From ff5063298483d14ef8e1e1f9615ca46d87eb0a5c Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Tue, 18 Mar 2025 18:40:09 +0100 Subject: [PATCH 01/12] towards combining everything in one arc.cwl --- arc-job.yml | 66 +++++++++++++++++++++++++++++++++++++ arc.cwl | 80 +++++++++++++++++++++++++++++++++++++++++++++ runs/sleuth/run.cwl | 4 +-- 3 files changed, 148 insertions(+), 2 deletions(-) create mode 100644 arc-job.yml create mode 100644 arc.cwl diff --git a/arc-job.yml b/arc-job.yml new file mode 100644 index 0000000..610a61c --- /dev/null +++ b/arc-job.yml @@ -0,0 +1,66 @@ +################################################################################ +### runs/kallisto +################################################################################ + +## Genome file to build kallisto Index +kallisto_IndexInput: + - class: File + path: ../../studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa + format: edam:format_1929 # FASTA + +## Fastq files to be mapped +# The `readsOfOneSample` looks more complicated than needed +# It's an array of records (each with one or mupltiple files and a sample name) to generically allow multiple fastq files per sample + +kallisto_sampleRecord: + - readsOfOneSample: + - class: File + path: ../../assays/RNASeq/dataset/DB_097_CAMMD_CAGATC_L001_R1_001.fastq.gz + format: edam:format_1930 + sampleName: 'DB_097' + - readsOfOneSample: + - class: File + path: ../../assays/RNASeq/dataset/DB_163_reC3MD_GTGAAA_L001_R1_001.fastq.gz + format: edam:format_1930 + sampleName: 'DB_163' + - readsOfOneSample: + - class: File + path: ../../assays/RNASeq/dataset/DB_099_CAMMD_CTTGTA_L001_R1_001.fastq.gz + format: edam:format_1930 + sampleName: 'DB_099' + - readsOfOneSample: + - class: File + path: ../../assays/RNASeq/dataset/DB_103_CAMMD_AGTCAA_L001_R1_001.fastq.gz + format: edam:format_1930 + sampleName: 'DB_103' + - readsOfOneSample: + - class: File + path: ../../assays/RNASeq/dataset/DB_161_reC3MD_GTCCGC_L001_R1_001.fastq.gz + format: edam:format_1930 + sampleName: 'DB_161' + - readsOfOneSample: + - class: File + path: ../../assays/RNASeq/dataset/DB_165_re-C3MD_GTGAAA_L002_R1_001.fastq.gz + format: edam:format_1930 + sampleName: 'DB_165' + +### Kallisto quant Parameters +kallisto_isSingle: true +kallisto_FragmentLength: 200 +kallisto_StandardDeviation: 20 +kallisto_BootstrapSamples: 30 +kallisto_resultsFolder: kallisto_results + +$namespaces: + edam: https://edamontology.org/ + +################################################################################ +### runs/isaSampleToRawDataSeq +################################################################################ + +isaSampleToRawDataSeq_arcPath: + class: Directory + path: ../../ +isaSampleToRawDataSeq_assayName: "RNASeq" +isaSampleToRawDataSeq_outName: rnaseq-samples +isaSampleToRawDataSeq_startingNodeNum: 0 diff --git a/arc.cwl b/arc.cwl new file mode 100644 index 0000000..0043dcf --- /dev/null +++ b/arc.cwl @@ -0,0 +1,80 @@ +cwlVersion: v1.2 +class: Workflow + +requirements: + SubworkflowFeatureRequirement: {} + ScatterFeatureRequirement: {} + MultipleInputFeatureRequirement: {} + +inputs: + #### runs/kallisto + kallisto_IndexInput: File[] + kallisto_sampleRecord: + type: + type: array + items: + type: record + fields: + readsOfOneSample: + type: File[] + sampleName: + type: string? + kallisto_isSingle: boolean + kallisto_FragmentLength: double? + kallisto_StandardDeviation: double? + kallisto_BootstrapSamples: int? + kallisto_resultsFolder: string + + #### runs/isaSampleToRawDataSeq + isaSampleToRawDataSeq_arcPath: Directory + isaSampleToRawDataSeq_assayName: string + isaSampleToRawDataSeq_startingNodeNum: int + isaSampleToRawDataSeq_outName: string + + #### runs/sleuth +# sleuth_inKallistoResults: Directory +# sleuth_inMetadataFile: File + sleuth_inMetadataSample: string + sleuth_inMetadataFactorList: string[] + sleuth_inMetadataDataCol: string + sleuth_outFolder: string + +steps: + kallisto: + run: runs/kallisto/run.cwl + in: + IndexInput: kallisto_IndexInput + sampleRecord: kallisto_sampleRecord + isSingle: kallisto_isSingle + FragmentLength: kallisto_FragmentLength + StandardDeviation: kallisto_StandardDeviation + BootstrapSamples: kallisto_BootstrapSamples + resultsFolder: kallisto_resultsFolder + out: [finalOut] + + + isaSampleToRawDataSeq: + run: runs/isaSampleToRawDataSeq/run.cwl + in: + arcPath: isaSampleToRawDataSeq_arcPath + assayName: isaSampleToRawDataSeq_assayName + startingNodeNum: isaSampleToRawDataSeq_startingNodeNum + outName: isaSampleToRawDataSeq_outName + out: [output] + + sleuth: + run: runs/sleuth/run.cwl + in: + inKallistoResults: kallisto/finalOut + inMetadataFile: isaSampleToRawDataSeq/output + inMetadataSample: sleuth_inMetadataSample + inMetadataFactorList: sleuth_inMetadataFactorList + inMetadataDataCol: sleuth_inMetadataDataCol + outFolder: sleuth_outFolder + out: [outdir] + +outputs: + + outdir: + type: Directory[] + outputSource: sleuth/outdir \ No newline at end of file diff --git a/runs/sleuth/run.cwl b/runs/sleuth/run.cwl index b214342..e00ce51 100644 --- a/runs/sleuth/run.cwl +++ b/runs/sleuth/run.cwl @@ -12,7 +12,7 @@ inputs: outFolder: string steps: - collectResults: + sleuth: run: ../../workflows/sleuth/workflow.cwl in: inKallistoResults: inKallistoResults @@ -26,4 +26,4 @@ steps: outputs: outdir: type: Directory[] - outputSource: collectResults/outdir + outputSource: sleuth/outdir -- GitLab From 3439c44059da4983b666a9c72f5018be0781361c Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Wed, 19 Mar 2025 14:16:28 +0100 Subject: [PATCH 02/12] add job parameters sleuth --- arc-job.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arc-job.yml b/arc-job.yml index 610a61c..ee1ebad 100644 --- a/arc-job.yml +++ b/arc-job.yml @@ -64,3 +64,13 @@ isaSampleToRawDataSeq_arcPath: isaSampleToRawDataSeq_assayName: "RNASeq" isaSampleToRawDataSeq_outName: rnaseq-samples isaSampleToRawDataSeq_startingNodeNum: 0 + +################################################################################ +### runs/sleuth +################################################################################ + +inMetadataSample: "Input [Source Name]" +inMetadataFactorList: + - "Factor [Photosynthesis mode]" +inMetadataDataCol: "Output [Data]" +outFolder: results -- GitLab From 00fa74c216e589dae0998b6beb58a356f0991da0 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Wed, 19 Mar 2025 15:00:20 +0100 Subject: [PATCH 03/12] improve logic to gather output directories from kallisto scatter step --- .cwl/cwl-plots/runs/kallisto.svg | 10 +++++----- .cwl/cwl-plots/workflows/kallisto.svg | 10 +++++----- arc.cwl | 4 ++-- .../isaSampleToRawDataSeq/rnaseq-samples.xlsx | Bin 7183 -> 7184 bytes runs/isaSampleToRawDataSeq/run.cwl | 13 ++++++++++--- runs/kallisto/run.cwl | 8 ++++---- workflows/isaSampleToRawDataSeq/workflow.cwl | 8 ++++++-- ...d-dirInDestination.cwl => gather-dirs.cwl} | 6 +++--- workflows/kallisto/kallisto-quant.cwl | 2 +- workflows/kallisto/workflow.cwl | 13 ++++++------- 10 files changed, 42 insertions(+), 32 deletions(-) rename workflows/kallisto/{yield-dirInDestination.cwl => gather-dirs.cwl} (64%) diff --git a/.cwl/cwl-plots/runs/kallisto.svg b/.cwl/cwl-plots/runs/kallisto.svg index 45f69bc..241535b 100644 --- a/.cwl/cwl-plots/runs/kallisto.svg +++ b/.cwl/cwl-plots/runs/kallisto.svg @@ -109,15 +109,15 @@ <path fill="none" stroke="black" d="M79.18,-185.43C89.76,-174.74 104.84,-161.76 121,-155 157.69,-139.66 267.22,-133.56 330.49,-131.27"/> <polygon fill="black" stroke="black" points="330.73,-134.77 340.61,-130.93 330.49,-127.77 330.73,-134.77"/> </g> -<!-- finalOut --> +<!-- kallistoOutDir --> <g id="node9" class="node"> -<title>finalOut</title> +<title>kallistoOutDir</title> <polygon fill="#94ddf4" stroke="black" points="346.06,-38.5 346.06,-74.5 407.94,-74.5 407.94,-38.5 346.06,-38.5"/> -<text text-anchor="middle" x="377" y="-52.3" font-family="Times,serif" font-size="14.00">finalOut</text> +<text text-anchor="middle" x="377" y="-52.3" font-family="Times,serif" font-size="14.00">kallistoOutDir</text> </g> -<!-- kallisto->finalOut --> +<!-- kallisto->kallistoOutDir --> <g id="edge8" class="edge"> -<title>kallisto->finalOut</title> +<title>kallisto->kallistoOutDir</title> <path fill="none" stroke="black" d="M377,-110.93C377,-103.17 377,-93.78 377,-85.06"/> <polygon fill="black" stroke="black" points="380.5,-84.9 377,-74.9 373.5,-84.9 380.5,-84.9"/> </g> diff --git a/.cwl/cwl-plots/workflows/kallisto.svg b/.cwl/cwl-plots/workflows/kallisto.svg index 9e81b88..930b4ca 100644 --- a/.cwl/cwl-plots/workflows/kallisto.svg +++ b/.cwl/cwl-plots/workflows/kallisto.svg @@ -37,15 +37,15 @@ <path fill="none" stroke="black" d="M491.95,-193.08C456.78,-182.13 394.44,-162.71 348.49,-148.4"/> <polygon fill="black" stroke="black" points="349.25,-144.97 338.66,-145.34 347.16,-151.66 349.25,-144.97"/> </g> -<!-- finalOut --> +<!-- kallistoOutDir --> <g id="node11" class="node"> -<title>finalOut</title> +<title>kallistoOutDir</title> <polygon fill="#94ddf4" stroke="black" points="260.06,-38.5 260.06,-74.5 321.94,-74.5 321.94,-38.5 260.06,-38.5"/> -<text text-anchor="middle" x="291" y="-52.3" font-family="Times,serif" font-size="14.00">finalOut</text> +<text text-anchor="middle" x="291" y="-52.3" font-family="Times,serif" font-size="14.00">kallistoOutDir</text> </g> -<!-- collectResults->finalOut --> +<!-- collectResults->kallistoOutDir --> <g id="edge12" class="edge"> -<title>collectResults->finalOut</title> +<title>collectResults->kallistoOutDir</title> <path fill="none" stroke="black" d="M291,-111.31C291,-103.29 291,-93.55 291,-84.57"/> <polygon fill="black" stroke="black" points="294.5,-84.53 291,-74.53 287.5,-84.53 294.5,-84.53"/> </g> diff --git a/arc.cwl b/arc.cwl index 0043dcf..e42113e 100644 --- a/arc.cwl +++ b/arc.cwl @@ -50,7 +50,7 @@ steps: StandardDeviation: kallisto_StandardDeviation BootstrapSamples: kallisto_BootstrapSamples resultsFolder: kallisto_resultsFolder - out: [finalOut] + out: [kallistoOutDir] isaSampleToRawDataSeq: @@ -65,7 +65,7 @@ steps: sleuth: run: runs/sleuth/run.cwl in: - inKallistoResults: kallisto/finalOut + inKallistoResults: kallisto/kallistoOutDir inMetadataFile: isaSampleToRawDataSeq/output inMetadataSample: sleuth_inMetadataSample inMetadataFactorList: sleuth_inMetadataFactorList diff --git a/runs/isaSampleToRawDataSeq/rnaseq-samples.xlsx b/runs/isaSampleToRawDataSeq/rnaseq-samples.xlsx index 1c05d363efc105474dc4887ac57f94beba0ac181..e73d722501815a8b129f9f7a95b3b0af60c45ab0 100644 GIT binary patch delta 748 zcmeCTm|!6s;LXe;!oa}6!Qhi$9OeCNO3&em!fN$5G8-MHw0MVhYQN&2Q878<#=R4- zxBRS$negCLZd#dXRj|DH`y={p+eD-^u5q!n_4V*NGZ()KueR9sHRx~Pp16*6O8?o{ zO=CMYWto@PJGU>huA3yD*gkL3b<Nur9`ls44l5^p@~hL>Iyvq(!~fdvQ|5cIyxrLN z{^m~k+Cq*;a<21z(wGFbXCK{pXO@ut)nx`p6N6Xv{A%cYe<0xgZNB|0X4|%XS<v~S z<kaEHEyB$ca&_ZVKAG;?%ZTEgjeq<FnDUD^`wF(OumgSP^0LNl^9@mHMl%CT3v+|C zq%=!I0}B(&)Knz}eLEZdpwyhilFa<P;*89KVmlP0qT-GTI#1RVSFgWd&3DLvhvC9% zmwPLNHADh}#l*snur9jtx5KP+(v<TzB`k!(6_OZL*wjw%SHIbRd+&{yws~_;{5+Sz zn0csiV%V}*CCUjrUsE_|Z@Lkz?>y&2Zn|x^_ATxV=MC8|>!YNW`1h#FT5m0E<}obg zjau2aNpVimK8Np_2ezN&3-|u-(XniuwQKa^V~r(07$4u<u{ghzX+v`fFDqlk%loPQ z>nErMvTa+<^5%2p)XP2TnN!zonsZz{A@afXvrkduda}60Yf#{@NNO>1z@mP#nUpC| zaB{AcG~<WKT#VA%0p5&EBFu;svS;GSQz0x23^T+T7=WU1u%yv(@_8v0IcP9zFfuTJ zFgIKQ1H+QW<C6uYr9nzSp}89<r2|wVg`z}avX``$JWz>K!4-}*Kq+yc5@i%6+!B-3 lBvdEwkmdy$_2_4T$WNer91{bBFp9b#lOIc~u_=Mg2LS2$5NZGb delta 769 zcmbPW(QhFf;LXe;!oa}6!Qhjf7q$Gxzr%+n3aiyyTv*Y$NF_#VNnBw40gLRU#A@aB z&YyqI2$-t9edf)pFMO*b_6VEz<Y<;`xGkn&kRg?`qr^ac^U8-8wtIi}{#d29tLu;a zjz}ws;8{|dk^L`3^D_@k$ZuN|u6es+qK#_^bAQK4>z(>t{x7HUes~cb{4ZQdqhD4g z{dwJ$n~EK#F8}0=ggb8B(DC~!XTNIhs#BLlQe(4zG<Uu~Fu^eFvG~iI$sYc*I{cQ} z%yG9i<hZ&0ldD?%;@{RNzS;QKUw|n)Z?m6Z3ky5Ya~FP{y1MzMs5GOQg`tH(ibYb2 zajK<}NlL1jl7haSjebySPGU)BeqM1#W<jwXicwKwJGQe<))H5*KX1)<$bg6K!D^R# zD{lumw$3U`OmvM=;PQ*!>bpo~&$(j<ChlMo<PJ$n*!1T9KNb7;{i}BRDO*qey!WEa zCKXn<s9DMDqxqIE5K}cN-MT!mWyiza_o~GFw>-blnqb}$S7yRFU&7l;?bHs>mK(do z)=p^7;QVY7yYRPJLf)psXD@l!FA7{WeT(TQlZkxx340{WfAncESaVztdeCtFqw%*N zdXYk@3A>fsYiC|An%h|#xMGV2Hz&_)u5|nB`Y1k{EFtk46gaGsT8td9n4fGeWeOCW zoF^sC_<k}sqqKH_HzSh>Ga`ZPnRxP42nz$l3~>eqpeP(HX>^!;K}sbY8q6Au3=AO5 z4OhUxu%z*rC<8-$QEE=Hz8;7~HyaejyMY>XfEuMxG>T95me!I7n(b6@g<}m+N*t&} s8AS<~#AI~|)yX@hdF6oW9{ns3`3aPdV`5+sMp5^D@)Kz_Hbt=c0AigIApigX diff --git a/runs/isaSampleToRawDataSeq/run.cwl b/runs/isaSampleToRawDataSeq/run.cwl index 8f00f6a..3df9da4 100644 --- a/runs/isaSampleToRawDataSeq/run.cwl +++ b/runs/isaSampleToRawDataSeq/run.cwl @@ -1,6 +1,9 @@ cwlVersion: v1.2 class: Workflow +requirements: + MultipleInputFeatureRequirement: {} + inputs: arcPath: Directory assayName: string @@ -15,10 +18,14 @@ steps: assayName: assayName startingNodeNum: startingNodeNum outName: outName - out: [output] + out: + - sampleseqCsv + - sampleseqXlsx outputs: - output: + sampleseqs: type: File[] - outputSource: isaSampleToRawDataSeq/output + outputSource: + - isaSampleToRawDataSeq/sampleseqCsv + - isaSampleToRawDataSeq/sampleseqXlsx diff --git a/runs/kallisto/run.cwl b/runs/kallisto/run.cwl index 3ec2a22..d702ed4 100644 --- a/runs/kallisto/run.cwl +++ b/runs/kallisto/run.cwl @@ -35,9 +35,9 @@ steps: StandardDeviation: StandardDeviation BootstrapSamples: BootstrapSamples resultsFolder: resultsFolder - out: [finalOut] + out: [kallistoOutDir] outputs: - finalOut: - type: Directory[] - outputSource: kallisto/finalOut + kallistoOutDir: + type: Directory + outputSource: kallisto/kallistoOutDir diff --git a/workflows/isaSampleToRawDataSeq/workflow.cwl b/workflows/isaSampleToRawDataSeq/workflow.cwl index 9cace45..f9db3ee 100644 --- a/workflows/isaSampleToRawDataSeq/workflow.cwl +++ b/workflows/isaSampleToRawDataSeq/workflow.cwl @@ -39,9 +39,13 @@ inputs: position: 4 outputs: - output: - type: File[] + sampleseqCsv: + type: File outputBinding: glob: - "*.csv" + sampleseqXlsx: + type: File + outputBinding: + glob: - "*.xlsx" diff --git a/workflows/kallisto/yield-dirInDestination.cwl b/workflows/kallisto/gather-dirs.cwl similarity index 64% rename from workflows/kallisto/yield-dirInDestination.cwl rename to workflows/kallisto/gather-dirs.cwl index bc90cb8..c7a13f5 100644 --- a/workflows/kallisto/yield-dirInDestination.cwl +++ b/workflows/kallisto/gather-dirs.cwl @@ -1,18 +1,18 @@ cwlVersion: v1.2 class: ExpressionTool doc: | - Takes a directory (e.g. from a workflow step) and yields it in a desired directory. + Takes an array of directories (e.g. from a workflow step) and yields them in a destination directory. requirements: - class: InlineJavascriptRequirement inputs: - inDir: Directory + inDirs: Directory[] destinationDir: string expression: | ${ return {"outDir": { "class": "Directory", "basename": inputs.destinationDir, - "listing": [inputs.inDir] + "listing": inputs.inDirs } }; } outputs: diff --git a/workflows/kallisto/kallisto-quant.cwl b/workflows/kallisto/kallisto-quant.cwl index 026f7e1..692be58 100755 --- a/workflows/kallisto/kallisto-quant.cwl +++ b/workflows/kallisto/kallisto-quant.cwl @@ -126,7 +126,7 @@ arguments: [ "--output-dir", $(inputs.QuantOutfolder) ] outputs: - outFolder: + kallistoQuantOutDir: type: Directory outputBinding: glob: $(runtime.outdir)/$(inputs.QuantOutfolder) diff --git a/workflows/kallisto/workflow.cwl b/workflows/kallisto/workflow.cwl index 4ce5617..56e1400 100644 --- a/workflows/kallisto/workflow.cwl +++ b/workflows/kallisto/workflow.cwl @@ -51,17 +51,16 @@ steps: FragmentLength: FragmentLength StandardDeviation: StandardDeviation BootstrapSamples: BootstrapSamples - out: [outFolder] - + out: [kallistoQuantOutDir] collectResults: - run: ./yield-dirInDestination.cwl - scatter: inDir + run: ./gather-dirs.cwl in: - inDir: quant/outFolder + inDirs: quant/kallistoQuantOutDir destinationDir: resultsFolder out: [outDir] outputs: - finalOut: - type: Directory[] + kallistoOutDir: + type: Directory outputSource: collectResults/outDir + -- GitLab From d707aa14a9e4d235a023da33a24d8cb5f6478239 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Wed, 19 Mar 2025 15:05:46 +0100 Subject: [PATCH 04/12] fix separating sampleseq output --- runs/isaSampleToRawDataSeq/run.cwl | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/runs/isaSampleToRawDataSeq/run.cwl b/runs/isaSampleToRawDataSeq/run.cwl index 3df9da4..aca29e7 100644 --- a/runs/isaSampleToRawDataSeq/run.cwl +++ b/runs/isaSampleToRawDataSeq/run.cwl @@ -1,9 +1,6 @@ cwlVersion: v1.2 class: Workflow -requirements: - MultipleInputFeatureRequirement: {} - inputs: arcPath: Directory assayName: string @@ -23,9 +20,9 @@ steps: - sampleseqXlsx outputs: - sampleseqs: - type: File[] - outputSource: - - isaSampleToRawDataSeq/sampleseqCsv - - isaSampleToRawDataSeq/sampleseqXlsx - + sampleseqCsv: + type: File + outputSource: isaSampleToRawDataSeq/sampleseqCsv + sampleseqXlsx: + type: File + outputSource: isaSampleToRawDataSeq/sampleseqXlsx \ No newline at end of file -- GitLab From b938dde09b889dab501439a6a9f8080f8d7422f6 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Wed, 19 Mar 2025 15:06:03 +0100 Subject: [PATCH 05/12] arc.cwl --- arc.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arc.cwl b/arc.cwl index e42113e..e0a4ce9 100644 --- a/arc.cwl +++ b/arc.cwl @@ -60,13 +60,13 @@ steps: assayName: isaSampleToRawDataSeq_assayName startingNodeNum: isaSampleToRawDataSeq_startingNodeNum outName: isaSampleToRawDataSeq_outName - out: [output] + out: [sampleseqCsv, sampleseqXlsx] sleuth: run: runs/sleuth/run.cwl in: inKallistoResults: kallisto/kallistoOutDir - inMetadataFile: isaSampleToRawDataSeq/output + inMetadataFile: isaSampleToRawDataSeq/sampleseqCsv inMetadataSample: sleuth_inMetadataSample inMetadataFactorList: sleuth_inMetadataFactorList inMetadataDataCol: sleuth_inMetadataDataCol -- GitLab From 66c246ac2afdb7cb28fb8db72bd1491d30c21f4d Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Wed, 19 Mar 2025 15:07:43 +0100 Subject: [PATCH 06/12] fix arc.cwl job --- arc-job.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arc-job.yml b/arc-job.yml index ee1ebad..f197ce5 100644 --- a/arc-job.yml +++ b/arc-job.yml @@ -69,8 +69,8 @@ isaSampleToRawDataSeq_startingNodeNum: 0 ### runs/sleuth ################################################################################ -inMetadataSample: "Input [Source Name]" -inMetadataFactorList: +sleuth_inMetadataSample: "Input [Source Name]" +sleuth_inMetadataFactorList: - "Factor [Photosynthesis mode]" -inMetadataDataCol: "Output [Data]" -outFolder: results +sleuth_inMetadataDataCol: "Output [Data]" +sleuth_outFolder: results -- GitLab From 65040513604d2bc3b993ed287f39c02a1bdae344 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Wed, 19 Mar 2025 15:08:43 +0100 Subject: [PATCH 07/12] replace paths to arc root --- arc-job.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arc-job.yml b/arc-job.yml index f197ce5..9b36300 100644 --- a/arc-job.yml +++ b/arc-job.yml @@ -5,7 +5,7 @@ ## Genome file to build kallisto Index kallisto_IndexInput: - class: File - path: ../../studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa + path: ./studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa format: edam:format_1929 # FASTA ## Fastq files to be mapped @@ -15,32 +15,32 @@ kallisto_IndexInput: kallisto_sampleRecord: - readsOfOneSample: - class: File - path: ../../assays/RNASeq/dataset/DB_097_CAMMD_CAGATC_L001_R1_001.fastq.gz + path: ./assays/RNASeq/dataset/DB_097_CAMMD_CAGATC_L001_R1_001.fastq.gz format: edam:format_1930 sampleName: 'DB_097' - readsOfOneSample: - class: File - path: ../../assays/RNASeq/dataset/DB_163_reC3MD_GTGAAA_L001_R1_001.fastq.gz + path: ./assays/RNASeq/dataset/DB_163_reC3MD_GTGAAA_L001_R1_001.fastq.gz format: edam:format_1930 sampleName: 'DB_163' - readsOfOneSample: - class: File - path: ../../assays/RNASeq/dataset/DB_099_CAMMD_CTTGTA_L001_R1_001.fastq.gz + path: ./assays/RNASeq/dataset/DB_099_CAMMD_CTTGTA_L001_R1_001.fastq.gz format: edam:format_1930 sampleName: 'DB_099' - readsOfOneSample: - class: File - path: ../../assays/RNASeq/dataset/DB_103_CAMMD_AGTCAA_L001_R1_001.fastq.gz + path: ./assays/RNASeq/dataset/DB_103_CAMMD_AGTCAA_L001_R1_001.fastq.gz format: edam:format_1930 sampleName: 'DB_103' - readsOfOneSample: - class: File - path: ../../assays/RNASeq/dataset/DB_161_reC3MD_GTCCGC_L001_R1_001.fastq.gz + path: ./assays/RNASeq/dataset/DB_161_reC3MD_GTCCGC_L001_R1_001.fastq.gz format: edam:format_1930 sampleName: 'DB_161' - readsOfOneSample: - class: File - path: ../../assays/RNASeq/dataset/DB_165_re-C3MD_GTGAAA_L002_R1_001.fastq.gz + path: ./assays/RNASeq/dataset/DB_165_re-C3MD_GTGAAA_L002_R1_001.fastq.gz format: edam:format_1930 sampleName: 'DB_165' @@ -60,7 +60,7 @@ $namespaces: isaSampleToRawDataSeq_arcPath: class: Directory - path: ../../ + path: ./ isaSampleToRawDataSeq_assayName: "RNASeq" isaSampleToRawDataSeq_outName: rnaseq-samples isaSampleToRawDataSeq_startingNodeNum: 0 -- GitLab From 203806461973fe7803424eaeed2deb8c4d46942d Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Thu, 20 Mar 2025 15:59:13 +0100 Subject: [PATCH 08/12] try fix indentation --- workflows/kallisto/workflow.cwl | 94 ++++++++++++++++----------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/workflows/kallisto/workflow.cwl b/workflows/kallisto/workflow.cwl index 56e1400..3120f96 100644 --- a/workflows/kallisto/workflow.cwl +++ b/workflows/kallisto/workflow.cwl @@ -8,56 +8,56 @@ requirements: InlineJavascriptRequirement: {} inputs: - IndexInput: File[] - sampleRecord: - type: - type: array - items: - type: record - fields: - readsOfOneSample: - type: File[] - sampleName: - type: string? - isSingle: boolean - FragmentLength: double? - StandardDeviation: double? - BootstrapSamples: int? - resultsFolder: string + IndexInput: File[] + sampleRecord: + type: + type: array + items: + type: record + fields: + readsOfOneSample: + type: File[] + sampleName: + type: string? + isSingle: boolean + FragmentLength: double? + StandardDeviation: double? + BootstrapSamples: int? + resultsFolder: string steps: - index: - run: kallisto-index.cwl - in: - InputFiles: IndexInput - IndexName: - source: IndexInput - valueFrom: $(self[0].nameroot) - out: [index] + index: + run: kallisto-index.cwl + in: + InputFiles: IndexInput + IndexName: + source: IndexInput + valueFrom: $(self[0].nameroot) + out: [index] - quant: - run: kallisto-quant.cwl - scatter: [InputReads, QuantOutfolder] - scatterMethod: dotproduct - in: - InputReads: - source: sampleRecord - valueFrom: $(self.readsOfOneSample) - QuantOutfolder: - source: sampleRecord - valueFrom: $(self.sampleName) - Index: index/index - isSingle: isSingle - FragmentLength: FragmentLength - StandardDeviation: StandardDeviation - BootstrapSamples: BootstrapSamples - out: [kallistoQuantOutDir] - collectResults: - run: ./gather-dirs.cwl - in: - inDirs: quant/kallistoQuantOutDir - destinationDir: resultsFolder - out: [outDir] + quant: + run: kallisto-quant.cwl + scatter: [InputReads, QuantOutfolder] + scatterMethod: dotproduct + in: + InputReads: + source: sampleRecord + valueFrom: $(self.readsOfOneSample) + QuantOutfolder: + source: sampleRecord + valueFrom: $(self.sampleName) + Index: index/index + isSingle: isSingle + FragmentLength: FragmentLength + StandardDeviation: StandardDeviation + BootstrapSamples: BootstrapSamples + out: [kallistoQuantOutDir] + collectResults: + run: ./gather-dirs.cwl + in: + inDirs: quant/kallistoQuantOutDir + destinationDir: resultsFolder + out: [outDir] outputs: kallistoOutDir: -- GitLab From 2fb885ed8273f84c8f804386cbf9ca15a8ebe507 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Thu, 20 Mar 2025 16:10:02 +0100 Subject: [PATCH 09/12] add metadata to kallisto --- workflows/kallisto/kallisto-index.cwl | 33 +++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/workflows/kallisto/kallisto-index.cwl b/workflows/kallisto/kallisto-index.cwl index d35f520..977cbab 100644 --- a/workflows/kallisto/kallisto-index.cwl +++ b/workflows/kallisto/kallisto-index.cwl @@ -2,18 +2,42 @@ cwlVersion: v1.2 class: CommandLineTool +label: Kallisto index +doc: | + Builds a kallisto index + + Usage: kallisto index [arguments] FASTA-files + + Required argument: + -i, --index=STRING Filename for the kallisto index to be constructed + + Optional argument: + -k, --kmer-size=INT k-mer (odd) length (default: 31, max value: 63) + -t, --threads=INT Number of threads to use (default: 1) + -d, --d-list=STRING Path to a FASTA-file containing sequences to mask from quantification + --make-unique Replace repeated target names with unique names + --aa Generate index from a FASTA-file containing amino acid sequences + --distinguish Generate index where sequences are distinguished by the sequence name + -T, --tmp=STRING Temporary directory (default: tmp) + -m, --min-size=INT Length of minimizers (default: automatically chosen) + -e, --ec-max-size=INT Maximum number of targets in an equivalence class (default: no maximum) + hints: DockerRequirement: dockerPull: quay.io/biocontainers/kallisto:0.51.1--ha4fb952_1 SoftwareRequirement: packages: kallisto: - version: [ "0.50.0" ] - specs: [ https://identifiers.org/biotools/kallisto ] + version: [ "0.51.1" ] + specs: + - https://identifiers.org/rrid/RRID:SCR_016582 + - https://identifiers.org/biotools/kallisto requirements: - class: InlineJavascriptRequirement +baseCommand: [kallisto, index] + inputs: InputFiles: type: File[] @@ -41,8 +65,6 @@ inputs: inputBinding: prefix: "--make-unique" -baseCommand: [kallisto, index] - outputs: index: type: File @@ -51,5 +73,8 @@ outputs: $namespaces: edam: https://edamontology.org/ + s: https://schema.org/ $schemas: - https://edamontology.org/EDAM_1.18.owl + +s:license: https://spdx.org/licenses/BSD-2-Clause -- GitLab From be876f99ab2b1de9129a70f94ce022c2a8a59cdd Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Thu, 20 Mar 2025 17:23:10 +0100 Subject: [PATCH 10/12] add more cwl metadata --- workflows/kallisto/gather-dirs.cwl | 9 ++++++ workflows/kallisto/kallisto-index.cwl | 6 +++- workflows/kallisto/kallisto-quant.cwl | 41 ++++++++++++++++++++++++++- workflows/kallisto/kallisto-test.cwl | 18 ------------ 4 files changed, 54 insertions(+), 20 deletions(-) delete mode 100644 workflows/kallisto/kallisto-test.cwl diff --git a/workflows/kallisto/gather-dirs.cwl b/workflows/kallisto/gather-dirs.cwl index c7a13f5..a11ef77 100644 --- a/workflows/kallisto/gather-dirs.cwl +++ b/workflows/kallisto/gather-dirs.cwl @@ -1,12 +1,20 @@ cwlVersion: v1.2 class: ExpressionTool +label: Gather directories doc: | + Helper tool to organize + Takes an array of directories (e.g. from a workflow step) and yields them in a destination directory. + + Adapted from: https://github.com/common-workflow-language/cwl-v1.1/blob/a22b7580c6b50e77c0a181ca59d3828dd5c69143/tests/dir7.cwl + requirements: - class: InlineJavascriptRequirement + inputs: inDirs: Directory[] destinationDir: string + expression: | ${ return {"outDir": { @@ -15,5 +23,6 @@ expression: | "listing": inputs.inDirs } }; } + outputs: outDir: Directory \ No newline at end of file diff --git a/workflows/kallisto/kallisto-index.cwl b/workflows/kallisto/kallisto-index.cwl index 977cbab..e374a1f 100644 --- a/workflows/kallisto/kallisto-index.cwl +++ b/workflows/kallisto/kallisto-index.cwl @@ -4,6 +4,10 @@ class: CommandLineTool label: Kallisto index doc: | + + Docs: https://pachterlab.github.io/kallisto/ + + Builds a kallisto index Usage: kallisto index [arguments] FASTA-files @@ -34,7 +38,7 @@ hints: - https://identifiers.org/biotools/kallisto requirements: - - class: InlineJavascriptRequirement + InlineJavascriptRequirement: {} baseCommand: [kallisto, index] diff --git a/workflows/kallisto/kallisto-quant.cwl b/workflows/kallisto/kallisto-quant.cwl index 692be58..8da7542 100755 --- a/workflows/kallisto/kallisto-quant.cwl +++ b/workflows/kallisto/kallisto-quant.cwl @@ -2,13 +2,48 @@ cwlVersion: v1.2 class: CommandLineTool +label: Kallisto quant +doc: | + + Docs: https://pachterlab.github.io/kallisto/ + + Computes equivalence classes for reads and quantifies abundances + + Usage: kallisto quant [arguments] FASTQ-files + + Required arguments: + -i, --index=STRING Filename for the kallisto index to be used for + quantification + -o, --output-dir=STRING Directory to write output to + + Optional arguments: + -b, --bootstrap-samples=INT Number of bootstrap samples (default: 0) + --seed=INT Seed for the bootstrap sampling (default: 42) + --plaintext Output plaintext instead of HDF5 + --single Quantify single-end reads + --single-overhang Include reads where unobserved rest of fragment is + predicted to lie outside a transcript + --fr-stranded Strand specific reads, first read forward + --rf-stranded Strand specific reads, first read reverse + -l, --fragment-length=DOUBLE Estimated average fragment length + -s, --sd=DOUBLE Estimated standard deviation of fragment length + (default: -l, -s values are estimated from paired + end data, but are required when using --single) + -p, --priors Priors for the EM algorithm, either as raw counts or as + probabilities. Pseudocounts are added to raw reads to + prevent zero valued priors. Supplied in the same order + as the transcripts in the transcriptome + -t, --threads=INT Number of threads to use (default: 1) + --verbose Print out progress information every 1M proccessed reads + + hints: DockerRequirement: dockerPull: quay.io/biocontainers/kallisto:0.51.1--ha4fb952_1 SoftwareRequirement: packages: kallisto: - version: [ "0.50.0" ] + version: [ "0.51.1" ] specs: [ https://identifiers.org/biotools/kallisto ] inputs: @@ -131,7 +166,11 @@ outputs: outputBinding: glob: $(runtime.outdir)/$(inputs.QuantOutfolder) + $namespaces: edam: https://edamontology.org/ + s: https://schema.org/ $schemas: - https://edamontology.org/EDAM_1.18.owl + +s:license: https://spdx.org/licenses/BSD-2-Clause diff --git a/workflows/kallisto/kallisto-test.cwl b/workflows/kallisto/kallisto-test.cwl deleted file mode 100644 index 3a05871..0000000 --- a/workflows/kallisto/kallisto-test.cwl +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env cwl-runner -cwlVersion: v1.2 -class: CommandLineTool - -hints: - DockerRequirement: - dockerPull: quay.io/biocontainers/kallisto:0.50.1--h6de1650_2 - SoftwareRequirement: - packages: - kallisto: - version: [ "0.50.0" ] - specs: [ https://identifiers.org/biotools/kallisto ] - -inputs: [] - -baseCommand: [kallisto] - -outputs: [] \ No newline at end of file -- GitLab From 02d24942652b004daffad87b36e32ba3d13dc1fd Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Thu, 20 Mar 2025 17:39:35 +0100 Subject: [PATCH 11/12] add cwl metadata --- runs/fastqc/run.cwl | 6 +++--- workflows/fastqc/collectFilesInDir.cwl | 20 -------------------- workflows/fastqc/fastqc.cwl | 24 +++++++++++++++++++----- workflows/fastqc/gather-files.cwl | 24 ++++++++++++++++++++++++ workflows/fastqc/workflow.cwl | 12 +++++++----- workflows/kallisto/gather-dirs.cwl | 4 ++-- workflows/kallisto/kallisto-index.cwl | 2 ++ workflows/kallisto/kallisto-quant.cwl | 3 +++ 8 files changed, 60 insertions(+), 35 deletions(-) delete mode 100644 workflows/fastqc/collectFilesInDir.cwl create mode 100644 workflows/fastqc/gather-files.cwl diff --git a/runs/fastqc/run.cwl b/runs/fastqc/run.cwl index 974e992..b049a5f 100644 --- a/runs/fastqc/run.cwl +++ b/runs/fastqc/run.cwl @@ -16,9 +16,9 @@ steps: in: fastq: fastq finaloutdir: finaloutdir - out: [outdir] + out: [fastqc_outdir] outputs: - outdir: + fastqc_outdir: type: Directory - outputSource: fastqc/outdir \ No newline at end of file + outputSource: fastqc/fastqc_outdir \ No newline at end of file diff --git a/workflows/fastqc/collectFilesInDir.cwl b/workflows/fastqc/collectFilesInDir.cwl deleted file mode 100644 index 39f9ff8..0000000 --- a/workflows/fastqc/collectFilesInDir.cwl +++ /dev/null @@ -1,20 +0,0 @@ -cwlVersion: v1.2 -class: ExpressionTool -label: Collect files in a directory -doc: | - Takes Files (e.g. from a workflow step) and yields them in a desired directory. -requirements: - - class: InlineJavascriptRequirement -inputs: - files: File[] - destination: string -expression: | - ${ - return {"outDir": { - "class": "Directory", - "basename": inputs.destination, - "listing": inputs.files - } }; - } -outputs: - outDir: Directory \ No newline at end of file diff --git a/workflows/fastqc/fastqc.cwl b/workflows/fastqc/fastqc.cwl index a2e408d..f0b906a 100644 --- a/workflows/fastqc/fastqc.cwl +++ b/workflows/fastqc/fastqc.cwl @@ -2,12 +2,14 @@ cwlVersion: v1.2 class: CommandLineTool -label: Run fastqc on raw reads in FASTQ format (single or paired end) or aligned reads in BAM. +label: FastQC - A high throughput sequence QC analysis tool doc: | - simplified from: https://github.com/common-workflow-library/bio-cwl-tools/blob/66f620da5b0a11e934a6da83272205a2516bcd91/fastqc/fastqc_1.cwl - + Original docs: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + Run fastqc on raw reads in FASTQ format (single or paired end) or aligned reads in BAM. + + This CWL was adapted from: https://github.com/common-workflow-library/bio-cwl-tools/blob/66f620da5b0a11e934a6da83272205a2516bcd91/fastqc/fastqc_1.cwl hints: ResourceRequirement: @@ -18,11 +20,13 @@ hints: SoftwareRequirement: packages: fastqc: - specs: [ https://identifiers.org/biotools/fastqc ] + specs: + - https://identifiers.org/biotools/fastqc + - - https://identifiers.org/rrid/RRID:SCR_014583 version: [ "0.11.9" ] - baseCommand: "fastqc" + arguments: - valueFrom: $(runtime.outdir) prefix: "-o" @@ -46,3 +50,13 @@ outputs: outputBinding: glob: "*_fastqc.html" + +$namespaces: + edam: https://edamontology.org/ + s: https://schema.org/ +$schemas: + - https://edamontology.org/EDAM_1.18.owl + +s:license: https://spdx.org/licenses/GPL-3.0-or-later + + diff --git a/workflows/fastqc/gather-files.cwl b/workflows/fastqc/gather-files.cwl new file mode 100644 index 0000000..67d23ec --- /dev/null +++ b/workflows/fastqc/gather-files.cwl @@ -0,0 +1,24 @@ +cwlVersion: v1.2 +class: ExpressionTool +label: Gather files +doc: | + Helper tool to organize workflow outputs + + Takes an array of files (e.g. from a workflow step) and yields them in a destination directory. + + Adapted from: https://github.com/common-workflow-language/cwl-v1.1/blob/a22b7580c6b50e77c0a181ca59d3828dd5c69143/tests/dir7.cwl +requirements: + - class: InlineJavascriptRequirement +inputs: + inFiles: File[] + destination: string +expression: | + ${ + return {"outDir": { + "class": "Directory", + "basename": inputs.destination, + "listing": inputs.inFiles + } }; + } +outputs: + outDir: Directory \ No newline at end of file diff --git a/workflows/fastqc/workflow.cwl b/workflows/fastqc/workflow.cwl index d80a7ea..4592319 100644 --- a/workflows/fastqc/workflow.cwl +++ b/workflows/fastqc/workflow.cwl @@ -18,15 +18,17 @@ steps: fastq: fastq out: [fastqc_zip, fastqc_html] collectFiles: - run: ./collectFilesInDir.cwl + run: ./gather-files.cwl in: - destination: finaloutdir - files: - source: [fastqc/fastqc_html, fastqc/fastqc_zip] + inFiles: + source: + - fastqc/fastqc_html + - fastqc/fastqc_zip linkMerge: merge_flattened + destination: finaloutdir out: [outDir] outputs: - outdir: + fastqc_outdir: type: Directory outputSource: collectFiles/outDir \ No newline at end of file diff --git a/workflows/kallisto/gather-dirs.cwl b/workflows/kallisto/gather-dirs.cwl index a11ef77..8586823 100644 --- a/workflows/kallisto/gather-dirs.cwl +++ b/workflows/kallisto/gather-dirs.cwl @@ -2,8 +2,8 @@ cwlVersion: v1.2 class: ExpressionTool label: Gather directories doc: | - Helper tool to organize - + Helper tool to organize workflow outputs + Takes an array of directories (e.g. from a workflow step) and yields them in a destination directory. Adapted from: https://github.com/common-workflow-language/cwl-v1.1/blob/a22b7580c6b50e77c0a181ca59d3828dd5c69143/tests/dir7.cwl diff --git a/workflows/kallisto/kallisto-index.cwl b/workflows/kallisto/kallisto-index.cwl index e374a1f..b43cba7 100644 --- a/workflows/kallisto/kallisto-index.cwl +++ b/workflows/kallisto/kallisto-index.cwl @@ -26,6 +26,8 @@ doc: | -m, --min-size=INT Length of minimizers (default: automatically chosen) -e, --ec-max-size=INT Maximum number of targets in an equivalence class (default: no maximum) + This CWL was adapted from: https://github.com/common-workflow-library/bio-cwl-tools/commit/91c42fb809ce18eafe16155cca0abf362270c0fe + hints: DockerRequirement: dockerPull: quay.io/biocontainers/kallisto:0.51.1--ha4fb952_1 diff --git a/workflows/kallisto/kallisto-quant.cwl b/workflows/kallisto/kallisto-quant.cwl index 8da7542..998c371 100755 --- a/workflows/kallisto/kallisto-quant.cwl +++ b/workflows/kallisto/kallisto-quant.cwl @@ -37,6 +37,9 @@ doc: | --verbose Print out progress information every 1M proccessed reads + This CWL was adapted from: https://github.com/common-workflow-library/bio-cwl-tools/commit/91c42fb809ce18eafe16155cca0abf362270c0fe + + hints: DockerRequirement: dockerPull: quay.io/biocontainers/kallisto:0.51.1--ha4fb952_1 -- GitLab From 737822631dca957ed6bf6993a0ac87ea86f7350b Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Thu, 20 Mar 2025 17:46:47 +0100 Subject: [PATCH 12/12] add overview figure --- .cwl/README.md | 2 +- .cwl/arc-cwl.svg | 245 +++++++++++++++++++++++++++++++++++++++++++++++ README.md | 39 ++++++-- 3 files changed, 275 insertions(+), 11 deletions(-) create mode 100644 .cwl/arc-cwl.svg diff --git a/.cwl/README.md b/.cwl/README.md index 41a62d0..0340cec 100644 --- a/.cwl/README.md +++ b/.cwl/README.md @@ -77,5 +77,5 @@ cwltool --parallel run.cwl run.yml > $(date +"%Y-%m-%d_%H-%M")-run.log 2>&1 & ### Print workflow to file ```bash -cwltool --print-dot run.cwl | dot -Tsvg > run.svg +cwltool --print-dot ../arc.cwl | dot -Tsvg > arc-cwl.svg ``` diff --git a/.cwl/arc-cwl.svg b/.cwl/arc-cwl.svg new file mode 100644 index 0000000..8269aa8 --- /dev/null +++ b/.cwl/arc-cwl.svg @@ -0,0 +1,245 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" + "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> +<!-- Generated by graphviz version 9.0.0 (0) + --> +<!-- Title: G Pages: 1 --> +<svg width="2781pt" height="325pt" + viewBox="0.00 0.00 2781.00 325.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> +<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 321)"> +<title>G</title> +<polygon fill="#eeeeee" stroke="none" points="-4,4 -4,-321 2777,-321 2777,4 -4,4"/> +<g id="clust1" class="cluster"> +<title>cluster_inputs</title> +<polygon fill="#eeeeee" stroke="black" stroke-dasharray="5,2" points="8,-231.5 8,-309 2765,-309 2765,-231.5 8,-231.5"/> +<text text-anchor="middle" x="2710.5" y="-238.7" font-family="Times,serif" font-size="14.00">Workflow Inputs</text> +</g> +<g id="clust2" class="cluster"> +<title>cluster_outputs</title> +<polygon fill="#eeeeee" stroke="black" stroke-dasharray="5,2" points="2217,-8 2217,-85.5 2335,-85.5 2335,-8 2217,-8"/> +<text text-anchor="middle" x="2276" y="-15.2" font-family="Times,serif" font-size="14.00">Workflow Outputs</text> +</g> +<!-- isaSampleToRawDataSeq --> +<g id="node1" class="node"> +<title>isaSampleToRawDataSeq</title> +<polygon fill="#f3cea1" stroke="black" points="563.12,-187 563.12,-223 720.88,-223 720.88,-187 563.12,-187"/> +<text text-anchor="middle" x="642" y="-199.95" font-family="Times,serif" font-size="14.00">isaSampleToRawDataSeq</text> +</g> +<!-- sleuth --> +<g id="node2" class="node"> +<title>sleuth</title> +<polygon fill="#f3cea1" stroke="black" points="2249,-114 2249,-150 2303,-150 2303,-114 2249,-114"/> +<text text-anchor="middle" x="2275.62" y="-126.95" font-family="Times,serif" font-size="14.00">sleuth</text> +</g> +<!-- isaSampleToRawDataSeq->sleuth --> +<g id="edge1" class="edge"> +<title>isaSampleToRawDataSeq->sleuth</title> +<path fill="none" stroke="black" d="M721.36,-200.55C1013.99,-187.84 2026.5,-143.84 2237.35,-134.68"/> +<polygon fill="black" stroke="black" points="2237.36,-138.18 2247.2,-134.25 2237.05,-131.19 2237.36,-138.18"/> +</g> +<!-- outdir --> +<g id="node19" class="node"> +<title>outdir</title> +<polygon fill="#94ddf4" stroke="black" points="2249,-41 2249,-77 2303,-77 2303,-41 2249,-41"/> +<text text-anchor="middle" x="2275.62" y="-53.95" font-family="Times,serif" font-size="14.00">outdir</text> +</g> +<!-- sleuth->outdir --> +<g id="edge18" class="edge"> +<title>sleuth->outdir</title> +<path fill="none" stroke="black" d="M2276,-113.81C2276,-106.23 2276,-97.1 2276,-88.54"/> +<polygon fill="black" stroke="black" points="2279.5,-88.54 2276,-78.54 2272.5,-88.54 2279.5,-88.54"/> +</g> +<!-- kallisto --> +<g id="node3" class="node"> +<title>kallisto</title> +<polygon fill="#f3cea1" stroke="black" points="1606.12,-187 1606.12,-223 1661.88,-223 1661.88,-187 1606.12,-187"/> +<text text-anchor="middle" x="1634" y="-199.95" font-family="Times,serif" font-size="14.00">kallisto</text> +</g> +<!-- kallisto->sleuth --> +<g id="edge2" class="edge"> +<title>kallisto->sleuth</title> +<path fill="none" stroke="black" d="M1662,-200.9C1764.43,-189.58 2118.96,-150.37 2237.24,-137.29"/> +<polygon fill="black" stroke="black" points="2237.52,-140.78 2247.07,-136.2 2236.75,-133.82 2237.52,-140.78"/> +</g> +<!-- isaSampleToRawDataSeq_arcPath --> +<g id="node4" class="node"> +<title>isaSampleToRawDataSeq_arcPath</title> +<polygon fill="#94ddf4" stroke="black" points="773.5,-264.5 773.5,-300.5 978.5,-300.5 978.5,-264.5 773.5,-264.5"/> +<text text-anchor="middle" x="876" y="-277.45" font-family="Times,serif" font-size="14.00">isaSampleToRawDataSeq_arcPath</text> +</g> +<!-- isaSampleToRawDataSeq_arcPath->isaSampleToRawDataSeq --> +<g id="edge3" class="edge"> +<title>isaSampleToRawDataSeq_arcPath->isaSampleToRawDataSeq</title> +<path fill="none" stroke="black" d="M841.94,-264C820.12,-253.37 790.95,-240.22 764,-231.5 753.73,-228.18 742.83,-225.18 731.94,-222.51"/> +<polygon fill="black" stroke="black" points="733.08,-219.18 722.55,-220.3 731.48,-226 733.08,-219.18"/> +</g> +<!-- isaSampleToRawDataSeq_assayName --> +<g id="node5" class="node"> +<title>isaSampleToRawDataSeq_assayName</title> +<polygon fill="#94ddf4" stroke="black" points="528.62,-264.5 528.62,-300.5 755.38,-300.5 755.38,-264.5 528.62,-264.5"/> +<text text-anchor="middle" x="642" y="-277.45" font-family="Times,serif" font-size="14.00">isaSampleToRawDataSeq_assayName</text> +</g> +<!-- isaSampleToRawDataSeq_assayName->isaSampleToRawDataSeq --> +<g id="edge4" class="edge"> +<title>isaSampleToRawDataSeq_assayName->isaSampleToRawDataSeq</title> +<path fill="none" stroke="black" d="M642,-264.37C642,-255.55 642,-244.57 642,-234.51"/> +<polygon fill="black" stroke="black" points="645.5,-234.84 642,-224.84 638.5,-234.84 645.5,-234.84"/> +</g> +<!-- isaSampleToRawDataSeq_outName --> +<g id="node6" class="node"> +<title>isaSampleToRawDataSeq_outName</title> +<polygon fill="#94ddf4" stroke="black" points="295.62,-264.5 295.62,-300.5 510.38,-300.5 510.38,-264.5 295.62,-264.5"/> +<text text-anchor="middle" x="403" y="-277.45" font-family="Times,serif" font-size="14.00">isaSampleToRawDataSeq_outName</text> +</g> +<!-- isaSampleToRawDataSeq_outName->isaSampleToRawDataSeq --> +<g id="edge5" class="edge"> +<title>isaSampleToRawDataSeq_outName->isaSampleToRawDataSeq</title> +<path fill="none" stroke="black" d="M438.67,-264.07C461.49,-253.47 491.98,-240.32 520,-231.5 530.11,-228.32 540.82,-225.42 551.53,-222.81"/> +<polygon fill="black" stroke="black" points="552.25,-226.24 561.18,-220.55 550.65,-219.42 552.25,-226.24"/> +</g> +<!-- isaSampleToRawDataSeq_startingNodeNum --> +<g id="node7" class="node"> +<title>isaSampleToRawDataSeq_startingNodeNum</title> +<polygon fill="#94ddf4" stroke="black" points="16,-264.5 16,-300.5 278,-300.5 278,-264.5 16,-264.5"/> +<text text-anchor="middle" x="147" y="-277.45" font-family="Times,serif" font-size="14.00">isaSampleToRawDataSeq_startingNodeNum</text> +</g> +<!-- isaSampleToRawDataSeq_startingNodeNum->isaSampleToRawDataSeq --> +<g id="edge6" class="edge"> +<title>isaSampleToRawDataSeq_startingNodeNum->isaSampleToRawDataSeq</title> +<path fill="none" stroke="black" d="M186.6,-264.1C214.19,-252.79 252.15,-238.83 287,-231.5 375.68,-212.84 479.26,-207.19 551.56,-205.77"/> +<polygon fill="black" stroke="black" points="551.44,-209.27 561.38,-205.6 551.32,-202.27 551.44,-209.27"/> +</g> +<!-- kallisto_BootstrapSamples --> +<g id="node8" class="node"> +<title>kallisto_BootstrapSamples</title> +<polygon fill="#94ddf4" stroke="black" points="1307.25,-264.5 1307.25,-300.5 1468.75,-300.5 1468.75,-264.5 1307.25,-264.5"/> +<text text-anchor="middle" x="1388" y="-277.45" font-family="Times,serif" font-size="14.00">kallisto_BootstrapSamples</text> +</g> +<!-- kallisto_BootstrapSamples->kallisto --> +<g id="edge7" class="edge"> +<title>kallisto_BootstrapSamples->kallisto</title> +<path fill="none" stroke="black" d="M1413.65,-264.1C1431.07,-253.09 1455.04,-239.48 1478,-231.5 1516.32,-218.18 1562.41,-211.74 1594.51,-208.68"/> +<polygon fill="black" stroke="black" points="1594.51,-212.19 1604.16,-207.83 1593.89,-205.22 1594.51,-212.19"/> +</g> +<!-- kallisto_FragmentLength --> +<g id="node9" class="node"> +<title>kallisto_FragmentLength</title> +<polygon fill="#94ddf4" stroke="black" points="1136.38,-264.5 1136.38,-300.5 1289.62,-300.5 1289.62,-264.5 1136.38,-264.5"/> +<text text-anchor="middle" x="1213" y="-277.45" font-family="Times,serif" font-size="14.00">kallisto_FragmentLength</text> +</g> +<!-- kallisto_FragmentLength->kallisto --> +<g id="edge8" class="edge"> +<title>kallisto_FragmentLength->kallisto</title> +<path fill="none" stroke="black" d="M1235.81,-264.17C1252.18,-252.75 1275.29,-238.62 1298,-231.5 1352.25,-214.5 1517.79,-208.61 1594.37,-206.77"/> +<polygon fill="black" stroke="black" points="1594.43,-210.27 1604.35,-206.54 1594.27,-203.27 1594.43,-210.27"/> +</g> +<!-- kallisto_IndexInput --> +<g id="node10" class="node"> +<title>kallisto_IndexInput</title> +<polygon fill="#94ddf4" stroke="black" points="996.12,-264.5 996.12,-300.5 1117.88,-300.5 1117.88,-264.5 996.12,-264.5"/> +<text text-anchor="middle" x="1057" y="-277.45" font-family="Times,serif" font-size="14.00">kallisto_IndexInput</text> +</g> +<!-- kallisto_IndexInput->kallisto --> +<g id="edge9" class="edge"> +<title>kallisto_IndexInput->kallisto</title> +<path fill="none" stroke="black" d="M1074.93,-264.24C1088.17,-252.69 1107.26,-238.38 1127,-231.5 1170.61,-216.31 1482.71,-208.9 1594.39,-206.71"/> +<polygon fill="black" stroke="black" points="1594.33,-210.21 1604.26,-206.52 1594.2,-203.21 1594.33,-210.21"/> +</g> +<!-- kallisto_StandardDeviation --> +<g id="node11" class="node"> +<title>kallisto_StandardDeviation</title> +<polygon fill="#94ddf4" stroke="black" points="1921.12,-264.5 1921.12,-300.5 2084.88,-300.5 2084.88,-264.5 1921.12,-264.5"/> +<text text-anchor="middle" x="2003" y="-277.45" font-family="Times,serif" font-size="14.00">kallisto_StandardDeviation</text> +</g> +<!-- kallisto_StandardDeviation->kallisto --> +<g id="edge10" class="edge"> +<title>kallisto_StandardDeviation->kallisto</title> +<path fill="none" stroke="black" d="M1977.98,-264.06C1960.37,-252.74 1935.79,-238.78 1912,-231.5 1868.5,-218.19 1739.3,-210.69 1673.49,-207.63"/> +<polygon fill="black" stroke="black" points="1674.01,-204.15 1663.86,-207.2 1673.69,-211.15 1674.01,-204.15"/> +</g> +<!-- kallisto_isSingle --> +<g id="node12" class="node"> +<title>kallisto_isSingle</title> +<polygon fill="#94ddf4" stroke="black" points="1797,-264.5 1797,-300.5 1903,-300.5 1903,-264.5 1797,-264.5"/> +<text text-anchor="middle" x="1850" y="-277.45" font-family="Times,serif" font-size="14.00">kallisto_isSingle</text> +</g> +<!-- kallisto_isSingle->kallisto --> +<g id="edge11" class="edge"> +<title>kallisto_isSingle->kallisto</title> +<path fill="none" stroke="black" d="M1833.47,-264.16C1821.83,-253.02 1805.29,-239.25 1788,-231.5 1751.33,-215.06 1705.47,-209.11 1673.37,-207.01"/> +<polygon fill="black" stroke="black" points="1673.9,-203.54 1663.72,-206.49 1673.51,-210.53 1673.9,-203.54"/> +</g> +<!-- kallisto_resultsFolder --> +<g id="node13" class="node"> +<title>kallisto_resultsFolder</title> +<polygon fill="#94ddf4" stroke="black" points="1645.5,-264.5 1645.5,-300.5 1778.5,-300.5 1778.5,-264.5 1645.5,-264.5"/> +<text text-anchor="middle" x="1712" y="-277.45" font-family="Times,serif" font-size="14.00">kallisto_resultsFolder</text> +</g> +<!-- kallisto_resultsFolder->kallisto --> +<g id="edge12" class="edge"> +<title>kallisto_resultsFolder->kallisto</title> +<path fill="none" stroke="black" d="M1694.3,-264.37C1684.2,-254.59 1671.35,-242.15 1660.11,-231.27"/> +<polygon fill="black" stroke="black" points="1662.61,-228.82 1652.99,-224.38 1657.74,-233.85 1662.61,-228.82"/> +</g> +<!-- kallisto_sampleRecord --> +<g id="node14" class="node"> +<title>kallisto_sampleRecord</title> +<polygon fill="#94ddf4" stroke="black" points="1486.75,-264.5 1486.75,-300.5 1627.25,-300.5 1627.25,-264.5 1486.75,-264.5"/> +<text text-anchor="middle" x="1557" y="-277.45" font-family="Times,serif" font-size="14.00">kallisto_sampleRecord</text> +</g> +<!-- kallisto_sampleRecord->kallisto --> +<g id="edge13" class="edge"> +<title>kallisto_sampleRecord->kallisto</title> +<path fill="none" stroke="black" d="M1574.47,-264.37C1584.44,-254.59 1597.13,-242.15 1608.23,-231.27"/> +<polygon fill="black" stroke="black" points="1610.56,-233.89 1615.25,-224.39 1605.66,-228.89 1610.56,-233.89"/> +</g> +<!-- sleuth_inMetadataDataCol --> +<g id="node15" class="node"> +<title>sleuth_inMetadataDataCol</title> +<polygon fill="#94ddf4" stroke="black" points="2103.25,-264.5 2103.25,-300.5 2264.75,-300.5 2264.75,-264.5 2103.25,-264.5"/> +<text text-anchor="middle" x="2184" y="-277.45" font-family="Times,serif" font-size="14.00">sleuth_inMetadataDataCol</text> +</g> +<!-- sleuth_inMetadataDataCol->sleuth --> +<g id="edge14" class="edge"> +<title>sleuth_inMetadataDataCol->sleuth</title> +<path fill="none" stroke="black" d="M2194.73,-264.17C2210.66,-238.47 2240.68,-190 2259.44,-159.73"/> +<polygon fill="black" stroke="black" points="2262.25,-161.85 2264.54,-151.5 2256.3,-158.16 2262.25,-161.85"/> +</g> +<!-- sleuth_inMetadataFactorList --> +<g id="node16" class="node"> +<title>sleuth_inMetadataFactorList</title> +<polygon fill="#94ddf4" stroke="black" points="2283,-264.5 2283,-300.5 2455,-300.5 2455,-264.5 2283,-264.5"/> +<text text-anchor="middle" x="2369" y="-277.45" font-family="Times,serif" font-size="14.00">sleuth_inMetadataFactorList</text> +</g> +<!-- sleuth_inMetadataFactorList->sleuth --> +<g id="edge15" class="edge"> +<title>sleuth_inMetadataFactorList->sleuth</title> +<path fill="none" stroke="black" d="M2358.15,-264.17C2342.05,-238.47 2311.7,-190 2292.74,-159.73"/> +<polygon fill="black" stroke="black" points="2295.86,-158.12 2287.58,-151.5 2289.93,-161.83 2295.86,-158.12"/> +</g> +<!-- sleuth_inMetadataSample --> +<g id="node17" class="node"> +<title>sleuth_inMetadataSample</title> +<polygon fill="#94ddf4" stroke="black" points="2473.12,-264.5 2473.12,-300.5 2630.88,-300.5 2630.88,-264.5 2473.12,-264.5"/> +<text text-anchor="middle" x="2552" y="-277.45" font-family="Times,serif" font-size="14.00">sleuth_inMetadataSample</text> +</g> +<!-- sleuth_inMetadataSample->sleuth --> +<g id="edge16" class="edge"> +<title>sleuth_inMetadataSample->sleuth</title> +<path fill="none" stroke="black" d="M2522,-264.12C2505.06,-254.39 2483.43,-242.11 2464,-231.5 2412.12,-203.18 2351.44,-171.67 2313.35,-152.1"/> +<polygon fill="black" stroke="black" points="2315.08,-149.05 2304.59,-147.59 2311.88,-155.28 2315.08,-149.05"/> +</g> +<!-- sleuth_outFolder --> +<g id="node18" class="node"> +<title>sleuth_outFolder</title> +<polygon fill="#94ddf4" stroke="black" points="2649.25,-264.5 2649.25,-300.5 2756.75,-300.5 2756.75,-264.5 2649.25,-264.5"/> +<text text-anchor="middle" x="2703" y="-277.45" font-family="Times,serif" font-size="14.00">sleuth_outFolder</text> +</g> +<!-- sleuth_outFolder->sleuth --> +<g id="edge17" class="edge"> +<title>sleuth_outFolder->sleuth</title> +<path fill="none" stroke="black" d="M2685.09,-264C2673.18,-253.24 2656.7,-239.93 2640,-231.5 2529.23,-175.57 2382.96,-148.47 2314.47,-138.16"/> +<polygon fill="black" stroke="black" points="2315.28,-134.74 2304.88,-136.76 2314.27,-141.67 2315.28,-134.74"/> +</g> +</g> +</svg> diff --git a/README.md b/README.md index a0b4760..ffc8756 100644 --- a/README.md +++ b/README.md @@ -5,15 +5,29 @@ The [Talinum Genome Draft](./studies/TalinumGenomeDraft) originates from <https: ## Table of Contents -1. [Description](#description) -2. [Publication](#publication) -3. Studies - - [TalinumGenomeDraft](#study--talinumgenomedraft) - - [TalinumSamples-STRI](#study--TalinumSamples-STRI) -4. Assays - - [MassHunter_targets](#assay--masshunter_targets) - - [GCqTOF_targets](#assay--gcqtof_targets) - - [RNASeq](#assay--RNASeq) +- [Table of Contents](#table-of-contents) + - [Description](#description) +- [Relationships between Assays and Studies](#relationships-between-assays-and-studies) +- [Workflow Overview](#workflow-overview) +- [Additional details](#additional-details) +- [Publication](#publication) +- [Study : *TalinumGenomeDraft*](#study--talinumgenomedraft) + - [Additional details](#additional-details-1) + - [Annotation headers](#annotation-headers) +- [Study : *TalinumSamples-STRI*](#study--talinumsamples-stri) + - [Additional details](#additional-details-2) + - [Annotation headers](#annotation-headers-1) +- [Assay : *MassHunter\_targets*](#assay--masshunter_targets) + - [Additional details](#additional-details-3) + - [Annotation headers](#annotation-headers-2) +- [Assay : *RNASeq*](#assay--rnaseq) + - [Additional details](#additional-details-4) + - [Annotation headers](#annotation-headers-3) +- [Assay : *GCqTOF\_targets*](#assay--gcqtof_targets) + - [Additional details](#additional-details-5) + - [Annotation headers](#annotation-headers-4) + + ### Description @@ -67,7 +81,12 @@ class ASSAY_MassHunter_targets,ASSAY_RNASeq,ASSAY_GCqTOF_targets assayStyle; class TalinumGenomeDraft,plant_material,mh-quant-results,mh-quant-report,rna_extraction,illumina,metabolite_extraction,gas_chromatography,mass_spec processStyle; ``` -### Additional details +## Workflow Overview + + + + +## Additional details | Meta Data | Description | | --------- | ----------- | -- GitLab