From 539301f6ed69b67a59446baa7afd544065cdd04c Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Fri, 21 Mar 2025 13:59:16 +0100 Subject: [PATCH 1/5] remove obsolete requirement --- workflows/kallisto/kallisto-index.cwl | 3 --- 1 file changed, 3 deletions(-) diff --git a/workflows/kallisto/kallisto-index.cwl b/workflows/kallisto/kallisto-index.cwl index 1a4b3c9..6c72330 100644 --- a/workflows/kallisto/kallisto-index.cwl +++ b/workflows/kallisto/kallisto-index.cwl @@ -39,9 +39,6 @@ hints: - https://identifiers.org/rrid/RRID:SCR_016582 - https://identifiers.org/biotools/kallisto -requirements: - InlineJavascriptRequirement: {} - baseCommand: [kallisto, index] inputs: -- GitLab From abfd58be480fe99fda05d2e48d1a0f310be81796 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Mon, 24 Mar 2025 09:04:37 +0100 Subject: [PATCH 2/5] remove false citation metadata (related to tool not CWL) --- workflows/kallisto/kallisto-index.cwl | 4 ---- workflows/kallisto/kallisto-quant.cwl | 4 ---- 2 files changed, 8 deletions(-) diff --git a/workflows/kallisto/kallisto-index.cwl b/workflows/kallisto/kallisto-index.cwl index 6c72330..c5719d5 100644 --- a/workflows/kallisto/kallisto-index.cwl +++ b/workflows/kallisto/kallisto-index.cwl @@ -80,7 +80,3 @@ $namespaces: $schemas: - https://edamontology.org/EDAM_1.25.owl - https://schema.org/version/latest/schemaorg-current-https.rdf - -s:license: https://spdx.org/licenses/BSD-2-Clause -s:citation: https://dx.doi.org/10.1038/nbt.3519 -s:codeRepository: https://github.com/pachterlab/kallisto diff --git a/workflows/kallisto/kallisto-quant.cwl b/workflows/kallisto/kallisto-quant.cwl index 93ff361..0eea07f 100755 --- a/workflows/kallisto/kallisto-quant.cwl +++ b/workflows/kallisto/kallisto-quant.cwl @@ -176,7 +176,3 @@ $namespaces: $schemas: - https://edamontology.org/EDAM_1.25.owl - https://schema.org/version/latest/schemaorg-current-https.rdf - -s:license: https://spdx.org/licenses/BSD-2-Clause -s:citation: https://dx.doi.org/10.1038/nbt.3519 -s:codeRepository: https://github.com/pachterlab/kallisto -- GitLab From 142f85518f4ce2c1be17fdc18c2f42b6ed6ba330 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Mon, 24 Mar 2025 09:21:34 +0100 Subject: [PATCH 3/5] add sleuth metadata --- workflows/sleuth/sleuth.R | 2 +- workflows/sleuth/workflow.cwl | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/workflows/sleuth/sleuth.R b/workflows/sleuth/sleuth.R index 2c87d61..2b2283c 100644 --- a/workflows/sleuth/sleuth.R +++ b/workflows/sleuth/sleuth.R @@ -76,7 +76,7 @@ save(so, file = file.path(outFolder, "kallisto_sleuthObject.RData")) expression_data <- kallisto_table(so) ## write to file -write.csv(expression_data, paste(outFolder, "/kallisto_df.csv", sep = "/"), row.names = F) +write.csv(expression_data, file.path(outFolder, "kallisto_df.csv"), row.names = F) ## as tpm matrix (gene x sample) tpm_table <- reshape(expression_data, idvar = "target_id", timevar = "sample", direction = "wide", v.names = "tpm") diff --git a/workflows/sleuth/workflow.cwl b/workflows/sleuth/workflow.cwl index 4a8add0..497533c 100644 --- a/workflows/sleuth/workflow.cwl +++ b/workflows/sleuth/workflow.cwl @@ -6,6 +6,19 @@ class: CommandLineTool hints: DockerRequirement: dockerPull: quay.io/biocontainers/mulled-v2-fdd016122f200fdc6dc30f6ea2fd0000e8067dff:f9531f6ac1f44332eff70b5912d7d5f3ebe8df38-0 + SoftwareRequirement: + packages: + - package: R + specs: + - https://identifiers.org/rrid/RRID:SCR_001905 + - https://identifiers.org/biotools/r + - https://anaconda.org/bioconda/r + version: [ "4.2.3" ] + - package: sleuth + version: [ "0.30.1" ] + specs: + - https://identifiers.org/rrid/RRID:SCR_016883 + - https://identifiers.org/biotools/sleuth requirements: - class: InitialWorkDirRequirement @@ -13,8 +26,8 @@ requirements: - entryname: sleuth.R entry: $include: sleuth.R - - class: NetworkAccess - networkAccess: true + # - class: NetworkAccess + # networkAccess: true baseCommand: [Rscript, sleuth.R] -- GitLab From 211ac2be5acdf160d1e7394d773c5b2875636773 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Mon, 24 Mar 2025 10:17:13 +0100 Subject: [PATCH 4/5] redesign sleuth --- runs/sleuth/run.cwl | 14 ++-- runs/sleuth/run.yml | 2 +- workflows/sleuth/gather-files.cwl | 24 +++++++ workflows/sleuth/sleuth.R | 23 ++---- workflows/sleuth/sleuth.cwl | 87 +++++++++++++++++++++++ workflows/sleuth/workflow.cwl | 113 +++++++++++++++--------------- 6 files changed, 182 insertions(+), 81 deletions(-) create mode 100644 workflows/sleuth/gather-files.cwl create mode 100644 workflows/sleuth/sleuth.cwl diff --git a/runs/sleuth/run.cwl b/runs/sleuth/run.cwl index 7884d8d..789736f 100644 --- a/runs/sleuth/run.cwl +++ b/runs/sleuth/run.cwl @@ -3,13 +3,16 @@ cwlVersion: v1.2 class: Workflow +requirements: + SubworkflowFeatureRequirement: {} + inputs: inKallistoResults: Directory inMetadataFile: File inMetadataSample: string inMetadataFactorList: string[] inMetadataDataCol: string - outFolder: string + resultsoutdir: string steps: sleuth: @@ -20,14 +23,13 @@ steps: inMetadataSample: inMetadataSample inMetadataFactorList: inMetadataFactorList inMetadataDataCol: inMetadataDataCol - outFolder: outFolder - out: [outdir] + finaloutdir: resultsoutdir + out: [ sleuth_outdir ] outputs: outdir: - type: Directory[] - outputSource: sleuth/outdir - + type: Directory + outputSource: sleuth/sleuth_outdir $namespaces: s: https://schema.org/ diff --git a/runs/sleuth/run.yml b/runs/sleuth/run.yml index 1996f50..61c509d 100644 --- a/runs/sleuth/run.yml +++ b/runs/sleuth/run.yml @@ -8,4 +8,4 @@ inMetadataSample: "Input [Source Name]" inMetadataFactorList: - "Factor [Photosynthesis mode]" inMetadataDataCol: "Output [Data]" -outFolder: results +resultsoutdir: results diff --git a/workflows/sleuth/gather-files.cwl b/workflows/sleuth/gather-files.cwl new file mode 100644 index 0000000..67d23ec --- /dev/null +++ b/workflows/sleuth/gather-files.cwl @@ -0,0 +1,24 @@ +cwlVersion: v1.2 +class: ExpressionTool +label: Gather files +doc: | + Helper tool to organize workflow outputs + + Takes an array of files (e.g. from a workflow step) and yields them in a destination directory. + + Adapted from: https://github.com/common-workflow-language/cwl-v1.1/blob/a22b7580c6b50e77c0a181ca59d3828dd5c69143/tests/dir7.cwl +requirements: + - class: InlineJavascriptRequirement +inputs: + inFiles: File[] + destination: string +expression: | + ${ + return {"outDir": { + "class": "Directory", + "basename": inputs.destination, + "listing": inputs.inFiles + } }; + } +outputs: + outDir: Directory \ No newline at end of file diff --git a/workflows/sleuth/sleuth.R b/workflows/sleuth/sleuth.R index 2b2283c..a9e0b52 100644 --- a/workflows/sleuth/sleuth.R +++ b/workflows/sleuth/sleuth.R @@ -18,7 +18,6 @@ inMetadataFile <- args[2] inMetadataSample <- args[3] inMetadataFactorList <- args[4] inMetadataDataCol <- args[5] -outFolder <- args[6] # inKallistoResults <- "../../runs/kallisto/kallisto_results" # inMetadataFile <- "../../runs/isaSampleToRawDataSeq/rnaseq-samples.csv" @@ -27,12 +26,6 @@ outFolder <- args[6] # inMetadataDataCol <- "Output [Data]" # outFolder <- "." -################################################ -#### If it does not exist, create out dir -################################################ - -dir.create(outFolder, recursive = T, showWarnings = F) - ################################################ #### Read ISA sample metadata ################################################ @@ -43,10 +36,8 @@ samples <- read.csv(file = inMetadataFile, check.names = FALSE) #### Read Kallisto results ################################################ -base_dir <- inKallistoResults - # A list of paths to the kallisto results indexed by the sample IDs is collated with -kal_dirs <- dir(base_dir, full.names = T) ## Sleuth requires full paths +kal_dirs <- dir(inKallistoResults, full.names = T) ## Sleuth requires full paths s2c <- samples[order(samples[[inMetadataSample]]), c(inMetadataSample, unlist(inMetadataFactorList), inMetadataDataCol)] @@ -66,7 +57,7 @@ design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + "))) so <- sleuth_prep(s2c, full_model = design_formula) -save(so, file = file.path(outFolder, "kallisto_sleuthObject.RData")) +save(so, file = "kallisto_sleuthObject.RData") ################################################ #### Extract expression tables @@ -76,13 +67,13 @@ save(so, file = file.path(outFolder, "kallisto_sleuthObject.RData")) expression_data <- kallisto_table(so) ## write to file -write.csv(expression_data, file.path(outFolder, "kallisto_df.csv"), row.names = F) +write.csv(expression_data, "kallisto_df.csv", row.names = F) ## as tpm matrix (gene x sample) tpm_table <- reshape(expression_data, idvar = "target_id", timevar = "sample", direction = "wide", v.names = "tpm") # Write to file -write.csv(tpm_table, file.path(outFolder, "kallisto_tpmMatrix.csv"), row.names = F) +write.csv(tpm_table, "kallisto_tpmMatrix.csv", row.names = F) ################################################ #### Summarize mapping stats @@ -95,11 +86,11 @@ for (i in dir(kal_dirs, pattern = ".json", full.names = T)) { mapping_stats <- rbind(mapping_stats, z) } -write.csv(mapping_stats, file.path(outFolder, "kallisto_mappingStats.csv"), row.names = F) +write.csv(mapping_stats, "kallisto_mappingStats.csv", row.names = F) ################################################ -#### Run sleuth to identify DEGs +#### Run sleuth to identify DGE ################################################ so <- sleuth_fit(so) @@ -111,4 +102,4 @@ sleuth_table <- sleuth_results(so, "reduced:full", "lrt", show_all = FALSE) ### write to file -write.csv(sleuth_table, file.path(outFolder, "sleuth_dge.csv"), row.names = F) +write.csv(sleuth_table, "sleuth_dge.csv", row.names = F) diff --git a/workflows/sleuth/sleuth.cwl b/workflows/sleuth/sleuth.cwl new file mode 100644 index 0000000..3ffc997 --- /dev/null +++ b/workflows/sleuth/sleuth.cwl @@ -0,0 +1,87 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.2 +class: CommandLineTool + +hints: + DockerRequirement: + dockerPull: quay.io/biocontainers/mulled-v2-fdd016122f200fdc6dc30f6ea2fd0000e8067dff:f9531f6ac1f44332eff70b5912d7d5f3ebe8df38-0 + SoftwareRequirement: + packages: + - package: R + specs: + - https://identifiers.org/rrid/RRID:SCR_001905 + - https://identifiers.org/biotools/r + - https://anaconda.org/bioconda/r + version: [ "4.2.3" ] + - package: sleuth + version: [ "0.30.1" ] + specs: + - https://identifiers.org/rrid/RRID:SCR_016883 + - https://identifiers.org/biotools/sleuth + +requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: sleuth.R + entry: + $include: sleuth.R + +baseCommand: [Rscript, sleuth.R] + +inputs: + inKallistoResults: + type: Directory + inputBinding: + position: 1 + inMetadataFile: + type: File + inputBinding: + position: 2 + inMetadataSample: + type: string + inputBinding: + position: 3 + inMetadataFactorList: + type: string[] + inputBinding: + position: 4 + inMetadataDataCol: + type: string + inputBinding: + position: 5 + +outputs: + kallisto_sleuthObject: + type: File + outputBinding: + glob: "kallisto_sleuthObject.RData" + kallisto_df: + type: File + outputBinding: + glob: "kallisto_df.csv" + kallisto_tpmMatrix: + type: File + outputBinding: + glob: "kallisto_tpmMatrix.csv" + kallisto_mappingStats: + type: File + outputBinding: + glob: "kallisto_mappingStats.csv" + sleuth_dge: + type: File + outputBinding: + glob: "sleuth_dge.csv" + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.18.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 \ No newline at end of file diff --git a/workflows/sleuth/workflow.cwl b/workflows/sleuth/workflow.cwl index 497533c..5f08a47 100644 --- a/workflows/sleuth/workflow.cwl +++ b/workflows/sleuth/workflow.cwl @@ -1,66 +1,63 @@ #!/usr/bin/env cwl-runner - cwlVersion: v1.2 -class: CommandLineTool - -hints: - DockerRequirement: - dockerPull: quay.io/biocontainers/mulled-v2-fdd016122f200fdc6dc30f6ea2fd0000e8067dff:f9531f6ac1f44332eff70b5912d7d5f3ebe8df38-0 - SoftwareRequirement: - packages: - - package: R - specs: - - https://identifiers.org/rrid/RRID:SCR_001905 - - https://identifiers.org/biotools/r - - https://anaconda.org/bioconda/r - version: [ "4.2.3" ] - - package: sleuth - version: [ "0.30.1" ] - specs: - - https://identifiers.org/rrid/RRID:SCR_016883 - - https://identifiers.org/biotools/sleuth +class: Workflow requirements: - - class: InitialWorkDirRequirement - listing: - - entryname: sleuth.R - entry: - $include: sleuth.R - # - class: NetworkAccess - # networkAccess: true - -baseCommand: [Rscript, sleuth.R] + ScatterFeatureRequirement: {} + SubworkflowFeatureRequirement: {} + MultipleInputFeatureRequirement: {} inputs: - inKallistoResults: + inKallistoResults: Directory + inMetadataFile: File + inMetadataSample: string + inMetadataFactorList: string[] + inMetadataDataCol: string + finaloutdir: string + +steps: + sleuth: + run: sleuth.cwl + in: + inKallistoResults: inKallistoResults + inMetadataFile: inMetadataFile + inMetadataSample: inMetadataSample + inMetadataFactorList: inMetadataFactorList + inMetadataDataCol: inMetadataDataCol + out: + - kallisto_sleuthObject + - kallisto_df + - kallisto_tpmMatrix + - kallisto_mappingStats + - sleuth_dge + collectFiles: + run: ./gather-files.cwl + in: + inFiles: + source: + - sleuth/kallisto_sleuthObject + - sleuth/kallisto_df + - sleuth/kallisto_tpmMatrix + - sleuth/kallisto_mappingStats + - sleuth/sleuth_dge + linkMerge: merge_flattened + destination: finaloutdir + out: [outDir] + +outputs: + sleuth_outdir: type: Directory - inputBinding: - position: 1 - inMetadataFile: - type: File - inputBinding: - position: 2 - inMetadataSample: - type: string - inputBinding: - position: 3 - inMetadataFactorList: - type: string[] - inputBinding: - position: 4 - inMetadataDataCol: - type: string - inputBinding: - position: 5 - outFolder: - type: string - inputBinding: - position: 6 + outputSource: collectFiles/outDir -outputs: -- id: outdir - type: - type: array - items: Directory - outputBinding: - glob: $(runtime.outdir)/$(inputs.outFolder) +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.18.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 -- GitLab From ef768ffe99bdbfe3ed1b0b1b072825c46211b486 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Mon, 24 Mar 2025 10:37:15 +0100 Subject: [PATCH 5/5] redesign deseq2 --- runs/deseq2/{ => results}/results_ma-plot.svg | 0 .../deseq2/{ => results}/results_pca-plot.svg | 0 runs/deseq2/{ => results}/results_stats.csv | 0 runs/deseq2/run.cwl | 14 ++- runs/deseq2/run.yml | 3 +- workflows/deseq2/deseq2.R | 7 -- workflows/deseq2/deseq2.cwl | 92 +++++++++++++++ workflows/deseq2/gather-files.cwl | 24 ++++ workflows/deseq2/workflow.cwl | 107 +++++++++--------- workflows/sleuth/sleuth.cwl | 2 +- 10 files changed, 180 insertions(+), 69 deletions(-) rename runs/deseq2/{ => results}/results_ma-plot.svg (100%) rename runs/deseq2/{ => results}/results_pca-plot.svg (100%) rename runs/deseq2/{ => results}/results_stats.csv (100%) create mode 100644 workflows/deseq2/deseq2.cwl create mode 100644 workflows/deseq2/gather-files.cwl diff --git a/runs/deseq2/results_ma-plot.svg b/runs/deseq2/results/results_ma-plot.svg similarity index 100% rename from runs/deseq2/results_ma-plot.svg rename to runs/deseq2/results/results_ma-plot.svg diff --git a/runs/deseq2/results_pca-plot.svg b/runs/deseq2/results/results_pca-plot.svg similarity index 100% rename from runs/deseq2/results_pca-plot.svg rename to runs/deseq2/results/results_pca-plot.svg diff --git a/runs/deseq2/results_stats.csv b/runs/deseq2/results/results_stats.csv similarity index 100% rename from runs/deseq2/results_stats.csv rename to runs/deseq2/results/results_stats.csv diff --git a/runs/deseq2/run.cwl b/runs/deseq2/run.cwl index e0fdbf4..0aadd01 100644 --- a/runs/deseq2/run.cwl +++ b/runs/deseq2/run.cwl @@ -2,11 +2,15 @@ cwlVersion: v1.2 class: Workflow +requirements: + SubworkflowFeatureRequirement: {} + inputs: inKallistoResults: Directory inMetadataFile: File inMetadataSample: string inMetadataFactorList: string[] + resultsoutdir: string steps: deseq2: @@ -16,12 +20,14 @@ steps: inMetadataFile: inMetadataFile inMetadataSample: inMetadataSample inMetadataFactorList: inMetadataFactorList - out: [output] + finaloutdir: resultsoutdir + + out: [deseq2_outdir] outputs: output: - type: File[] - outputSource: deseq2/output + type: Directory + outputSource: deseq2/deseq2_outdir $namespaces: s: https://schema.org/ @@ -34,4 +40,4 @@ $schemas: s:author: - class: s:Person s:name: Dominik Brilhaus - s:identifier: https://orcid.org/0000-0001-9021-3197 \ No newline at end of file + s:identifier: https://orcid.org/0000-0001-9021-3197 diff --git a/runs/deseq2/run.yml b/runs/deseq2/run.yml index b8e9909..60e8b8f 100644 --- a/runs/deseq2/run.yml +++ b/runs/deseq2/run.yml @@ -6,4 +6,5 @@ inMetadataFile: path: ../../runs/isaSampleToRawDataSeq/rnaseq-samples.csv inMetadataSample: "Input [Source Name]" inMetadataFactorList: - - "Factor [Photosynthesis mode]" \ No newline at end of file + - "Factor [Photosynthesis mode]" +resultsoutdir: results diff --git a/workflows/deseq2/deseq2.R b/workflows/deseq2/deseq2.R index dcad6b6..b20545e 100644 --- a/workflows/deseq2/deseq2.R +++ b/workflows/deseq2/deseq2.R @@ -7,13 +7,6 @@ library("tximport") library("rhdf5") library("ggplot2") -# ## Tests - -# inKallistoResults <- "../../runs/kallisto/kallisto_results" -# inMetadataFile <- "../../runs/isaSampleToRawDataSeq/rnaseq-samples.csv" -# inMetadataSample <- "Input [Source Name]" -# inMetadataFactorList <- list("Factor [Photosynthesis mode]") - ### Read arguments from CLI args <- commandArgs(trailingOnly = T) diff --git a/workflows/deseq2/deseq2.cwl b/workflows/deseq2/deseq2.cwl new file mode 100644 index 0000000..7087f6f --- /dev/null +++ b/workflows/deseq2/deseq2.cwl @@ -0,0 +1,92 @@ +#!/usr/bin/env cwl-runner + +doc: | + DESeq2 example workflow for **differential gene expression analysis** + + This workflow runs DESeq2 on the output of the kallisto workflow + and the metadata file. + It runs an R script, deseq2.R, which ideally should be split into three sub scripts and accordingly three workflow steps + 1. Read kallsito data + 2. Prep / run deseq2 + 3. Plot results + + ## DESeq2 docs: + https://bioconductor.org/packages/release/bioc/html/DESeq2.html + + ## Importing kallisto output with tximport + https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto + + ## Multi-package containers + - R and combinations of library dependencies are available as multi-package containers from [BioContainers](https://github.com/BioContainers/multi-package-containers) + - Searched for `repo:BioContainers/multi-package-containers deseq2 tximport rhdf5` + - and found `quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0` :tada: + +cwlVersion: v1.2 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0 + SoftwareRequirement: + packages: + - package: R + version: [ "4.1.1" ] + specs: + - https://identifiers.org/rrid/RRID:SCR_001905 + - https://identifiers.org/biotools/r + - https://anaconda.org/bioconda/r + - package: DESeq2 + version: [ "1.34.0" ] + specs: + - https://identifiers.org/rrid/RRID:SCR_015687 + - https://identifiers.org/biotools/deseq2 +requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: deseq2.R + entry: + $include: deseq2.R +baseCommand: [Rscript, deseq2.R] +inputs: + inKallistoResults: + type: Directory + inputBinding: + position: 1 + inMetadataFile: + type: File + inputBinding: + position: 2 + inMetadataSample: + type: string + inputBinding: + position: 3 + inMetadataFactorList: + type: string[] + inputBinding: + position: 4 + +outputs: + results_stats: + type: File + outputBinding: + glob: "results_stats.csv" + results_ma-plot: + type: File + outputBinding: + glob: "results_ma-plot.svg" + results_pca-plot: + type: File + outputBinding: + glob: "results_pca-plot.svg" + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.25.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 diff --git a/workflows/deseq2/gather-files.cwl b/workflows/deseq2/gather-files.cwl new file mode 100644 index 0000000..67d23ec --- /dev/null +++ b/workflows/deseq2/gather-files.cwl @@ -0,0 +1,24 @@ +cwlVersion: v1.2 +class: ExpressionTool +label: Gather files +doc: | + Helper tool to organize workflow outputs + + Takes an array of files (e.g. from a workflow step) and yields them in a destination directory. + + Adapted from: https://github.com/common-workflow-language/cwl-v1.1/blob/a22b7580c6b50e77c0a181ca59d3828dd5c69143/tests/dir7.cwl +requirements: + - class: InlineJavascriptRequirement +inputs: + inFiles: File[] + destination: string +expression: | + ${ + return {"outDir": { + "class": "Directory", + "basename": inputs.destination, + "listing": inputs.inFiles + } }; + } +outputs: + outDir: Directory \ No newline at end of file diff --git a/workflows/deseq2/workflow.cwl b/workflows/deseq2/workflow.cwl index 0f1d68d..18af3a1 100644 --- a/workflows/deseq2/workflow.cwl +++ b/workflows/deseq2/workflow.cwl @@ -1,62 +1,57 @@ #!/usr/bin/env cwl-runner - -doc: | - DESeq2 example workflow for **differential gene expression analysis** - - This workflow runs DESeq2 on the output of the kallisto workflow - and the metadata file. - It runs an R script, deseq2.R, which ideally should be split into three sub scripts and accordingly three workflow steps - 1. Read kallsito data - 2. Prep / run deseq2 - 3. Plot results - - ## DESeq2 docs: - https://bioconductor.org/packages/release/bioc/html/DESeq2.html - - ## Importing kallisto output with tximport - https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto - - ## Multi-package containers - - R and combinations of library dependencies are available as multi-package containers from [BioContainers](https://github.com/BioContainers/multi-package-containers) - - Searched for `repo:BioContainers/multi-package-containers deseq2 tximport rhdf5` - - and found `quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0` :tada: - cwlVersion: v1.2 -class: CommandLineTool -hints: - DockerRequirement: - dockerPull: quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0 +class: Workflow + requirements: - - class: InitialWorkDirRequirement - listing: - - entryname: deseq2.R - entry: - $include: deseq2.R - - class: NetworkAccess - networkAccess: true -baseCommand: [Rscript, deseq2.R] + ScatterFeatureRequirement: {} + SubworkflowFeatureRequirement: {} + MultipleInputFeatureRequirement: {} + inputs: - inKallistoResults: + inKallistoResults: Directory + inMetadataFile: File + inMetadataSample: string + inMetadataFactorList: string[] + finaloutdir: string + +steps: + deseq2: + run: deseq2.cwl + in: + inKallistoResults: inKallistoResults + inMetadataFile: inMetadataFile + inMetadataSample: inMetadataSample + inMetadataFactorList: inMetadataFactorList + out: + - results_stats + - results_ma-plot + - results_pca-plot + collectFiles: + run: ./gather-files.cwl + in: + inFiles: + source: + - deseq2/results_stats + - deseq2/results_ma-plot + - deseq2/results_pca-plot + linkMerge: merge_flattened + destination: finaloutdir + out: [outDir] + +outputs: + deseq2_outdir: type: Directory - inputBinding: - position: 1 - inMetadataFile: - type: File - inputBinding: - position: 2 - inMetadataSample: - type: string - inputBinding: - position: 3 - inMetadataFactorList: - type: string[] - inputBinding: - position: 4 + outputSource: collectFiles/outDir -outputs: - output: - type: File[] - outputBinding: - glob: - - "*.svg" - - "*.csv" +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.18.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 diff --git a/workflows/sleuth/sleuth.cwl b/workflows/sleuth/sleuth.cwl index 3ffc997..00a1749 100644 --- a/workflows/sleuth/sleuth.cwl +++ b/workflows/sleuth/sleuth.cwl @@ -79,7 +79,7 @@ $namespaces: $schemas: - https://schema.org/version/latest/schemaorg-current-https.rdf - - http://edamontology.org/EDAM_1.18.owl + - http://edamontology.org/EDAM_1.25.owl s:author: - class: s:Person -- GitLab