diff --git a/runs/deseq2/results_ma-plot.svg b/runs/deseq2/results/results_ma-plot.svg similarity index 100% rename from runs/deseq2/results_ma-plot.svg rename to runs/deseq2/results/results_ma-plot.svg diff --git a/runs/deseq2/results_pca-plot.svg b/runs/deseq2/results/results_pca-plot.svg similarity index 100% rename from runs/deseq2/results_pca-plot.svg rename to runs/deseq2/results/results_pca-plot.svg diff --git a/runs/deseq2/results_stats.csv b/runs/deseq2/results/results_stats.csv similarity index 100% rename from runs/deseq2/results_stats.csv rename to runs/deseq2/results/results_stats.csv diff --git a/runs/deseq2/run.cwl b/runs/deseq2/run.cwl index e0fdbf42ee4d373eae50a4e3ccf8fc91ac7229ec..0aadd018989bc587f5cef0da02db2b655968adbe 100644 --- a/runs/deseq2/run.cwl +++ b/runs/deseq2/run.cwl @@ -2,11 +2,15 @@ cwlVersion: v1.2 class: Workflow +requirements: + SubworkflowFeatureRequirement: {} + inputs: inKallistoResults: Directory inMetadataFile: File inMetadataSample: string inMetadataFactorList: string[] + resultsoutdir: string steps: deseq2: @@ -16,12 +20,14 @@ steps: inMetadataFile: inMetadataFile inMetadataSample: inMetadataSample inMetadataFactorList: inMetadataFactorList - out: [output] + finaloutdir: resultsoutdir + + out: [deseq2_outdir] outputs: output: - type: File[] - outputSource: deseq2/output + type: Directory + outputSource: deseq2/deseq2_outdir $namespaces: s: https://schema.org/ @@ -34,4 +40,4 @@ $schemas: s:author: - class: s:Person s:name: Dominik Brilhaus - s:identifier: https://orcid.org/0000-0001-9021-3197 \ No newline at end of file + s:identifier: https://orcid.org/0000-0001-9021-3197 diff --git a/runs/deseq2/run.yml b/runs/deseq2/run.yml index b8e9909794f2554784021064e4914996f866bcc2..60e8b8fc310a0aa7b00b44d8fea1bb113eda7cbb 100644 --- a/runs/deseq2/run.yml +++ b/runs/deseq2/run.yml @@ -6,4 +6,5 @@ inMetadataFile: path: ../../runs/isaSampleToRawDataSeq/rnaseq-samples.csv inMetadataSample: "Input [Source Name]" inMetadataFactorList: - - "Factor [Photosynthesis mode]" \ No newline at end of file + - "Factor [Photosynthesis mode]" +resultsoutdir: results diff --git a/workflows/deseq2/deseq2.R b/workflows/deseq2/deseq2.R index dcad6b63d978ab9e6b6f144549f9095f59d8eaa8..b20545ef2ea581426104565ac860aedf3e04faf4 100644 --- a/workflows/deseq2/deseq2.R +++ b/workflows/deseq2/deseq2.R @@ -7,13 +7,6 @@ library("tximport") library("rhdf5") library("ggplot2") -# ## Tests - -# inKallistoResults <- "../../runs/kallisto/kallisto_results" -# inMetadataFile <- "../../runs/isaSampleToRawDataSeq/rnaseq-samples.csv" -# inMetadataSample <- "Input [Source Name]" -# inMetadataFactorList <- list("Factor [Photosynthesis mode]") - ### Read arguments from CLI args <- commandArgs(trailingOnly = T) diff --git a/workflows/deseq2/deseq2.cwl b/workflows/deseq2/deseq2.cwl new file mode 100644 index 0000000000000000000000000000000000000000..7087f6fd43d6655b79d79ecfd2a86067759cd3f4 --- /dev/null +++ b/workflows/deseq2/deseq2.cwl @@ -0,0 +1,92 @@ +#!/usr/bin/env cwl-runner + +doc: | + DESeq2 example workflow for **differential gene expression analysis** + + This workflow runs DESeq2 on the output of the kallisto workflow + and the metadata file. + It runs an R script, deseq2.R, which ideally should be split into three sub scripts and accordingly three workflow steps + 1. Read kallsito data + 2. Prep / run deseq2 + 3. Plot results + + ## DESeq2 docs: + https://bioconductor.org/packages/release/bioc/html/DESeq2.html + + ## Importing kallisto output with tximport + https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto + + ## Multi-package containers + - R and combinations of library dependencies are available as multi-package containers from [BioContainers](https://github.com/BioContainers/multi-package-containers) + - Searched for `repo:BioContainers/multi-package-containers deseq2 tximport rhdf5` + - and found `quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0` :tada: + +cwlVersion: v1.2 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0 + SoftwareRequirement: + packages: + - package: R + version: [ "4.1.1" ] + specs: + - https://identifiers.org/rrid/RRID:SCR_001905 + - https://identifiers.org/biotools/r + - https://anaconda.org/bioconda/r + - package: DESeq2 + version: [ "1.34.0" ] + specs: + - https://identifiers.org/rrid/RRID:SCR_015687 + - https://identifiers.org/biotools/deseq2 +requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: deseq2.R + entry: + $include: deseq2.R +baseCommand: [Rscript, deseq2.R] +inputs: + inKallistoResults: + type: Directory + inputBinding: + position: 1 + inMetadataFile: + type: File + inputBinding: + position: 2 + inMetadataSample: + type: string + inputBinding: + position: 3 + inMetadataFactorList: + type: string[] + inputBinding: + position: 4 + +outputs: + results_stats: + type: File + outputBinding: + glob: "results_stats.csv" + results_ma-plot: + type: File + outputBinding: + glob: "results_ma-plot.svg" + results_pca-plot: + type: File + outputBinding: + glob: "results_pca-plot.svg" + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.25.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 diff --git a/workflows/deseq2/gather-files.cwl b/workflows/deseq2/gather-files.cwl new file mode 100644 index 0000000000000000000000000000000000000000..67d23ec72e90012c4e07b2f2ebaf3b2d102308c8 --- /dev/null +++ b/workflows/deseq2/gather-files.cwl @@ -0,0 +1,24 @@ +cwlVersion: v1.2 +class: ExpressionTool +label: Gather files +doc: | + Helper tool to organize workflow outputs + + Takes an array of files (e.g. from a workflow step) and yields them in a destination directory. + + Adapted from: https://github.com/common-workflow-language/cwl-v1.1/blob/a22b7580c6b50e77c0a181ca59d3828dd5c69143/tests/dir7.cwl +requirements: + - class: InlineJavascriptRequirement +inputs: + inFiles: File[] + destination: string +expression: | + ${ + return {"outDir": { + "class": "Directory", + "basename": inputs.destination, + "listing": inputs.inFiles + } }; + } +outputs: + outDir: Directory \ No newline at end of file diff --git a/workflows/deseq2/workflow.cwl b/workflows/deseq2/workflow.cwl index 0f1d68dfbb3bf401270f8f0b49a4706f0f571455..18af3a16861ee832d544e2533d06644c4b406ebb 100644 --- a/workflows/deseq2/workflow.cwl +++ b/workflows/deseq2/workflow.cwl @@ -1,62 +1,57 @@ #!/usr/bin/env cwl-runner - -doc: | - DESeq2 example workflow for **differential gene expression analysis** - - This workflow runs DESeq2 on the output of the kallisto workflow - and the metadata file. - It runs an R script, deseq2.R, which ideally should be split into three sub scripts and accordingly three workflow steps - 1. Read kallsito data - 2. Prep / run deseq2 - 3. Plot results - - ## DESeq2 docs: - https://bioconductor.org/packages/release/bioc/html/DESeq2.html - - ## Importing kallisto output with tximport - https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto - - ## Multi-package containers - - R and combinations of library dependencies are available as multi-package containers from [BioContainers](https://github.com/BioContainers/multi-package-containers) - - Searched for `repo:BioContainers/multi-package-containers deseq2 tximport rhdf5` - - and found `quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0` :tada: - cwlVersion: v1.2 -class: CommandLineTool -hints: - DockerRequirement: - dockerPull: quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0 +class: Workflow + requirements: - - class: InitialWorkDirRequirement - listing: - - entryname: deseq2.R - entry: - $include: deseq2.R - - class: NetworkAccess - networkAccess: true -baseCommand: [Rscript, deseq2.R] + ScatterFeatureRequirement: {} + SubworkflowFeatureRequirement: {} + MultipleInputFeatureRequirement: {} + inputs: - inKallistoResults: + inKallistoResults: Directory + inMetadataFile: File + inMetadataSample: string + inMetadataFactorList: string[] + finaloutdir: string + +steps: + deseq2: + run: deseq2.cwl + in: + inKallistoResults: inKallistoResults + inMetadataFile: inMetadataFile + inMetadataSample: inMetadataSample + inMetadataFactorList: inMetadataFactorList + out: + - results_stats + - results_ma-plot + - results_pca-plot + collectFiles: + run: ./gather-files.cwl + in: + inFiles: + source: + - deseq2/results_stats + - deseq2/results_ma-plot + - deseq2/results_pca-plot + linkMerge: merge_flattened + destination: finaloutdir + out: [outDir] + +outputs: + deseq2_outdir: type: Directory - inputBinding: - position: 1 - inMetadataFile: - type: File - inputBinding: - position: 2 - inMetadataSample: - type: string - inputBinding: - position: 3 - inMetadataFactorList: - type: string[] - inputBinding: - position: 4 + outputSource: collectFiles/outDir -outputs: - output: - type: File[] - outputBinding: - glob: - - "*.svg" - - "*.csv" +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.18.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 diff --git a/workflows/sleuth/sleuth.cwl b/workflows/sleuth/sleuth.cwl index 3ffc997b43ebf3e8525d7828be0197c9cf8d28e0..00a174971c83fd83dd61d7130ccd445e28635e7b 100644 --- a/workflows/sleuth/sleuth.cwl +++ b/workflows/sleuth/sleuth.cwl @@ -79,7 +79,7 @@ $namespaces: $schemas: - https://schema.org/version/latest/schemaorg-current-https.rdf - - http://edamontology.org/EDAM_1.18.owl + - http://edamontology.org/EDAM_1.25.owl s:author: - class: s:Person