diff --git a/runs/deseq2/results_ma-plot.svg b/runs/deseq2/results/results_ma-plot.svg similarity index 100% rename from runs/deseq2/results_ma-plot.svg rename to runs/deseq2/results/results_ma-plot.svg diff --git a/runs/deseq2/results_pca-plot.svg b/runs/deseq2/results/results_pca-plot.svg similarity index 100% rename from runs/deseq2/results_pca-plot.svg rename to runs/deseq2/results/results_pca-plot.svg diff --git a/runs/deseq2/results_stats.csv b/runs/deseq2/results/results_stats.csv similarity index 100% rename from runs/deseq2/results_stats.csv rename to runs/deseq2/results/results_stats.csv diff --git a/runs/deseq2/run.cwl b/runs/deseq2/run.cwl index e0fdbf42ee4d373eae50a4e3ccf8fc91ac7229ec..0aadd018989bc587f5cef0da02db2b655968adbe 100644 --- a/runs/deseq2/run.cwl +++ b/runs/deseq2/run.cwl @@ -2,11 +2,15 @@ cwlVersion: v1.2 class: Workflow +requirements: + SubworkflowFeatureRequirement: {} + inputs: inKallistoResults: Directory inMetadataFile: File inMetadataSample: string inMetadataFactorList: string[] + resultsoutdir: string steps: deseq2: @@ -16,12 +20,14 @@ steps: inMetadataFile: inMetadataFile inMetadataSample: inMetadataSample inMetadataFactorList: inMetadataFactorList - out: [output] + finaloutdir: resultsoutdir + + out: [deseq2_outdir] outputs: output: - type: File[] - outputSource: deseq2/output + type: Directory + outputSource: deseq2/deseq2_outdir $namespaces: s: https://schema.org/ @@ -34,4 +40,4 @@ $schemas: s:author: - class: s:Person s:name: Dominik Brilhaus - s:identifier: https://orcid.org/0000-0001-9021-3197 \ No newline at end of file + s:identifier: https://orcid.org/0000-0001-9021-3197 diff --git a/runs/deseq2/run.yml b/runs/deseq2/run.yml index b8e9909794f2554784021064e4914996f866bcc2..60e8b8fc310a0aa7b00b44d8fea1bb113eda7cbb 100644 --- a/runs/deseq2/run.yml +++ b/runs/deseq2/run.yml @@ -6,4 +6,5 @@ inMetadataFile: path: ../../runs/isaSampleToRawDataSeq/rnaseq-samples.csv inMetadataSample: "Input [Source Name]" inMetadataFactorList: - - "Factor [Photosynthesis mode]" \ No newline at end of file + - "Factor [Photosynthesis mode]" +resultsoutdir: results diff --git a/runs/sleuth/run.cwl b/runs/sleuth/run.cwl index 7884d8dcbd318257bfaf4de6ef0a93b603875cbb..789736f8804a3a3b7783ebc7f2327f2fe9ffdf16 100644 --- a/runs/sleuth/run.cwl +++ b/runs/sleuth/run.cwl @@ -3,13 +3,16 @@ cwlVersion: v1.2 class: Workflow +requirements: + SubworkflowFeatureRequirement: {} + inputs: inKallistoResults: Directory inMetadataFile: File inMetadataSample: string inMetadataFactorList: string[] inMetadataDataCol: string - outFolder: string + resultsoutdir: string steps: sleuth: @@ -20,14 +23,13 @@ steps: inMetadataSample: inMetadataSample inMetadataFactorList: inMetadataFactorList inMetadataDataCol: inMetadataDataCol - outFolder: outFolder - out: [outdir] + finaloutdir: resultsoutdir + out: [ sleuth_outdir ] outputs: outdir: - type: Directory[] - outputSource: sleuth/outdir - + type: Directory + outputSource: sleuth/sleuth_outdir $namespaces: s: https://schema.org/ diff --git a/runs/sleuth/run.yml b/runs/sleuth/run.yml index 1996f505b7c00d7aaaabbcf18e6284e3c3d9384b..61c509d64dcfd32507512692a66c8cfad46fbb7f 100644 --- a/runs/sleuth/run.yml +++ b/runs/sleuth/run.yml @@ -8,4 +8,4 @@ inMetadataSample: "Input [Source Name]" inMetadataFactorList: - "Factor [Photosynthesis mode]" inMetadataDataCol: "Output [Data]" -outFolder: results +resultsoutdir: results diff --git a/workflows/deseq2/deseq2.R b/workflows/deseq2/deseq2.R index dcad6b63d978ab9e6b6f144549f9095f59d8eaa8..b20545ef2ea581426104565ac860aedf3e04faf4 100644 --- a/workflows/deseq2/deseq2.R +++ b/workflows/deseq2/deseq2.R @@ -7,13 +7,6 @@ library("tximport") library("rhdf5") library("ggplot2") -# ## Tests - -# inKallistoResults <- "../../runs/kallisto/kallisto_results" -# inMetadataFile <- "../../runs/isaSampleToRawDataSeq/rnaseq-samples.csv" -# inMetadataSample <- "Input [Source Name]" -# inMetadataFactorList <- list("Factor [Photosynthesis mode]") - ### Read arguments from CLI args <- commandArgs(trailingOnly = T) diff --git a/workflows/deseq2/deseq2.cwl b/workflows/deseq2/deseq2.cwl new file mode 100644 index 0000000000000000000000000000000000000000..7087f6fd43d6655b79d79ecfd2a86067759cd3f4 --- /dev/null +++ b/workflows/deseq2/deseq2.cwl @@ -0,0 +1,92 @@ +#!/usr/bin/env cwl-runner + +doc: | + DESeq2 example workflow for **differential gene expression analysis** + + This workflow runs DESeq2 on the output of the kallisto workflow + and the metadata file. + It runs an R script, deseq2.R, which ideally should be split into three sub scripts and accordingly three workflow steps + 1. Read kallsito data + 2. Prep / run deseq2 + 3. Plot results + + ## DESeq2 docs: + https://bioconductor.org/packages/release/bioc/html/DESeq2.html + + ## Importing kallisto output with tximport + https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto + + ## Multi-package containers + - R and combinations of library dependencies are available as multi-package containers from [BioContainers](https://github.com/BioContainers/multi-package-containers) + - Searched for `repo:BioContainers/multi-package-containers deseq2 tximport rhdf5` + - and found `quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0` :tada: + +cwlVersion: v1.2 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0 + SoftwareRequirement: + packages: + - package: R + version: [ "4.1.1" ] + specs: + - https://identifiers.org/rrid/RRID:SCR_001905 + - https://identifiers.org/biotools/r + - https://anaconda.org/bioconda/r + - package: DESeq2 + version: [ "1.34.0" ] + specs: + - https://identifiers.org/rrid/RRID:SCR_015687 + - https://identifiers.org/biotools/deseq2 +requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: deseq2.R + entry: + $include: deseq2.R +baseCommand: [Rscript, deseq2.R] +inputs: + inKallistoResults: + type: Directory + inputBinding: + position: 1 + inMetadataFile: + type: File + inputBinding: + position: 2 + inMetadataSample: + type: string + inputBinding: + position: 3 + inMetadataFactorList: + type: string[] + inputBinding: + position: 4 + +outputs: + results_stats: + type: File + outputBinding: + glob: "results_stats.csv" + results_ma-plot: + type: File + outputBinding: + glob: "results_ma-plot.svg" + results_pca-plot: + type: File + outputBinding: + glob: "results_pca-plot.svg" + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.25.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 diff --git a/workflows/deseq2/gather-files.cwl b/workflows/deseq2/gather-files.cwl new file mode 100644 index 0000000000000000000000000000000000000000..67d23ec72e90012c4e07b2f2ebaf3b2d102308c8 --- /dev/null +++ b/workflows/deseq2/gather-files.cwl @@ -0,0 +1,24 @@ +cwlVersion: v1.2 +class: ExpressionTool +label: Gather files +doc: | + Helper tool to organize workflow outputs + + Takes an array of files (e.g. from a workflow step) and yields them in a destination directory. + + Adapted from: https://github.com/common-workflow-language/cwl-v1.1/blob/a22b7580c6b50e77c0a181ca59d3828dd5c69143/tests/dir7.cwl +requirements: + - class: InlineJavascriptRequirement +inputs: + inFiles: File[] + destination: string +expression: | + ${ + return {"outDir": { + "class": "Directory", + "basename": inputs.destination, + "listing": inputs.inFiles + } }; + } +outputs: + outDir: Directory \ No newline at end of file diff --git a/workflows/deseq2/workflow.cwl b/workflows/deseq2/workflow.cwl index 0f1d68dfbb3bf401270f8f0b49a4706f0f571455..18af3a16861ee832d544e2533d06644c4b406ebb 100644 --- a/workflows/deseq2/workflow.cwl +++ b/workflows/deseq2/workflow.cwl @@ -1,62 +1,57 @@ #!/usr/bin/env cwl-runner - -doc: | - DESeq2 example workflow for **differential gene expression analysis** - - This workflow runs DESeq2 on the output of the kallisto workflow - and the metadata file. - It runs an R script, deseq2.R, which ideally should be split into three sub scripts and accordingly three workflow steps - 1. Read kallsito data - 2. Prep / run deseq2 - 3. Plot results - - ## DESeq2 docs: - https://bioconductor.org/packages/release/bioc/html/DESeq2.html - - ## Importing kallisto output with tximport - https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto - - ## Multi-package containers - - R and combinations of library dependencies are available as multi-package containers from [BioContainers](https://github.com/BioContainers/multi-package-containers) - - Searched for `repo:BioContainers/multi-package-containers deseq2 tximport rhdf5` - - and found `quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0` :tada: - cwlVersion: v1.2 -class: CommandLineTool -hints: - DockerRequirement: - dockerPull: quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0 +class: Workflow + requirements: - - class: InitialWorkDirRequirement - listing: - - entryname: deseq2.R - entry: - $include: deseq2.R - - class: NetworkAccess - networkAccess: true -baseCommand: [Rscript, deseq2.R] + ScatterFeatureRequirement: {} + SubworkflowFeatureRequirement: {} + MultipleInputFeatureRequirement: {} + inputs: - inKallistoResults: + inKallistoResults: Directory + inMetadataFile: File + inMetadataSample: string + inMetadataFactorList: string[] + finaloutdir: string + +steps: + deseq2: + run: deseq2.cwl + in: + inKallistoResults: inKallistoResults + inMetadataFile: inMetadataFile + inMetadataSample: inMetadataSample + inMetadataFactorList: inMetadataFactorList + out: + - results_stats + - results_ma-plot + - results_pca-plot + collectFiles: + run: ./gather-files.cwl + in: + inFiles: + source: + - deseq2/results_stats + - deseq2/results_ma-plot + - deseq2/results_pca-plot + linkMerge: merge_flattened + destination: finaloutdir + out: [outDir] + +outputs: + deseq2_outdir: type: Directory - inputBinding: - position: 1 - inMetadataFile: - type: File - inputBinding: - position: 2 - inMetadataSample: - type: string - inputBinding: - position: 3 - inMetadataFactorList: - type: string[] - inputBinding: - position: 4 + outputSource: collectFiles/outDir -outputs: - output: - type: File[] - outputBinding: - glob: - - "*.svg" - - "*.csv" +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.18.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 diff --git a/workflows/kallisto/kallisto-index.cwl b/workflows/kallisto/kallisto-index.cwl index 1a4b3c96e3a9b20f201c687f148ab94513f8792c..c5719d553635dce5d6827c90811c832b8c2aa18e 100644 --- a/workflows/kallisto/kallisto-index.cwl +++ b/workflows/kallisto/kallisto-index.cwl @@ -39,9 +39,6 @@ hints: - https://identifiers.org/rrid/RRID:SCR_016582 - https://identifiers.org/biotools/kallisto -requirements: - InlineJavascriptRequirement: {} - baseCommand: [kallisto, index] inputs: @@ -83,7 +80,3 @@ $namespaces: $schemas: - https://edamontology.org/EDAM_1.25.owl - https://schema.org/version/latest/schemaorg-current-https.rdf - -s:license: https://spdx.org/licenses/BSD-2-Clause -s:citation: https://dx.doi.org/10.1038/nbt.3519 -s:codeRepository: https://github.com/pachterlab/kallisto diff --git a/workflows/kallisto/kallisto-quant.cwl b/workflows/kallisto/kallisto-quant.cwl index 93ff361284fa411c945fcbb5ddf4c498770160f8..0eea07f31127398cfe50d108232709e8a14bfa36 100755 --- a/workflows/kallisto/kallisto-quant.cwl +++ b/workflows/kallisto/kallisto-quant.cwl @@ -176,7 +176,3 @@ $namespaces: $schemas: - https://edamontology.org/EDAM_1.25.owl - https://schema.org/version/latest/schemaorg-current-https.rdf - -s:license: https://spdx.org/licenses/BSD-2-Clause -s:citation: https://dx.doi.org/10.1038/nbt.3519 -s:codeRepository: https://github.com/pachterlab/kallisto diff --git a/workflows/sleuth/gather-files.cwl b/workflows/sleuth/gather-files.cwl new file mode 100644 index 0000000000000000000000000000000000000000..67d23ec72e90012c4e07b2f2ebaf3b2d102308c8 --- /dev/null +++ b/workflows/sleuth/gather-files.cwl @@ -0,0 +1,24 @@ +cwlVersion: v1.2 +class: ExpressionTool +label: Gather files +doc: | + Helper tool to organize workflow outputs + + Takes an array of files (e.g. from a workflow step) and yields them in a destination directory. + + Adapted from: https://github.com/common-workflow-language/cwl-v1.1/blob/a22b7580c6b50e77c0a181ca59d3828dd5c69143/tests/dir7.cwl +requirements: + - class: InlineJavascriptRequirement +inputs: + inFiles: File[] + destination: string +expression: | + ${ + return {"outDir": { + "class": "Directory", + "basename": inputs.destination, + "listing": inputs.inFiles + } }; + } +outputs: + outDir: Directory \ No newline at end of file diff --git a/workflows/sleuth/sleuth.R b/workflows/sleuth/sleuth.R index 2c87d6161cb691e17e222abd177150e261645fec..a9e0b52386c384970001c8c7edb45b84c3f5fc02 100644 --- a/workflows/sleuth/sleuth.R +++ b/workflows/sleuth/sleuth.R @@ -18,7 +18,6 @@ inMetadataFile <- args[2] inMetadataSample <- args[3] inMetadataFactorList <- args[4] inMetadataDataCol <- args[5] -outFolder <- args[6] # inKallistoResults <- "../../runs/kallisto/kallisto_results" # inMetadataFile <- "../../runs/isaSampleToRawDataSeq/rnaseq-samples.csv" @@ -27,12 +26,6 @@ outFolder <- args[6] # inMetadataDataCol <- "Output [Data]" # outFolder <- "." -################################################ -#### If it does not exist, create out dir -################################################ - -dir.create(outFolder, recursive = T, showWarnings = F) - ################################################ #### Read ISA sample metadata ################################################ @@ -43,10 +36,8 @@ samples <- read.csv(file = inMetadataFile, check.names = FALSE) #### Read Kallisto results ################################################ -base_dir <- inKallistoResults - # A list of paths to the kallisto results indexed by the sample IDs is collated with -kal_dirs <- dir(base_dir, full.names = T) ## Sleuth requires full paths +kal_dirs <- dir(inKallistoResults, full.names = T) ## Sleuth requires full paths s2c <- samples[order(samples[[inMetadataSample]]), c(inMetadataSample, unlist(inMetadataFactorList), inMetadataDataCol)] @@ -66,7 +57,7 @@ design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + "))) so <- sleuth_prep(s2c, full_model = design_formula) -save(so, file = file.path(outFolder, "kallisto_sleuthObject.RData")) +save(so, file = "kallisto_sleuthObject.RData") ################################################ #### Extract expression tables @@ -76,13 +67,13 @@ save(so, file = file.path(outFolder, "kallisto_sleuthObject.RData")) expression_data <- kallisto_table(so) ## write to file -write.csv(expression_data, paste(outFolder, "/kallisto_df.csv", sep = "/"), row.names = F) +write.csv(expression_data, "kallisto_df.csv", row.names = F) ## as tpm matrix (gene x sample) tpm_table <- reshape(expression_data, idvar = "target_id", timevar = "sample", direction = "wide", v.names = "tpm") # Write to file -write.csv(tpm_table, file.path(outFolder, "kallisto_tpmMatrix.csv"), row.names = F) +write.csv(tpm_table, "kallisto_tpmMatrix.csv", row.names = F) ################################################ #### Summarize mapping stats @@ -95,11 +86,11 @@ for (i in dir(kal_dirs, pattern = ".json", full.names = T)) { mapping_stats <- rbind(mapping_stats, z) } -write.csv(mapping_stats, file.path(outFolder, "kallisto_mappingStats.csv"), row.names = F) +write.csv(mapping_stats, "kallisto_mappingStats.csv", row.names = F) ################################################ -#### Run sleuth to identify DEGs +#### Run sleuth to identify DGE ################################################ so <- sleuth_fit(so) @@ -111,4 +102,4 @@ sleuth_table <- sleuth_results(so, "reduced:full", "lrt", show_all = FALSE) ### write to file -write.csv(sleuth_table, file.path(outFolder, "sleuth_dge.csv"), row.names = F) +write.csv(sleuth_table, "sleuth_dge.csv", row.names = F) diff --git a/workflows/sleuth/sleuth.cwl b/workflows/sleuth/sleuth.cwl new file mode 100644 index 0000000000000000000000000000000000000000..00a174971c83fd83dd61d7130ccd445e28635e7b --- /dev/null +++ b/workflows/sleuth/sleuth.cwl @@ -0,0 +1,87 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.2 +class: CommandLineTool + +hints: + DockerRequirement: + dockerPull: quay.io/biocontainers/mulled-v2-fdd016122f200fdc6dc30f6ea2fd0000e8067dff:f9531f6ac1f44332eff70b5912d7d5f3ebe8df38-0 + SoftwareRequirement: + packages: + - package: R + specs: + - https://identifiers.org/rrid/RRID:SCR_001905 + - https://identifiers.org/biotools/r + - https://anaconda.org/bioconda/r + version: [ "4.2.3" ] + - package: sleuth + version: [ "0.30.1" ] + specs: + - https://identifiers.org/rrid/RRID:SCR_016883 + - https://identifiers.org/biotools/sleuth + +requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: sleuth.R + entry: + $include: sleuth.R + +baseCommand: [Rscript, sleuth.R] + +inputs: + inKallistoResults: + type: Directory + inputBinding: + position: 1 + inMetadataFile: + type: File + inputBinding: + position: 2 + inMetadataSample: + type: string + inputBinding: + position: 3 + inMetadataFactorList: + type: string[] + inputBinding: + position: 4 + inMetadataDataCol: + type: string + inputBinding: + position: 5 + +outputs: + kallisto_sleuthObject: + type: File + outputBinding: + glob: "kallisto_sleuthObject.RData" + kallisto_df: + type: File + outputBinding: + glob: "kallisto_df.csv" + kallisto_tpmMatrix: + type: File + outputBinding: + glob: "kallisto_tpmMatrix.csv" + kallisto_mappingStats: + type: File + outputBinding: + glob: "kallisto_mappingStats.csv" + sleuth_dge: + type: File + outputBinding: + glob: "sleuth_dge.csv" + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.25.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197 \ No newline at end of file diff --git a/workflows/sleuth/workflow.cwl b/workflows/sleuth/workflow.cwl index 4a8add08556780341d42af5b5488691ea8608107..5f08a472b1116f4523e732dc3195f9dcb6974275 100644 --- a/workflows/sleuth/workflow.cwl +++ b/workflows/sleuth/workflow.cwl @@ -1,53 +1,63 @@ #!/usr/bin/env cwl-runner - cwlVersion: v1.2 -class: CommandLineTool - -hints: - DockerRequirement: - dockerPull: quay.io/biocontainers/mulled-v2-fdd016122f200fdc6dc30f6ea2fd0000e8067dff:f9531f6ac1f44332eff70b5912d7d5f3ebe8df38-0 +class: Workflow requirements: - - class: InitialWorkDirRequirement - listing: - - entryname: sleuth.R - entry: - $include: sleuth.R - - class: NetworkAccess - networkAccess: true - -baseCommand: [Rscript, sleuth.R] + ScatterFeatureRequirement: {} + SubworkflowFeatureRequirement: {} + MultipleInputFeatureRequirement: {} inputs: - inKallistoResults: + inKallistoResults: Directory + inMetadataFile: File + inMetadataSample: string + inMetadataFactorList: string[] + inMetadataDataCol: string + finaloutdir: string + +steps: + sleuth: + run: sleuth.cwl + in: + inKallistoResults: inKallistoResults + inMetadataFile: inMetadataFile + inMetadataSample: inMetadataSample + inMetadataFactorList: inMetadataFactorList + inMetadataDataCol: inMetadataDataCol + out: + - kallisto_sleuthObject + - kallisto_df + - kallisto_tpmMatrix + - kallisto_mappingStats + - sleuth_dge + collectFiles: + run: ./gather-files.cwl + in: + inFiles: + source: + - sleuth/kallisto_sleuthObject + - sleuth/kallisto_df + - sleuth/kallisto_tpmMatrix + - sleuth/kallisto_mappingStats + - sleuth/sleuth_dge + linkMerge: merge_flattened + destination: finaloutdir + out: [outDir] + +outputs: + sleuth_outdir: type: Directory - inputBinding: - position: 1 - inMetadataFile: - type: File - inputBinding: - position: 2 - inMetadataSample: - type: string - inputBinding: - position: 3 - inMetadataFactorList: - type: string[] - inputBinding: - position: 4 - inMetadataDataCol: - type: string - inputBinding: - position: 5 - outFolder: - type: string - inputBinding: - position: 6 + outputSource: collectFiles/outDir -outputs: -- id: outdir - type: - type: array - items: Directory - outputBinding: - glob: $(runtime.outdir)/$(inputs.outFolder) +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ + +$schemas: + - https://schema.org/version/latest/schemaorg-current-https.rdf + - http://edamontology.org/EDAM_1.18.owl + +s:author: + - class: s:Person + s:name: Dominik Brilhaus + s:identifier: https://orcid.org/0000-0001-9021-3197