From 68c1747104810cc8c58f73d9088990b425ca17d0 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Fri, 2 Aug 2024 10:15:05 +0200 Subject: [PATCH] restructure kallisto pipeline --- .../Talinum_RNASeq_minimal/dataset/.gitkeep | 0 runs/{kallisto_quant => kallisto}/run.yml | 6 + runs/kallisto_index/run.yml | 8 - workflows/kallisto/README.md | 7 + workflows/kallisto/kallisto-index.cwl | 63 ++++++++ workflows/kallisto/kallisto-quant.cwl | 137 ++++++++++++++++++ workflows/kallisto/kallisto-workflow.cwl | 44 ++++++ 7 files changed, 257 insertions(+), 8 deletions(-) delete mode 100644 assays/Talinum_RNASeq_minimal/dataset/.gitkeep rename runs/{kallisto_quant => kallisto}/run.yml (78%) delete mode 100644 runs/kallisto_index/run.yml create mode 100644 workflows/kallisto/README.md create mode 100644 workflows/kallisto/kallisto-index.cwl create mode 100755 workflows/kallisto/kallisto-quant.cwl create mode 100644 workflows/kallisto/kallisto-workflow.cwl diff --git a/assays/Talinum_RNASeq_minimal/dataset/.gitkeep b/assays/Talinum_RNASeq_minimal/dataset/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/runs/kallisto_quant/run.yml b/runs/kallisto/run.yml similarity index 78% rename from runs/kallisto_quant/run.yml rename to runs/kallisto/run.yml index 09afef4..269c202 100644 --- a/runs/kallisto_quant/run.yml +++ b/runs/kallisto/run.yml @@ -13,3 +13,9 @@ kallisto_bootstrap: 100 kallisto_threads: 4 kallisto_fragmentLength: 200 kallisto_stdDev: 20 + + + +in_genome_ref: + class: File + path: ../../studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa \ No newline at end of file diff --git a/runs/kallisto_index/run.yml b/runs/kallisto_index/run.yml deleted file mode 100644 index df0c070..0000000 --- a/runs/kallisto_index/run.yml +++ /dev/null @@ -1,8 +0,0 @@ -cores: 1 -sh_script: - class: File - path: ../../workflows/kallisto_index/kallisto_index.sh -in_genome_ref: - class: File - path: ../../studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa -out_folder: ./out diff --git a/workflows/kallisto/README.md b/workflows/kallisto/README.md new file mode 100644 index 0000000..52f46ea --- /dev/null +++ b/workflows/kallisto/README.md @@ -0,0 +1,7 @@ + +# Kallisto + + + +CWL adapted from: https://github.com/common-workflow-library/bio-cwl-tools/commit/91c42fb809ce18eafe16155cca0abf362270c0fe + diff --git a/workflows/kallisto/kallisto-index.cwl b/workflows/kallisto/kallisto-index.cwl new file mode 100644 index 0000000..4e5b609 --- /dev/null +++ b/workflows/kallisto/kallisto-index.cwl @@ -0,0 +1,63 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool + +hints: + DockerRequirement: + dockerPull: quay.io/biocontainers/kallisto:0.46.2--h4f7b962_1 + SoftwareRequirement: + packages: + kallisto: + version: [ "0.46.0" ] + specs: [ https://identifiers.org/biotools/kallisto ] + +requirements: + - class: InlineJavascriptRequirement + - class: InitialWorkDirRequirement + listing: + - entry: "$({class: 'Directory', listing: []})" + entryname: $(inputs.IndexOutfolder) + writable: true + +inputs: + InputFiles: + type: File[] + format: edam:format_1929 # FASTA + inputBinding: + position: 200 + + IndexOutfolder: + type: string + + IndexName: + type: string + inputBinding: + prefix: "--index=" + separate: false + +#Optional arguments + + kmerSize: + type: int? + inputBinding: + prefix: "--kmer-size=" + separate: false + + makeUnique: + type: boolean? + inputBinding: + prefix: "--make-unique" + +baseCommand: [kallisto, index] + +outputs: + + index: + type: Directory + outputBinding: + glob: $(runtime.outdir)/$(inputs.IndexOutfolder) + +$namespaces: + edam: http://edamontology.org/ +$schemas: + - https://edamontology.org/EDAM_1.18.owl diff --git a/workflows/kallisto/kallisto-quant.cwl b/workflows/kallisto/kallisto-quant.cwl new file mode 100755 index 0000000..a5c3b3e --- /dev/null +++ b/workflows/kallisto/kallisto-quant.cwl @@ -0,0 +1,137 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool + +hints: + DockerRequirement: + dockerPull: quay.io/biocontainers/kallisto:0.46.2--h4f7b962_1 + SoftwareRequirement: + packages: + kallisto: + version: [ "0.46.0" ] + specs: [ https://identifiers.org/biotools/kallisto ] + +inputs: + InputReads: + type: File[] + format: edam:format_1930 # FASTA + inputBinding: + position: 200 + + QuantOutfolder: + type: string + + Index: + type: File + inputBinding: + position: 1 + prefix: "--index" + + isSingle: + type: boolean + inputBinding: + position: 2 + prefix: "--single" + + #Optional Inputs + + isBias: + type: boolean? + inputBinding: + prefix: "--bias" + + isFusion: + type: boolean? + inputBinding: + prefix: "--fusion" + + isSingleOverhang: + type: boolean? + inputBinding: + prefix: "--single-overhang" + + FragmentLength: + type: double? + inputBinding: + separate: false + prefix: "--fragment-length=" + + StandardDeviation: + type: double? + inputBinding: + prefix: "--sd" + + BootstrapSamples: + type: int? + inputBinding: + separate: false + prefix: "--bootstrap-samples=" + + Seed: + type: int? + inputBinding: + prefix: "--seed" + +#Using record inputs to create mutually exclusive inputs + Strand: + type: + - "null" + - type: record + name: forward + fields: + forward: + type: boolean + inputBinding: + prefix: "--fr-stranded" + + - type: record + name: reverse + fields: + reverse: + type: boolean + inputBinding: + prefix: "--rf-stranded" + + PseudoBam: + type: boolean? + inputBinding: + prefix: "--pseudobam" + +#Using record inputs to create dependent inputs + + GenomeBam: + type: + - "null" + - type: record + name: genome_bam + fields: + genomebam: + type: boolean + inputBinding: + prefix: "--genomebam" + + gtf: + type: File + inputBinding: + prefix: "--gtf" + + chromosomes: + type: File + inputBinding: + prefix: "--chromosomes" + +baseCommand: [ kallisto, quant ] + +arguments: [ "--output-dir", $(inputs.QuantOutfolder) ] + +outputs: + + outFolder: + type: Directory + outputBinding: + glob: $(runtime.outdir)/$(inputs.QuantOutfolder) + +$namespaces: + edam: http://edamontology.org/ +$schemas: + - https://edamontology.org/EDAM_1.18.owl diff --git a/workflows/kallisto/kallisto-workflow.cwl b/workflows/kallisto/kallisto-workflow.cwl new file mode 100644 index 0000000..5f65dfc --- /dev/null +++ b/workflows/kallisto/kallisto-workflow.cwl @@ -0,0 +1,44 @@ +cwlVersion: v1.2 +class: Workflow + +requirements: + ScatterFeatureRequirement: {} + +inputs: + parentDir: Directory + dirNamePattern: string + collectedOut: string + Index: File + isSingle: boolean + FragmentLength: double? + StandardDeviation: double? + BootstrapSamples: int? + +steps: + quant: + run: kallisto-quant.cwl + scatter: + - InputReads + - QuantOutfolder + scatterMethod: dotproduct + in: + InputReads: listFiles/inDirFiles + QuantOutfolder: listFiles/inDirBasename + Index: Index + isSingle: isSingle + FragmentLength: FragmentLength + StandardDeviation: StandardDeviation + BootstrapSamples: BootstrapSamples + out: [outFolder] + collect: + run: ../_aux-tools/yield-dirInDestination.cwl + scatter: inDir + in: + inDir: quant/outFolder + destinationDir: collectedOut + out: [outDir] + +outputs: + finalOut: + type: Directory[] + outputSource: collect/outDir -- GitLab