From 943e5e521f5c8dbc0feba007ea1d7d6ff6781e88 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Tue, 18 Mar 2025 09:57:56 +0100 Subject: [PATCH] switch kallisto input to array of records --- runs/kallisto/run.cwl | 19 ++++--- runs/kallisto/run.yml | 57 ++++++++++--------- workflows/kallisto/workflow.cwl | 99 +++++++++++++++++---------------- 3 files changed, 92 insertions(+), 83 deletions(-) diff --git a/runs/kallisto/run.cwl b/runs/kallisto/run.cwl index 7ab4d83..3ec2a22 100644 --- a/runs/kallisto/run.cwl +++ b/runs/kallisto/run.cwl @@ -4,16 +4,20 @@ class: Workflow requirements: SubworkflowFeatureRequirement: {} + MultipleInputFeatureRequirement: {} inputs: IndexInput: File[] - InputReadsMultipleSamples: - type: + sampleRecord: + type: type: array - items: - type: array - items: File - numUnderscoresFileName: int + items: + type: record + fields: + readsOfOneSample: + type: File[] + sampleName: + type: string? isSingle: boolean FragmentLength: double? StandardDeviation: double? @@ -25,8 +29,7 @@ steps: run: ../../workflows/kallisto/workflow.cwl in: IndexInput: IndexInput - InputReadsMultipleSamples: InputReadsMultipleSamples - numUnderscoresFileName: numUnderscoresFileName + sampleRecord: sampleRecord isSingle: isSingle FragmentLength: FragmentLength StandardDeviation: StandardDeviation diff --git a/runs/kallisto/run.yml b/runs/kallisto/run.yml index 1d147f9..541e170 100644 --- a/runs/kallisto/run.yml +++ b/runs/kallisto/run.yml @@ -5,44 +5,47 @@ IndexInput: format: edam:format_1929 # FASTA ## Fastq files to be mapped -# The `InputReadsMultipleSamples` looks more complicated than needed -# It's an array of File arrays to generically allow multiple fastq files per sample +# The `readsOfOneSample` looks more complicated than needed +# It's an array of records (each with one or mupltiple files and a sample name) to generically allow multiple fastq files per sample -InputReadsMultipleSamples: - - +sampleRecord: + - readsOfOneSample: - class: File path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_097_CAMMD_CAGATC_L001_R1_001.fastq.gz - format: edam:format_1930 # FASTQ - - + format: edam:format_1930 + sampleName: 'DB_097' + - readsOfOneSample: + - class: File + path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_163_reC3MD_GTGAAA_L001_R1_001.fastq.gz + format: edam:format_1930 + sampleName: 'DB_163' + - readsOfOneSample: - class: File path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_099_CAMMD_CTTGTA_L001_R1_001.fastq.gz - format: edam:format_1930 # FASTQ - # - - # - class: File - # path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_103_CAMMD_AGTCAA_L001_R1_001.fastq.gz - # format: edam:format_1930 # FASTQ - # - - # - class: File - # path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_161_reC3MD_GTCCGC_L001_R1_001.fastq.gz - # format: edam:format_1930 # FASTQ - # - - # - class: File - # path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_163_reC3MD_GTGAAA_L001_R1_001.fastq.gz - # format: edam:format_1930 # FASTQ - # - - # - class: File - # path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_165_re-C3MD_GTGAAA_L002_R1_001.fastq.gz - # format: edam:format_1930 # FASTQ - -numUnderscoresFileName: 2 + format: edam:format_1930 + sampleName: 'DB_099' + - readsOfOneSample: + - class: File + path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_103_CAMMD_AGTCAA_L001_R1_001.fastq.gz + format: edam:format_1930 + sampleName: 'DB_103' + - readsOfOneSample: + - class: File + path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_161_reC3MD_GTCCGC_L001_R1_001.fastq.gz + format: edam:format_1930 + sampleName: 'DB_161' + - readsOfOneSample: + - class: File + path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_165_re-C3MD_GTGAAA_L002_R1_001.fastq.gz + format: edam:format_1930 + sampleName: 'DB_165' ### Kallisto quant Parameters isSingle: true FragmentLength: 200 StandardDeviation: 20 BootstrapSamples: 30 - -resultsFolder: test +resultsFolder: kallisto_results $namespaces: edam: https://edamontology.org/ \ No newline at end of file diff --git a/workflows/kallisto/workflow.cwl b/workflows/kallisto/workflow.cwl index ea6b79d..4ce5617 100644 --- a/workflows/kallisto/workflow.cwl +++ b/workflows/kallisto/workflow.cwl @@ -1,3 +1,4 @@ +#!/usr/bin/env cwl-runner cwlVersion: v1.2 class: Workflow @@ -7,56 +8,58 @@ requirements: InlineJavascriptRequirement: {} inputs: - IndexInput: File[] - sampleRecord: - type: - type: array - items: - type: record - fields: - readsOfOneSample: - type: File[] - sampleName: - type: string? - isSingle: boolean - FragmentLength: double? - StandardDeviation: double? - BootstrapSamples: int? - resultsFolder: string + IndexInput: File[] + sampleRecord: + type: + type: array + items: + type: record + fields: + readsOfOneSample: + type: File[] + sampleName: + type: string? + isSingle: boolean + FragmentLength: double? + StandardDeviation: double? + BootstrapSamples: int? + resultsFolder: string steps: - index: - run: kallisto-index.cwl - in: - InputFiles: IndexInput - IndexName: - source: IndexInput - valueFrom: $(self[0].nameroot) - out: [index] - quant: - run: kallisto-quant.cwl - scatter: [InputReads, QuantOutfolder] - scatterMethod: dotproduct - in: - InputReads: - source: sampleRecord - valueFrom: $(self.readsOfOneSample) - QuantOutfolder: - source: sampleRecord - valueFrom: $(self.sampleName) - Index: index/index - isSingle: isSingle - FragmentLength: FragmentLength - StandardDeviation: StandardDeviation - BootstrapSamples: BootstrapSamples - out: [outFolder] - collectResults: - run: ./yield-dirInDestination.cwl - scatter: inDir - in: - inDir: quant/outFolder - destinationDir: resultsFolder - out: [outDir] + index: + run: kallisto-index.cwl + in: + InputFiles: IndexInput + IndexName: + source: IndexInput + valueFrom: $(self[0].nameroot) + out: [index] + + quant: + run: kallisto-quant.cwl + scatter: [InputReads, QuantOutfolder] + scatterMethod: dotproduct + in: + InputReads: + source: sampleRecord + valueFrom: $(self.readsOfOneSample) + QuantOutfolder: + source: sampleRecord + valueFrom: $(self.sampleName) + Index: index/index + isSingle: isSingle + FragmentLength: FragmentLength + StandardDeviation: StandardDeviation + BootstrapSamples: BootstrapSamples + out: [outFolder] + + collectResults: + run: ./yield-dirInDestination.cwl + scatter: inDir + in: + inDir: quant/outFolder + destinationDir: resultsFolder + out: [outDir] outputs: finalOut: -- GitLab