From 943e5e521f5c8dbc0feba007ea1d7d6ff6781e88 Mon Sep 17 00:00:00 2001
From: Dominik Brilhaus <brilhaus@nfdi4plants.org>
Date: Tue, 18 Mar 2025 09:57:56 +0100
Subject: [PATCH] switch kallisto input to array of records

---
 runs/kallisto/run.cwl           | 19 ++++---
 runs/kallisto/run.yml           | 57 ++++++++++---------
 workflows/kallisto/workflow.cwl | 99 +++++++++++++++++----------------
 3 files changed, 92 insertions(+), 83 deletions(-)

diff --git a/runs/kallisto/run.cwl b/runs/kallisto/run.cwl
index 7ab4d83..3ec2a22 100644
--- a/runs/kallisto/run.cwl
+++ b/runs/kallisto/run.cwl
@@ -4,16 +4,20 @@ class: Workflow
 
 requirements:
   SubworkflowFeatureRequirement: {}
+  MultipleInputFeatureRequirement: {}
 
 inputs:
   IndexInput: File[]
-  InputReadsMultipleSamples:
-    type: 
+  sampleRecord:
+    type:
       type: array
-      items: 
-        type: array
-        items: File
-  numUnderscoresFileName: int
+      items:
+        type: record
+        fields:
+          readsOfOneSample:
+            type: File[]
+          sampleName:
+            type: string?
   isSingle: boolean
   FragmentLength: double?  
   StandardDeviation: double?
@@ -25,8 +29,7 @@ steps:
     run:  ../../workflows/kallisto/workflow.cwl
     in:
       IndexInput: IndexInput
-      InputReadsMultipleSamples: InputReadsMultipleSamples
-      numUnderscoresFileName: numUnderscoresFileName
+      sampleRecord: sampleRecord
       isSingle: isSingle
       FragmentLength: FragmentLength
       StandardDeviation: StandardDeviation
diff --git a/runs/kallisto/run.yml b/runs/kallisto/run.yml
index 1d147f9..541e170 100644
--- a/runs/kallisto/run.yml
+++ b/runs/kallisto/run.yml
@@ -5,44 +5,47 @@ IndexInput:
     format: edam:format_1929 # FASTA
 
 ## Fastq files to be mapped
-# The `InputReadsMultipleSamples` looks more complicated than needed
-# It's an array of File arrays to generically allow multiple fastq files per sample
+# The `readsOfOneSample` looks more complicated than needed
+# It's an array of records (each with one or mupltiple files and a sample name) to generically allow multiple fastq files per sample
 
-InputReadsMultipleSamples:
-  - 
+sampleRecord:
+  - readsOfOneSample:
     - class: File
       path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_097_CAMMD_CAGATC_L001_R1_001.fastq.gz
-      format: edam:format_1930 # FASTQ
-  - 
+      format: edam:format_1930
+    sampleName: 'DB_097'
+  - readsOfOneSample:  
+    - class: File
+      path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_163_reC3MD_GTGAAA_L001_R1_001.fastq.gz
+      format: edam:format_1930
+    sampleName: 'DB_163'
+  - readsOfOneSample:
     - class: File
       path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_099_CAMMD_CTTGTA_L001_R1_001.fastq.gz
-      format: edam:format_1930 # FASTQ
-  # - 
-  #   - class: File
-  #     path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_103_CAMMD_AGTCAA_L001_R1_001.fastq.gz
-  #     format: edam:format_1930 # FASTQ
-  # - 
-  #   - class: File
-  #     path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_161_reC3MD_GTCCGC_L001_R1_001.fastq.gz
-  #     format: edam:format_1930 # FASTQ
-  # - 
-  #   - class: File
-  #     path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_163_reC3MD_GTGAAA_L001_R1_001.fastq.gz
-  #     format: edam:format_1930 # FASTQ
-  # - 
-  #   - class: File
-  #     path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_165_re-C3MD_GTGAAA_L002_R1_001.fastq.gz
-  #     format: edam:format_1930 # FASTQ
-
-numUnderscoresFileName: 2
+      format: edam:format_1930
+    sampleName: 'DB_099'
+  - readsOfOneSample:
+    - class: File
+      path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_103_CAMMD_AGTCAA_L001_R1_001.fastq.gz
+      format: edam:format_1930
+    sampleName: 'DB_103'
+  - readsOfOneSample:
+    - class: File
+      path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_161_reC3MD_GTCCGC_L001_R1_001.fastq.gz
+      format: edam:format_1930
+    sampleName: 'DB_161'
+  - readsOfOneSample:
+    - class: File
+      path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_165_re-C3MD_GTGAAA_L002_R1_001.fastq.gz
+      format: edam:format_1930
+    sampleName: 'DB_165'
 
 ### Kallisto quant Parameters
 isSingle: true
 FragmentLength: 200
 StandardDeviation: 20
 BootstrapSamples: 30
-
-resultsFolder: test
+resultsFolder: kallisto_results
 
 $namespaces:
   edam: https://edamontology.org/
\ No newline at end of file
diff --git a/workflows/kallisto/workflow.cwl b/workflows/kallisto/workflow.cwl
index ea6b79d..4ce5617 100644
--- a/workflows/kallisto/workflow.cwl
+++ b/workflows/kallisto/workflow.cwl
@@ -1,3 +1,4 @@
+#!/usr/bin/env cwl-runner
 cwlVersion: v1.2
 class: Workflow
 
@@ -7,56 +8,58 @@ requirements:
   InlineJavascriptRequirement: {}
 
 inputs:
-  IndexInput: File[]
-  sampleRecord:
-    type:
-      type: array
-      items:
-        type: record
-        fields:
-          readsOfOneSample:
-            type: File[]
-          sampleName:
-            type: string?
-  isSingle: boolean
-  FragmentLength: double?
-  StandardDeviation: double?
-  BootstrapSamples: int?
-  resultsFolder: string
+    IndexInput: File[]
+    sampleRecord:
+        type:
+          type: array
+          items:
+            type: record
+            fields:
+              readsOfOneSample:
+                type: File[]
+              sampleName:
+                type: string?
+    isSingle: boolean
+    FragmentLength: double?
+    StandardDeviation: double?
+    BootstrapSamples: int?
+    resultsFolder: string
 
 steps:
-  index:
-    run: kallisto-index.cwl
-    in:
-      InputFiles: IndexInput
-      IndexName:
-        source: IndexInput
-        valueFrom: $(self[0].nameroot)
-    out: [index]
-  quant:
-    run: kallisto-quant.cwl
-    scatter: [InputReads, QuantOutfolder]
-    scatterMethod: dotproduct
-    in:
-      InputReads:
-        source: sampleRecord
-        valueFrom: $(self.readsOfOneSample)
-      QuantOutfolder:
-        source: sampleRecord
-        valueFrom: $(self.sampleName)
-      Index: index/index
-      isSingle: isSingle
-      FragmentLength: FragmentLength
-      StandardDeviation: StandardDeviation
-      BootstrapSamples: BootstrapSamples
-    out: [outFolder]
-  collectResults:
-    run: ./yield-dirInDestination.cwl
-    scatter: inDir
-    in:
-      inDir: quant/outFolder
-      destinationDir: resultsFolder
-    out: [outDir]
+    index:
+        run: kallisto-index.cwl
+        in:
+            InputFiles: IndexInput
+            IndexName:
+                source: IndexInput
+                valueFrom: $(self[0].nameroot)
+        out: [index]
+
+    quant:
+        run: kallisto-quant.cwl
+        scatter: [InputReads, QuantOutfolder]
+        scatterMethod: dotproduct
+        in:
+            InputReads:
+                source: sampleRecord
+                valueFrom: $(self.readsOfOneSample)
+            QuantOutfolder:
+                source: sampleRecord
+                valueFrom: $(self.sampleName)
+            Index: index/index
+            isSingle: isSingle
+            FragmentLength: FragmentLength
+            StandardDeviation: StandardDeviation
+            BootstrapSamples: BootstrapSamples
+        out: [outFolder]
+
+    collectResults:
+        run: ./yield-dirInDestination.cwl
+        scatter: inDir
+        in:
+            inDir: quant/outFolder
+            destinationDir: resultsFolder
+        out: [outDir]
 
 outputs:
   finalOut:
-- 
GitLab