From 8bc5ebe548f1252ad58867ec0c61b7aa4a7212d8 Mon Sep 17 00:00:00 2001
From: Dominik Brilhaus <brilhaus@nfdi4plants.org>
Date: Fri, 2 Aug 2024 13:59:32 +0200
Subject: [PATCH] somewhat generic kallisto with multiple samples worked

---
 .gitignore                               |  3 +++
 runs/kallisto/README.md                  |  1 -
 runs/kallisto/workflow.yml               | 21 +++++++++++++++------
 workflows/kallisto/kallisto-quant.cwl    |  2 +-
 workflows/kallisto/kallisto-workflow.cwl | 13 ++++++++-----
 5 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/.gitignore b/.gitignore
index 11819f2..bce78f5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -386,3 +386,6 @@ Temporary Items
 # R temporaries
 .Rhistory
 .RData
+
+# CWL caches
+**/cache*/
\ No newline at end of file
diff --git a/runs/kallisto/README.md b/runs/kallisto/README.md
index 65d21cd..2f01d07 100644
--- a/runs/kallisto/README.md
+++ b/runs/kallisto/README.md
@@ -6,4 +6,3 @@
 cd runs/kallisto/
 cwltool --cachedir cache2 ../../workflows/kallisto/kallisto-workflow.cwl workflow.yml > $(date +"%Y-%m-%d_%H-%M").log 2>&1 &
 ```
-
diff --git a/runs/kallisto/workflow.yml b/runs/kallisto/workflow.yml
index e1d32d5..db33ad6 100644
--- a/runs/kallisto/workflow.yml
+++ b/runs/kallisto/workflow.yml
@@ -1,4 +1,14 @@
-InputReadsMultipleSamples: 
+## Genome file to build kallisto Index
+IndexInput:
+  - class: File
+    path: ../../studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa
+    format: edam:format_1929 # FASTA
+
+## Fastq files to be mapped
+# The `InputReadsMultipleSamples` looks more complicated than needed
+# It's an array of File arrays to generically allow multiple fastq files per sample
+
+InputReadsMultipleSamples:
   - 
     - class: File
       path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_097_CAMMD_CAGATC_L001_R1_001.fastq.gz
@@ -7,14 +17,13 @@ InputReadsMultipleSamples:
     - class: File
       path: ../../assays/Talinum_RNASeq_minimal/dataset/DB_099_CAMMD_CTTGTA_L001_R1_001.fastq.gz
       format: edam:format_1930 # FASTQ
-IndexInput:
-  - class: File
-    path: ../../studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa
-    format: edam:format_1929 # FASTA
+
+numCharsFastqFileName: 6
+### Kallisto quant Parameters
 isSingle: true
 FragmentLength: 200
 StandardDeviation: 20
-BootstrapSamples: 30
+BootstrapSamples: 1
 
 $namespaces:
   edam: https://edamontology.org/
\ No newline at end of file
diff --git a/workflows/kallisto/kallisto-quant.cwl b/workflows/kallisto/kallisto-quant.cwl
index 8267499..9d6df5f 100755
--- a/workflows/kallisto/kallisto-quant.cwl
+++ b/workflows/kallisto/kallisto-quant.cwl
@@ -132,6 +132,6 @@ outputs:
       glob: $(runtime.outdir)/$(inputs.QuantOutfolder)
 
 $namespaces:
-  edam: http://edamontology.org/
+  edam: https://edamontology.org/
 $schemas:
   - https://edamontology.org/EDAM_1.18.owl
diff --git a/workflows/kallisto/kallisto-workflow.cwl b/workflows/kallisto/kallisto-workflow.cwl
index 37fb7df..c0e074e 100644
--- a/workflows/kallisto/kallisto-workflow.cwl
+++ b/workflows/kallisto/kallisto-workflow.cwl
@@ -7,13 +7,14 @@ requirements:
   InlineJavascriptRequirement: {}
 
 inputs:
+  IndexInput: File[]
   InputReadsMultipleSamples:
     type: 
       type: array
       items: 
         type: array
         items: File
-  IndexInput: File[]
+  numCharsFastqFileName: int?
   isSingle: boolean
   FragmentLength: double?  
   StandardDeviation: double?
@@ -30,13 +31,15 @@ steps:
     out: [index]
   quant:
     run: kallisto-quant.cwl
-    scatter: InputReads
+    scatter: 
+      - InputReads
+      - QuantOutfolder
+    scatterMethod: dotproduct
     in:
       InputReads: InputReadsMultipleSamples
       QuantOutfolder:
-        # source: InputReadsMultipleSamples
-        # valueFrom: $(self[0].nameroot)
-        valueFrom: testSample
+        source: InputReadsMultipleSamples
+        valueFrom: $(self[0].nameroot)
       Index: index/index
       isSingle: isSingle
       FragmentLength: FragmentLength 
-- 
GitLab