From 68c1747104810cc8c58f73d9088990b425ca17d0 Mon Sep 17 00:00:00 2001
From: Dominik Brilhaus <brilhaus@nfdi4plants.org>
Date: Fri, 2 Aug 2024 10:15:05 +0200
Subject: [PATCH] restructure kallisto pipeline

---
 .../Talinum_RNASeq_minimal/dataset/.gitkeep   |   0
 runs/{kallisto_quant => kallisto}/run.yml     |   6 +
 runs/kallisto_index/run.yml                   |   8 -
 workflows/kallisto/README.md                  |   7 +
 workflows/kallisto/kallisto-index.cwl         |  63 ++++++++
 workflows/kallisto/kallisto-quant.cwl         | 137 ++++++++++++++++++
 workflows/kallisto/kallisto-workflow.cwl      |  44 ++++++
 7 files changed, 257 insertions(+), 8 deletions(-)
 delete mode 100644 assays/Talinum_RNASeq_minimal/dataset/.gitkeep
 rename runs/{kallisto_quant => kallisto}/run.yml (78%)
 delete mode 100644 runs/kallisto_index/run.yml
 create mode 100644 workflows/kallisto/README.md
 create mode 100644 workflows/kallisto/kallisto-index.cwl
 create mode 100755 workflows/kallisto/kallisto-quant.cwl
 create mode 100644 workflows/kallisto/kallisto-workflow.cwl

diff --git a/assays/Talinum_RNASeq_minimal/dataset/.gitkeep b/assays/Talinum_RNASeq_minimal/dataset/.gitkeep
deleted file mode 100644
index e69de29..0000000
diff --git a/runs/kallisto_quant/run.yml b/runs/kallisto/run.yml
similarity index 78%
rename from runs/kallisto_quant/run.yml
rename to runs/kallisto/run.yml
index 09afef4..269c202 100644
--- a/runs/kallisto_quant/run.yml
+++ b/runs/kallisto/run.yml
@@ -13,3 +13,9 @@ kallisto_bootstrap: 100
 kallisto_threads: 4
 kallisto_fragmentLength: 200
 kallisto_stdDev: 20
+
+
+
+in_genome_ref:
+  class: File
+  path: ../../studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa
\ No newline at end of file
diff --git a/runs/kallisto_index/run.yml b/runs/kallisto_index/run.yml
deleted file mode 100644
index df0c070..0000000
--- a/runs/kallisto_index/run.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-cores: 1
-sh_script:
-  class: File
-  path: ../../workflows/kallisto_index/kallisto_index.sh
-in_genome_ref:
-  class: File
-  path: ../../studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa
-out_folder: ./out
diff --git a/workflows/kallisto/README.md b/workflows/kallisto/README.md
new file mode 100644
index 0000000..52f46ea
--- /dev/null
+++ b/workflows/kallisto/README.md
@@ -0,0 +1,7 @@
+
+# Kallisto
+
+
+
+CWL adapted from: https://github.com/common-workflow-library/bio-cwl-tools/commit/91c42fb809ce18eafe16155cca0abf362270c0fe
+
diff --git a/workflows/kallisto/kallisto-index.cwl b/workflows/kallisto/kallisto-index.cwl
new file mode 100644
index 0000000..4e5b609
--- /dev/null
+++ b/workflows/kallisto/kallisto-index.cwl
@@ -0,0 +1,63 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.0
+class: CommandLineTool
+
+hints:
+  DockerRequirement:
+    dockerPull: quay.io/biocontainers/kallisto:0.46.2--h4f7b962_1
+  SoftwareRequirement:
+    packages:
+      kallisto:
+        version: [ "0.46.0" ]
+        specs: [ https://identifiers.org/biotools/kallisto ]
+
+requirements:        
+  - class: InlineJavascriptRequirement
+  - class: InitialWorkDirRequirement
+    listing:
+      - entry: "$({class: 'Directory', listing: []})"
+        entryname: $(inputs.IndexOutfolder)
+        writable: true
+
+inputs:
+  InputFiles:
+    type: File[]
+    format: edam:format_1929 # FASTA
+    inputBinding:
+      position: 200
+  
+  IndexOutfolder:
+    type: string
+
+  IndexName:
+    type: string
+    inputBinding:
+      prefix: "--index="
+      separate: false
+
+#Optional arguments
+
+  kmerSize:
+    type: int?
+    inputBinding:
+      prefix: "--kmer-size="
+      separate: false
+
+  makeUnique:
+    type: boolean?
+    inputBinding:
+      prefix: "--make-unique"
+
+baseCommand: [kallisto, index]
+
+outputs:
+
+  index:
+    type: Directory
+    outputBinding:
+      glob: $(runtime.outdir)/$(inputs.IndexOutfolder)
+
+$namespaces:
+  edam: http://edamontology.org/
+$schemas:
+  - https://edamontology.org/EDAM_1.18.owl
diff --git a/workflows/kallisto/kallisto-quant.cwl b/workflows/kallisto/kallisto-quant.cwl
new file mode 100755
index 0000000..a5c3b3e
--- /dev/null
+++ b/workflows/kallisto/kallisto-quant.cwl
@@ -0,0 +1,137 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.0
+class: CommandLineTool
+
+hints:
+  DockerRequirement:
+    dockerPull: quay.io/biocontainers/kallisto:0.46.2--h4f7b962_1
+  SoftwareRequirement:
+    packages:
+      kallisto:
+        version: [ "0.46.0" ]
+        specs: [ https://identifiers.org/biotools/kallisto ]
+
+inputs:
+  InputReads:
+    type: File[]
+    format: edam:format_1930  # FASTA
+    inputBinding:
+      position: 200
+
+  QuantOutfolder: 
+    type: string
+
+  Index:
+    type: File
+    inputBinding:
+      position: 1
+      prefix: "--index"
+
+  isSingle:
+    type: boolean
+    inputBinding:
+      position: 2
+      prefix: "--single"
+
+  #Optional Inputs
+
+  isBias:
+    type: boolean?
+    inputBinding:
+      prefix: "--bias"
+
+  isFusion:
+    type: boolean?
+    inputBinding:
+      prefix: "--fusion"
+
+  isSingleOverhang:
+    type: boolean?
+    inputBinding:
+      prefix: "--single-overhang"
+  
+  FragmentLength:
+    type: double?
+    inputBinding:
+      separate: false
+      prefix: "--fragment-length="
+  
+  StandardDeviation:
+    type: double?
+    inputBinding:
+      prefix: "--sd"
+  
+  BootstrapSamples:
+    type: int?
+    inputBinding:
+      separate: false
+      prefix: "--bootstrap-samples="
+  
+  Seed:
+    type: int?
+    inputBinding:
+      prefix: "--seed"
+
+#Using record inputs to create mutually exclusive inputs
+  Strand:
+    type:
+      - "null"
+      - type: record
+        name: forward
+        fields:
+          forward:
+              type: boolean
+              inputBinding:
+                prefix: "--fr-stranded"
+
+      - type: record
+        name: reverse
+        fields:
+          reverse:
+            type: boolean
+            inputBinding:
+              prefix: "--rf-stranded"
+
+  PseudoBam:
+    type: boolean?
+    inputBinding:
+      prefix: "--pseudobam"
+
+#Using record inputs to create dependent inputs
+  
+  GenomeBam:
+    type:
+      - "null"
+      - type: record
+        name: genome_bam
+        fields:
+          genomebam:
+            type: boolean
+            inputBinding:
+              prefix: "--genomebam"
+
+          gtf:
+            type: File
+            inputBinding:
+              prefix: "--gtf"
+
+          chromosomes:
+            type: File
+            inputBinding:
+              prefix: "--chromosomes"
+
+baseCommand: [ kallisto, quant ]
+
+arguments: [ "--output-dir", $(inputs.QuantOutfolder) ]
+
+outputs:
+
+  outFolder:
+    type: Directory
+    outputBinding:
+      glob: $(runtime.outdir)/$(inputs.QuantOutfolder)
+
+$namespaces:
+  edam: http://edamontology.org/
+$schemas:
+  - https://edamontology.org/EDAM_1.18.owl
diff --git a/workflows/kallisto/kallisto-workflow.cwl b/workflows/kallisto/kallisto-workflow.cwl
new file mode 100644
index 0000000..5f65dfc
--- /dev/null
+++ b/workflows/kallisto/kallisto-workflow.cwl
@@ -0,0 +1,44 @@
+cwlVersion: v1.2
+class: Workflow
+
+requirements:
+  ScatterFeatureRequirement: {}
+
+inputs:
+  parentDir: Directory
+  dirNamePattern: string
+  collectedOut: string
+  Index: File
+  isSingle: boolean
+  FragmentLength: double?  
+  StandardDeviation: double?
+  BootstrapSamples: int?  
+
+steps:
+  quant:
+    run: kallisto-quant.cwl
+    scatter:
+      - InputReads
+      - QuantOutfolder
+    scatterMethod: dotproduct
+    in:
+      InputReads: listFiles/inDirFiles
+      QuantOutfolder: listFiles/inDirBasename
+      Index: Index
+      isSingle: isSingle
+      FragmentLength: FragmentLength 
+      StandardDeviation: StandardDeviation
+      BootstrapSamples: BootstrapSamples
+    out: [outFolder]
+  collect:
+    run: ../_aux-tools/yield-dirInDestination.cwl
+    scatter: inDir
+    in:
+      inDir: quant/outFolder
+      destinationDir: collectedOut
+    out: [outDir]
+
+outputs:
+  finalOut:
+    type: Directory[]
+    outputSource: collect/outDir
-- 
GitLab