From 12ee942947a19c543dfe169d4a808624b90064eb Mon Sep 17 00:00:00 2001
From: Dominik Brilhaus <brilhaus@nfdi4plants.org>
Date: Thu, 25 Jan 2024 14:08:33 +0100
Subject: [PATCH] add kallisto pe workflow

---
 runs/kallisto_quant-pe/run.cwl                | 45 ++++++++++++++++++
 runs/kallisto_quant-pe/run.yml                | 13 ++++++
 .../kallisto_quant-pe/kallisto_quant-pe.sh    | 46 +++++++++++++++++++
 3 files changed, 104 insertions(+)
 create mode 100644 runs/kallisto_quant-pe/run.cwl
 create mode 100644 runs/kallisto_quant-pe/run.yml
 create mode 100644 workflows/kallisto_quant-pe/kallisto_quant-pe.sh

diff --git a/runs/kallisto_quant-pe/run.cwl b/runs/kallisto_quant-pe/run.cwl
new file mode 100644
index 0000000..af888a5
--- /dev/null
+++ b/runs/kallisto_quant-pe/run.cwl
@@ -0,0 +1,45 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.2
+class: CommandLineTool
+
+hints:
+  DockerRequirement:
+    dockerPull: quay.io/biocontainers/kallisto:0.43.0--hdf51.8.17_2
+
+inputs:
+- id: sh_script
+  type: File
+  inputBinding:
+    position: 0
+- id: out_folder
+  type: string
+  inputBinding:
+    position: 1
+- id: in_kallisto_index
+  type: File
+  inputBinding:
+    position: 2
+- id: in_fastq_dir
+  type: Directory
+  inputBinding:
+    position: 3
+- id: kallisto_bootstrap
+  type: int
+  inputBinding:
+    position: 4
+- id: kallisto_threads
+  type: int
+  inputBinding:
+    position: 5
+
+outputs:
+- id: outdir
+  type:
+    type: array
+    items: Directory
+  outputBinding:
+    glob: $(runtime.outdir)/$(inputs.out_folder)
+
+baseCommand:
+- bash
diff --git a/runs/kallisto_quant-pe/run.yml b/runs/kallisto_quant-pe/run.yml
new file mode 100644
index 0000000..2aec75e
--- /dev/null
+++ b/runs/kallisto_quant-pe/run.yml
@@ -0,0 +1,13 @@
+cores: 4
+sh_script:
+  class: File
+  path: ../../workflows/kallisto_quant-pe/kallisto_quant-pe.sh
+out_folder: ./out
+in_kallisto_index:
+  class: File
+  path: ../kallisto_index/out/kallisto_index
+in_fastq_dir: 
+  class: Directory
+  path: ../../assays/rna-seq/dataset
+kallisto_bootstrap: 30
+kallisto_threads: 4
diff --git a/workflows/kallisto_quant-pe/kallisto_quant-pe.sh b/workflows/kallisto_quant-pe/kallisto_quant-pe.sh
new file mode 100644
index 0000000..da07576
--- /dev/null
+++ b/workflows/kallisto_quant-pe/kallisto_quant-pe.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+
+### Map RNASeq reads via kallisto
+### Note, this is written for paired-end mode only
+### And this assumes that there's exactly 2 fastq files for each sample with
+ ## forward = ${sample}_R1_001.fastq.gz
+ ## reverse = ${sample}_R2_001.fastq.gz
+
+################################################
+#### Read arguments from CLI
+################################################
+
+out_folder=$1
+in_kallisto_index=$2
+in_fastq_folder=$3
+kallisto_bootstrap=$4
+kallisto_threads=$5
+
+################################################
+#### If it does not exist, create out dir
+################################################
+
+mkdir -p "$out_folder"
+
+################################################
+#### Store fastq files in variable
+################################################
+
+fastq_files=$(ls "${in_fastq_folder}"/50*fastq* | sed 's/_R[1-2]_001.fastq.gz//g' | uniq)
+
+
+################################################
+#### Loop over fastq files and quantify reads
+################################################
+
+for j in $fastq_files; do
+	
+	sampleName=$(basename $j)
+	
+	echo $sampleName
+	
+	kallisto quant -b $kallisto_bootstrap -t $kallisto_threads -i "$in_kallisto_index" -o "$out_folder/$sampleName" "$j"_R1_001.fastq.gz "$j"_R2_001.fastq.gz
+
+	echo 'Kallisto done'
+
+done
-- 
GitLab