From 12ee942947a19c543dfe169d4a808624b90064eb Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Thu, 25 Jan 2024 14:08:33 +0100 Subject: [PATCH] add kallisto pe workflow --- runs/kallisto_quant-pe/run.cwl | 45 ++++++++++++++++++ runs/kallisto_quant-pe/run.yml | 13 ++++++ .../kallisto_quant-pe/kallisto_quant-pe.sh | 46 +++++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100644 runs/kallisto_quant-pe/run.cwl create mode 100644 runs/kallisto_quant-pe/run.yml create mode 100644 workflows/kallisto_quant-pe/kallisto_quant-pe.sh diff --git a/runs/kallisto_quant-pe/run.cwl b/runs/kallisto_quant-pe/run.cwl new file mode 100644 index 0000000..af888a5 --- /dev/null +++ b/runs/kallisto_quant-pe/run.cwl @@ -0,0 +1,45 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.2 +class: CommandLineTool + +hints: + DockerRequirement: + dockerPull: quay.io/biocontainers/kallisto:0.43.0--hdf51.8.17_2 + +inputs: +- id: sh_script + type: File + inputBinding: + position: 0 +- id: out_folder + type: string + inputBinding: + position: 1 +- id: in_kallisto_index + type: File + inputBinding: + position: 2 +- id: in_fastq_dir + type: Directory + inputBinding: + position: 3 +- id: kallisto_bootstrap + type: int + inputBinding: + position: 4 +- id: kallisto_threads + type: int + inputBinding: + position: 5 + +outputs: +- id: outdir + type: + type: array + items: Directory + outputBinding: + glob: $(runtime.outdir)/$(inputs.out_folder) + +baseCommand: +- bash diff --git a/runs/kallisto_quant-pe/run.yml b/runs/kallisto_quant-pe/run.yml new file mode 100644 index 0000000..2aec75e --- /dev/null +++ b/runs/kallisto_quant-pe/run.yml @@ -0,0 +1,13 @@ +cores: 4 +sh_script: + class: File + path: ../../workflows/kallisto_quant-pe/kallisto_quant-pe.sh +out_folder: ./out +in_kallisto_index: + class: File + path: ../kallisto_index/out/kallisto_index +in_fastq_dir: + class: Directory + path: ../../assays/rna-seq/dataset +kallisto_bootstrap: 30 +kallisto_threads: 4 diff --git a/workflows/kallisto_quant-pe/kallisto_quant-pe.sh b/workflows/kallisto_quant-pe/kallisto_quant-pe.sh new file mode 100644 index 0000000..da07576 --- /dev/null +++ b/workflows/kallisto_quant-pe/kallisto_quant-pe.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +### Map RNASeq reads via kallisto +### Note, this is written for paired-end mode only +### And this assumes that there's exactly 2 fastq files for each sample with + ## forward = ${sample}_R1_001.fastq.gz + ## reverse = ${sample}_R2_001.fastq.gz + +################################################ +#### Read arguments from CLI +################################################ + +out_folder=$1 +in_kallisto_index=$2 +in_fastq_folder=$3 +kallisto_bootstrap=$4 +kallisto_threads=$5 + +################################################ +#### If it does not exist, create out dir +################################################ + +mkdir -p "$out_folder" + +################################################ +#### Store fastq files in variable +################################################ + +fastq_files=$(ls "${in_fastq_folder}"/50*fastq* | sed 's/_R[1-2]_001.fastq.gz//g' | uniq) + + +################################################ +#### Loop over fastq files and quantify reads +################################################ + +for j in $fastq_files; do + + sampleName=$(basename $j) + + echo $sampleName + + kallisto quant -b $kallisto_bootstrap -t $kallisto_threads -i "$in_kallisto_index" -o "$out_folder/$sampleName" "$j"_R1_001.fastq.gz "$j"_R2_001.fastq.gz + + echo 'Kallisto done' + +done -- GitLab