From cde2abc048201aadf3c9dc9c501182bfed9ac43e Mon Sep 17 00:00:00 2001
From: Adrian Zimmer <z.adrian1995@gmail.com>
Date: Sat, 13 Aug 2022 15:24:05 +0200
Subject: [PATCH] Dockerize Kallisto_sleuth

---
 runs/kallisto_quant/README.md                 | 10 +----
 runs/kallisto_quant/kallisto_quant.yml        |  2 +-
 runs/kallisto_quant/run.cwl                   |  2 +-
 runs/kallisto_sleuth/README.md                | 10 +----
 runs/kallisto_sleuth/kallisto_sleuth.yml      |  8 ++--
 runs/kallisto_sleuth/{ => out}/sleuth_dge.csv |  0
 runs/kallisto_sleuth/run.cwl                  | 21 ++++++++++
 workflows/kallisto_sleuth/kallisto_sleuth.R   | 14 +++----
 workflows/kallisto_sleuth/workflow.cwl        | 39 ++++++++++---------
 9 files changed, 53 insertions(+), 53 deletions(-)
 rename runs/kallisto_sleuth/{ => out}/sleuth_dge.csv (100%)
 create mode 100644 runs/kallisto_sleuth/run.cwl

diff --git a/runs/kallisto_quant/README.md b/runs/kallisto_quant/README.md
index 7e03991..34ee542 100644
--- a/runs/kallisto_quant/README.md
+++ b/runs/kallisto_quant/README.md
@@ -8,14 +8,6 @@ cd /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq/runs/kallisto_quant
 ## Let it flow
 
 ```bash
-### store arc root (two levels up from here) as variable
-arc_root=$(echo ${PWD%/*/*})
-
-### replace arc root line in yml (specific to the machine from where this is run)
-### not sure, if this works on linux... 
-sed -i '' "s|^arc_root:.*|arc_root: $arc_root|g" kallisto_quant.yml
-
 ### run with cwltool
-cwltool ../../workflows/kallisto_quant.cwl kallisto_quant.yml
-
+cwltool --enable-dev run.cwl kallisto_quant.yml
 ```
diff --git a/runs/kallisto_quant/kallisto_quant.yml b/runs/kallisto_quant/kallisto_quant.yml
index 44f6bfe..e5c7505 100644
--- a/runs/kallisto_quant/kallisto_quant.yml
+++ b/runs/kallisto_quant/kallisto_quant.yml
@@ -9,4 +9,4 @@ in_fastq_dir:
 kallisto_bootstrap: 100
 kallisto_threads: 4
 kallisto_fragmentLength: 200
-kallisto_stdDev: 20
\ No newline at end of file
+kallisto_stdDev: 20
diff --git a/runs/kallisto_quant/run.cwl b/runs/kallisto_quant/run.cwl
index a5e9b88..87cc30f 100644
--- a/runs/kallisto_quant/run.cwl
+++ b/runs/kallisto_quant/run.cwl
@@ -33,4 +33,4 @@ steps:
       kallisto_threads: kallisto_threads
       kallisto_fragmentLength: kallisto_fragmentLength
       kallisto_stdDev: kallisto_stdDev
-    out: [outdir]
\ No newline at end of file
+    out: [outdir]
diff --git a/runs/kallisto_sleuth/README.md b/runs/kallisto_sleuth/README.md
index 05639e7..9df2ee4 100644
--- a/runs/kallisto_sleuth/README.md
+++ b/runs/kallisto_sleuth/README.md
@@ -8,14 +8,6 @@ cd /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq/runs/kallisto_sleuth
 ## Let it flow
 
 ```bash
-### store arc root (two levels up from here) as variable
-arc_root=$(echo ${PWD%/*/*})
-
-### replace arc root line in yml (specific to the machine from where this is run)
-### not sure, if this works on linux... 
-sed -i '' "s|^arc_root:.*|arc_root: $arc_root|g" kallisto_sleuth.yml
-
 ### run with cwltool
-cwltool ../../workflows/kallisto_sleuth.cwl kallisto_sleuth.yml
-
+cwltool --enable-dev run.cwl kallisto_sleuth.yml
 ```
diff --git a/runs/kallisto_sleuth/kallisto_sleuth.yml b/runs/kallisto_sleuth/kallisto_sleuth.yml
index 34364fd..49c1123 100644
--- a/runs/kallisto_sleuth/kallisto_sleuth.yml
+++ b/runs/kallisto_sleuth/kallisto_sleuth.yml
@@ -1,7 +1,5 @@
 cores: 1
-r_script:
+in_sleuth:
   class: File
-  path: ../../workflows/kallisto_sleuth.R
-in_sleuth: runs/kallisto_collect/kallisto_sleuthObject.RData
-out_folder: runs/kallisto_sleuth
-arc_root: /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq
\ No newline at end of file
+  path: ../kallisto_collect/kallisto_sleuthObject.RData
+out_folder: out
diff --git a/runs/kallisto_sleuth/sleuth_dge.csv b/runs/kallisto_sleuth/out/sleuth_dge.csv
similarity index 100%
rename from runs/kallisto_sleuth/sleuth_dge.csv
rename to runs/kallisto_sleuth/out/sleuth_dge.csv
diff --git a/runs/kallisto_sleuth/run.cwl b/runs/kallisto_sleuth/run.cwl
new file mode 100644
index 0000000..9565052
--- /dev/null
+++ b/runs/kallisto_sleuth/run.cwl
@@ -0,0 +1,21 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.2.0-dev1
+class: Workflow
+inputs:
+  in_sleuth:
+    type: File
+  out_folder:
+    type: string
+outputs:
+  out_dir:
+    type:
+      type: array
+      items: Directory
+    outputSource: kallisto_quant/outdir
+steps:
+  kallisto_quant:
+    run: ../../workflows/kallisto_sleuth/workflow.cwl
+    in:
+      in_sleuth: in_sleuth
+      out_folder: out_folder
+    out: [outdir]
diff --git a/workflows/kallisto_sleuth/kallisto_sleuth.R b/workflows/kallisto_sleuth/kallisto_sleuth.R
index ec3a84a..e4c895e 100644
--- a/workflows/kallisto_sleuth/kallisto_sleuth.R
+++ b/workflows/kallisto_sleuth/kallisto_sleuth.R
@@ -1,5 +1,4 @@
 #!/usr/bin/env Rscript
-
 ################################################
 ### Diff. gene expression with sleuth ##########
 ################################################
@@ -15,30 +14,27 @@
 ################################################
 #### Load required library
 ################################################
-
 library(sleuth)
-
 ################################################
 #### Read arguments from CLI
 ################################################
 
 args <- commandArgs(trailingOnly = T)
 
-arc_root <- args[1]
-in_sleuth <- args[2]
-out_folder <- args[3]
+in_sleuth <- args[1]
+out_folder <- args[2]
 
 ################################################
 #### If it does not exist, create out dir
 ################################################
 
-dir.create(paste(arc_root, out_folder, sep = "/"), recursive = T, showWarnings = F)
+dir.create(out_folder, recursive = T, showWarnings = F)
 
 ################################################
 #### Load sleuth object
 ################################################
 
-load(file = paste(arc_root, in_sleuth, sep = "/"))
+load(file = in_sleuth)
 
 ################################################
 #### Run sleuth fit
@@ -55,4 +51,4 @@ sleuth_table <- sleuth_results(so, "reduced:full", "lrt", show_all = FALSE)
 #### write to file
 ################################################
 
-write.csv(sleuth_table, paste(arc_root, out_folder, "sleuth_dge.csv", sep = "/"), row.names = F)
+write.csv(sleuth_table, paste(out_folder, "sleuth_dge.csv", sep = "/"), row.names = F)
diff --git a/workflows/kallisto_sleuth/workflow.cwl b/workflows/kallisto_sleuth/workflow.cwl
index 6f42726..009ea53 100644
--- a/workflows/kallisto_sleuth/workflow.cwl
+++ b/workflows/kallisto_sleuth/workflow.cwl
@@ -1,26 +1,27 @@
 #!/usr/bin/env cwl-runner
 
-cwlVersion: v1.2
+cwlVersion: v1.2.0-dev1
 class: CommandLineTool
-
+hints:
+  DockerRequirement:
+    dockerPull: zimmera95/rnaseq:latest
+requirements:
+  - class: InitialWorkDirRequirement
+    listing:
+      - class: File
+        location: kallisto_sleuth.R
+arguments:
+  - position: 0
+    valueFrom: kallisto_sleuth.R
 inputs:
-- id: r_script
-  type: File
-  inputBinding:
-    position: 0
-- id: arc_root
-  type: string
-  inputBinding:
-    position: 1
-- id: in_sleuth
-  type: string
-  inputBinding:
-    position: 2
-- id: out_folder
-  type: string
-  inputBinding:
-    position: 3
-
+  - id: in_sleuth
+    type: File
+    inputBinding:
+      position: 1
+  - id: out_folder
+    type: string
+    inputBinding:
+      position: 2
 outputs:
 - id: outdir
   type:
-- 
GitLab