From dff74494ce14fd9e3a476e38e25daee12ff98fb1 Mon Sep 17 00:00:00 2001
From: Dominik <dominik.brilhaus@hhu.de>
Date: Wed, 10 Aug 2022 18:02:27 +0200
Subject: [PATCH] test runs from different machine

---
 runs/kallisto_collect/README.md            |  2 +-
 runs/kallisto_collect/kallisto_collect.yml |  2 +-
 runs/kallisto_index/README.md              |  2 +-
 runs/kallisto_index/kallisto_index.yml     |  2 +-
 runs/kallisto_quant/README.md              |  2 +-
 runs/kallisto_quant/kallisto_quant.yml     |  4 ++--
 runs/shiny_prep/README.md                  | 21 +++++++++++++++++++++
 runs/shiny_prep/shiny_prep.yml             |  2 +-
 workflows/kallisto_collect.R               | 12 +++++++++---
 workflows/kallisto_index.sh                | 12 +++++++++++-
 workflows/kallisto_quant.sh                | 16 +++++++++++++---
 workflows/kallisto_sleuth.R                |  2 +-
 workflows/merge_isa_metadata.R             |  2 +-
 workflows/shiny_prep.R                     |  2 +-
 14 files changed, 65 insertions(+), 18 deletions(-)
 create mode 100644 runs/shiny_prep/README.md

diff --git a/runs/kallisto_collect/README.md b/runs/kallisto_collect/README.md
index b7d553f..574080e 100644
--- a/runs/kallisto_collect/README.md
+++ b/runs/kallisto_collect/README.md
@@ -3,7 +3,7 @@
 
 ## First cd into the (runs) folder, with the .yml file
 
-cd /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq/runs/kallisto_collect
+cd /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq/runs/kallisto_collect
 
 ## Let it flow
 
diff --git a/runs/kallisto_collect/kallisto_collect.yml b/runs/kallisto_collect/kallisto_collect.yml
index 4fd3e9d..1b30051 100644
--- a/runs/kallisto_collect/kallisto_collect.yml
+++ b/runs/kallisto_collect/kallisto_collect.yml
@@ -7,4 +7,4 @@ in_metadata_file: "runs/merged_isa_metadata/merged_isa.tsv"
 in_metadata_sample: "Sample.Name.2"
 in_metadata_factor: "Factor..Photosynthesis.mode."
 out_folder: runs/kallisto_collect
-arc_root: /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq
\ No newline at end of file
+arc_root: /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq
\ No newline at end of file
diff --git a/runs/kallisto_index/README.md b/runs/kallisto_index/README.md
index 6143bf5..16475f8 100644
--- a/runs/kallisto_index/README.md
+++ b/runs/kallisto_index/README.md
@@ -3,7 +3,7 @@
 
 ## First cd into the (runs) folder, with the .yml file
 
-cd /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq/runs/kallisto_index
+cd /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq/runs/kallisto_index
 
 ## Let it flow
 
diff --git a/runs/kallisto_index/kallisto_index.yml b/runs/kallisto_index/kallisto_index.yml
index 1190016..60f3a27 100644
--- a/runs/kallisto_index/kallisto_index.yml
+++ b/runs/kallisto_index/kallisto_index.yml
@@ -4,4 +4,4 @@ sh_script:
   path: ../../workflows/kallisto_index.sh
 in_genome_ref: studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa
 out_folder: runs/kallisto_index
-arc_root: /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq
+arc_root: /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq
diff --git a/runs/kallisto_quant/README.md b/runs/kallisto_quant/README.md
index 6551177..15edf3b 100644
--- a/runs/kallisto_quant/README.md
+++ b/runs/kallisto_quant/README.md
@@ -3,7 +3,7 @@
 
 ## First cd into the (runs) folder, with the .yml file
 
-cd /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq/runs/kallisto_quant
+cd /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq/runs/kallisto_quant
 
 ## Let it flow
 
diff --git a/runs/kallisto_quant/kallisto_quant.yml b/runs/kallisto_quant/kallisto_quant.yml
index 19031ec..3a700dd 100644
--- a/runs/kallisto_quant/kallisto_quant.yml
+++ b/runs/kallisto_quant/kallisto_quant.yml
@@ -2,8 +2,8 @@ cores: 4
 sh_script:
   class: File
   path: ../../workflows/kallisto_quant.sh
-arc_root: /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq
-out_folder: runs/kallisto_quant
+arc_root: /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq
+out_folder: runs/kallisto_quant/kallisto_results
 in_kallisto_index: runs/kallisto_index/kallisto_index
 in_fastq_folder: assays/Talinum_RNASeq_minimal/dataset
 kallisto_bootstrap: 100
diff --git a/runs/shiny_prep/README.md b/runs/shiny_prep/README.md
new file mode 100644
index 0000000..05639e7
--- /dev/null
+++ b/runs/shiny_prep/README.md
@@ -0,0 +1,21 @@
+
+# cwltool README
+
+## First cd into the (runs) folder, with the .yml file
+
+cd /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq/runs/kallisto_sleuth
+
+## Let it flow
+
+```bash
+### store arc root (two levels up from here) as variable
+arc_root=$(echo ${PWD%/*/*})
+
+### replace arc root line in yml (specific to the machine from where this is run)
+### not sure, if this works on linux... 
+sed -i '' "s|^arc_root:.*|arc_root: $arc_root|g" kallisto_sleuth.yml
+
+### run with cwltool
+cwltool ../../workflows/kallisto_sleuth.cwl kallisto_sleuth.yml
+
+```
diff --git a/runs/shiny_prep/shiny_prep.yml b/runs/shiny_prep/shiny_prep.yml
index cdaed0e..a8fec1d 100644
--- a/runs/shiny_prep/shiny_prep.yml
+++ b/runs/shiny_prep/shiny_prep.yml
@@ -4,4 +4,4 @@ r_script:
   path: ../../workflows/shiny_prep.R
 in_kallisto_df: runs/kallisto_collect/kallisto_df.csv
 out_folder: runs/shiny_prep
-arc_root: /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq
+arc_root: /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq
diff --git a/workflows/kallisto_collect.R b/workflows/kallisto_collect.R
index 57f2ed8..49f27db 100644
--- a/workflows/kallisto_collect.R
+++ b/workflows/kallisto_collect.R
@@ -1,11 +1,12 @@
-#!/usr/bin/env Rscript
+ö#!/usr/bin/env Rscript
 
 ################################################
 #### Test area (Within R)
 ################################################
 
-# arc_root <- "~/03_DataPLANT_gitlab/samplearc_rnaseq/"
-# in_kallisto_results <- "runs/kallisto_sleuth/run1/01_kallisto_results"
+# arc_root <- "~/gitlab_dataplant/samplearc_rnaseq/"
+# # in_kallisto_results <- "runs/kallisto_sleuth/run1/01_kallisto_results"
+# in_kallisto_results <- "runs/kallisto_quant/kallisto_results"
 # in_metadata_file <- "runs/merged_isa_metadata/merged_isa.tsv"
 # in_metadata_sample <- "Sample.Name.2"
 # in_metadata_factor <- "Factor..Photosynthesis.mode."
@@ -59,6 +60,11 @@ s2c <- samples[order(samples[[in_metadata_sample]]), c(in_metadata_sample, in_me
 
 colnames(s2c) <- c("sample", "condition")
 
+
+##### MAJOR TODO: properly connect metadata with samples via fastq filename
+##### MAJOR TODO: properly connect metadata with samples via fastq filename
+##### MAJOR TODO: properly connect metadata with samples via fastq filename
+
 s2c$path <- kal_dirs
 s2c <- s2c[order(s2c$sample), ]
 
diff --git a/workflows/kallisto_index.sh b/workflows/kallisto_index.sh
index 19e4ba7..5d17eb3 100755
--- a/workflows/kallisto_index.sh
+++ b/workflows/kallisto_index.sh
@@ -1,6 +1,16 @@
 #!/usr/bin/env bash
 
-### Build kallisto index 
+### Build kallisto index
+
+
+################################################
+#### CWL-free tests
+################################################
+
+# in_genome_ref=studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa
+# out_folder=runs/kallisto_index
+# arc_root=/Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq
+
 
 ################################################
 #### Read arguments from CLI
diff --git a/workflows/kallisto_quant.sh b/workflows/kallisto_quant.sh
index 0f81cd1..f819f27 100755
--- a/workflows/kallisto_quant.sh
+++ b/workflows/kallisto_quant.sh
@@ -3,6 +3,19 @@
 ### Map RNASeq reads via kallisto
 ### Note, this is written for single-end mode only
 
+################################################
+#### CWL-free tests
+################################################
+
+# arc_root=/Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq
+# out_folder=runs/kallisto_quant/kallisto_results_v48_anaconda
+# in_kallisto_index=runs/kallisto_index/kallisto_index
+# in_fastq_folder=assays/Talinum_RNASeq_minimal/dataset
+# kallisto_bootstrap=100
+# kallisto_threads=4
+# kallisto_fragmentLength=200
+# kallisto_stdDev=20
+
 ################################################
 #### Read arguments from CLI
 ################################################
@@ -44,6 +57,3 @@ for j in $fastq_files; do
 	echo 'Kallisto done'
 
 done
-
-
-
diff --git a/workflows/kallisto_sleuth.R b/workflows/kallisto_sleuth.R
index 0d81bb9..164ba8e 100644
--- a/workflows/kallisto_sleuth.R
+++ b/workflows/kallisto_sleuth.R
@@ -8,7 +8,7 @@
 #### Test area (Within R)
 ################################################
 
-# arc_root <- "~/03_DataPLANT_gitlab/samplearc_rnaseq/"
+# arc_root <- "~/gitlab_dataplant/samplearc_rnaseq/"
 # in_sleuth <- "runs/kallisto_collect/kallisto_sleuthObject.RData"
 # out_folder <- "runs/kallisto_sleuth"
 
diff --git a/workflows/merge_isa_metadata.R b/workflows/merge_isa_metadata.R
index 50746cd..2c0aa77 100644
--- a/workflows/merge_isa_metadata.R
+++ b/workflows/merge_isa_metadata.R
@@ -4,7 +4,7 @@
 #### Test area (Within R)
 ################################################
 
-# arc_root <- "~/03_DataPLANT_gitlab/samplearc_rnaseq/"
+# arc_root <- "~/gitlab_dataplant/samplearc_rnaseq/"
 # in_isa_study <- "studies/TalinumFacultativeCAM/isa.study.xlsx:plant_growth"
 # in_isa_assay <- "assays/Talinum_RNASeq_minimal/isa.assay.xlsx:2EXT01_RNA:3ASY01_RNASeq"
 # out_folder <- "runs/merged_isa_metadata"
diff --git a/workflows/shiny_prep.R b/workflows/shiny_prep.R
index b883bb1..bd0967a 100644
--- a/workflows/shiny_prep.R
+++ b/workflows/shiny_prep.R
@@ -4,7 +4,7 @@
 #### Test area (Within R)
 ################################################
 
-# arc_root <- "~/03_DataPLANT_gitlab/samplearc_rnaseq/"
+# arc_root <- "~/gitlab_dataplant/samplearc_rnaseq/"
 # out_folder <- "runs/shiny_prep"
 # in_kallisto_df <- "runs/kallisto_collect/kallisto_df.csv"
 
-- 
GitLab