From dff74494ce14fd9e3a476e38e25daee12ff98fb1 Mon Sep 17 00:00:00 2001 From: Dominik <dominik.brilhaus@hhu.de> Date: Wed, 10 Aug 2022 18:02:27 +0200 Subject: [PATCH] test runs from different machine --- runs/kallisto_collect/README.md | 2 +- runs/kallisto_collect/kallisto_collect.yml | 2 +- runs/kallisto_index/README.md | 2 +- runs/kallisto_index/kallisto_index.yml | 2 +- runs/kallisto_quant/README.md | 2 +- runs/kallisto_quant/kallisto_quant.yml | 4 ++-- runs/shiny_prep/README.md | 21 +++++++++++++++++++++ runs/shiny_prep/shiny_prep.yml | 2 +- workflows/kallisto_collect.R | 12 +++++++++--- workflows/kallisto_index.sh | 12 +++++++++++- workflows/kallisto_quant.sh | 16 +++++++++++++--- workflows/kallisto_sleuth.R | 2 +- workflows/merge_isa_metadata.R | 2 +- workflows/shiny_prep.R | 2 +- 14 files changed, 65 insertions(+), 18 deletions(-) create mode 100644 runs/shiny_prep/README.md diff --git a/runs/kallisto_collect/README.md b/runs/kallisto_collect/README.md index b7d553f..574080e 100644 --- a/runs/kallisto_collect/README.md +++ b/runs/kallisto_collect/README.md @@ -3,7 +3,7 @@ ## First cd into the (runs) folder, with the .yml file -cd /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq/runs/kallisto_collect +cd /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq/runs/kallisto_collect ## Let it flow diff --git a/runs/kallisto_collect/kallisto_collect.yml b/runs/kallisto_collect/kallisto_collect.yml index 4fd3e9d..1b30051 100644 --- a/runs/kallisto_collect/kallisto_collect.yml +++ b/runs/kallisto_collect/kallisto_collect.yml @@ -7,4 +7,4 @@ in_metadata_file: "runs/merged_isa_metadata/merged_isa.tsv" in_metadata_sample: "Sample.Name.2" in_metadata_factor: "Factor..Photosynthesis.mode." out_folder: runs/kallisto_collect -arc_root: /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq \ No newline at end of file +arc_root: /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq \ No newline at end of file diff --git a/runs/kallisto_index/README.md b/runs/kallisto_index/README.md index 6143bf5..16475f8 100644 --- a/runs/kallisto_index/README.md +++ b/runs/kallisto_index/README.md @@ -3,7 +3,7 @@ ## First cd into the (runs) folder, with the .yml file -cd /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq/runs/kallisto_index +cd /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq/runs/kallisto_index ## Let it flow diff --git a/runs/kallisto_index/kallisto_index.yml b/runs/kallisto_index/kallisto_index.yml index 1190016..60f3a27 100644 --- a/runs/kallisto_index/kallisto_index.yml +++ b/runs/kallisto_index/kallisto_index.yml @@ -4,4 +4,4 @@ sh_script: path: ../../workflows/kallisto_index.sh in_genome_ref: studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa out_folder: runs/kallisto_index -arc_root: /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq +arc_root: /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq diff --git a/runs/kallisto_quant/README.md b/runs/kallisto_quant/README.md index 6551177..15edf3b 100644 --- a/runs/kallisto_quant/README.md +++ b/runs/kallisto_quant/README.md @@ -3,7 +3,7 @@ ## First cd into the (runs) folder, with the .yml file -cd /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq/runs/kallisto_quant +cd /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq/runs/kallisto_quant ## Let it flow diff --git a/runs/kallisto_quant/kallisto_quant.yml b/runs/kallisto_quant/kallisto_quant.yml index 19031ec..3a700dd 100644 --- a/runs/kallisto_quant/kallisto_quant.yml +++ b/runs/kallisto_quant/kallisto_quant.yml @@ -2,8 +2,8 @@ cores: 4 sh_script: class: File path: ../../workflows/kallisto_quant.sh -arc_root: /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq -out_folder: runs/kallisto_quant +arc_root: /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq +out_folder: runs/kallisto_quant/kallisto_results in_kallisto_index: runs/kallisto_index/kallisto_index in_fastq_folder: assays/Talinum_RNASeq_minimal/dataset kallisto_bootstrap: 100 diff --git a/runs/shiny_prep/README.md b/runs/shiny_prep/README.md new file mode 100644 index 0000000..05639e7 --- /dev/null +++ b/runs/shiny_prep/README.md @@ -0,0 +1,21 @@ + +# cwltool README + +## First cd into the (runs) folder, with the .yml file + +cd /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq/runs/kallisto_sleuth + +## Let it flow + +```bash +### store arc root (two levels up from here) as variable +arc_root=$(echo ${PWD%/*/*}) + +### replace arc root line in yml (specific to the machine from where this is run) +### not sure, if this works on linux... +sed -i '' "s|^arc_root:.*|arc_root: $arc_root|g" kallisto_sleuth.yml + +### run with cwltool +cwltool ../../workflows/kallisto_sleuth.cwl kallisto_sleuth.yml + +``` diff --git a/runs/shiny_prep/shiny_prep.yml b/runs/shiny_prep/shiny_prep.yml index cdaed0e..a8fec1d 100644 --- a/runs/shiny_prep/shiny_prep.yml +++ b/runs/shiny_prep/shiny_prep.yml @@ -4,4 +4,4 @@ r_script: path: ../../workflows/shiny_prep.R in_kallisto_df: runs/kallisto_collect/kallisto_df.csv out_folder: runs/shiny_prep -arc_root: /Users/dominikbrilhaus/03_DataPLANT_gitlab/samplearc_rnaseq +arc_root: /Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq diff --git a/workflows/kallisto_collect.R b/workflows/kallisto_collect.R index 57f2ed8..49f27db 100644 --- a/workflows/kallisto_collect.R +++ b/workflows/kallisto_collect.R @@ -1,11 +1,12 @@ -#!/usr/bin/env Rscript +ö#!/usr/bin/env Rscript ################################################ #### Test area (Within R) ################################################ -# arc_root <- "~/03_DataPLANT_gitlab/samplearc_rnaseq/" -# in_kallisto_results <- "runs/kallisto_sleuth/run1/01_kallisto_results" +# arc_root <- "~/gitlab_dataplant/samplearc_rnaseq/" +# # in_kallisto_results <- "runs/kallisto_sleuth/run1/01_kallisto_results" +# in_kallisto_results <- "runs/kallisto_quant/kallisto_results" # in_metadata_file <- "runs/merged_isa_metadata/merged_isa.tsv" # in_metadata_sample <- "Sample.Name.2" # in_metadata_factor <- "Factor..Photosynthesis.mode." @@ -59,6 +60,11 @@ s2c <- samples[order(samples[[in_metadata_sample]]), c(in_metadata_sample, in_me colnames(s2c) <- c("sample", "condition") + +##### MAJOR TODO: properly connect metadata with samples via fastq filename +##### MAJOR TODO: properly connect metadata with samples via fastq filename +##### MAJOR TODO: properly connect metadata with samples via fastq filename + s2c$path <- kal_dirs s2c <- s2c[order(s2c$sample), ] diff --git a/workflows/kallisto_index.sh b/workflows/kallisto_index.sh index 19e4ba7..5d17eb3 100755 --- a/workflows/kallisto_index.sh +++ b/workflows/kallisto_index.sh @@ -1,6 +1,16 @@ #!/usr/bin/env bash -### Build kallisto index +### Build kallisto index + + +################################################ +#### CWL-free tests +################################################ + +# in_genome_ref=studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa +# out_folder=runs/kallisto_index +# arc_root=/Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq + ################################################ #### Read arguments from CLI diff --git a/workflows/kallisto_quant.sh b/workflows/kallisto_quant.sh index 0f81cd1..f819f27 100755 --- a/workflows/kallisto_quant.sh +++ b/workflows/kallisto_quant.sh @@ -3,6 +3,19 @@ ### Map RNASeq reads via kallisto ### Note, this is written for single-end mode only +################################################ +#### CWL-free tests +################################################ + +# arc_root=/Users/dominikbrilhaus/gitlab_dataplant/samplearc_rnaseq +# out_folder=runs/kallisto_quant/kallisto_results_v48_anaconda +# in_kallisto_index=runs/kallisto_index/kallisto_index +# in_fastq_folder=assays/Talinum_RNASeq_minimal/dataset +# kallisto_bootstrap=100 +# kallisto_threads=4 +# kallisto_fragmentLength=200 +# kallisto_stdDev=20 + ################################################ #### Read arguments from CLI ################################################ @@ -44,6 +57,3 @@ for j in $fastq_files; do echo 'Kallisto done' done - - - diff --git a/workflows/kallisto_sleuth.R b/workflows/kallisto_sleuth.R index 0d81bb9..164ba8e 100644 --- a/workflows/kallisto_sleuth.R +++ b/workflows/kallisto_sleuth.R @@ -8,7 +8,7 @@ #### Test area (Within R) ################################################ -# arc_root <- "~/03_DataPLANT_gitlab/samplearc_rnaseq/" +# arc_root <- "~/gitlab_dataplant/samplearc_rnaseq/" # in_sleuth <- "runs/kallisto_collect/kallisto_sleuthObject.RData" # out_folder <- "runs/kallisto_sleuth" diff --git a/workflows/merge_isa_metadata.R b/workflows/merge_isa_metadata.R index 50746cd..2c0aa77 100644 --- a/workflows/merge_isa_metadata.R +++ b/workflows/merge_isa_metadata.R @@ -4,7 +4,7 @@ #### Test area (Within R) ################################################ -# arc_root <- "~/03_DataPLANT_gitlab/samplearc_rnaseq/" +# arc_root <- "~/gitlab_dataplant/samplearc_rnaseq/" # in_isa_study <- "studies/TalinumFacultativeCAM/isa.study.xlsx:plant_growth" # in_isa_assay <- "assays/Talinum_RNASeq_minimal/isa.assay.xlsx:2EXT01_RNA:3ASY01_RNASeq" # out_folder <- "runs/merged_isa_metadata" diff --git a/workflows/shiny_prep.R b/workflows/shiny_prep.R index b883bb1..bd0967a 100644 --- a/workflows/shiny_prep.R +++ b/workflows/shiny_prep.R @@ -4,7 +4,7 @@ #### Test area (Within R) ################################################ -# arc_root <- "~/03_DataPLANT_gitlab/samplearc_rnaseq/" +# arc_root <- "~/gitlab_dataplant/samplearc_rnaseq/" # out_folder <- "runs/shiny_prep" # in_kallisto_df <- "runs/kallisto_collect/kallisto_df.csv" -- GitLab