Skip to content
Snippets Groups Projects
Commit 4c0de1d1 authored by Dominik Brilhaus's avatar Dominik Brilhaus
Browse files

Merge branch 'update_workflows' into 'main'

restructure workflows for reproducibility

See merge request brilator/samplearc_rnaseq!2
parents 737cffa2 469c47f4
No related branches found
No related tags found
No related merge requests found
Showing with 48 additions and 23 deletions
...@@ -91,4 +91,12 @@ DB_161 \ ...@@ -91,4 +91,12 @@ DB_161 \
DB_163 \ DB_163 \
DB_165 DB_165
cd $arc_root cd $arc_root
``` ```
\ No newline at end of file
## Make workflows a bit more representative and reproducible (24.03.2022)
## generate a common arc_root pointer
```bash
echo "~/03DataPLANT_gitlab/samplearc_rnaseq/" > workflows/_arc_local_wd
```
No preview for this file type
No preview for this file type
ARC_root="~/03_DataPLANT_gitlab/samplearc_rnaseq/"
...@@ -3,11 +3,12 @@ ...@@ -3,11 +3,12 @@
#### To be replaced by CWL routine #### To be replaced by CWL routine
######################## ########################
ARC_root=~/samplearc_rnaseq/ # Execute within <ARC root>/workflows
cd $ARC_root'workflows/'
# chmod a+x 01_KallistoQuant.sh # chmod a+x 01_KallistoQuant.sh
# ./01_KallistoQuant.sh > $ARC_root'runs/01_kallisto.log' 2>&1 & # ./01_KallistoQuant.sh > $ARC_root'runs/01_kallisto.log' 2>&1 &
# pointers to and from `runs` need to be replaced
ARC_root=$(cat ./_arc_local_wd)
######################## ########################
...@@ -21,7 +22,7 @@ kallisto cite ...@@ -21,7 +22,7 @@ kallisto cite
### Build index ### Build index
kall_ref=$ARC_root'externals/Talinum.gm.CDS.nt.fa' kall_ref=$ARC_root'studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa'
kallisto index -i $ARC_root'runs/01_kallisto_index' $kall_ref kallisto index -i $ARC_root'runs/01_kallisto_index' $kall_ref
### Align reads ### Align reads
...@@ -32,11 +33,11 @@ mkdir $ARC_root'/runs/01_kallisto_results/' ...@@ -32,11 +33,11 @@ mkdir $ARC_root'/runs/01_kallisto_results/'
for j in $ILLUMINASAMPLES; do for j in $ILLUMINASAMPLES; do
sampleName=$(echo $j | sed -e 's|.*/||' | cut -c -6) # cut away path. retain only first six chars of file name sampleName=$(echo $j | sed -e 's|.*/||' | cut -c -6) # cut away path. retain only first six chars of file name
echo $sampleName echo $sampleName
kallisto quant --single -b 100 -t 30 -l 200 -s 20 -i $ARC_root'/runs/01_kallisto_index' -o $ARC_root'/runs/01_kallisto_results/'$sampleName $j kallisto quant --single -b 100 -t 30 -l 200 -s 20 -i $ARC_root'/runs/01_kallisto_index' -o $ARC_root'/runs/01_kallisto_results/'$sampleName $j
echo 'Kallisto done' echo 'Kallisto done'
done done
\ No newline at end of file
...@@ -4,8 +4,12 @@ ...@@ -4,8 +4,12 @@
#### To be replaced by CWL routine #### To be replaced by CWL routine
######################## ########################
ARC_root="~/Hackathon_ARCexample_rnaseq/" # Execute within <ARC root>/workflows
setwd(paste0(ARC_root, 'workflows/')) # Rscript 03_KallistoCollect.R
# pointers to and from `runs` need to be replaced
ARC_root=readLines("./_arc_local_wd")
######################## ########################
...@@ -29,23 +33,29 @@ library(openxlsx) ...@@ -29,23 +33,29 @@ library(openxlsx)
## read experimental metadata from isa.assay wb ## read experimental metadata from isa.assay wb
isa_assay <- paste0(ARC_root, 'assays/Talinum_RNASeq_minimal/assay.isa.xlsx') isa_assay <- paste0(ARC_root, 'assays/Talinum_RNASeq_minimal/isa.assay.xlsx')
isa_study <- paste0(ARC_root, 'studies/TalinumFacultativeCAM/isa.study.xlsx')
assay_data <- merge(readWorkbook(isa_assay, "1SPL01_plants", startRow = 2), study_data <- readWorkbook(isa_study, "plant_growth", startRow = 1)
readWorkbook(isa_assay, "3ASY01_RNASeq", startRow = 2),
by = "Sample.Name" assay_data <- merge(readWorkbook(isa_assay, "2EXT01_RNA", startRow = 1),
readWorkbook(isa_assay, "3ASY01_RNASeq", startRow = 1),
by.x = "Sample.Name",
by.y = "Source.Name"
) )
assay_data <- merge(study_data, assay_data, by.x = "Sample.Name", by.y = "Source.Name")
## remove empty cols ## remove empty cols
assay_data <- assay_data[, !apply(assay_data, 2, function(x){sum(is.na(x)) == nrow(assay_data)})] assay_data <- assay_data[, !apply(assay_data, 2, function(x){sum(is.na(x)) == nrow(assay_data)})]
# Pointer to kallisto results folder # Pointer to kallisto results folder
base_dir <- paste0(ARC_root, '/runs/01_kallisto_results/') base_dir <- paste0(ARC_root, 'runs/01_kallisto_results/')
# A list of paths to the kallisto results indexed by the sample IDs is collated with # A list of paths to the kallisto results indexed by the sample IDs is collated with
kal_dirs <- dir(base_dir, full.names = T) ## Sleuth requires full paths kal_dirs <- dir(base_dir, full.names = T) ## Sleuth requires full paths
s2c <- assay_data[order(assay_data$Sample.Name), c('Sample.Name', "Characteristics.[Photosynthesis.mode]")] s2c <- assay_data[order(assay_data$Sample.Name), c('Sample.Name', "Factor.[Photosynthesis.mode]")]
# For kallisto / sleuth: 's2c' (sample_to_covariates) must contain a column named 'sample' # For kallisto / sleuth: 's2c' (sample_to_covariates) must contain a column named 'sample'
colnames(s2c) <- c("sample", "Photosynthesis.mode") colnames(s2c) <- c("sample", "Photosynthesis.mode")
......
...@@ -4,8 +4,11 @@ ...@@ -4,8 +4,11 @@
#### To be replaced by CWL routine #### To be replaced by CWL routine
######################## ########################
ARC_root="~/Hackathon_ARCexample_rnaseq/" # Execute within <ARC root>/workflows
setwd(paste0(ARC_root, 'workflows/')) # Rscript 04_Sleuth.R
# pointers to and from `runs` need to be replaced
ARC_root=readLines("./_arc_local_wd")
######################## ########################
......
...@@ -4,8 +4,11 @@ ...@@ -4,8 +4,11 @@
#### To be replaced by CWL routine #### To be replaced by CWL routine
######################## ########################
ARC_root="~/Hackathon_ARCexample_rnaseq/" # Execute within <ARC root>/workflows
setwd(paste0(ARC_root, 'workflows/')) # Rscript 05_plot_shinyPrep.R
# pointers to and from `runs` need to be replaced
ARC_root=readLines("./_arc_local_wd")
######################## ########################
......
~/03DataPLANT_gitlab/samplearc_rnaseq/
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment