Skip to content
Snippets Groups Projects
Commit 4c0de1d1 authored by Dominik Brilhaus's avatar Dominik Brilhaus
Browse files

Merge branch 'update_workflows' into 'main'

restructure workflows for reproducibility

See merge request brilator/samplearc_rnaseq!2
parents 737cffa2 469c47f4
No related branches found
No related tags found
No related merge requests found
Showing with 48 additions and 23 deletions
......@@ -91,4 +91,12 @@ DB_161 \
DB_163 \
DB_165
cd $arc_root
```
\ No newline at end of file
```
## Make workflows a bit more representative and reproducible (24.03.2022)
## generate a common arc_root pointer
```bash
echo "~/03DataPLANT_gitlab/samplearc_rnaseq/" > workflows/_arc_local_wd
```
No preview for this file type
No preview for this file type
ARC_root="~/03_DataPLANT_gitlab/samplearc_rnaseq/"
......@@ -3,11 +3,12 @@
#### To be replaced by CWL routine
########################
ARC_root=~/samplearc_rnaseq/
cd $ARC_root'workflows/'
# Execute within <ARC root>/workflows
# chmod a+x 01_KallistoQuant.sh
# ./01_KallistoQuant.sh > $ARC_root'runs/01_kallisto.log' 2>&1 &
# pointers to and from `runs` need to be replaced
ARC_root=$(cat ./_arc_local_wd)
########################
......@@ -21,7 +22,7 @@ kallisto cite
### Build index
kall_ref=$ARC_root'externals/Talinum.gm.CDS.nt.fa'
kall_ref=$ARC_root'studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa'
kallisto index -i $ARC_root'runs/01_kallisto_index' $kall_ref
### Align reads
......@@ -32,11 +33,11 @@ mkdir $ARC_root'/runs/01_kallisto_results/'
for j in $ILLUMINASAMPLES; do
sampleName=$(echo $j | sed -e 's|.*/||' | cut -c -6) # cut away path. retain only first six chars of file name
echo $sampleName
kallisto quant --single -b 100 -t 30 -l 200 -s 20 -i $ARC_root'/runs/01_kallisto_index' -o $ARC_root'/runs/01_kallisto_results/'$sampleName $j
sampleName=$(echo $j | sed -e 's|.*/||' | cut -c -6) # cut away path. retain only first six chars of file name
echo $sampleName
kallisto quant --single -b 100 -t 30 -l 200 -s 20 -i $ARC_root'/runs/01_kallisto_index' -o $ARC_root'/runs/01_kallisto_results/'$sampleName $j
echo 'Kallisto done'
echo 'Kallisto done'
done
\ No newline at end of file
......@@ -4,8 +4,12 @@
#### To be replaced by CWL routine
########################
ARC_root="~/Hackathon_ARCexample_rnaseq/"
setwd(paste0(ARC_root, 'workflows/'))
# Execute within <ARC root>/workflows
# Rscript 03_KallistoCollect.R
# pointers to and from `runs` need to be replaced
ARC_root=readLines("./_arc_local_wd")
########################
......@@ -29,23 +33,29 @@ library(openxlsx)
## read experimental metadata from isa.assay wb
isa_assay <- paste0(ARC_root, 'assays/Talinum_RNASeq_minimal/assay.isa.xlsx')
isa_assay <- paste0(ARC_root, 'assays/Talinum_RNASeq_minimal/isa.assay.xlsx')
isa_study <- paste0(ARC_root, 'studies/TalinumFacultativeCAM/isa.study.xlsx')
assay_data <- merge(readWorkbook(isa_assay, "1SPL01_plants", startRow = 2),
readWorkbook(isa_assay, "3ASY01_RNASeq", startRow = 2),
by = "Sample.Name"
study_data <- readWorkbook(isa_study, "plant_growth", startRow = 1)
assay_data <- merge(readWorkbook(isa_assay, "2EXT01_RNA", startRow = 1),
readWorkbook(isa_assay, "3ASY01_RNASeq", startRow = 1),
by.x = "Sample.Name",
by.y = "Source.Name"
)
assay_data <- merge(study_data, assay_data, by.x = "Sample.Name", by.y = "Source.Name")
## remove empty cols
assay_data <- assay_data[, !apply(assay_data, 2, function(x){sum(is.na(x)) == nrow(assay_data)})]
# Pointer to kallisto results folder
base_dir <- paste0(ARC_root, '/runs/01_kallisto_results/')
base_dir <- paste0(ARC_root, 'runs/01_kallisto_results/')
# A list of paths to the kallisto results indexed by the sample IDs is collated with
kal_dirs <- dir(base_dir, full.names = T) ## Sleuth requires full paths
s2c <- assay_data[order(assay_data$Sample.Name), c('Sample.Name', "Characteristics.[Photosynthesis.mode]")]
s2c <- assay_data[order(assay_data$Sample.Name), c('Sample.Name', "Factor.[Photosynthesis.mode]")]
# For kallisto / sleuth: 's2c' (sample_to_covariates) must contain a column named 'sample'
colnames(s2c) <- c("sample", "Photosynthesis.mode")
......
......@@ -4,8 +4,11 @@
#### To be replaced by CWL routine
########################
ARC_root="~/Hackathon_ARCexample_rnaseq/"
setwd(paste0(ARC_root, 'workflows/'))
# Execute within <ARC root>/workflows
# Rscript 04_Sleuth.R
# pointers to and from `runs` need to be replaced
ARC_root=readLines("./_arc_local_wd")
########################
......
......@@ -4,8 +4,11 @@
#### To be replaced by CWL routine
########################
ARC_root="~/Hackathon_ARCexample_rnaseq/"
setwd(paste0(ARC_root, 'workflows/'))
# Execute within <ARC root>/workflows
# Rscript 05_plot_shinyPrep.R
# pointers to and from `runs` need to be replaced
ARC_root=readLines("./_arc_local_wd")
########################
......
~/03DataPLANT_gitlab/samplearc_rnaseq/
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment