diff --git a/README.md b/README.md index 7a6343fa29335b865c7a5cde87056a9934162cdb..0fc960c4032ab95cfe4dc5efc5aedcdc40cfe494 100644 --- a/README.md +++ b/README.md @@ -91,4 +91,12 @@ DB_161 \ DB_163 \ DB_165 cd $arc_root -``` \ No newline at end of file +``` + +## Make workflows a bit more representative and reproducible (24.03.2022) + +## generate a common arc_root pointer + +```bash +echo "~/03DataPLANT_gitlab/samplearc_rnaseq/" > workflows/_arc_local_wd +``` diff --git a/assays/Talinum_RNASeq_minimal/README.md b/assays/Talinum_RNASeq_minimal/README.md deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/assays/Talinum_RNASeq_minimal/isa.assay.xlsx b/assays/Talinum_RNASeq_minimal/isa.assay.xlsx index 3e2dc03464a66d388eeb2dd3b6aafe7f8ca214b0..972482f26819554c1205dea5173d66ca2e5b9af4 100644 Binary files a/assays/Talinum_RNASeq_minimal/isa.assay.xlsx and b/assays/Talinum_RNASeq_minimal/isa.assay.xlsx differ diff --git a/assays/Talinum_RNASeq_minimal/protocols/README.md b/assays/Talinum_RNASeq_minimal/protocols/README.md deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/assays/Talinum_RNASeq_minimal/protocols/02_RNAex_libraries.md b/assays/Talinum_RNASeq_minimal/protocols/RNAex_libraries.md similarity index 100% rename from assays/Talinum_RNASeq_minimal/protocols/02_RNAex_libraries.md rename to assays/Talinum_RNASeq_minimal/protocols/RNAex_libraries.md diff --git a/studies/TalinumFacultativeCAM/README.md b/studies/TalinumFacultativeCAM/README.md deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/studies/TalinumFacultativeCAM/protocols/01_plant_material.md b/studies/TalinumFacultativeCAM/protocols/plant_material.md similarity index 100% rename from studies/TalinumFacultativeCAM/protocols/01_plant_material.md rename to studies/TalinumFacultativeCAM/protocols/plant_material.md diff --git a/studies/TalinumGenomeDraft/isa.study.xlsx b/studies/TalinumGenomeDraft/isa.study.xlsx index 0019946e56767e3aa5a2bd47735e9b9a5f9df58e..a12118f2e0c76cb2611156271ad11f47f1bd6a25 100644 Binary files a/studies/TalinumGenomeDraft/isa.study.xlsx and b/studies/TalinumGenomeDraft/isa.study.xlsx differ diff --git a/workflows/.Rhistory b/workflows/.Rhistory deleted file mode 100644 index 2682a56b8b7906d6b98e05c70905cc6584d6c11f..0000000000000000000000000000000000000000 --- a/workflows/.Rhistory +++ /dev/null @@ -1 +0,0 @@ -ARC_root="~/03_DataPLANT_gitlab/samplearc_rnaseq/" diff --git a/workflows/01_KallistoQuant.sh b/workflows/01_KallistoQuant.sh index 5cb91b5a2c750299f29c5fac895a7b26b0d6d858..c63f4647df1232f6a847f964b318953ba9f9e3d8 100644 --- a/workflows/01_KallistoQuant.sh +++ b/workflows/01_KallistoQuant.sh @@ -3,11 +3,12 @@ #### To be replaced by CWL routine ######################## -ARC_root=~/samplearc_rnaseq/ -cd $ARC_root'workflows/' - +# Execute within <ARC root>/workflows # chmod a+x 01_KallistoQuant.sh # ./01_KallistoQuant.sh > $ARC_root'runs/01_kallisto.log' 2>&1 & +# pointers to and from `runs` need to be replaced + +ARC_root=$(cat ./_arc_local_wd) ######################## @@ -21,7 +22,7 @@ kallisto cite ### Build index -kall_ref=$ARC_root'externals/Talinum.gm.CDS.nt.fa' +kall_ref=$ARC_root'studies/TalinumGenomeDraft/resources/Talinum.gm.CDS.nt.fa' kallisto index -i $ARC_root'runs/01_kallisto_index' $kall_ref ### Align reads @@ -32,11 +33,11 @@ mkdir $ARC_root'/runs/01_kallisto_results/' for j in $ILLUMINASAMPLES; do - sampleName=$(echo $j | sed -e 's|.*/||' | cut -c -6) # cut away path. retain only first six chars of file name - echo $sampleName - - kallisto quant --single -b 100 -t 30 -l 200 -s 20 -i $ARC_root'/runs/01_kallisto_index' -o $ARC_root'/runs/01_kallisto_results/'$sampleName $j + sampleName=$(echo $j | sed -e 's|.*/||' | cut -c -6) # cut away path. retain only first six chars of file name + echo $sampleName + + kallisto quant --single -b 100 -t 30 -l 200 -s 20 -i $ARC_root'/runs/01_kallisto_index' -o $ARC_root'/runs/01_kallisto_results/'$sampleName $j - echo 'Kallisto done' + echo 'Kallisto done' done \ No newline at end of file diff --git a/workflows/03_KallistoCollect.R b/workflows/03_KallistoCollect.R index 796a61218717e37e36b59cb01da0e0f5c738e875..a103b38f3e9c422bd1550f867cf259e670be1237 100644 --- a/workflows/03_KallistoCollect.R +++ b/workflows/03_KallistoCollect.R @@ -4,8 +4,12 @@ #### To be replaced by CWL routine ######################## -ARC_root="~/Hackathon_ARCexample_rnaseq/" -setwd(paste0(ARC_root, 'workflows/')) +# Execute within <ARC root>/workflows +# Rscript 03_KallistoCollect.R +# pointers to and from `runs` need to be replaced + +ARC_root=readLines("./_arc_local_wd") + ######################## @@ -29,23 +33,29 @@ library(openxlsx) ## read experimental metadata from isa.assay wb -isa_assay <- paste0(ARC_root, 'assays/Talinum_RNASeq_minimal/assay.isa.xlsx') +isa_assay <- paste0(ARC_root, 'assays/Talinum_RNASeq_minimal/isa.assay.xlsx') +isa_study <- paste0(ARC_root, 'studies/TalinumFacultativeCAM/isa.study.xlsx') -assay_data <- merge(readWorkbook(isa_assay, "1SPL01_plants", startRow = 2), - readWorkbook(isa_assay, "3ASY01_RNASeq", startRow = 2), - by = "Sample.Name" +study_data <- readWorkbook(isa_study, "plant_growth", startRow = 1) + +assay_data <- merge(readWorkbook(isa_assay, "2EXT01_RNA", startRow = 1), + readWorkbook(isa_assay, "3ASY01_RNASeq", startRow = 1), + by.x = "Sample.Name", + by.y = "Source.Name" ) +assay_data <- merge(study_data, assay_data, by.x = "Sample.Name", by.y = "Source.Name") + ## remove empty cols assay_data <- assay_data[, !apply(assay_data, 2, function(x){sum(is.na(x)) == nrow(assay_data)})] # Pointer to kallisto results folder -base_dir <- paste0(ARC_root, '/runs/01_kallisto_results/') +base_dir <- paste0(ARC_root, 'runs/01_kallisto_results/') # A list of paths to the kallisto results indexed by the sample IDs is collated with kal_dirs <- dir(base_dir, full.names = T) ## Sleuth requires full paths -s2c <- assay_data[order(assay_data$Sample.Name), c('Sample.Name', "Characteristics.[Photosynthesis.mode]")] +s2c <- assay_data[order(assay_data$Sample.Name), c('Sample.Name', "Factor.[Photosynthesis.mode]")] # For kallisto / sleuth: 's2c' (sample_to_covariates) must contain a column named 'sample' colnames(s2c) <- c("sample", "Photosynthesis.mode") diff --git a/workflows/04_Sleuth.R b/workflows/04_Sleuth.R index ab2f5791cc20d8343cee2f934443d87b5e317c8e..97894a987cea97fea991ce67b889f70544a637f0 100644 --- a/workflows/04_Sleuth.R +++ b/workflows/04_Sleuth.R @@ -4,8 +4,11 @@ #### To be replaced by CWL routine ######################## -ARC_root="~/Hackathon_ARCexample_rnaseq/" -setwd(paste0(ARC_root, 'workflows/')) +# Execute within <ARC root>/workflows +# Rscript 04_Sleuth.R +# pointers to and from `runs` need to be replaced + +ARC_root=readLines("./_arc_local_wd") ######################## diff --git a/workflows/05_plot_shinyPrep.R b/workflows/05_plot_shinyPrep.R index e0b2fe554819c1c0f21057a7cf283c021319de3f..075c567040119d7ce18e546a49accca8af4494b5 100644 --- a/workflows/05_plot_shinyPrep.R +++ b/workflows/05_plot_shinyPrep.R @@ -4,8 +4,11 @@ #### To be replaced by CWL routine ######################## -ARC_root="~/Hackathon_ARCexample_rnaseq/" -setwd(paste0(ARC_root, 'workflows/')) +# Execute within <ARC root>/workflows +# Rscript 05_plot_shinyPrep.R +# pointers to and from `runs` need to be replaced + +ARC_root=readLines("./_arc_local_wd") ######################## diff --git a/workflows/_arc_local_wd b/workflows/_arc_local_wd new file mode 100644 index 0000000000000000000000000000000000000000..aca1e009538a1e623f7c13a9f169d4a707c39a6b --- /dev/null +++ b/workflows/_arc_local_wd @@ -0,0 +1 @@ +~/03DataPLANT_gitlab/samplearc_rnaseq/