diff --git a/assays/BSA_RNAseq/adapters/custom_adapters.fa b/assays/BSA_RNAseq/adapters/custom_adapters.fa
new file mode 100644
index 0000000000000000000000000000000000000000..c6da458fd5c4e43e5c5b9758062a9062867a9e98
--- /dev/null
+++ b/assays/BSA_RNAseq/adapters/custom_adapters.fa
@@ -0,0 +1,24 @@
+>read1_1/1
+AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
+>read1_2/1
+AGATCGGAAGAGCACACGTCTGAAC
+>read1_3/1
+TGGAATTCTCGGGTGCCAAGG
+>read1_4/1
+AGATCGGAAGAGCACACGTCT
+>read1_5/1
+CTGTCTCTTATACACATCT
+>read1_6/1
+AGATGTGTATAAGAGACAG
+>read2_1/2
+AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
+>read2_2/2
+AGATCGGAAGAGCGTCGTGTAGGGA
+>read2_3/2
+TGGAATTCTCGGGTGCCAAGG
+>read2_4/2
+AGATCGGAAGAGCACACGTCT
+>read2_5/2
+CTGTCTCTTATACACATCT
+>read2_6/2
+AGATGTGTATAAGAGACAG
diff --git a/assays/BSA_RNAseq/adapters/readme.txt b/assays/BSA_RNAseq/adapters/readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a029aca99c1b4ba0432cb9989de49109ef0d2645
--- /dev/null
+++ b/assays/BSA_RNAseq/adapters/readme.txt
@@ -0,0 +1,3 @@
+## raw_data/BSA_rnaseq/adapters/
+
+Sequencing adapters used for reads trimming
diff --git a/assays/BSA_RNAseq/isa.assay.xlsx b/assays/BSA_RNAseq/isa.assay.xlsx
index 2d21f88fb855a54a1f3982155f0bb99249397390..399f4b8a40fb6354edb648cda747f4dc1d558145 100644
Binary files a/assays/BSA_RNAseq/isa.assay.xlsx and b/assays/BSA_RNAseq/isa.assay.xlsx differ
diff --git a/assays/BSA_WGS/isa.assay.xlsx b/assays/BSA_WGS/isa.assay.xlsx
index 1397489d55497f42e3468d5e7c53118165c9f3de..2e357f7219a77f02119427594574cd6d4d116af0 100644
Binary files a/assays/BSA_WGS/isa.assay.xlsx and b/assays/BSA_WGS/isa.assay.xlsx differ
diff --git a/assays/betalain_LCMS_quantification/isa.assay.xlsx b/assays/betalain_LCMS_quantification/isa.assay.xlsx
index 134a053fcd23d521beea075532f59f2dd93aa81c..d361dd4b2ffae902a2be61a221938294149b6bfa 100644
Binary files a/assays/betalain_LCMS_quantification/isa.assay.xlsx and b/assays/betalain_LCMS_quantification/isa.assay.xlsx differ
diff --git a/assays/betalain_photometric_quantification/isa.assay.xlsx b/assays/betalain_photometric_quantification/isa.assay.xlsx
index e18f639f6add7f87f39ecc7590f4455803e5d5b1..68931c6c470e924b1bd598f466bf6f20196dc4dc 100644
Binary files a/assays/betalain_photometric_quantification/isa.assay.xlsx and b/assays/betalain_photometric_quantification/isa.assay.xlsx differ
diff --git a/assays/isoform_sequencing/isa.assay.xlsx b/assays/isoform_sequencing/isa.assay.xlsx
index 9719eb30c1c6f3ccd645f4189923b8d6215fce9d..ae346d235c02e0ea6c0a10e07149fcc41e5f20f7 100644
Binary files a/assays/isoform_sequencing/isa.assay.xlsx and b/assays/isoform_sequencing/isa.assay.xlsx differ
diff --git a/isa.investigation.xlsx b/isa.investigation.xlsx
index 8c37020a85cca1fe30c338cad3743015e7e4fa3b..586f555874be0956c7b05f5da13c824073eaa385 100644
Binary files a/isa.investigation.xlsx and b/isa.investigation.xlsx differ
diff --git a/studies/BSA_parent_betalain_quant/isa.study.xlsx b/studies/BSA_parent_betalain_quant/isa.study.xlsx
index ec46c0474b3ff8ea33525da7511c111656479c76..486411ac1c8c88a7585c68191536289931207512 100644
Binary files a/studies/BSA_parent_betalain_quant/isa.study.xlsx and b/studies/BSA_parent_betalain_quant/isa.study.xlsx differ
diff --git a/studies/Bulk_segregant_analysis/isa.study.xlsx b/studies/Bulk_segregant_analysis/isa.study.xlsx
index 43a7b33b2beb38dc6c65334db05c480f743bd916..b7b89a26b6cd86616539825c87df590f547b6e4e 100644
Binary files a/studies/Bulk_segregant_analysis/isa.study.xlsx and b/studies/Bulk_segregant_analysis/isa.study.xlsx differ
diff --git a/studies/additional_data/README.md b/studies/additional_data/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/studies/additional_data/isa.study.xlsx b/studies/additional_data/isa.study.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..566ca09ccb44bcd0d23eb8c1fe9b6e8b6b8de26d
Binary files /dev/null and b/studies/additional_data/isa.study.xlsx differ
diff --git a/studies/additional_data/protocols/.gitkeep b/studies/additional_data/protocols/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/studies/additional_data/resources/.gitkeep b/studies/additional_data/resources/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/studies/additional_data/resources/color_pathway_genes.bed b/studies/additional_data/resources/color_pathway_genes.bed
new file mode 100644
index 0000000000000000000000000000000000000000..c4ee58173ccc48f2eae2fa0b2e9b3e28eae49bcc
--- /dev/null
+++ b/studies/additional_data/resources/color_pathway_genes.bed
@@ -0,0 +1,36 @@
+chr	start	end	strand	gene_id	transcript_id	Gene	Pathway
+Scaffold_1	7523181	7527088	-	AHp000674	AHp000674.1	AhCYP76AD5	Betalain
+Scaffold_1	28687404	28688868	-	AHp001663	AHp001663.1	AhBetanidin5GT	Betalain
+Scaffold_2	6806355	6807984	+	AHp003152	AHp003152.1	F3-H_3	Flavonoid
+Scaffold_2	6806355	6807984	+	AHp003152	AHp003152.2	F3-H_3	Flavonoid
+Scaffold_2	31518210	31522095	+	AHp004305	AHp004305.1	CHS	Flavonoid
+Scaffold_2	31518210	31522095	+	AHp004305	AHp004305.2	CHS	Flavonoid
+Scaffold_3	14652200	14653652	+	AHp005940	AHp005940.1	AhBetanidin6GT	Betalain
+Scaffold_3	29507485	29509926	+	AHp006454	AHp006454.1	F3H	Flavonoid
+Scaffold_4	10226509	10227946	-	AHp007219	AHp007219.1	AhcDOPA5GT	Betalain
+Scaffold_5	9470955	9474986	-	AHp008991	AHp008991.1	FLS	Flavonoid
+Scaffold_5	9470955	9474986	-	AHp008991	AHp008991.2	FLS	Flavonoid
+Scaffold_5	19537497	19543306	+	AHp009303	AHp009303.1	DFR	Flavonoid
+Scaffold_6	3461614	3469253	-	AHp009962	AHp009962.1	CHI1	Flavonoid
+Scaffold_6	13877397	13882497	+	AHp010386	AHp010386.1	AhDODAÎ±2	Betalain
+Scaffold_8	3308349	3313908	+	AHp012752	AHp012752.1	PAL_1	Flavonoid
+Scaffold_8	7925628	7930892	-	AHp013217	AHp013217.1	C4H_1	Flavonoid
+Scaffold_8	7925628	7930892	-	AHp013217	AHp013217.2	C4H_1	Flavonoid
+Scaffold_9	12115220	12125730	-	AHp014409	AHp014409.1	4CL_1	Flavonoid
+Scaffold_10	21762620	21764924	-	AHp016530	AHp016530.1	AhMYB3	Betalain
+Scaffold_10	21780564	21783050	-	AHp016531	AHp016531.1	AhMYB4	Betalain
+Scaffold_11	16155424	16159543	+	AHp017409	AHp017409.1	LAR	Flavonoid
+Scaffold_11	17204504	17209738	-	AHp017497	AHp017497.1	F3-H_1	Flavonoid
+Scaffold_14	12286837	12295353	+	AHp020962	AHp020962.1	4CL_2	Flavonoid
+Scaffold_15	1098208	1100359	+	AHp021795	AHp021795.1	ANS	Flavonoid
+Scaffold_15	2886482	2891165	-	AHp021980	AHp021980.1	PAL_2	Flavonoid
+Scaffold_15	4193988	4197809	+	AHp022120	AHp022120.1	F3-H_4	Flavonoid
+Scaffold_15	4211064	4214362	+	AHp022122	AHp022122.1	F3-H_2	Flavonoid
+Scaffold_15	4227367	4229108	-	AHp022123	AHp022123.1	F3-H_5	Flavonoid
+Scaffold_15	7959672	7965298	+	AHp022382	AHp022382.1	C4H_3	Flavonoid
+Scaffold_15	7959672	7965298	+	AHp022382	AHp022382.2	C4H_3	Flavonoid
+Scaffold_15	8003999	8008333	+	AHp022384	AHp022384.1	C4H_2	Flavonoid
+Scaffold_16	988733	991447	+	AHp022773	AHp022773.1	AhMYB2	Betalain
+Scaffold_16	988733	991447	+	AHp022773	AHp022773.2	AhMYB2	Betalain
+Scaffold_16	5231548	5239058	-	AHp023147	AHp023147.1	AhDODAÎ±1	Betalain
+Scaffold_16	5301961	5305973	+	AHp023148	AHp023148.1	AhCYP76AD2	Betalain
diff --git a/studies/additional_data/resources/color_pathway_genes.csv b/studies/additional_data/resources/color_pathway_genes.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a427fab2d4361cc316c62eb25f0080f53dcf3c69
--- /dev/null
+++ b/studies/additional_data/resources/color_pathway_genes.csv
@@ -0,0 +1,30 @@
+Gene,Pathway,Gene_id
+PAL_1,Flavonoid,AHp012752
+PAL_2,Flavonoid,AHp021980
+C4H_1,Flavonoid,AHp013217
+C4H_2,Flavonoid,AHp022384
+C4H_3,Flavonoid,AHp022382
+4CL_1,Flavonoid,AHp014409
+4CL_2,Flavonoid,AHp020962
+CHS,Flavonoid,AHp004305
+CHI1,Flavonoid,AHp009962
+F3-H_1,Flavonoid,AHp017497
+F3-H_2,Flavonoid,AHp022122
+F3-H_3,Flavonoid,AHp003152
+F3-H_4,Flavonoid,AHp022120
+F3-H_5,Flavonoid,AHp022123
+DFR,Flavonoid,AHp009303
+F3H,Flavonoid,AHp006454
+FLS,Flavonoid,AHp008991
+LAR,Flavonoid,AHp017409
+ANS,Flavonoid,AHp021795
+AhCYP76AD2,Betalain,AHp023148
+AhCYP76AD5,Betalain,AHp000674
+AhDODAÎ±2,Betalain,AHp010386
+AhDODAÎ±1,Betalain,AHp023147
+AhcDOPA5GT,Betalain,AHp007219
+AhBetanidin5GT,Betalain,AHp001663
+AhBetanidin6GT,Betalain,AHp005940
+AhMYB2,Betalain,AHp022773
+AhMYB3,Betalain,AHp016530
+AhMYB4,Betalain,AHp016531
diff --git a/studies/isoform_sequencing/isa.study.xlsx b/studies/isoform_sequencing/isa.study.xlsx
index 60867b92cbe45252333f78b49c485bd39a3e02b2..48af51fa63c14650219e5e214e849d894b1120dd 100644
Binary files a/studies/isoform_sequencing/isa.study.xlsx and b/studies/isoform_sequencing/isa.study.xlsx differ
diff --git a/workflows/BSA/RNAseq/QC.sh b/workflows/BSA/RNAseq/QC.sh
index 64db67753b630dbf68ec8969414684f3d63565bf..947a4ff755356b5808130b9f984c477c0759599a 100644
--- a/workflows/BSA/RNAseq/QC.sh
+++ b/workflows/BSA/RNAseq/QC.sh
@@ -22,8 +22,8 @@ GTFIN=polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_correc
 
 # Quality control
 # Initialize the output directory
-QCIN=raw_data/BSA_rnaseq/
-QCOUT=raw_data/BSA_rnaseq/fastqc
+QCIN=assays/BSA_RNAseq/dataset
+QCOUT=assays/BSA_RNAseq/fastqc
 
 mkdir -p $QCOUT
 # run quality control fastqc
@@ -32,14 +32,14 @@ fastqc -t 10 -o $QCOUT "$QCIN"*P.fq.gz
 
 # Quality control
 # Initialize the output directory
-QCIN=raw_data/BSA_rnaseq/
-QCOUT=raw_data/BSA_rnaseq/fastqc
+QCIN=assays/BSA_RNAseq/
+QCOUT=assays/BSA_RNAseq/fastqc
 
 mkdir -p $QCOUT
 # run quality control fastqc
 fastqc -t 10 -o $QCOUT "$QCIN"*P.fq.gz
 
-cp -r $QCOUT data/BSA/RNAseq/STAR_flower_mappings/QC/
+cp -r $QCOUT runs/BSA/RNAseq/STAR_flower_mappings/QC/
 
 module load samtools/1.13
 
@@ -53,11 +53,11 @@ GTFQM=/scratch/twinkle1/temp.gtf
 sed 's/CDS/exon/' $GTFIN > $GTFQM
 
 # define input files
-QMIN=data/BSA/RNAseq/STAR_flower_mappings/
+QMIN=runs/BSA/RNAseq/STAR_flower_mappings/
 
 
 # define output directory
-QMOUT=data/BSA/RNAseq/STAR_flower_mappings/QC/qualimap
+QMOUT=runs/BSA/RNAseq/STAR_flower_mappings/QC/qualimap
 
 # create main output directory
 mkdir -p $QMOUT
@@ -85,8 +85,8 @@ done
 rm $GTFQM
 
 # run multiqc to combine the results from fastqc and qualimap into a single report
-MULTIQCOUT=data/BSA/RNAseq/STAR_flower_mappings/multiqc
-MULTIQCIN=data/BSA/RNAseq/STAR_flower_mappings/QC/
+MULTIQCOUT=runs/BSA/RNAseq/STAR_flower_mappings/multiqc
+MULTIQCIN=runs/BSA/RNAseq/STAR_flower_mappings/QC/
 
 mkdir -p $MULTIQCOUT
 
diff --git a/workflows/BSA/RNAseq/adapter_trimming.sh b/workflows/BSA/RNAseq/adapter_trimming.sh
index 15d5ab7bef2f182a0afe15ae0caadef289200a67..2821fec969a558c3c2ec65cc964789464e4e58d4 100644
--- a/workflows/BSA/RNAseq/adapter_trimming.sh
+++ b/workflows/BSA/RNAseq/adapter_trimming.sh
@@ -20,7 +20,7 @@ module load trimmomatic/0.39
 
 # run this part as array job
 # create array of read fastq files (R1 only):
-SOURCE_DIR=raw_data/BSA_rnaseq/
+SOURCE_DIR=assays/BSA_RNAseq/dataset/
 FILES=("$SOURCE_DIR"/*R1.fastq.gz)
 
 # run trimmomatic, use 6 threads, taking advantage of the baseout function to name output files
@@ -32,4 +32,4 @@ java -jar $TRIMMOMATIC/trimmomatic.jar PE \
 	"${FILES["${SLURM_ARRAY_TASK_ID}"]}" \
 	"${FILES["${SLURM_ARRAY_TASK_ID}"]/R1.fastq.gz/R2.fastq.gz}" \
 	-baseout "${FILES["${SLURM_ARRAY_TASK_ID}"]/R1.fastq.gz/trimmed.fq.gz}" \
-	ILLUMINACLIP:raw_data/BSA_rnaseq/adapters/custom_adapters.fa:2:30:10
+	ILLUMINACLIP:assays/BSA_RNAseq/adapters/custom_adapters.fa:2:30:10
diff --git a/workflows/BSA/RNAseq/index_STAR.sh b/workflows/BSA/RNAseq/index_STAR.sh
index cb8606a419e341318008238f456a836ae017649b..6d12c7f12c3562cabf82f0d9e93ac0eb977eab97 100644
--- a/workflows/BSA/RNAseq/index_STAR.sh
+++ b/workflows/BSA/RNAseq/index_STAR.sh
@@ -18,10 +18,10 @@ module load star/2.7.8a
 # more specific settings: use the polished, softmasked reference assembly
 # sjdbOverhang dependend on input read length
 # as SJDB file, use the newly generated braker2 protein gtf file
-REFGENOME=polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
-SJDBFILE=polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf
+REFGENOME=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
+SJDBFILE=runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf
 
-mkdir -p data/BSA/RNAseq/STAR_flower_index
+mkdir -p runs/BSA/RNAseq/STAR_flower_index
 
 STAR --runThreadN 8 \
 	--runMode genomeGenerate \
diff --git a/workflows/BSA/RNAseq/run_STAR.sh b/workflows/BSA/RNAseq/run_STAR.sh
index 1064306650a392d552fdf51ac9ccde1a1fc2c725..099d4e6f420777b3c9f843d55f9bc292f558e0c4 100644
--- a/workflows/BSA/RNAseq/run_STAR.sh
+++ b/workflows/BSA/RNAseq/run_STAR.sh
@@ -12,9 +12,9 @@
 module load star/2.7.8a
 
 # create array of read fastq files (R1 only):
-SOURCE_DIR=raw_data/BSA_rnaseq/
+SOURCE_DIR=assays/BSA_RNAseq/dataset/
 FILES=("$SOURCE_DIR"/*_1P.fq.gz)
-OUTPUTDIR=data/BSA/RNAseq/STAR_flower_mappings
+OUTPUTDIR=runs/BSA/RNAseq/STAR_flower_mappings
 
 # change directory of outprefix
 OUTPREFIX1=("${FILES["${SLURM_ARRAY_TASK_ID}"]/$SOURCE_DIR/$OUTPUTDIR}")
diff --git a/workflows/BSA/RNAseq/run_kallisto.sh b/workflows/BSA/RNAseq/run_kallisto.sh
index ae124fb57e4ca3d234851d6c12be5104ed6cde41..88243f01138aef81632ddb4975627402e2a94826 100644
--- a/workflows/BSA/RNAseq/run_kallisto.sh
+++ b/workflows/BSA/RNAseq/run_kallisto.sh
@@ -23,9 +23,9 @@
 
 ##### Perform quantification
 # create array of read fastq files (R1 only):
-SOURCE_DIR=raw_data/flower_color_mapping/
+SOURCE_DIR=assays/BSA_RNAseq/dataset/flower_color_mapping/
 FILES=("$SOURCE_DIR"AM*_1P.fq.gz)
-OUTDIR=data/flower_color_mapping/kallisto_quant/
+OUTDIR=runs/flower_color_mapping/kallisto_quant/
 TISSUE_NAMES=("AM_00331_gf" "AM_00331_rf" "AM_00332_gf" "AM_00332_rf")
 
 mkdir -p $OUTDIR
diff --git a/workflows/BSA/WGS/combined_filter.sh b/workflows/BSA/WGS/combined_filter.sh
index 707a9bd4b68212b56b80b324d637e9a69f9c326b..78e6a1718cb1f58098fd970d689df38466db7bc7 100644
--- a/workflows/BSA/WGS/combined_filter.sh
+++ b/workflows/BSA/WGS/combined_filter.sh
@@ -12,13 +12,13 @@
 module load samtools/1.13
 
 
-REFERENCE=polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
+REFERENCE=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
 
 PROVIDER=CCG
-OUTDIR=data/BSA/wgs/vcf
+OUTDIR=runs/BSA/wgs/vcf
 mkdir -p $OUTDIR
 
-ALLSAMP=$(for i in data/BSA/wgs/bam_files/gvcf/AM_00*.vcf; do echo -V $i;done)
+ALLSAMP=$(for i in runs/BSA/wgs/bam_files/gvcf/AM_00*.vcf; do echo -V $i;done)
 
 $MYUTIL/tools/gatk-4.1.7.0/gatk --java-options "-Xmx48G" \
 	CombineGVCFs \
diff --git a/workflows/BSA/WGS/map_reads.sh b/workflows/BSA/WGS/map_reads.sh
index ad2bcf23142c774d731e1471b5639fca1422593d..3a1d2a877b9082e1f8b2906e8527196ac9349aed 100644
--- a/workflows/BSA/WGS/map_reads.sh
+++ b/workflows/BSA/WGS/map_reads.sh
@@ -17,15 +17,15 @@ module load bwamem2/2.0_gnu
 module load samtools/1.13
 
 
-REFERENCE=polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
+REFERENCE=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
 #bwa-mem2 index $REFERENCE
 
 
 PROVIDER=CCG
 
 
-INPUTPATH=raw_data/BSA_wgs/ #
-OUTPUTPATH=data/BSA/wgs/bam_files/ # Change this for different generations
+INPUTPATH=assays/BSA_WGS/dataset #
+OUTPUTPATH=runs/BSA/wgs/bam_files/ # Change this for different generations
 
 mkdir -p $OUTPUTPATH
 mkdir -p ${OUTPUTPATH}/metrics/
diff --git a/workflows/BSA/betalain_quantification.Rmd b/workflows/BSA/betalain_quantification.Rmd
index 8ab62650b42d6d117839495a3a60751e804a7235..31a9b69c32f1ca1ec6726be542ae9c0e1d5c1223 100644
--- a/workflows/BSA/betalain_quantification.Rmd
+++ b/workflows/BSA/betalain_quantification.Rmd
@@ -20,7 +20,7 @@ library(ggbeeswarm)
 Create output directory for plots
 
 ```{bash}
-mkdir ../../plots/betalain_quantification
+mkdir ../../runs/betalain_quantification
 ```
 
 
@@ -28,7 +28,7 @@ Read in measured data
 
 ```{r}
 # read in data
-betalain_quantification <- read_excel(path = "../../raw_data/betalain_quantification/Betalain_quantification_summary.xlsx",
+betalain_quantification <- read_excel(path = "../../assays/betalain_photometric_quantification/dataset/Betalain_quantification_summary.xlsx",
                                       sheet = "Photometric_quantification")
 
 # normalize to the same input weight
@@ -219,11 +219,11 @@ p_leaf <- ggplot(data = melted_quant %>% filter(tissue == "leaf")) +
                    yend = mean_content),
                linewidth = 0.9,
                color = "black") +
-  geom_text(data = leaf_bc_groups, 
+  geom_text(data = leaf_bc_groups,
           aes(x = as.numeric(factors) - 0.19,
               y = 1.15,
-              label = groups), 
-          size=8, 
+              label = groups),
+          size=8,
           inherit.aes = F,
           color = "red3") +  
   scale_color_manual(values = c( "red3","#F0B327"), labels = c("Betacyanins", "Betaxanthins")) +
@@ -246,8 +246,8 @@ p_leaf
 
 # plot betalain quantification from flower
 p_flower <- ggplot(data = melted_quant %>% filter(tissue == "flower")) +
-  # geom_boxplot(aes(x = accession, 
-  #                y = content, 
+  # geom_boxplot(aes(x = accession,
+  #                y = content,
   #                fill = metabolite),
   #            color = "black",
   #            #color = "red4",
@@ -282,18 +282,18 @@ p_flower <- ggplot(data = melted_quant %>% filter(tissue == "flower")) +
                    yend = mean_content),
                linewidth = 0.9,
                color = "black") +
-  geom_text(data = flower_bc_groups, 
+  geom_text(data = flower_bc_groups,
           aes(x = as.numeric(factors) - 0.19,
               y = 1.15,
-              label = groups), 
-          size=8, 
+              label = groups),
+          size=8,
           inherit.aes = F,
           color = "red3") +  
-  geom_text(data = flower_bx_groups, 
+  geom_text(data = flower_bx_groups,
           aes(x = as.numeric(factors) + 0.19,
               y = 1.15,
-              label = groups), 
-          size=8, 
+              label = groups),
+          size=8,
           inherit.aes = F,
           color = "#F0B327") +    
   scale_fill_manual(values = c( "red3","#F0B327"), labels = c("Betacyanins", "Betaxanthins")) +
@@ -322,7 +322,7 @@ patchplot_betalains <- p_flower / p_leaf +
 
 patchplot_betalains
 
-ggsave(filename = "../../plots/betalain_quantification/betalain_quant_photometric_content.png",
+ggsave(filename = "../../runs/betalain_quantification/betalain_quant_photometric_content.png",
        plot = patchplot_betalains,
        bg = "white",
        dpi = 450,
@@ -335,7 +335,7 @@ HPLC quantification
 
 ```{r}
 # hplc quantification
-hplc_quantification <- read_excel(path = "../../raw_data/betalain_quantification/Betalain_quantification_summary.xlsx",
+hplc_quantification <- read_excel(path = "../../assays/betalain_photometric_quantification/dataset/Betalain_quantification_summary.xlsx",
                                       sheet = "LC-MS_quantification")
 
 # get blank measurements
@@ -447,14 +447,14 @@ Plot results amaranthin
 ```{r}
 # plot
 pa_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
-  # geom_point(aes(x = accession, 
-  #                y = Amaranthin_ratio, 
+  # geom_point(aes(x = accession,
+  #                y = Amaranthin_ratio,
   #                group = uniq_ind),
   #            color = "red3",
   #            position = position_dodge(width = 0.75),
   #            size = 2.8) +
-  geom_beeswarm(aes(x = accession, 
-                 y = Amaranthin_ratio, 
+  geom_beeswarm(aes(x = accession,
+                 y = Amaranthin_ratio,
                  group = uniq_ind),
              color = "red3",
              cex = 1.5,
@@ -489,14 +489,14 @@ pa_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
 
 
 pa_flower <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "flower")) +
-  # geom_point(aes(x = accession, 
-  #                y = Amaranthin_ratio, 
+  # geom_point(aes(x = accession,
+  #                y = Amaranthin_ratio,
   #                group = uniq_ind),
   #            color = "red3",
   #            position = position_dodge(width = 0.75),
   #            size = 2.8) +
-  geom_beeswarm(aes(x = accession, 
-                 y = Amaranthin_ratio, 
+  geom_beeswarm(aes(x = accession,
+                 y = Amaranthin_ratio,
                  group = uniq_ind),
              color = "red3",
              cex = 1.5,
@@ -536,8 +536,8 @@ Plot results betanin
 ```{r}
 # plot
 pb_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
-    geom_beeswarm(aes(x = accession, 
-                 y = Betanin_ratio, 
+    geom_beeswarm(aes(x = accession,
+                 y = Betanin_ratio,
                  group = uniq_ind),
              color = "red3",
              cex = 1.5,
@@ -572,8 +572,8 @@ pb_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
 
 
 pb_flower <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "flower")) +
-    geom_beeswarm(aes(x = accession, 
-                 y = Betanin_ratio, 
+    geom_beeswarm(aes(x = accession,
+                 y = Betanin_ratio,
                  group = uniq_ind),
              color = "red3",
              cex = 1.5,
@@ -612,8 +612,8 @@ Plot results betalamic acid
 ```{r}
 # plot
 pba_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
-    geom_beeswarm(aes(x = accession, 
-                 y = Betalamic_acid_ratio, 
+    geom_beeswarm(aes(x = accession,
+                 y = Betalamic_acid_ratio,
                  group = uniq_ind),
              color = "grey40",
              cex = 1.5,
@@ -648,8 +648,8 @@ pba_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
 
 
 pba_flower <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "flower")) +
-    geom_beeswarm(aes(x = accession, 
-                 y = Betalamic_acid_ratio, 
+    geom_beeswarm(aes(x = accession,
+                 y = Betalamic_acid_ratio,
                  group = uniq_ind),
              color = "grey40",
              cex = 1.5,
@@ -688,8 +688,8 @@ Plot results vulgaxanthin
 ```{r}
 # plot
 pv_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
-  geom_beeswarm(aes(x = accession, 
-                 y = Vulgaxanthin_IV_ratio, 
+  geom_beeswarm(aes(x = accession,
+                 y = Vulgaxanthin_IV_ratio,
                  group = uniq_ind),
              color = "#F0B327",
              method = "swarm",
@@ -718,8 +718,8 @@ pv_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
 
 
 pv_flower <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "flower")) +
-  geom_beeswarm(aes(x = accession, 
-                 y = Vulgaxanthin_IV_ratio, 
+  geom_beeswarm(aes(x = accession,
+                 y = Vulgaxanthin_IV_ratio,
                  group = uniq_ind),
              color = "#F0B327",
              method = "swarm",
@@ -762,7 +762,7 @@ overview_patchplot <- p_leaf + p_flower + pa_leaf + pa_flower + pb_leaf + pb_flo
         legend.direction = "vertical",
         plot.tag = element_text(vjust = 2, size = 17, face = "bold"))
 
-ggsave(filename = "../../plots/betalain_quantification/overview_betalain_quantification.png",
+ggsave(filename = "../../runs/betalain_quantification/overview_betalain_quantification.png",
        plot = overview_patchplot,
        bg = "white",
        dpi = 450,
@@ -785,7 +785,7 @@ photo_hplc_patchplot <- plot_spacer() / (p_leaf + p_flower) / (pa_leaf + pa_flow
 
 
 
-ggsave(filename = "../../plots/betalain_quantification/quantification_without_picture.png",
+ggsave(filename = "../../runs/betalain_quantification/quantification_without_picture.png",
        plot = photo_hplc_patchplot,
        bg = "white",
        dpi = 450,
@@ -798,7 +798,7 @@ Analyse HPLC results from amaranth roots:
 
 ```{r}
 # hplc quantification
-root_quant <- read_excel(path = "../../raw_data/betalain_quantification/Betalain_quantification_summary.xlsx",
+root_quant <- read_excel(path = "../../assays/betalain_photometric_quantification/dataset/Betalain_quantification_summary.xlsx",
                                       sheet = "transgenic_roots_LC-MS_quantification")
 root_quant <- root_quant %>%
   mutate(sample_id = paste0("plate_", plate, "_", root_type)) %>%
@@ -834,7 +834,7 @@ root_quant_plot <- ggplot(data = root_quant_norm) +
         legend.position = "none")
 root_quant_plot
 
-ggsave(filename = "../../plots/betalain_quantification/root_quantification.png",
+ggsave(filename = "../../runs/betalain_quantification/root_quantification.png",
        plot = root_quant_plot,
        bg = "white",
        dpi = 450,
@@ -842,5 +842,3 @@ ggsave(filename = "../../plots/betalain_quantification/root_quantification.png",
        height = 5)
 
 ```
-
-
diff --git a/workflows/BSA/bsa_and_plotting.R b/workflows/BSA/bsa_and_plotting.R
index 4f9f2667b3ec478e80153811738a96efea580f7c..ed0b831766c4cf6e4baa13a866298f91a75f3d4e 100644
--- a/workflows/BSA/bsa_and_plotting.R
+++ b/workflows/BSA/bsa_and_plotting.R
@@ -1,5 +1,5 @@
 
-setwd("/home/tom/Documents/projects/Ahyp_v2_2_publication/")
+setwd("/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/")
 
 library(QTLseqr)
 library(tidyverse)
@@ -16,7 +16,7 @@ bsa_analysis <- function(rawData,HighBulk,LowBulk,Chroms,nhigh,nlow){
                        highBulk = HighBulk,
                        lowBulk = LowBulk,
                        chromList = Chroms)
-  
+
   df <- df %>% select(CHROM,
                       POS,
                       REF,
@@ -38,7 +38,7 @@ bsa_analysis <- function(rawData,HighBulk,LowBulk,Chroms,nhigh,nlow){
     mutate(CHROM=as.factor(as.numeric(gsub("Scaffold_","",CHROM))),
            POS=as.numeric(POS)) %>%
     filter(REF!='*',ALT!='*')
-  
+
   df_filt <-filterSNPs(SNPset = df,
                        refAlleleFreq = 0.2,
                        minTotalDepth = 50,
@@ -46,7 +46,7 @@ bsa_analysis <- function(rawData,HighBulk,LowBulk,Chroms,nhigh,nlow){
                        minSampleDepth = 20,
                        minGQ = 99,
                        verbose = TRUE)
-  
+
   df_filt <- runGprimeAnalysis(
     SNPset = df_filt,
     windowSize = 2e6,
@@ -64,14 +64,14 @@ bsa_analysis <- function(rawData,HighBulk,LowBulk,Chroms,nhigh,nlow){
   return(df_filt)
 }
 
-AM_00332_leaf_green_red <- bsa_analysis(rawData = 'data/BSA/wgs/vcf/bulk_snps05.table',
+AM_00332_leaf_green_red <- bsa_analysis(rawData = 'runs/BSA/wgs/vcf/bulk_snps05.table',
                                         HighBulk = "AM_00332_gl",
                                         LowBulk = "AM_00332_rl",
                                         Chroms = paste0(rep("Scaffold_",
                                                             16),1:16),
                                         nhigh=80,
                                         nlow=80)
-AM_00331_flower_red_green <- bsa_analysis(rawData = 'data/BSA/wgs/vcf/bulk_snps05.table',
+AM_00331_flower_red_green <- bsa_analysis(rawData = 'runs/BSA/wgs/vcf/bulk_snps05.table',
                                           HighBulk = "AM_00331_rf",
                                           LowBulk = "AM_00331_gf",
                                           Chroms = paste0(rep("Scaffold_",
@@ -83,15 +83,15 @@ AM_00331_flower_red_green <- bsa_analysis(rawData = 'data/BSA/wgs/vcf/bulk_snps0
 # plot all results
 # leaf
 plotGresults <- function(Gresults,betalain_genes){
-  qval <- Gresults %>% 
-    filter(qvalue<=0.01) 
+  qval <- Gresults %>%
+    filter(qvalue<=0.01)
   #qval <- min(qval$Gprime)
   qval <- 3
-  
-  
+
+
   mG <- Gresults %>%
     filter(Gprime==max(Gresults$Gprime))
-  
+
   p1 <- ggplot()+
     geom_line(data=Gresults,aes(POS/1e6,Gprime), size=2) +
     labs(x= 'Position (Mb)',y= "G' value") +
@@ -99,7 +99,7 @@ plotGresults <- function(Gresults,betalain_genes){
     geom_hline(data=data.frame(yint=qval),
                aes(yintercept =yint,
                    linetype ='dashed',
-                   color=alpha('red',0.6)), 
+                   color=alpha('red',0.6)),
                size=1.7)+
     facet_grid(.~CHROM, space = 'free_x',scales='free_x') +
     theme(panel.spacing.x=unit(0.25, "lines")) +
@@ -117,23 +117,23 @@ plotGresults <- function(Gresults,betalain_genes){
           axis.ticks = element_line(linewidth = 1.5),
           axis.ticks.length = unit(.25, "cm")) +
     scale_x_continuous(guide = guide_axis(check.overlap = T))
-    #geom_gene_arrow(data=betalain_genes, 
+    #geom_gene_arrow(data=betalain_genes,
     #                aes(xmin = start/1e6, xmax = end/1e6, y = max(Gresults$Gprime), fill = type))
-  
+
   #ggsave(outfile,p1,width = 18,height = 7,,bg='white')
   return(p1)
 }
 # flower
 plotGresults1 <- function(Gresults,betalain_genes){
-  qval <- Gresults %>% 
-    filter(qvalue<=0.01) 
+  qval <- Gresults %>%
+    filter(qvalue<=0.01)
   #qval <- min(qval$Gprime)
   qval <- 3
-  
-  
+
+
   mG <- Gresults %>%
     filter(Gprime==max(Gresults$Gprime))
-  
+
   p1 <- ggplot()+
     geom_line(data=Gresults,aes(POS/1e6,Gprime), size=2) +
     labs(x= 'Position (Mb)',y= "G' value") +
@@ -142,7 +142,7 @@ plotGresults1 <- function(Gresults,betalain_genes){
     geom_hline(data=data.frame(yint=qval),
                aes(yintercept =yint,
                    linetype ='dashed',
-                   color=alpha('red',0.6)), 
+                   color=alpha('red',0.6)),
                size=1.7)+
     facet_grid(.~CHROM,space = 'free_x',scales='free_x') +
     theme(panel.spacing.x=unit(0.25, "lines")) +
@@ -158,9 +158,9 @@ plotGresults1 <- function(Gresults,betalain_genes){
           axis.ticks = element_line(linewidth = 1.5),
           axis.ticks.length = unit(.25, "cm")) +
     scale_x_continuous(guide = guide_axis(check.overlap = T))
-    #geom_gene_arrow(data=betalain_genes, 
+    #geom_gene_arrow(data=betalain_genes,
     #                aes(xmin = start/1e6, xmax = end/1e6, y = max(Gresults$Gprime), fill = type))
-  
+
   #ggsave(outfile,p1,width = 18,height = 7,,bg='white')
   return(p1)
 }
@@ -168,23 +168,23 @@ plotGresults1 <- function(Gresults,betalain_genes){
 # plot individual chromosomes
 # flower
 plotGqtl <- function(Gresults,chr,genes){
-  
-  qval <- Gresults %>% 
-    filter(qvalue<=0.01) 
+
+  qval <- Gresults %>%
+    filter(qvalue<=0.01)
   #qval <- min(qval$Gprime)
   qval <- 3
   my_qtl <- getQTLTable(SNPset = Gresults, alpha = 0.01,export = F)
-  
+
   ggplot()+
     geom_line(data=filter(Gresults,CHROM==chr),aes(POS/1e6,Gprime),size=2) +
     labs(x= 'Position (Mb)',y= "G' value") +
     scale_x_continuous(breaks = c(0,10,20,30))+
     geom_hline(data=data.frame(yint=qval),
-               aes(yintercept = yint, 
-                   linetype ='dashed', 
+               aes(yintercept = yint,
+                   linetype ='dashed',
                    color=alpha('red',0.6)),
                size = 2) +
-    facet_grid(.~CHROM,space = 'free_x',scales='free_x') + 
+    facet_grid(.~CHROM,space = 'free_x',scales='free_x') +
     theme(panel.spacing.x=unit(0.25, "lines")) +
     ylim(0,10) +
     theme( strip.background = element_rect(fill = alpha('lightblue',0.2)),
@@ -199,19 +199,19 @@ plotGqtl <- function(Gresults,chr,genes){
           axis.line = element_line(linewidth = 2),
           axis.ticks = element_line(linewidth = 1.5),
           axis.ticks.length = unit(.25, "cm")) +
-    geom_gene_arrow(data=filter(genes, CHROM==chr, type == "transcript") %>% droplevels(), 
+    geom_gene_arrow(data=filter(genes, CHROM==chr, type == "transcript") %>% droplevels(),
                     aes(xmin = start/1e6, xmax = end/1e6, y = 9.5, color = attributes), size=6) +
     scale_color_manual(values = c(alpha('red',0.6), "black", "black","grey","grey"))
 }
 # leaf
 plotGqtl1 <- function(Gresults,chr,genes){
-  
-  qval <- Gresults %>% 
-    filter(qvalue<=0.01) 
+
+  qval <- Gresults %>%
+    filter(qvalue<=0.01)
   #qval <- min(qval$Gprime)
   qval <- 3
   my_qtl <- getQTLTable(SNPset = Gresults, alpha = 0.01,export = F)
-  
+
   p1 <- ggplot() +
     geom_line(data=filter(Gresults,CHROM==chr),aes(POS/1e6,Gprime),size=2) +
     labs(x= 'Position (Mb)',y= "G' value") +
@@ -239,22 +239,22 @@ plotGqtl1 <- function(Gresults,chr,genes){
           axis.line = element_line(linewidth = 2),
           axis.ticks = element_line(linewidth = 1.5),
           axis.ticks.length = unit(.25, "cm"))
-  
-  
+
+
   p2 <- ggplot() +
-    geom_gene_arrow(data=filter(genes, 
-                                CHROM==chr, 
+    geom_gene_arrow(data=filter(genes,
+                                CHROM==chr,
                                 type == "transcript",
                                 attributes == "ID=AHp023147.1;geneID=AHp023147" | attributes == "ID=AHp023148.1;geneID=AHp023148") %>% droplevels(),
-                    aes(xmin = start, 
-                        xmax = end, 
-                        y = "chr16", 
-                        fill = attributes, 
+                    aes(xmin = start,
+                        xmax = end,
+                        y = "chr16",
+                        fill = attributes,
                         forward = c(F,T)),
                     size = 1.5,
                     color = "black",
-                    arrowhead_height = unit(12, "mm"), 
-                    arrowhead_width = unit(6, "mm"), 
+                    arrowhead_height = unit(12, "mm"),
+                    arrowhead_width = unit(6, "mm"),
                     arrow_body_height = grid::unit(6, "mm")) +
     geom_text(aes(x = c(5246000,5290000),
                   y = "chr16",
@@ -268,7 +268,7 @@ plotGqtl1 <- function(Gresults,chr,genes){
           axis.line = element_line(linewidth = 2),
           axis.ticks = element_line(linewidth = 1.5),
           axis.text.x = element_text(size=20),
-          panel.grid.major.y = ggplot2::element_line(colour = "grey", 
+          panel.grid.major.y = ggplot2::element_line(colour = "grey",
                                                      linewidth = 1),
           #axis.title.x = element_text(size=40),
           axis.ticks.length = unit(.25, "cm"),
@@ -278,12 +278,12 @@ plotGqtl1 <- function(Gresults,chr,genes){
           axis.ticks.y = element_blank(),
           axis.text.y = element_blank()) +
     scale_fill_manual(values = c("chocolate2","cyan3",'red'))
-  
+
   # combine plots:
   out <- plot_grid(p2,p1,
                    nrow = 2,
                    rel_heights = c(0.3,0.7))
-  
+
   out
 }
 
@@ -348,15 +348,9 @@ MYB_plot <- plot_grid(cowplot_flower, pathway_plot, alignment_plot,
                       labels = c("", "C", "D"),
                       label_size = 34)
 
-ggsave(filename = "plots/paper_myb_combined_alignment.png",
+ggsave(filename = "runs/plots/paper_myb_combined_alignment.png",
        plot = MYB_plot,
        dpi = 400,
        width = 25,
        height = 25,
        bg = "white")
-
-
-
-
-
-
diff --git a/workflows/BSA/read_count_analysis.Rmd b/workflows/BSA/read_count_analysis.Rmd
index 4154c86c51ede7b0b6f006ccf5b84574c203946f..f7c97bb2a060eac383eb12231e19872e8ecd811d 100644
--- a/workflows/BSA/read_count_analysis.Rmd
+++ b/workflows/BSA/read_count_analysis.Rmd
@@ -11,14 +11,14 @@ library(tidyverse)
 library(DESeq2)
 library(factoextra)
 library(patchwork)
-knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2_publication/")
+knitr::opts_knit$set(root.dir = "/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/")
 ```
 
 
 ```{r}
 ########################## Create a function to generate plots for all betalain pathway genes
 # load object with names of all betalain and flavonoid genes
-pathway_genes <- read.csv(file = "data/manual_sheets/color_pathway_genes.csv", header=T)
+pathway_genes <- read.csv(file = "studies/additional_data/resources/color_pathway_genes.csv", header=T)
 colnames(pathway_genes) <- c("pathway_gene", "pathway", "gene_id")
 betalain.genes <- pathway_genes %>%
   filter(pathway == "Betalain")
@@ -32,13 +32,13 @@ Transcript level gene expression quantification from kallisto:
 
 ```{r}
 # vector of input directories
-sample_names <- dir(path = "data/flower_color_mapping/kallisto_quant/")
+sample_names <- dir(path = "runs/flower_color_mapping/kallisto_quant/")
 
 # read in tables
 kallisto_quant <- c()
 
 for (i in 1:length(sample_names)){
-  x <- read_table(file = paste0("data/flower_color_mapping/kallisto_quant/",
+  x <- read_table(file = paste0("runs/flower_color_mapping/kallisto_quant/",
                                 sample_names[i],
                                 "/abundance.tsv"))
   # set column names and keep relevant columns
@@ -126,7 +126,7 @@ patchplot <- betalain_plots[[8]] + betalain_plots[[9]] + betalain_plots[[11]] +
   theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"),
         plot.tag = element_text(size = 35))
 
-ggsave(filename = "plots/flower_mapping_expression/betalain_gene_kallisto.png",
+ggsave(filename = "runs/plots/flower_mapping_expression/betalain_gene_kallisto.png",
        width = 28,
        height = 20)
 
@@ -182,7 +182,7 @@ betalain_plots <- plot_betalain_counts(gene_ID_list = betalain_quant$transcript_
 
 # save all plots
 for (i in 1:length(betalain_plots)){
-  ggsave(filename = paste0("plots/flower_mapping_expression/", gene_names[i], ".png"),
+  ggsave(filename = paste0("runs/plots/flower_mapping_expression/", gene_names[i], ".png"),
        plot = betalain_plots[[i]],
        height = 6,
        width = 8)
@@ -219,7 +219,7 @@ ggplot(data=regulator_quant) +
         legend.position = "bottom",
         legend.direction = "vertical")
 
-ggsave(filename = "plots/flower_mapping_expression/flower_expression_grid.png",
+ggsave(filename = "runs/plots/flower_mapping_expression/flower_expression_grid.png",
        width = 4,
        height = 8,
        dpi = 500)
@@ -289,7 +289,7 @@ ggplot(data=flavonoid_grid) +
         legend.position = "right",
         legend.direction = "vertical")
 
-ggsave(filename = "plots/flower_mapping_expression/flavonoid_expression_grid.png",
+ggsave(filename = "runs/plots/flower_mapping_expression/flavonoid_expression_grid.png",
        width = 8,
        height = 8,
        dpi = 500)
diff --git a/workflows/BSA/read_count_analysis_from_bam.Rmd b/workflows/BSA/read_count_analysis_from_bam.Rmd
index a3d44236380fe6efe5f4547dc8fb470af44d4dff..f442e2299bfd4934b573a95800a86283e27c16ef 100644
--- a/workflows/BSA/read_count_analysis_from_bam.Rmd
+++ b/workflows/BSA/read_count_analysis_from_bam.Rmd
@@ -11,40 +11,40 @@ library(tidyverse)
 library(chromstaR)
 library(ggtranscript)
 library(RColorBrewer)
-knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2/")
+knitr::opts_knit$set(root.dir = "/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/")
 ```
 
 Extract the sequencing reads from the bam files which overlap both the two non-synonymous SNPs and the stop-gained SNP. Samtools view can be used to extract reads covering a particular position (and their pairs). Non-primary alignments can be discarded. Reads should cover the position of the stop-gained variant (Scaffold 16, 5305851, C->T) and the position of the right non-synonymous variant (Scaffold 16, 5305727, A->T).
 
 ```{bash}
-mkdir -p data/BSA/RNAseq/phased_reads/
+mkdir -p runs/BSA/RNAseq/phased_reads/
 
 # green flower
 # index and extract everything overlapping the right non-synonymous variant position
-samtools index data/BSA/RNAseq/STAR_flower_mappings/AM_00332_gf_Aligned.sortedByCoord.out.bam
-samtools view -b -h -F 256 -P data/BSA/RNAseq/STAR_flower_mappings/AM_00332_gf_Aligned.sortedByCoord.out.bam Scaffold_16:5305727-5305727 > data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam
+samtools index runs/BSA/RNAseq/STAR_flower_mappings/AM_00332_gf_Aligned.sortedByCoord.out.bam
+samtools view -b -h -F 256 -P runs/BSA/RNAseq/STAR_flower_mappings/AM_00332_gf_Aligned.sortedByCoord.out.bam Scaffold_16:5305727-5305727 > runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam
 
 # index and extract everything overlapping the stop-gained variant position
-samtools index data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam
-samtools view -b -h -P data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam Scaffold_16:5305851-5305851 > data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam
-samtools index data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam
+samtools index runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam
+samtools view -b -h -P runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam Scaffold_16:5305851-5305851 > runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam
+samtools index runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam
 
 # save as tsv file using sam2tsv from jvarkit
-java -jar /home/tom/Documents/tools/jvarkit/dist/sam2tsv.jar -R polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam > data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam.tsv
+java -jar /home/tom/Documents/tools/jvarkit/dist/sam2tsv.jar -R runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam > runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam.tsv
 
 
 # red flower
 # index and extract everything overlapping the right non-synonymous variant position
-samtools index data/BSA/RNAseq/STAR_flower_mappings/AM_00332_rf_Aligned.sortedByCoord.out.bam
-samtools view -b -h -F 256 -P data/BSA/RNAseq/STAR_flower_mappings/AM_00332_rf_Aligned.sortedByCoord.out.bam Scaffold_16:5305727-5305727 > data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam
+samtools index runs/BSA/RNAseq/STAR_flower_mappings/AM_00332_rf_Aligned.sortedByCoord.out.bam
+samtools view -b -h -F 256 -P runs/BSA/RNAseq/STAR_flower_mappings/AM_00332_rf_Aligned.sortedByCoord.out.bam Scaffold_16:5305727-5305727 > runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam
 
 # index and extract everything overlapping the stop-gained variant position
-samtools index data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam
-samtools view -b -h -P data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam Scaffold_16:5305851-5305851 > data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam
-samtools index data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam
+samtools index runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam
+samtools view -b -h -P runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam Scaffold_16:5305851-5305851 > runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam
+samtools index runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam
 
 # save as tsv file using sam2tsv from jvarkit
-java -jar /home/tom/Documents/tools/jvarkit/dist/sam2tsv.jar -R polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam > data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam.tsv
+java -jar /home/tom/Documents/tools/jvarkit/dist/sam2tsv.jar -R runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam > runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam.tsv
 ```
 
 Put in loading and filtering of the data (tsv and sequencing data) into separate functions:
@@ -72,9 +72,9 @@ read_bam.tsv <- function(filename){
     arrange(read_name) %>%
     ungroup() %>%
     #mutate(allele = ifelse(tolower(read_base) == ref_base, "ref", "alt")) %>% # is it the reference or alternative allele?
-    mutate(allele = ifelse(ref_pos == 5305722 & read_base == "A", 
-                           "alt", 
-                           ifelse(ref_pos == 5305727 & read_base == "T", 
+    mutate(allele = ifelse(ref_pos == 5305722 & read_base == "A",
+                           "alt",
+                           ifelse(ref_pos == 5305727 & read_base == "T",
                                   "alt",
                                   ifelse(ref_pos == 5305851 & read_base == "T",
                                          "alt", "ref"))))
@@ -104,22 +104,22 @@ plot_reads <- function(df, tsv, title){
   # maybe rather join the two tables?
   number_alt_alleles <- tsv %>%
     filter(allele == "alt") %>%
-    group_by(read_name) %>% 
+    group_by(read_name) %>%
     summarise(n = n(),
               snp_grouped = sum(snp_group))
   # join the table detailing the number of alt alleles
   snp.bam.df <- left_join(df, number_alt_alleles, by = c("qname" = "read_name"))
-  
+
   # first plot, showing specific reads
   snpplot <- ggplot() +
-    geom_range(data = snp.bam.df, 
+    geom_range(data = snp.bam.df,
                aes(xstart = start, xend = end, y = factor(qname, levels=unique(qname[order(n,snp_grouped,qname)]), ordered=TRUE)))
-  
+
   # second plot showing alternativ allele positions
   snpplot2 <- snpplot +
-    geom_point(data = tsv %>% filter(allele == "alt"), 
-               aes(x = ref_pos, 
-                   y = read_name, 
+    geom_point(data = tsv %>% filter(allele == "alt"),
+               aes(x = ref_pos,
+                   y = read_name,
                    color = as.factor(snp_group)),
                size = 2.2) +
     theme_classic() +
@@ -149,9 +149,9 @@ Load in data for the red and green flower bulks of the BSA on AM_00332:
 
 ```{r}
 # green flower
-snp_tsv.gf <- read_bam.tsv(filename = "data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam.tsv")
+snp_tsv.gf <- read_bam.tsv(filename = "runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam.tsv")
 
-snp_df.gf <- read_bam_snps_as_df(filename = "data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam",
+snp_df.gf <- read_bam_snps_as_df(filename = "runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam",
                                  tsv = snp_tsv.gf)
 
 # how many reads after filtering?
@@ -159,9 +159,9 @@ snp_tsv.gf %>%
   dplyr::count(read_name) # 99 reads
 
 # red flower
-snp_tsv.rf <- read_bam.tsv(filename = "data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam.tsv")
+snp_tsv.rf <- read_bam.tsv(filename = "runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam.tsv")
 
-snp_df.rf <- read_bam_snps_as_df(filename = "data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam",
+snp_df.rf <- read_bam_snps_as_df(filename = "runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam",
                                  tsv = snp_tsv.rf)
 
 # how many reads after filtering?
@@ -179,7 +179,7 @@ plot.gf <- plot_reads(df = snp_df.gf,
 plot.gf
 
 # save plot
-ggsave(filename = "plots/rna_seq_reads_gf.png",
+ggsave(filename = "runs/plots/rna_seq_reads_gf.png",
        plot = plot.gf,
        width = 10,
        height = 8)
@@ -192,17 +192,8 @@ plot.rf <- plot_reads(df = snp_df.rf,
 plot.rf
 
 # save plot
-ggsave(filename = "plots/rna_seq_reads_rf.png",
+ggsave(filename = "runs/plots/rna_seq_reads_rf.png",
        plot = plot.rf,
        width = 10,
        height = 8)
 ```
-
-
-
-
-
-
-
-
-
diff --git a/workflows/BSA/snpEff_analysis.Rmd b/workflows/BSA/snpEff_analysis.Rmd
index f74a6b0d82f193bc9cedfa9a4c5920b764b79e30..28c1f06fe01e3477ba31fb75e5f70c02470dcb17 100644
--- a/workflows/BSA/snpEff_analysis.Rmd
+++ b/workflows/BSA/snpEff_analysis.Rmd
@@ -14,7 +14,7 @@ library(ggtranscript)
 library(reshape2)
 library(cowplot)
 library(patchwork)
-knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2_publication/")
+knitr::opts_knit$set(root.dir = "/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/")
 ```
 
 ## Database creation
@@ -22,15 +22,15 @@ knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2_publicat
 Run snpEff database creation on the fixed annotation files. Copy the fixed files to the snpeff directory:
 
 ```{bash}
-mkdir -p data/annotation_analysis/snpEff/databases/AHv2.2/
+mkdir -p runs/annotation_analysis/snpEff/databases/AHv2.2/
 
 # snpEff analysis
 # add genome file to snpEff database
-cp polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta data/annotation_analysis/snpEff/databases/AHv2.2/sequences.fa
+cp runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta runs/annotation_analysis/snpEff/databases/AHv2.2/sequences.fa
 # add annotation file to snpEff database
-cp data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gff data/annotation_analysis/snpEff/databases/AHv2.2/genes.gff
-cp data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.cds.fasta data/annotation_analysis/snpEff/databases/AHv2.2/cds.fa
-cp data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.prot.fasta data/annotation_analysis/snpEff/databases/AHv2.2/protein.fa
+cp runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gff runs/annotation_analysis/snpEff/databases/AHv2.2/genes.gff
+cp runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.cds.fasta runs/annotation_analysis/snpEff/databases/AHv2.2/cds.fa
+cp runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.prot.fasta runs/annotation_analysis/snpEff/databases/AHv2.2/protein.fa
 
 # create database:
 java -jar /home/tom/Documents/tools/snpEff/snpEff.jar build -v AHv2.2
@@ -42,18 +42,18 @@ java -jar /home/tom/Documents/tools/snpEff/snpEff.jar build -v AHv2.2
 Run using data from Markus color/sterility mapping bulks:
 
 ```{bash}
-mkdir -p data/annotation_analysis/snpEff/bsa_sterility_color/analysis
+mkdir -p runs/annotation_analysis/snpEff/bsa_sterility_color/analysis
 
 # get data and run snpEff on the example data:
-java -jar /home/tom/Documents/tools/snpEff/snpEff.jar -csvStats data/annotation_analysis/snpEff/bsa_sterility_color/output.stats.csv -v AHv2.2 data/annotation_analysis/snpEff/bsa_sterility_color/gatk_filter_maxmissing05_biallelic.vcf.gz > data/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.vcf
+java -jar /home/tom/Documents/tools/snpEff/snpEff.jar -csvStats runs/annotation_analysis/snpEff/bsa_sterility_color/output.stats.csv -v AHv2.2 runs/annotation_analysis/snpEff/bsa_sterility_color/gatk_filter_maxmissing05_biallelic.vcf.gz > runs/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.vcf
 
 
 # two files are not saved in the output directory but in the current working directory
-mv snpEff_* data/annotation_analysis/snpEff/bsa_sterility_color/
+mv snpEff_* runs/annotation_analysis/snpEff/bsa_sterility_color/
 
 # it is challenging to process the snpEff output for downstream analysis
 # snpsift is a software package distributed with snpEff that eases processing
-cat data/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.vcf | /home/tom/Documents/tools/snpEff/scripts/vcfEffOnePerLine.pl | java -jar /home/tom/Documents/tools/snpEff/SnpSift.jar extractFields - CHROM POS "ANN[*].GENEID" "ANN[*].EFFECT" > data/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.snpsift.txt
+cat runs/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.vcf | /home/tom/Documents/tools/snpEff/scripts/vcfEffOnePerLine.pl | java -jar /home/tom/Documents/tools/snpEff/SnpSift.jar extractFields - CHROM POS "ANN[*].GENEID" "ANN[*].EFFECT" > runs/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.snpsift.txt
 ```
 
 
@@ -63,11 +63,11 @@ Analyze the snpEff test run and check for high impact variants that can be manua
 
 ```{r}
 # load in snpEff summary file
-snpEff.tab <- read.table("data/annotation_analysis/snpEff/bsa_sterility_color/snpEff_genes.txt", skip = 1, header = T, comment.char = "")
+snpEff.tab <- read.table("runs/annotation_analysis/snpEff/bsa_sterility_color/snpEff_genes.txt", skip = 1, header = T, comment.char = "")
 colnames(snpEff.tab)[1] <- "GeneName"
 
 # load in list of betalain and flavonoid pathway genes
-color_pathways <- read.csv("data/manual_sheets/color_pathway_genes.csv", header = T)
+color_pathways <- read.csv("runs/manual_sheets/color_pathway_genes.csv", header = T)
 snpEff.tab <- left_join(snpEff.tab, color_pathways, by = c("GeneId" = "Gene_id"))
 
 # check color pathway genes for high impact variants, moderate and in theory also modifier might also be relevant
@@ -88,14 +88,14 @@ Subset a more detailed table of betalain genes and their respective positions in
 
 ```{r}
 # load in list of betalain and flavonoid pathway genes
-color_pathways <- read.csv("data/manual_sheets/color_pathway_genes.csv", header = T)
+color_pathways <- read.csv("runs/manual_sheets/color_pathway_genes.csv", header = T)
 betalain_chr16 <- color_pathways %>%
   filter(Gene_id == "AHp022773" | Gene_id == "AHp023148" | Gene_id == "AHp023147")
 # add BvMYB1like gene
 betalain_chr16[2,] <- c("BvMYB1like", "Betalain", "AHp022773")
 
-write.table(betalain_chr16, 
-            file = "data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.txt",
+write.table(betalain_chr16,
+            file = "runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.txt",
             quote = F)
 
 # set up function for reading in a gtf file
@@ -123,18 +123,18 @@ read.gtf <- function(file){
 }
 
 # read in annotation
-annotation.gtf <- read.gtf("polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf")
+annotation.gtf <- read.gtf("runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf")
 
 # subset betalain genes
 betalain_chr16.gtf <- annotation.gtf %>%
   filter(gene_id %in% betalain_chr16$Gene_id)
 
-saveRDS(betalain_chr16.gtf, file = "data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf")
+saveRDS(betalain_chr16.gtf, file = "runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf")
 
 
 
 #################### generate BED file of relevant positions
-betalain_chr16.gtf <- readRDS("data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf")
+betalain_chr16.gtf <- readRDS("runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf")
 # snpeff by default uses the 5000 positions before and after a gene
 # create a bed file that can be used to subset the vcf file into relevant variants
 betalain_chr16.bed <- betalain_chr16.gtf %>%
@@ -145,16 +145,16 @@ betalain_chr16.bed <- betalain_chr16.gtf %>%
   select(chrom, chromStart, chromEnd) %>%
   unique()
 
-write_tsv(betalain_chr16.bed, file = "data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed")  
+write_tsv(betalain_chr16.bed, file = "runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed")  
 ```
 
 Subset the vcf file and extract the allele frequencies using vcftools
 
 ```{bash}
 # to extract the format field
-vcftools --gzvcf data/BSA/wgs/vcf/gatk_filter_maxmissing05_biallelic.vcf.gz --bed data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed --indv AM_00331_gf --indv AM_00331_rf --indv AM_00332_gl --indv AM_00332_rl --extract-FORMAT-info AD --out data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16
+vcftools --gzvcf runs/BSA/wgs/vcf/gatk_filter_maxmissing05_biallelic.vcf.gz --bed runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed --indv AM_00331_gf --indv AM_00331_rf --indv AM_00332_gl --indv AM_00332_rl --extract-FORMAT-info AD --out runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16
 # also subset the vcf file to include only the variants around the betalain genes
-vcftools --gzvcf data/BSA/wgs/vcf/gatk_filter_maxmissing05_biallelic.vcf.gz --bed data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed --indv AM_00331_gf --indv AM_00331_rf --indv AM_00332_gl --indv AM_00332_rl --recode --recode-INFO-all --out data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16
+vcftools --gzvcf runs/BSA/wgs/vcf/gatk_filter_maxmissing05_biallelic.vcf.gz --bed runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed --indv AM_00331_gf --indv AM_00331_rf --indv AM_00332_gl --indv AM_00332_rl --recode --recode-INFO-all --out runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16
 ```
 
 
@@ -162,10 +162,10 @@ Plot the annotated variants in betalain genes. In general, it could be interesti
 
 ```{r}
 # read in betalain gene list on chr 16
-betalain_chr16 <- read.table("data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.txt")
+betalain_chr16 <- read.table("runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.txt")
 
 # read in variant count of extracted SNPs
-allele_depth.tab <- read.table(file = "data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.AD.FORMAT", header = T)
+allele_depth.tab <- read.table(file = "runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.AD.FORMAT", header = T)
 allele_depth.tab <- allele_depth.tab %>%
   mutate(customid = paste0("16_", POS))
 # allele depth denotes first the reference allele and then the alternative allele, only those reads which were involved in allele calling
@@ -176,7 +176,7 @@ allele_depth.tab <- separate(data = allele_depth.tab, col = "AM_00332_gl", sep =
 allele_depth.tab <- separate(data = allele_depth.tab, col = "AM_00332_rl", sep = ",", into = c("AM00332_rl_ref", "AM00332_rl_alt"))
 
 # read in snpsift output and subset for betalain genes
-snpsift.tab <- read.table("data/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.snpsift.txt", header = T)
+snpsift.tab <- read.table("runs/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.snpsift.txt", header = T)
 snpsift.tab <- snpsift.tab %>%
   filter(ANN....GENEID %in% betalain_chr16$Gene_id) %>%
   mutate(customid = paste0("16_", POS)) %>% # add customid column to snpsift table to enable merging of the two tables
@@ -184,7 +184,7 @@ snpsift.tab <- snpsift.tab %>%
 
 
 # read in betalain gene gtf on chr 16
-betalain_chr16.gtf <- readRDS("data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf")
+betalain_chr16.gtf <- readRDS("runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf")
 
 # join the snpsift table with the allele depth table
 joined.df <- left_join(snpsift.tab, allele_depth.tab, by = c("CHROM", "POS", "customid"))
@@ -201,7 +201,7 @@ Set up plotting functions:
 
 filter_variants <- function(variants, gene, bulk1_ref, bulk1_alt, bulk2_ref, bulk2_alt){
   # prepare data by only keeping the relevant gene variants and samples
-  dat <- variants %>% 
+  dat <- variants %>%
     filter(ANN....GENEID == gene) %>%
     select(CHROM, POS, ANN....EFFECT, bulk1_ref, bulk1_alt, bulk2_ref, bulk2_alt) %>%
     filter(bulk1_alt != 0 & bulk2_alt != 0)
@@ -226,13 +226,13 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){
     filter(transcript_id == trans_id,
            type == "CDS")
   filtered_variants <- filtered_variants[filtered_variants$POS >= min(annotation.filtered$start) & filtered_variants$POS <= max(annotation.filtered$end),]
-  filtered_variants$ANN....EFFECT <- factor(filtered_variants$ANN....EFFECT, levels = c("intron_variant", 
-                                                                                        "synonymous_variant", 
+  filtered_variants$ANN....EFFECT <- factor(filtered_variants$ANN....EFFECT, levels = c("intron_variant",
+                                                                                        "synonymous_variant",
                                                                                         "missense_variant",
                                                                                         "stop_gained"))
   min_pos <- min(annotation.filtered$start)
   max_pos <- max(annotation.filtered$end)
-  
+
   # plot the gene with variants
   p2 <- ggplot() +
     geom_range(data = annotation.filtered,
@@ -253,7 +253,7 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){
          x = "Position Scaffold 16 (bp)") +
     theme_classic() +
     #scale_fill_brewer(palette = "RdBl", direction = -1) + # think about color palette to use
-    scale_fill_viridis_d(direction = -1, 
+    scale_fill_viridis_d(direction = -1,
                          labels = c("Intron variant", "Synonymous variant", "Missense variant", "Stop gained")) +
     theme(text = element_text(size = 21),
           #legend.position = "none",
@@ -277,23 +277,23 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){
   out_plot <- plot_grid(p2, legend,
                         nrow = 1,
                         rel_widths = c(0.75, 0.25))
-  
+
   # # rearrange data:
   # dat1 <- filtered_variants[,-(4:5)]
   # dat2 <- filtered_variants[,-(6:7)]
   # dat1.melt <- melt(dat1, id.vars = c("CHROM", "POS", "ANN....EFFECT"))
   # dat2.melt <- melt(dat2, id.vars = c("CHROM", "POS", "ANN....EFFECT"))
-  # 
+  #
   # # plot relative allele frequency
   # p1 <- ggplot() +
   #   geom_col(data = dat1.melt,
   #          aes(x = POS, y = as.numeric(value), fill = variable), position = "stack", width = 20) +
   #   xlim(c(min_pos, max_pos)) +
-  #   labs(y = "", 
-  #        fill = "red_bulk", 
+  #   labs(y = "",
+  #        fill = "red_bulk",
   #        x = "") +
   #   theme_classic() +
-  #   scale_fill_brewer(palette = "Set1", 
+  #   scale_fill_brewer(palette = "Set1",
   #                     labels = c("Reference allele", "Alternative allele"),
   #                     direction = -1,
   #                     guide = guide_legend(override.aes = list(alpha = 0))) + # make legend invisible
@@ -307,12 +307,12 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){
   #         axis.title.x = element_blank(),
   #         legend.title = element_text(color = "transparent"),
   #         legend.text = element_text(color = "transparent"))
-  # 
+  #
   # bars <- map(unique(dat2.melt$POS)
   #           , ~geom_col(position = "stack",
   #                       width = 20
   #                      , data = dat2.melt %>% filter(POS == .x)))
-  # 
+  #
   # p3 <- ggplot(data = dat2.melt,
   #              aes(x=POS,
   #                  y=as.numeric(value),
@@ -322,8 +322,8 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){
   #   labs(fill = "green_bulk",
   #        x = "Position on Scaffold 16") +
   #   theme_classic() +
-  #   scale_fill_brewer(palette = "Set1", 
-  #                     labels = c("Reference allele", "Alternative allele"), 
+  #   scale_fill_brewer(palette = "Set1",
+  #                     labels = c("Reference allele", "Alternative allele"),
   #                     direction = -1,
   #                     guide = guide_legend(override.aes = list(alpha = 0))) +
   #   theme(text = element_text(size = 21),
@@ -331,9 +331,9 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){
   #         legend.position = "none",
   #         legend.title = element_text(color = "transparent"),
   #         legend.text = element_text(color = "transparent"))
-  
-  
-  #allplots <- p1 + p2 + p3 + 
+
+
+  #allplots <- p1 + p2 + p3 +
     #plot_layout(ncol = 1)
   return(out_plot)
 }
@@ -346,7 +346,7 @@ Plot for CYP76AD2:
 ```{r}
 # plot for one gene
 # filter all homozygous reference variants
-dat <- joined.df %>% 
+dat <- joined.df %>%
   filter(ANN....GENEID == "AHp023148") %>%
   filter(AM00332_gl_alt != 0)
 
@@ -355,7 +355,7 @@ dat <- joined.df %>%
 
 
 # filter to only include specific gene
-dat.filtered <- filter_variants(variants = joined.df, 
+dat.filtered <- filter_variants(variants = joined.df,
                   gene = "AHp023148",
                   bulk1_ref = "AM00332_gl_ref",
                   bulk1_alt = "AM00332_gl_alt",
@@ -373,8 +373,8 @@ AM00332_CYP76AD <- plot_bulk_comparison(annotation = betalain_chr16.gtf,
 AM00332_CYP76AD
 
 
-ggsave(filename = "plots/CYP76AD_AHp023148_bsa_snpeff.png",
-       width = 14, 
+ggsave(filename = "runs/plots/CYP76AD_AHp023148_bsa_snpeff.png",
+       width = 14,
        height = 6)
 ```
 
@@ -387,19 +387,19 @@ plot_all_genes <- function(gene_id, transcript_id, bulk){
   output <- list()
   for (i in 1:length(transcript_id)){
       # filter all homozygous reference variants
-      dat <- joined.df %>% 
+      dat <- joined.df %>%
         filter(ANN....GENEID == gene_id[i]) %>%
         filter(AM00332_gl_alt != 0)
       # filter to only include specific gene
       if (bulk == "AM00332"){
-        dat.filtered <- filter_variants(variants = joined.df, 
+        dat.filtered <- filter_variants(variants = joined.df,
                       gene = gene_id[i],
                       bulk1_ref = "AM00332_gl_ref",
                       bulk1_alt = "AM00332_gl_alt",
                       bulk2_ref = "AM00332_rl_ref",
                       bulk2_alt = "AM00332_rl_alt")
       } else if (bulk == "AM00331"){
-        dat.filtered <- filter_variants(variants = joined.df, 
+        dat.filtered <- filter_variants(variants = joined.df,
                                     gene = gene_id[i],
                                     bulk1_ref = "AM00331_gf_ref",
                                     bulk1_alt = "AM00331_gf_alt",
@@ -425,9 +425,9 @@ bulk_AM00332_plot_list <- plot_all_genes(gene_id = gene_id,
                                          bulk = "AM00332")
 
 for (i in 1:length(bulk_AM00332_plot_list)){
-  ggsave(filename = paste0("plots/BSA/AM00332_color_loss_", gene_id[i], ".png"),
+  ggsave(filename = paste0("runs/plots/BSA/AM00332_color_loss_", gene_id[i], ".png"),
          plot = bulk_AM00332_plot_list[[i]],
-         width = 14, 
+         width = 14,
          height = 6)
 }
 
@@ -437,9 +437,9 @@ bulk_AM00331_plot_list <- plot_all_genes(gene_id = gene_id,
                                          bulk = "AM00331")
 
 for (i in 1:length(bulk_AM00331_plot_list)){
-  ggsave(filename = paste0("plots/BSA/AM00331_color_loss_", gene_id[i], ".png"),
+  ggsave(filename = paste0("runs/plots/BSA/AM00331_color_loss_", gene_id[i], ".png"),
          plot = bulk_AM00331_plot_list[[i]],
-         width = 14, 
+         width = 14,
          height = 6)
 }
 ```
@@ -456,8 +456,8 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){
     filter(transcript_id == trans_id,
            type == "CDS")
   filtered_variants <- filtered_variants[filtered_variants$POS >= min(annotation.filtered$start) & filtered_variants$POS <= max(annotation.filtered$end),]
-  filtered_variants$ANN....EFFECT <- factor(filtered_variants$ANN....EFFECT, levels = c("intron_variant", 
-                                                                                        "synonymous_variant", 
+  filtered_variants$ANN....EFFECT <- factor(filtered_variants$ANN....EFFECT, levels = c("intron_variant",
+                                                                                        "synonymous_variant",
                                                                                         "missense_variant",
                                                                                         "stop_gained"))
   # plot the gene with variants
@@ -486,26 +486,26 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){
           axis.line.x = element_blank(),
           axis.text.x = element_blank(),
           legend.position = "none") # this increases the legend margin
-  # margin has to be increased so that other legends are not cut off, 
+  # margin has to be increased so that other legends are not cut off,
   # since the first legend seems to determine the margins
-  
+
   # rearrange data:
   dat1 <- filtered_variants[,-(4:5)]
   dat2 <- filtered_variants[,-(6:7)]
   dat1.melt <- melt(dat1, id.vars = c("CHROM", "POS", "ANN....EFFECT"))
   dat2.melt <- melt(dat2, id.vars = c("CHROM", "POS", "ANN....EFFECT"))
-  
+
   # plot relative allele frequency
   p1 <- ggplot() +
     geom_col(data = dat1.melt,
            aes(x = POS, y = as.numeric(value), fill = variable), position = "stack", width = 2) +
     xlim(c(min(annotation.filtered %>% select(start)),
          max(annotation.filtered %>% select(end)))) +
-    labs(y = "Red bulk\n allele depth", 
-         fill = "red_bulk", 
+    labs(y = "Red bulk\n allele depth",
+         fill = "red_bulk",
          x = "") +
     theme_classic() +
-    scale_fill_brewer(palette = "Set1", 
+    scale_fill_brewer(palette = "Set1",
                       labels = c("Reference allele", "Alternative allele"),
                       direction = -1,
                       guide = guide_legend(override.aes = list(alpha = 0))) + # make legend invisible
@@ -524,12 +524,12 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){
           axis.title.x = element_blank(),
           legend.title = element_text(color = "transparent"),
           legend.text = element_text(color = "transparent"))
-  
+
   bars <- map(unique(dat2.melt$POS)
             , ~geom_col(position = "stack",
                         width = 2
                        , data = dat2.melt %>% filter(POS == .x)))
-  
+
   p3 <- ggplot(data = dat2.melt,
                aes(x=POS,
                    y=as.numeric(value),
@@ -541,7 +541,7 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){
          fill = "",
          y = "Green bulk\n allele depth") +
     theme_classic() +
-    scale_fill_brewer(palette = "Set1", 
+    scale_fill_brewer(palette = "Set1",
                       labels = c("Reference allele", "Alternative allele"),
                       #guide = guide_legend(override.aes = list(alpha = 0)),
                       direction = -1) +
@@ -564,13 +564,13 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){
   p3 <- p3 + theme(legend.position = "none")
   # combine the three plots
   allplots <- plot_grid(p1, p2, p3,
-                        ncol = 1, 
+                        ncol = 1,
                         align = "v",
                         rel_heights = c(0.3, 0.25, 0.45))
-  
-  
+
+
   # combine other plots with legend
-  allplots <- plot_grid(legend, allplots, 
+  allplots <- plot_grid(legend, allplots,
                         ncol = 1,
                         #align = "v",
                         rel_heights = c(0.2,0.8))
@@ -582,7 +582,7 @@ AM00332_CYP76AD_zoom <- plot_bulk_comparison_zoom(annotation = betalain_chr16.gt
                              filtered_variants = dat.filtered)
 AM00332_CYP76AD_zoom
 
-ggsave(filename = "plots/CYP76AD_AHp023148_bsa_snpeff_zoom.png")
+ggsave(filename = "runs/plots/CYP76AD_AHp023148_bsa_snpeff_zoom.png")
 ```
 
 
@@ -620,16 +620,10 @@ grid_with_BSA <- plot_grid(cowplot_leaf, complete_grid,
 #grid_with_BSA
 
 # save plot
-ggsave(filename = "plots/BSA_with_grid.png",
+ggsave(filename = "runs/plots/BSA_with_grid.png",
        plot = grid_with_BSA,
        width = 25,
        height = 20,
        bg = "white",
        dpi = 500)
 ```
-
-
-
-
-
-
diff --git a/workflows/genome_polishing/helper_script.R b/workflows/genome_polishing/helper_script.R
index 2a936488bd83484a3b9b39ff8ca99e7401ab80c0..1571dce75ee67341da985e78ed4191ec42e8ad34 100644
--- a/workflows/genome_polishing/helper_script.R
+++ b/workflows/genome_polishing/helper_script.R
@@ -1,20 +1,20 @@
 # set working directory
-setwd("/home/tom/Documents/projects/Ahyp_v2_2_publication/")
+setwd("/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/")
 
 # read in list of all headers with the correct order
-headers <- read.table("data/NextPolish/input/out.headers.txt")
+headers <- read.table("runs/NextPolish/input/out.headers.txt")
 headers <- gsub(">","",headers$V1)
 headers <- gsub("quiver_","quiver",headers)
 
 # read in prefiltered fasta index
-prefilter <- read.table("data/NextPolish/input/out.prefiltered.renamed.txt.fai")
+prefilter <- read.table("runs/NextPolish/input/out.prefiltered.renamed.txt.fai")
 
-# use the 
-# every sequence that is in 
+# use the
+# every sequence that is in
 no_seq <- headers[!headers %in% prefilter[,1]]
 no_seq <- sub("",">",no_seq)
 
-write.table(no_seq, file="data/NextPolish/processed/header_without_sequence.fa", 
+write.table(no_seq, file="runs/NextPolish/processed/header_without_sequence.fa", 
             quote=F,
             row.names = F,
             col.names = F)
diff --git a/workflows/genome_polishing/process_nextpolish_output.sh b/workflows/genome_polishing/process_nextpolish_output.sh
index 65ab4586b98162e5bf5109c16f8a7c8a97d992bd..d24e30688ebd3274ae376c0e3874b55e2aefaa23 100644
--- a/workflows/genome_polishing/process_nextpolish_output.sh
+++ b/workflows/genome_polishing/process_nextpolish_output.sh
@@ -4,8 +4,8 @@
 # (see master_thesis/code/process_nextpolish_output.sh for more information about the input file preparation)
 
 # Setup
-NPOUT=data/NextPolish/output/
-NPPROCESSED=data/NextPolish/processed/
+NPOUT=runs/NextPolish/output/
+NPPROCESSED=runs/NextPolish/processed/
 
 mkdir -p "$NPPROCESSED"
 
@@ -15,10 +15,10 @@ mkdir -p "$NPPROCESSED"
 cut -f1,2 -d'_' "$NPOUT"genome.nextpolish.fa > "$NPPROCESSED"genome.nextpolish.renamed.fa
 
 # index the prefiltered fasta file for use in R
-samtools faidx data/NextPolish/input/out.prefiltered.renamed.txt
+samtools faidx runs/NextPolish/input/out.prefiltered.renamed.txt
 
 # filter the prefiltered file for everything that is not in Nextpolish file:
-/home/tom/Documents/tools/bbmap/filterbyname.sh in=data/NextPolish/input/out.prefiltered.renamed.txt \
+/home/tom/Documents/tools/bbmap/filterbyname.sh in=runs/NextPolish/input/out.prefiltered.renamed.txt \
 	names="$NPPROCESSED"genome.nextpolish.renamed.fa \
 	out="$NPPROCESSED"prefilter_not_in_Nextpolish.fa
 
@@ -34,7 +34,7 @@ cat "$NPPROCESSED"genome.nextpolish.renamed.fa \
 LC_ALL=C awk -v RS=">" -v FS="\n" -v ORS="\n" -v OFS="" '$0 {$1=">"$1"\n"; print}' "$NPPROCESSED"combined.fa > "$NPPROCESSED"combined.linear.fa
 
 # rename header file by removing trailing underscore character of Contigs:
-sed 's/quiver_/quiver/' data/NextPolish/input/out.headers.txt > "$NPPROCESSED"out.header.renamed.txt
+sed 's/quiver_/quiver/' runs/NextPolish/input/out.headers.txt > "$NPPROCESSED"out.header.renamed.txt
 
 # order file:
 ORDER="$NPPROCESSED"out.header.renamed.txt
@@ -60,5 +60,5 @@ rm "$NPPROCESSED"out*
 rm "$NPPROCESSED"prefilter*
 
 # copy to final output directory
-mkdir -p polished_reference_genome/polished_genome_annotation/assembly/
-cp "$NPPROCESSED"Ahypochondriacus_2.2_polished.fasta polished_reference_genome/polished_genome_annotation/assembly/
+mkdir -p runs/polished_reference_genome/polished_genome_annotation/assembly/
+cp "$NPPROCESSED"Ahypochondriacus_2.2_polished.fasta runs/polished_reference_genome/polished_genome_annotation/assembly/
diff --git a/workflows/genome_polishing/remove_ambiguous_bases.sh b/workflows/genome_polishing/remove_ambiguous_bases.sh
index c810a8a1099eb86902fb3ee23a0a589b6ce2d095..1cb8a10921dbfae66f89075283543dba9a6bdf2c 100644
--- a/workflows/genome_polishing/remove_ambiguous_bases.sh
+++ b/workflows/genome_polishing/remove_ambiguous_bases.sh
@@ -6,8 +6,8 @@
 # N specific entries can in a last step be removed. To reconstruct, the saved order can be used to integrate the N chromosomes again (which record the number of Ns removed)
 
 # Change these two parameters
-INPUT=reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.nospace.underscore.fa
-OUTDIR=data/NextPolish/input/
+INPUT=studies/additional_data/resources/reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.nospace.underscore.fa
+OUTDIR=runs/NextPolish/input/
 
 mkdir -p $OUTDIR
 
@@ -37,9 +37,8 @@ grep ">" "$OUTDIR"tmp2.txt > "$OUTDIR"out.headers.txt
 echo "headers saved"
 
 # remove headers without sequence (Can be caused by stretch of Ns at the start of a Scaffold (see Scaffold 10))
-sed -r 'N; /(>)[^\n]*\n\1/ s/[^\n]*//; P; D' "$OUTDIR"tmp2.txt | grep . | grep -i -B 1 --no-group-separator  '[ATGC]'  > "$OUTDIR"data/NextPolish/input/Ahypochondriacus_split.fasta
+sed -r 'N; /(>)[^\n]*\n\1/ s/[^\n]*//; P; D' "$OUTDIR"tmp2.txt | grep . | grep -i -B 1 --no-group-separator  '[ATGC]'  > "$OUTDIR"Ahypochondriacus_split.fasta
 
 # remove and rename temporary files
 rm "$OUTDIR"tmp.txt
 mv "$OUTDIR"tmp2.txt "$OUTDIR"out.prefiltered.txt
-
diff --git a/workflows/genome_polishing/run_nextpolish.sh b/workflows/genome_polishing/run_nextpolish.sh
index cbf057986b68d5b7db03b06925d898ff70ff3143..a086c0bb8d88e19d99648ad6b209d431b08c6307 100644
--- a/workflows/genome_polishing/run_nextpolish.sh
+++ b/workflows/genome_polishing/run_nextpolish.sh
@@ -19,15 +19,15 @@ module load samtools
 mkdir -p /scratch/twinkle1/nextpolish/
 
 # set output directory for saving the polished genome to:
-OUTDIR=data/NextPolish/output/
+OUTDIR=runs/NextPolish/output/
 
 
 
 ### Prepare input files:
 # remove all reads containing ambiguous bases from the input using bbduk
 /home/twinkle1/tools/bbmap/bbduk.sh maxns=0 \
-	in=/projects/ag-stetter/twinkle/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz \
-	in2=/projects/ag-stetter/twinkle/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz \
+	in=/studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz \
+	in2=/studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz \
 	out=/scratch/twinkle1/SRR2106212_1.cleaned.fq \
 	out2=/scratch/twinkle1/SRR2106212_2.cleaned.fq \
 	-Xmx16g
@@ -49,7 +49,7 @@ round=2
 threads=20
 read1=/scratch/twinkle1/SRR2106212_1.cleaned.repair.fq
 read2=/scratch/twinkle1/SRR2106212_2.cleaned.repair.fq
-input=/home/twinkle1/master_thesis/data/NextPolish/input/Ahypochondriacus_split.fasta
+input=/runs/NextPolish/input/Ahypochondriacus_split.fasta
 
 
 for ((i=1; i<=${round};i++)); do
diff --git a/workflows/genome_polishing/unpackSRA.sh b/workflows/genome_polishing/unpackSRA.sh
index 7467f476e46a5d21632b6feaac7c4b95f805f971..6cdf885e2a49251f79856dff73ce59f34dbda4a8 100644
--- a/workflows/genome_polishing/unpackSRA.sh
+++ b/workflows/genome_polishing/unpackSRA.sh
@@ -14,29 +14,29 @@
 # download file, show progress, increase default max size so that the download starts
 # tools/sratoolkit.2.11.2-centos_linux64/bin/prefetch -p -O /projects/ag-stetter/twinkle/lightfoot_WGS_short_reads/ --max-size 30G SRR2106212
 
-QCOUT=data/NextPolish/QC
+QCOUT=runs/NextPolish/QC
 
 # set working directory
-cd /projects/ag-stetter/twinkle/projects/Ahyp_v2_2_publication/raw_data/lightfoot_WGS_short_reads/
+cd /studies/additional_data/lightfoot_WGS_short_reads/
 
 # before running the fastq-dump command, switch off "Enable Remote Access" by running sratoolskit/bin/vdb-config -i
 # split into fastq files
 /home/twinkle1/tools/sratoolkit.2.11.2-centos_linux64/bin/fastq-dump --split-3 --verbose SRR2106212.sra
 
 # set working directory
-cd /projects/ag-stetter/twinkle/projects/Ahyp_v2_2_publication/
+cd /projects/ag-stetter/twinkle/ARC_projects/betalain_regulation_amaranth
 
 # gzip the resulting fastq files
 # Even though there is an option to gzip it directly using the fastq-dump command, the option is deprecated and should no longer be used
-gzip raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq
-gzip raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq
+gzip /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq
+gzip /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq
 
 # remove sra file afterwards
-rm raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212.sra
+rm /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212.sra
 
 # quality control:
 module load fastqc/0.11.9
 
-fastqc -o raw_data/lightfoot_WGS_short_reads/QC/ -t 8 \
-        raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz \
-        raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz
+fastqc -o /studies/additional_data/lightfoot_WGS_short_reads/QC/ -t 8 \
+        /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz \
+        /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz