From b6981cdad469d0edb524917d51f467b1ed90acc2 Mon Sep 17 00:00:00 2001
From: Tom Winkler <t.winkler@uni-koeln.de>
Date: Thu, 26 Sep 2024 10:25:44 +0200
Subject: [PATCH] adjusted paths to ARC structure

---
 assays/BSA_RNAseq/adapters/custom_adapters.fa |  24 +++
 assays/BSA_RNAseq/adapters/readme.txt         |   3 +
 assays/BSA_RNAseq/isa.assay.xlsx              | Bin 9293 -> 9292 bytes
 assays/BSA_WGS/isa.assay.xlsx                 | Bin 9311 -> 9310 bytes
 .../isa.assay.xlsx                            | Bin 9277 -> 9276 bytes
 .../isa.assay.xlsx                            | Bin 10101 -> 10100 bytes
 assays/isoform_sequencing/isa.assay.xlsx      | Bin 9324 -> 9323 bytes
 isa.investigation.xlsx                        | Bin 8417 -> 8416 bytes
 .../BSA_parent_betalain_quant/isa.study.xlsx  | Bin 12584 -> 12583 bytes
 .../Bulk_segregant_analysis/isa.study.xlsx    | Bin 10586 -> 10585 bytes
 studies/additional_data/README.md             |   0
 studies/additional_data/isa.study.xlsx        | Bin 0 -> 7504 bytes
 studies/additional_data/protocols/.gitkeep    |   0
 studies/additional_data/resources/.gitkeep    |   0
 .../resources/color_pathway_genes.bed         |  36 +++++
 .../resources/color_pathway_genes.csv         |  30 ++++
 studies/isoform_sequencing/isa.study.xlsx     | Bin 10164 -> 10163 bytes
 workflows/BSA/RNAseq/QC.sh                    |  18 +--
 workflows/BSA/RNAseq/adapter_trimming.sh      |   4 +-
 workflows/BSA/RNAseq/index_STAR.sh            |   6 +-
 workflows/BSA/RNAseq/run_STAR.sh              |   4 +-
 workflows/BSA/RNAseq/run_kallisto.sh          |   4 +-
 workflows/BSA/WGS/combined_filter.sh          |   6 +-
 workflows/BSA/WGS/map_reads.sh                |   6 +-
 workflows/BSA/betalain_quantification.Rmd     |  80 +++++-----
 workflows/BSA/bsa_and_plotting.R              | 102 ++++++------
 workflows/BSA/read_count_analysis.Rmd         |  16 +-
 .../BSA/read_count_analysis_from_bam.Rmd      |  69 ++++-----
 workflows/BSA/snpEff_analysis.Rmd             | 146 +++++++++---------
 workflows/genome_polishing/helper_script.R    |  12 +-
 .../process_nextpolish_output.sh              |  14 +-
 .../remove_ambiguous_bases.sh                 |   7 +-
 workflows/genome_polishing/run_nextpolish.sh  |   8 +-
 workflows/genome_polishing/unpackSRA.sh       |  18 +--
 34 files changed, 341 insertions(+), 272 deletions(-)
 create mode 100644 assays/BSA_RNAseq/adapters/custom_adapters.fa
 create mode 100644 assays/BSA_RNAseq/adapters/readme.txt
 create mode 100644 studies/additional_data/README.md
 create mode 100644 studies/additional_data/isa.study.xlsx
 create mode 100644 studies/additional_data/protocols/.gitkeep
 create mode 100644 studies/additional_data/resources/.gitkeep
 create mode 100644 studies/additional_data/resources/color_pathway_genes.bed
 create mode 100644 studies/additional_data/resources/color_pathway_genes.csv

diff --git a/assays/BSA_RNAseq/adapters/custom_adapters.fa b/assays/BSA_RNAseq/adapters/custom_adapters.fa
new file mode 100644
index 0000000..c6da458
--- /dev/null
+++ b/assays/BSA_RNAseq/adapters/custom_adapters.fa
@@ -0,0 +1,24 @@
+>read1_1/1
+AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
+>read1_2/1
+AGATCGGAAGAGCACACGTCTGAAC
+>read1_3/1
+TGGAATTCTCGGGTGCCAAGG
+>read1_4/1
+AGATCGGAAGAGCACACGTCT
+>read1_5/1
+CTGTCTCTTATACACATCT
+>read1_6/1
+AGATGTGTATAAGAGACAG
+>read2_1/2
+AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
+>read2_2/2
+AGATCGGAAGAGCGTCGTGTAGGGA
+>read2_3/2
+TGGAATTCTCGGGTGCCAAGG
+>read2_4/2
+AGATCGGAAGAGCACACGTCT
+>read2_5/2
+CTGTCTCTTATACACATCT
+>read2_6/2
+AGATGTGTATAAGAGACAG
diff --git a/assays/BSA_RNAseq/adapters/readme.txt b/assays/BSA_RNAseq/adapters/readme.txt
new file mode 100644
index 0000000..a029aca
--- /dev/null
+++ b/assays/BSA_RNAseq/adapters/readme.txt
@@ -0,0 +1,3 @@
+## raw_data/BSA_rnaseq/adapters/
+
+Sequencing adapters used for reads trimming
diff --git a/assays/BSA_RNAseq/isa.assay.xlsx b/assays/BSA_RNAseq/isa.assay.xlsx
index 2d21f88fb855a54a1f3982155f0bb99249397390..399f4b8a40fb6354edb648cda747f4dc1d558145 100644
GIT binary patch
delta 701
zcmX@>amIruz?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM
zSk#Y23&gErxy1yc!`Q!o=*d1@1|X`A>jOx%fO`U1l!;Fb#MR;}0(0jHfQ>pTu$v7e
z+$269EUYXA7WR_b&d90bY!zvqQr#UtnM+2yzQaw?BlqY^kEvW)S7&GIKbUl5+Fp|<
z?_b++EGT-Fo1R{_hkyC!<caL|33IPxxwSQSuq@ai7L=QwvNx(FSjZy9YeS6ojEP(_
zGt<j8%{i}&#jQ0<2yEisGqH%%+<23-3Fp$j_X`s89*CZrES<~UCU;BMwCDXzu?!hk
z;V1^9gY^?G-ZC)RtNc=j^}X)dgU@;!ABYrbI4oRz_~3+jTRFBBm~%@#bBgp>!*gfK
zyc2P?C7Z;jrCMsZ_g?vKzeN7@^zCU^0#;Rp%6LrUnpeQhv-Rw?to0=;gQ_O3uADN%
z`P!1*7u?@{_{uADyM(_+=Rl!0?=S1EPeKpPPv#F@xwc^5-Z-bPOC=74-Cniy+==En
zmNCDI^Hn;Izq<1}Uh8ey?Q;xDAFMrZ@y?s^{o>`%S&o}5x{fD)5jZwE)WrAEZxy-v
zYqs%M=ks>In^<o>k7w7<b^lVoaU;k3=1H<Mz;UW0Zx4#RTzMl9wNTz1MBS3t2lJF<
zCaWt*f|yPUZXl{l!3snjQ*Z%MJc?i{NYMero1*9sqMkx{>PluHUc3@mZlRJXh<8H?
rY?-98C5X2WXncV(*!XT`8>HyJpbXZ_p(4ZBI$2dkiA`1>m|PhE_>=+<

delta 725
zcmX@(an^$;z?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5
zU{OC7EfBYg<rWi&4rBiUq9^-s8Gxudt`8v50`3W5Q6@e$5Lb(@2+W-)05<BVz-~5>
zaFh6au&}ZeSlCNyJ0s^cTdPQwJAHE!CUePX*Kg@{nsBQtc%qi*l~v}~;|{29)UD1u
zasPUyfWy-(x6hq>Qz7q{-mb)7cfe}p)hRsef}9S8x=U{PoXoQ1fA9HVg66@ZwFzEY
z1u4_+F1e!;6J4?OR)Z_2-Uqi+s&{-&id|HhVzsZeX=h`&hX1;)x)N0x=2r~%B}*?T
z6!FbqJi$`$?3R7_!k4}!n??3)H>sX&&ejxsVj)9oA0NAO)g`4HhhlXzW*Da`ZZs+g
ztx~D}dhldi#I&aiWK4tZ|DRB&6L)V`p!3RCp@j;OS`~+M4X&6)ubg{nsn^TMb<YBm
z_#!92bCTcr_qusO&Sl#V8ySwTHF@#=$|P~l`aao^rP~fwev4<iIz5R)I(PLnD@FdI
zX9W+>@13R~w>o%z{mL!3bFG;>4!l>%G_Odx=k9<0wB^Z18glIyohAIIU5quFr}Fsr
z`CZSue@#xjX!|Mu_+m%%`$zYmsbfTr_{|e#XMiJBTizZNdpYt(AZmfUIf%L`uMg%a
z$xK#LkOVOu72H5nr-BuTI;!9TqPP{oRG^{*h&Nf$A4ENY@YIydK)g65u-pPAQxNaE
z64){cWlIom0nqq-Ww7yG$~H*xe_q*49+Voyxi|&085tP1FflOj15+#lv`tn~QDT#m
H2Z;axQT-py

diff --git a/assays/BSA_WGS/isa.assay.xlsx b/assays/BSA_WGS/isa.assay.xlsx
index 1397489d55497f42e3468d5e7c53118165c9f3de..2e357f7219a77f02119427594574cd6d4d116af0 100644
GIT binary patch
delta 717
zcmccbanFM%z?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zP1_4o__@dOD
zVtr&in^l?B!K(aNv_P7wSZ*_c=y3KgAbPSdmjQ^X=lTE=E#zJZ78T)B195Hmioo0r
z0;(YHb%EV%AbN`Ue6X;o6j(S$YC9vRj<Z#yc}jJ6{A3v!?fMQkMUUL0D?O%iWnG<}
zt^Z)sjcI#Lp1gl;!?B>~Rc?BE*&hDopOYuD+b7JulI7Oc+`+P7hgeW<ddl9YmS7=^
z6t4|2+A}6{$;?bI*EHw6E*7`eEFrLod(XroPIKc;&L*5o``#}|$a^4qYO-`LcbnWT
zUDKZTH^nk!T!o_;j1JaMxOmIJWUul|9oF}{XAeH>ZG0e7q~Wk|@!^9L=56KJR$$I8
z^~@>KV-3%pDf3Rm)s}1$pO$K=;of`YyZsXR)6=)7T?trK6)NK~jcZ;3H_z6y*Rs}^
ztPHA}xVm!64CiZ0c3*IR_u(tA%<U5X8l3}$+PuH4w>}9yG(VX?bmiKDd3)oWzAlwG
z6n1;n(sL)8=UB%4D$ZBwIR5I+>v*lVWw*~UD1ET@yu~|j#`lYtKW8~^vgkUV_(kB@
z<WLjeN556%>aW?xU!Bj}{cd8t^*o+kKiB<B{l<+Fr%PmKfaBCo-X0hqlk4S;K-6Y=
za}f1LULQosDwu+(5Cu07HAle;MBPwu0a0>_U@Aq?0mNIT=nta4LwJ@-W*}an5?D@F
zL1yxKB}tITD<!aH+RBz79!R1|87whZ*#;^4A1Q<NOR2~(PM>V9qQs^z4@|xc0Of@S
AM*si-

delta 718
zcmccTao>X{z?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD8w5m!;)_yq
ziuIB8Y*uAf2dna9(E@3zV!6!(qQlw0fauA-Tm~Sjp6dfhw2*rtSX6{h4aBwKD*|&j
z2&jU%*9CU7f#@mX^TEQVQefd2sqKuM*KDmKRqpi7Nti4nqg}tH*J;A7uHcDUqE}X#
zUynPWx>2_}^Thq@l>!b=uiQR&?oEZfUwXR|f87DAl~<?munTfJ6zVRy<#RI2lK;Ku
zg9(}ki`FK1X%(bQySwC$N=$Uc)>{p(oO&PJPO0AUIVpBgWs23l)~20};Trzyw(3e$
zWtd+v*q1E5pism&gYg7Qy|Y{P;R|2-mTVT;v)!b6wmDl<@QH;Ct$lp#&Q+I`ZXAl$
z&6r`Fs<_dpAhb%Q`s=}yaS_v=E|4(|y8nMdole}nS%J<gUxgMbL~2zW(lxkZ7QJ%r
zrKMgkBiB6(OyY~2{LV>!=ilq*1v!^(KWt<;zSiW$`zw>gIqUmmLzZqkRQWBQ>FV?(
z4(Z(0)2tNvi=GucJim9Eg52uh_4O;a+|IRT?l|yXCDXhj<(|9$`O}ssA8E+7Uv!r6
zpLQ|UXr9XB+vj&Z@BTG8@uKag{Nsxq&F>%Gf2NKRB~lm5&HzWMt-L)jLMGSA8-b`z
z^5!7wwY)xvl2I@PQNapsAZoUP6^Od7-~ys#6~R=pq63JxRM8(qeS`2Ul*~Z90wu7V
zjDpPMb4rpRk(Wwf%e0g&K|GK|qcT`xj<O9>{6AC%>z7oKVVp79Oht*!KpvQW8357f
BBE$dy

diff --git a/assays/betalain_LCMS_quantification/isa.assay.xlsx b/assays/betalain_LCMS_quantification/isa.assay.xlsx
index 134a053fcd23d521beea075532f59f2dd93aa81c..d361dd4b2ffae902a2be61a221938294149b6bfa 100644
GIT binary patch
delta 701
zcmdn%vB!faz?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM
zSk#Y23&gErxy=Nk!`T~|L8@+X8GtAr?hhc*f81>#*~x8uY9Q`9zI$N3eF9*kwhHWK
z0|^(2&j+g#mI4b~No{B3)N!_oG*7ASj-UKXTD!i(P0=Iw=t_^NTv=CVXX`(hbYt3H
zlPB+A+i)x>dX<}=UbcsS`RC+`?Dh$BuVlHkHFvNq*dZ2_o1U^aswG&+BE@S%jP{I)
zTrxA$%Qek8uZzX4HA@I=;@&f{h|}D7ld}ou(!Tc#67n91o|-J3%iSh-OV_mL{Y|k9
z8CT&b2BU-Z6E5B|Fxjj8Qit`u?%9LSdK({z6lpjtTzvT8gn3&zwiTFjOFeUn^jO1l
zXUe=2akV9z#HXcNYPk1a`EI{N{`B<iX;%VPRfWoUOyinYz|FJu?6s`*B`brfCa$iW
zGQ;`WlHC{F-+lPXD|5SqzeeXkp*HU?>#a{h56w^J4_&#oVBX$1r>{#T4u#!bwe;MH
z<~f!zzl!r!I*z}(^EzJZZQ1Q}3`!rYJ#X>OoALeP<<D7;n=HDHCw>t)HaXP9_t9?^
zx%z9i@mJ^bcE6igZ#|D^*UxqTQonH{$NT0c*%{z?k&(9t#a67m5r~>7Zw{hP$?Jo8
zN-~qh6(m7SBLz1QRi$7BqP8ixfT%wZ%1O}y#A{LX2T@lcJaHv65YJx;EH_cf6vR8B
s1h$M**%HK?2sA!H8EkyDvJF!7A5aGC{h=(wSUOo$MTw199++Gi0FRgifB*mh

delta 725
zcmdnvvDbqqz?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5
zU{OC7EfBYg<u((D4rgy<2C2HmWdNdhxIchI|8cj0WGA=rse!ob`0j!A_6dNE+A6S{
z4J2G7J|C<~SPCp`CAFQA^O~(yq{^MXISG@0No&_{>2;cLt1Eb-mgtpL=GWs6sBYA)
z&OCAddZmEF(<`^noqJOu@0Z@L#9w#7YUR}_JnVv;4u!f)Zuy+dvgCj7`Cx+P!J@SZ
zURnhy)9x<0qY@KcvGrDiE2rKEw^OQjd`^m8RGDJ6ueE7sW4MO@x~;krRT<`24E7~U
zFDMl8&0sviQt#}RefYwcz9pMQ_G~w)o^8(76ntVKLu(%&yK~hgr5lH0bu(rdrz&nV
zDhRDoss4KKWL(6wrwe3EgYN&IP^S}jZ&sl5%2%O<3XxhBhjb0Dm_@IgdugfH%gA-l
z0+aY6C%<!&-}(2tc|p!)+YcKVj;}R&@&3vranAZa*^s5%4pn}OXSzB)i9<Sf^)xF*
z{-S3E56|zNrXaUEczylKEw^*6nL7@=SIIQ5NV(_kfBv-P$wwM;?H8RT{HI-vHJYdL
z`1bi-&%1w3PP}OQDgXFlNAvqf_n)a_M2`5)jj}Vq5hE>c4~niBc_R=tLEaohos`!H
z^OR&Jiz!Hgn1%{&AgWTq3Pf#HZ~;-jA(W${1BlnG=ntZ<KzL$GW+0xQ5?F46k|~IH
zTnTI$hq5JzHvwpTyfWDMDrFm__}{N=CJ#yt;#{19*^CSfTbLLa_<<=F0m>$es3@`V
H$%8}y12G{_

diff --git a/assays/betalain_photometric_quantification/isa.assay.xlsx b/assays/betalain_photometric_quantification/isa.assay.xlsx
index e18f639f6add7f87f39ecc7590f4455803e5d5b1..68931c6c470e924b1bd598f466bf6f20196dc4dc 100644
GIT binary patch
delta 700
zcmezB_r;GVz?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM
zSk#Y23&gErInM;5z1f4<K&s~R8i1&aydOZKhxyimWG5F1s)4w(1hv52W5Qsg-U{yq
z>DjzWYCc%lTNW%_B)grFQ^(mV(mbWQJASf}ymozuo1#bV(Ul%kxw5X#&enf0>Bh9Z
zCQshKw&7S%^eQ(!y=)Kv^3TZ=+3geNUdeK6YwloKutO{;H$7!<R7<dsMT*yk80{Go
zxnyRhmus4HUKfj7YnBk$#Jy)?5vRHFCTA1QrG4)gB;-91JvCW6m%B~wmab{f`<r4J
zGOof=3`Pg*CtSQ`V6s>Fr4H+R-LnUu^)@~bDbjFQxcKnF3G=pcY%4J5mU`wC>9L0A
z&Xjp4;%ZAaiBC(l)Nt>;^4)%k{ORf2)2;-pstT3yn8r1)fSYIQ*=t$rOI8L|O<Y|$
zWrp*$CA%-Uzx(i&SLSvJe~r$8LT%n()?1&19-5!bAG&gF!Mwe3PG6Tw916R=YU#NX
z&2ubceii4dbR2(m=XJc++p^o|7?eI(d*0%mH{<)o%b&9xH(7KYPy8ZqY;vfH@1x%;
za`o42<FC%=?S410-g+L-uAl4vrGDc^j`z*G6lQ?qG(^cB6kAi3j6l>$C36tPq^u93
zYSd&V`zuR=m|4nhAZnAc6^MGL>;j@pRKQe~iUWwZN5vmR@v4G({;FmmUOR*bGU=%*
t$fQYXVEY`^EJ4yWYNjA+vziT3?Eg>$Yc)`pVO%@eS6zwCRSB3{834*A1!e#M

delta 702
zcmez3_tlRlz?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5
zU{OC7EfBYg<vbIJ_GS-a1F4$NYXG7y@_qn`9_CvMlAT;6s0QNB64U~7j|qc~dMmse
zq-XOgsrg`GZ&|Q#k?eLx&TF<-kt%ok<|IrulGm=^((5$gR#)&uEzv8h%&*5CP~E6o
zoq6K^^-2MUr&n&DJNKqS-Y>mfiNEfE)yk_=c-RFw9SU`q-10e@Wy$~E^T7nogGFl-
zytE2Zrrlj~M<ph@V(YC2S5CbTZl_f5_?#5Gs4~TBUu)CO#&8Y)bz5~Msxr*480<@y
zUQj6Fo56U3rQX>s`|yP?eM>fr?AdNoJ=>hEDfq-fhSoklcIT=~N;eM0>SoL^PF37!
zR1jLFQvLPd$+(DVPZ!9T2HpQZp-w06-mF09m9Ihz6(Y4N4(S?PF^gU~_tH|Ymyzq9
z1t#%DPJZVkzw_^P^MahqwjVY!9A9hl;{BCL;+*w;vLQ>i9jg2m&vbQq5{Go|>S<Pr
z{6)_S9-iMjO+jvT@cR0dTW;rCGj|+#uaaqAk#f)7|NLpolaDmy+Algw_)ohSYcx;g
z@$K`wo_GJ6oOsdpQ~vSAj^_7|?mtt<h#c{ocPh*PM{2N=Jt(@SC>epM6H4YFicwh~
zL{+QFO!iZj1Tiy}-9XevWh)T%R@nta8LNP)N)-nXZ?}p+h~iNN^ZZoJK)f~x4`k94
vRj^6_RlycIs9A#KtJO?F)Fw3>r1<}?2G*;uF2lHPvX8nFo0}3ay)pm*k#!>D

diff --git a/assays/isoform_sequencing/isa.assay.xlsx b/assays/isoform_sequencing/isa.assay.xlsx
index 9719eb30c1c6f3ccd645f4189923b8d6215fce9d..ae346d235c02e0ea6c0a10e07149fcc41e5f20f7 100644
GIT binary patch
delta 724
zcmaFk@!EqYz?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM
zSk#Y23&gErxy=Nk!`T&>L8>ab3_#Rst`8v5x!lV@vXfo;)Ii)^K0h#buK?JnCjz_K
zK*9^f=Yxfvq`<<NQrj6hb)2mt%~Pto<0os%XxDeRDSG4{UFk8EE9>g)Z2bq5ZcN*2
z^5p$%8;%7<uX5AV%l7au|C~IL-9BONl`OZm<_?wxJH&!=(^K|FwFC=Uq<C$J(Vj7p
zOJ-(zxu!Yib+Nd$W(k2!+<PV#ahe-%ayH>y+V_4zLf!+>Q<J50x!dG!>6-SuzbTd>
z<0>4*V05s4!o^z#CVQ1%>af1oJ$vw3Z{q`zA`ORyiw_^1FmEfzwgPi*sb@}+9&32+
zOqq8guC`>8__S0@4fozF-|d&kpPs%w?MlF^s!$n^X<YLPxOujoy_U7UWMxp*#MPBk
zW;kD4vipMjyANM^Wp0=7*XSH5)aLzVz4b}xq4~-Dp)1!G%-b91^mVDkp|IPlmYzG&
zJjXKTS8={d$MIKpUdL;_ExUb=LFt3F=PllOGrnKE{5i{UlSS9@#4iHJCWo5%KKiX9
zSAWem{_1?*?spUGt>^LV`nm33>NjrWc;CEHb_O_3z2)sevDG7Q1fmYen}evI^7>$&
zlFVc`1xXMyQNayFtyHiAQBM_IK$Ny3m?~6s0P!{}`hzGIB{0uT$qdA+gYXV0nSyxV
zmB5x+C|iPf2Y|-+D}#++rEG%~{qL2{<Uxr+oQqR1n~{NG3ljqaKQO@}z>>)>DoSiN
H@*oia&vgcz

delta 725
zcmaFu@y3HEz?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5
zU{OC7EfBYg<u((D4rf<j2C1s#G5}GlxjukI=W;It$xe3VQv-2x`TW4#y#ipPo(Sw_
z0|_q_pAQyxk^&27N^NK4yk=_^sdA@pPQqkO8SVNly-pKubp=n<61}p@{CeC0)s4E<
znJ4aFuM}{2dgb=Hb8jl-{nFc&`0EZ>t-Lyghh31<p-^|pEuWKFmi+HMA573ZShP05
zORFGd+TA60RAQnlw%%%R<<$G&c1rb*&q=Y1DpRcXwKnZ+4A<~qw^di7D#QGW!M<ea
z1%)EM8H^`b>Yd%P4`2Aww`8-(p6w>pv(4F>f=?`DXzk-;cdojmbmLI0ZpIAbRK<-(
z1))_c)n5;ujEk7|bb*X%(Ea}t>U84n%?fm0`6{$fAyTX2kgmZMv*?v`FD><Y8M*FR
zU=m;C<abW;JO5rcFUYxU`(Y!)@wFx|-d~v{&RO3l8?toUp~`RZOjoBTaY*N`o@S-U
zU-Yct;rYGO6y#P1udiRZ<#w($bH{=ADw*aLDfis{&!4tD`A9>q{i3sk|Fny-M)Oo2
z-#)+VdH1i$i5G1@<sV<{Xnz0b{xfxq$PvGJgX|1&q<YEQgQBZj-UvkPmp2DdKjihn
zJSCaQt_qSMW`cqnh+3gw1)`oPxPT}vMKD#M=m6qvQuGH=%t~OMtCAUrR}10oS26|h
zzA1q%Ggr0*@%96a?^6aFzf#!-DgNIno5_PxgE$wbU^XKI!xkn627X|QMS!J~omG_B
IY~?{B0Q=`78UO$Q

diff --git a/isa.investigation.xlsx b/isa.investigation.xlsx
index 8c37020a85cca1fe30c338cad3743015e7e4fa3b..586f555874be0956c7b05f5da13c824073eaa385 100644
GIT binary patch
delta 629
zcmaFp_`s1Tz?+$ci-CcGgF(l6BTok-laBM`X^cuhYV$5e0WkLrlRQZDEt4upRD@X`
zENaT41>z>MOk@VpU)blefmBWA1FPD_w;ROWTrN7F2_!5b0T#BG*uu!E<7^dao>JW%
zKlzWOc72DNqDSu0l^#>MvaZg~)_*YR#<aaAPu{<_;aE`gDmOj7Y!CnP&&d<n?Gxr+
z$#QFJ?qFH4Lo6saJ!NlHOR$hdir0o1?HLofWM-z9YnpRj7mHhKmJry)y=P((r@8Sa
zXA{n)eeV|}<UJ5QHCZ~ByG`zvu4&Kvn_?L<uEJ3aMhELBT)bsqvRC<~4(of}vj?B`
zHa-w3(r{R~`0&9A^R{wqD=_Dldgc`Av4-c)lzAuOYD+eWPfNAbaPPhH-F}Ju>FL|k
zt^};A3YGDg#x<{in`i6UYgy|{Rt8l~TwOV3hV!*0yDzxE`|y=l=5`5xjn08WZQftj
zTc3m;nxD)cx^iv7yuEQwUzbW83cI~(>A4flb1Y+i73ZsT9DjA^b-dQwvfJkvls;H{
z-r}7%<NL+SpR*h{S#%vw{338{a;S;#qu(lW_1A3Uug>S~emAk+dLGZNpX>gme&a@t
z(#<W>z2N8)mbC}PLx`*qi0Y6v2T=!Q^+D7Nd6~(4a*`mXrkopyDwMMVQETN~K-6bB
z2M`5P7AOx^mLm@~ZLPc|M8XtA$t&0(#chg$8Ax!Bf(&E%<edsiY%H?CWX1ph)I0EI

delta 630
zcmaFh_|TCjz?+$ci-CcGgW;O(MxG8vrfas7r!gu4sm;3>1;E@hO!6Snw@j)aQ4wZ&
zu&61E7Koe3GLacXe_@}?22wSd53Fhz-)<0hbGhhzCXld%1X$Q!VhbbZHCwAll{<ZN
z5+?tV)UMys>onn3SMWqF(JQOWug4uw-KblgdE)-{N&$zbS8ks>_ohPLFTGufzwUt5
z%Bxd&*abNq3U!y<@;RAh$^YK-!352NMQanhv<gzD-Cc4=B__IJ>#YV?PQ4Fqr&RCw
zoD{pLGR10NYtzoga1H-;TXiL>GR&_S>`RtjP$=S?!FYnD-q|ht@P#jZOE!z_*=|xj
z+nlW__{2hn);>OV=c-FeHx9+>X3Q{7RorM)5L%^D{q^9<xQJ;_7s!|f-TyzKPABf(
ztU%|LuR;qIBDE?G=^9)yi(WbR(o(ONk?WoXCh<j1e&-~=^Y3-@f}G2?A2u=^Uu*K>
z{gp}Lob`ROAxpO%s{9tubai?Xhji}hX;zB-Mb8Qzp5Hr7L2h;M`udeyZs%GvcN}=H
zl4)L%a?joW{AtURk2K`kFFH&3PrDdvG*9L6?en{ycmJB4c+vJ#{_(|*=J${8KU2qu
z9H*O`rF+5gB_wMPiiluYBM{XtYYw6g$m)ZrkFus9N<+>KL>0(cfv7cdE+FcYoCApZ
zC@V8LKwc8W%$5h6v_{?%Cc&%#mXK4hL5kdD1v8NRYy}y{ipe_^l-O8hf$59^0H}oq
Ap8x;=

diff --git a/studies/BSA_parent_betalain_quant/isa.study.xlsx b/studies/BSA_parent_betalain_quant/isa.study.xlsx
index ec46c0474b3ff8ea33525da7511c111656479c76..486411ac1c8c88a7585c68191536289931207512 100644
GIT binary patch
delta 723
zcmZ3Hv^<F?z?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM
zSk#Y23&gErNn`=hcR32UL8|;k3_w()$On*Uv8XL8h`UKj4aB`AHJb^<EtdrwHBWXo
z8%Q`>Wj<K=j~ZB5S$#Vrr;f8#q<Knpcl_kX8rt<8Zi*hcM^}1G<;uD`J6r$3q#M)r
znml>`+J<96(W~6_^s+tt%ReViWVcV4dnL=Qt+|6`!49#Y-1L;aQ7yqj7AamEVzg&W
z<dT`0Uao1*d0i}Styw}~6Zf8pMV#iwo19HJm-fA1kdXI4^webOT<$ixTe_w_?{A7_
z$hZneF&G`JpK$S(fyrLwmpZKPb<ZAr*4y|%q)5YI;o`#wC(PT*v8}+ITk4rpq{kYb
zJ5%PJh^sBxBt9+GQp3IX%6I!E@~5Y7PrDMZswz~*V;a}I0&bqIXRl?gFIgE>HF0(2
zlo`&~mh8UZ{_ew9UYXk^{53iU3blEES#Nz3dT4$!f9T4!1@rdCIelF!aVYHes-@>n
zG|#b&`Bj{+(sBINo!9YNZ_93<V^I2F?RkrL-i+@TFMrN*++@*pJn@UbvB{w(zK?#Z
z$kku7jlVjdxBK11dh2;SyMC_wm->wxIo>yCYs~`3DZ7q6D0)0}j6hV4jyZ_hs-q90
z^bKStGwDi#n6kQVASzYY3Pdf`bpcV&A(XD31BjQe=MSO|KzL00W+0xGK3EQ9(hPmD
zNt^Y-7JkvU1j*|gn1ZM@0~@61Uu|F}4@wMY*B(^LWn^I3!o<M94@|HK5I4zCiS36r
GNB{sTSO>BI

delta 738
zcmZ3Uv?7Toz?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5
zU{OC7EfBYgC6NV0-{mOa2C4EFF#u7GA|F7a#iF*XAnqn9H4yif)NCdYw_Fx%)I8bU
zY#`ximHA-dKWboMW%cchLf34qB3176%}HQnV0g^Lz#zyl`JjRD<o_D{^;>$KCfw=@
zo~R{yWtI8$xC5#ib*nQ^+`nEa;PCXy?Q`eeRLJ|Kw=4129k5z?bqWu=Ag4p2?vh(R
zC$lX1-+Mlopn0%pZGx9pLCUndOYW$|L|1IR)!@ph_rdLy>K&hxVi#4WSnX?V+SwSc
z;lFOHu0&Oa`4xkG$<hl7MSL?DPq5TGyJa7~@TG6bW|2MHO{!;`vo!^uSjf=Y$H(qm
zbxG;Qp;+CF8OEuK8;uG=t5mAL9y}QrG41IB8PlNq|0mSx#NC?}=)Ce(XrV%+R>dJ*
zgDYmyE9YKX>h&^m-Lt?XzR1b%oaA@@y>4ERbJ_O8Muy{SO<ugeGD)1XzE3t}>9#|a
z-{P6BPEX>H&RsptN|C?lS;52ed#5SLtqxvazjDj%Tx;fz1MgKb%_~ywx%;0#ZF%yM
zhFtqaX9@pl7h{d)sXV@Ye%JHvUy~Cr+J4GEzSz<H{?Yws>KKtz!R9QjS>PCE)3FCd
zmb;D-h^p2x2T@ye^g)!Kfy`t^T}cpAM%N8QrRZ9Ls0F$%AnF-}($RAO@$&TiLDYT-
zk5S(Y#Iw`~%YjUqt`9b8lRntO&-#`ic|8MD5S40R!wF9elUEs-$%A-j*B(^L1tyIx
Wz@)(sOuh&ZKl#6b65CI0kO%+`kSE6g

diff --git a/studies/Bulk_segregant_analysis/isa.study.xlsx b/studies/Bulk_segregant_analysis/isa.study.xlsx
index 43a7b33b2beb38dc6c65334db05c480f743bd916..b7b89a26b6cd86616539825c87df590f547b6e4e 100644
GIT binary patch
delta 724
zcmcZ=bTf!2z?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM
zSk#Y23&gEr$!7u4uQ^;m^khR`0}z$W`vD{x%2x{(y&|Lr;{Fj@%><HNDGD~~g6M8G
zkZ`Zee6X;dJXknXemf(lj<Z#yc}jJ6{A5u@?fMQkMUUL0D?O%iWnG<}t^Z)sjcI#L
zp1gl;!?B>~Rc?BE*&hDopOYuD+b7JulI7Oc+`+P7hgeW<ddl9YmS7=^6t4|2+A}6{
z$;?bI*EHw6E*7`eEFrLod(XroPIKc;&L*5o``#}|$a^4qYO-`LcbnWTUDKZTH^nk!
zT!o_;j1JaMxOmIJWUul|9oF}{XAeH>ZG0e7q~Wk|@!^9L=56KJR$$I8^~@>KV-3%p
zDf3Rm)s}1$pO$K=;of`YyZsXR)6=)7T?trK6)NK~jcZ;3H_z6y*Rs}^tPHA}xVm!6
z4CiZ0c3*IR_u(tA%<U5X8l3}$+PuH4w>}9yG(VX?bmiKDd3)oWzAlwG6n1;n(sL)8
z=UB%4D$ZBwIR5I+>v*lVWw*~UD1ET@yu~|j#`lYtKW8~^vgkUV_(kB@<WLjeN556%
z>aW?xU!Bj}{cd8t^*o+kKiB<B{l<+P@0;f;%>u`%rHVZ$da6{6K-4-Fa}f1hMIS^Z
zYRF7BQI!NS{Z-vS)O1xV5OrDA1w=`zfvI>k2M}+enm>s81mT&en}K*a>R>sLNvG7o
zCOuULTd1aC36f9LFa=RFG;ENf|E`9aJSZ_dnwqsUmyv;C3ljqaKQO@}z=X+0no4Y1
HDj*R6$;k+Q

delta 725
zcmcZ^bSsD_z?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5
zU{OC7EfBYgC7%UEzvgfO(UT2%4M0>f?+1`*C|@mD^oo!gi2FxqH4{j7r6|~_3!=N(
zK*GH;^TEP;@?ham`R$CH*KDmKRqpi7Nti6Es9nFM*J;A7uHcDUqE}X#UynPWx>2_}
z^Thq@l>!b=uiQR&?oEZfUwXR|f87DAl~<?munTfJ6zVRy<#RI2lK;Kug9(}ki`FK1
zX%(bQySwC$N=$Uc)>{p(oO&PJPO0AUIVpBgWs23l)~20};Trzyw(3e$Wtd+v*q1E5
zpism&gYg7Qy|Y{P;R|2-mTVT;v)!b6wmDl<@QH;Ct$lp#&Q+I`ZXAl$&6r`Fs<_dp
zAhb%Q`s=}yaS_v=E|4(|y8nMdole}nS%J<gUxgMbL~2zW(lxkZ7QJ%rrKMgkBiB6(
zOyY~2{LV>!=ilq*1v!^(KWt<;zSiW$`zw>gIqUmmLzZqkRQWBQ>FV?(4(Z(0)2tNv
zi=GucJim9Eg52uh_4O;a+|IRT?l|yXCDXhj<(|9$`O}ssA8E+7Uv!r6pLQ|UXr9XB
z+vj&Z@BTG8@uKag{Nsxq&F>%Gf2NKRIpR0ZQJMvgR0|b*Q2bP?7=fs@D&`>SnTkG$
zO3;v*Y^*8?V*07Nfv9P!Rv_w<stbscPy<tOY7QXY0yTdS^%24|RyPCjvem(IAd^n2
zgH3v(4z^HL!xAK)pkWH4rfb+B#s3`*GkH*Icr-O@XD%ZH!xkn627X|QMSzKu4K<b6
Iv{gVN0Pgi9`~Uy|

diff --git a/studies/additional_data/README.md b/studies/additional_data/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/studies/additional_data/isa.study.xlsx b/studies/additional_data/isa.study.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..566ca09ccb44bcd0d23eb8c1fe9b6e8b6b8de26d
GIT binary patch
literal 7504
zcmai3bzGEP(_XrJ>F!#jL6j8)k)=zzJEXfsS{hsdX{4kP1f)c|k#40Mq&xf;dDQ3i
zdA|2Of9!A1y>p#8J2Piy?&~PZARwXx002|~Nc^5=?DQuzO*jBx7ZCs;fIZa~x3hIN
zwRP5i;$d&<q<hER#=00PXWPt$8*~DeSmKbz)P15V(fsCp4^jRhVAx#@L@~Xe*f{Nm
z&r-lfQ(!>A{#tOyE3K7Op(;{pj0AE&Ib2Bqh&DCEYC~Len$nmXO_zzP_S}KU>76+K
z%LQBES&!_%WRB%GQG+5<Ql^nuiYvleB~R|~;hqVAvO^rvFrX(b^8ujLNDE^!Z5*YD
zJdLLA)+N)P=#z)=WcXbNdK2Z2a9_!F#g=Myw2wmKs8C7WT*(S5^WZQ7g{3UmSK&fF
zx~0hupuZ#yJVnMTTpH_)#OUE%jh{4qNx-VBx{m*fT^+01WC_9l4H>rp)8<$aoDIs=
zrMzsp_G3Y8$HSd2yfdH0YfqoR{#+D@gL#ZC__+Ulw2rvw5gr0{L^8_hw$J=aU45zL
zYfC>Ym3|v&PQ{$?9v;@|cirohrmFi#)`7GeJy~G<$@7hOM)%o+HW0O4MT!>4760-C
z;D;w*#{V^tVc+x}O|6|cf6Cl?Yd>9TY902Q3_JjU@k{2;jbw3LlY9#ors$5h#L`8Q
zEd(_)r;aB33lraiPYkx?m62Jk0pE6B#E4=>TB1ZbY^|F2Z1xF9ZbT{{Q(%G$$$&^b
z{UQCIdaf-UaM&=AL+DX>?6WbGSIn+t>810#8lp7`A)Uycv8Nd>RbaV9_9$StLXV{w
z>*ta6P}fgUhXo}&&gWfzD}nh{{i0K~jBl^Fhxn~+we8kpw+-LWm}^~jvzW+IzIg62
zvXc_5zLLqNIIya#HePoL&P#&~WM)*QSm3GT%P;h3HM&qt>!PiGj*t<n{$4|o6y#6u
zrio^J$uq<(IN%NVX_0?g4FP5mck5p!`=<cft-!xoHjv?++KpvvV3sBOyO^7u<5MF$
zyQeoN;isXkTDWi@?OdV1JJE90l4gx{VtuZoeS#49zDm|w%0kfQ>O_?sLV=ozlHOW0
z@wx-D*>`d`R6%wGr8I+08h1^utYHV}%{P5vj@vq=4MvPcXJVKbaoZH6;9I41^kImV
z6K1vV-II{!!IqSh2wgjnBOiJz)HBlcwolKEMcwp_VR!~%14H3m1^Q6G8ch=G!n1nu
z>0@O50HKJrGwDEvwBTM&vebJfNRqlH)G_0W>N4$4U)!9{(}l4MwJ~wR)#jYVF8WzT
zcI#i9of7pf#&PM>Zg<h{lP>X{OZ0W*lX~neZczXKhQ_`%(2Z%FEKE(E{|wWlK5gnQ
zm^U(r005GI$o_zE-`Mlj#Gvgk7Z7>QD~+xjEaPpR_jHs9jsPBw0a0AXD}Pvv!C~nG
z<>I;nuUcvpRa4E|C(~Va9ea5UdLPdxD<_+R2hV(8g%|RCw${Ih+^jCVoZhZW821o(
zJZvYs=~dTFy_MLg)u{3HV%K;^le_{r5MAg^yjaE(_U`Lse}uYknU9sF_nNG1TNy_?
ztwzOb`Hd;~?sB$!i|3czhIrS^ErOAS+mWRwsoRUjkh0<zmZyvTSc@uJ>faS~%$uXH
zcFT{y#L#`y*<5G=CYkFtoR)3-aJkvkZl#O8K4<k}yCj^gxpz7!xX0FZMY=XSF3h<i
zA+5zJct^L%VXVR+S@l{A^!16UUQxnu(@yR~U+l?t0khe5&g<(2muX$u6Ugf8*^aTJ
z<1Fxm0DtlRe*L#%yS!S<?C`#pNXkt<{X(5h6=FfAq$8fw3s7G@#La`N<AQ}~q1j`Z
zaE)*6+<j})S+sym<*e*ZLYrX&XyZ^uDNk6`W49Fcu>cjUY-qBvui@-kmw6+6R>XO>
z^ATvC2JBWzIN)Pe$P)GuDqUDoWht#q5nSxu#$4`8>*KJk`#3YsZ@x9oV&^Ml*WZ=A
z$PWQoDV0N^ca?Eth21sMXyV@j)RsseNIjBPV{l~%4dT>@L(F9et>n~5L>yxXUFOtC
zLqujYd$bfm_~HvHe3U^KUSbr@7Ag_d^CQ3!#9$q+2n7}VX$V0B2iMat2c@4%A&|;o
z{dq+mZYGL`Ctwp!45#c6FotmR^bgrGDw;!p42}UfZh&6XlVOY=4IGyvX#|e*Bs5-+
z)re*|gv3*M9Q#VH#e%^2o3VjZT|@NmsHFbuRANmD1V-Of3Z!Zq$||Fh@X;wdWGRxw
z^90I7H+upzCHFo)?|<Nr3J%MWGz5nYWNCN;$Flx?nlzB5>B%>ig$7Q_ku(PXIX#~<
zUmw(nW;(R^%G5l02R>7>;?SZ<lE9NsCc52|??0z(VhU|TDu&=uCos}f4SDaFFOT8o
zIrmc=UZfmNjOe<+;qPl9>-x3JZVw*Mf?v6gSzF(YA4;;Rk5&j;;65pgyr4a3%7K&j
zDlGKgrA1yex+gUHeLp|5fplgspI`mJ3i$x)RYqHTF4mIBw(WqzsH?ZQQ2hOyfP>)n
zefhTGl@$lXHcKaut6!Duwo19XbaT7G@+c~-L;-J)zmtWbqp69Cv!kW0xzo+jPmOPi
zn&QF@-tj?aKf$kkrKXBhG=x|H5mm%z+s^0d5(NnfRHmYT{V3l;7vG9KNicV2mUw0p
zZNBfw#(It+hTostpD;M_@DooOfotNuY3IGO<bdpEPzRJZ@t%21f6vzNBk7>`>@QXF
z6gN)4xR|PmzE}>Wes0;57LLz;r9z?O0rYdy1iX!_Rao4s&^BtOfbNCm=O0{-J|v`n
z#<t6eq0nzl*Jb_2OsW=3kGUjCZpL%I{B6NyL^DMqofrJV*QkRO7G1pDMyDeCYA<PE
zkgW=UhhRw~WDPmaN*!M_JQ%(RQ%PVnDh{(v6i%kh4?_&D*kO`K#)gUJOP!|<-QL@(
zR7qwwh2o4#6P85g`Ohv_^Bwa{kqfOF>_S}7v}iN#uK=h&OKgJaz{tmvZe4Ak0PSY>
zSh%p&@Na>F)6_i!CqrCX<jOXPZI$>(CxlBZnCF8u=ohr5%1Fc(QrR0J9FnU<GC;Lg
z-cWM7lCC<1#h}$>g=TnjX_)~R+*!DlEJp;){M_X+_22}CuZ5ZD3ew(>*?pGT;Bvm<
z#(URI(&kUoX%u|elcQxP$-A`wCU=NUnPzV8*JNqSqUPdNJ>MShTC?c@eeC<5PuIDL
z@blfuls0xFqHRi^HP=eYi$}G!hYu*#9m$iYhRF8{Jrca$Vi9RE=1fX1)n<yk>1?=Y
zsXeV1-azxu4=JzDq@I|)e=Z*6(!RDS?)zTeMUp?&s5tKWf^n=OE@vfJ3wIB-itfD6
z*puYb$YB}ZoQ{IVh_p4jZYh{~CXK^(TQ3cx_+9Gc4jSWxBQ@Eb72sCvH9^2})$({{
zdI^=O`S<_QJp9!gI9r(7nEq-ZZsR$wpPxAb){?c--1f+@9^+Q*$LEhqwymjPH!T4C
z&gTEE#6){PJ;T<p$8}i1(!YIsfS!Js$XJ>W!uCQ5NHR5i2Lai+<qm@Dv>}O_0g~EQ
zISxTa+n7%%Sn8U7>Pd2y<)EPhAUWH2XRqUgidw(5a9ly$io*B~YPgo!S(pvk-Tld!
zM~m?ub_1OJvf>aR7M>}d?Q338@8)>P8xGPrTyh%97aSuz2&{S9@r>Rij~CFB7f{n8
zNpNBV?@()tN6Cvmg%pR`j2Gd?cJk2y{f8O`Lr*727})x9_NjtOAz>z;@-9CqnD@$L
zPt0ehT!y~hWf6?Rvw6++IVKfzH=hqHEHDs-V_04!*JZn)68NTERDgM0LBy%1=99g$
z)bi=6J?q-t5|yW%N9sIR$shC@)Hg@t&y@lDJ{p#qSrvMFDMA7$-AQC$zIaw^sqG3~
zjbeFE%UuJ#U4q;McCS8Gmm6Y3`M+S*Pg}ay7WldamD<d0y4G!u5PGAdv%Fv;x0o(g
zZD_Mfh{4O6)^Z<qIX~*A_$a1Xv(~*tuL*ogd(WsI$8V$t64x11!AeC-ZBO-PDTcr}
zBmx1oo;57d&#=zuWSh;K`I-EbG&rn^t6R!k{Nn_P^eIX6stKF+3dexe3Js@z{avri
z%dNc~bA_}N@U_c>liV4Buy&7{aS@+`&p7&I&%PfoZsyHy?9oL(JL*v{y*@pV+sF~y
ztTQi76Y)B`8h>_m`2@kv$vH6u2ND|OfRaw)riqF%BI}29RwsoC{|JZ%1SlhHb&0}#
zeN9`?;!<YSd_lnzgzZlMK=}g|qHz2pgnG(Abo&JEDMOvQl<6*=v*p%74H2Q}gAaA6
zl$$HZV-z#tdAR$J@`f<V#GKOos|QIWHORVn1)4PIkNt4lO6poyWy@musBp6aLdO^o
z99fRUrWp_$dz}%RM#F4ecgQIpyKK~*YDhF!i9X50S80n7R0Ju#HH{EUlKe(Px(1=@
z{ZLAx1KB+!Q87CV)<8||rAlXYwi6E?tn*3e7bEOqe7zQPKlGh&c@LAGUvqvu?+bt7
z<J?5iP&(suDGs#UZ!y_8zM@K*bvp#0SsU%y0L*Nhm?f!R7j1Z67m@xoRKwJL0i0J`
zQ!9_iDO25x0*|rVfvFj^?d=W8_k@#q!0Xy@Ca-LYD*8CG+pUJ8%lvp)^PloXF^Zv0
zA_$uydApiFePJH_26vCH&yC5d^KcG2oI3>GP=G#>b}zR)VmN*j+MnYhA}X!H=#v>e
zP8$8yCHi^qpjYRdTm17@U)yO++_rN+QW_R6=XIh-Mh%eiRONRX?1IXI+uc}WS`7qo
zYsGD!wI&yRlgd}K2!)CDH-I_NBltYj6Kc@YT9&9|?i7Xu)jm_zgJ(rnVaNNX98Q`&
zI%7QPMe&-iIMuE>6bcwY-pjmnP^wPSw)=D(-iTXQhSbQz@@((DTllatgwo}kB88-<
zk<Irp=mG)&a)wEc_gXZ1RilHrP2r`PD-_+dJlp2)keg*K?=R#qq0-clg$zCA#tusA
zef;!jbh<9*{JkKN2fS^;;U$CP%Y23cGW?^BGYE7$+-Pv5%mO;w+T87dgw2e=XxF04
zY>YUMObeF1gm!V(d(Efa;mNJt3N!PNTWuCDj_C|amZl>PhNGa`Y$4v-G*j8x2#bH*
zu*dCfW3z%xmJeP;Jk}XS;(w4-)r)sIjM!(7e*P#fAD2>+V>rP{yivjIwIgjE1d$Zv
z11%O|z;_CW!!-i$Ad?k;c*X8=alknKUPPovb5p)O3-8#psj~p%GY{ib9yO}`JrME`
zO4DxZ-E8j0@K1`i4Jjn{kU$+RCy4u<59(?<GU^A#Z*{7Qs#x9(1Ns!8-aMb@EWM<d
ztD~Un=LMitG@OpB@VhRvcizYIVPMC28WLT&hj>S+s($VsU8yx|@jYPOn@16g(h^rF
zF33}JcGWR>7?WJ0W>77iDJZhbrxC5iuCAWj5UFkB>W&yY8)U_wt8BvAA)k49Omq^f
z$pn!ko?L!H#?8!ekhxt&S-RlKST}zxEkF8B2CG_^qf)n8>syU!-E_V%d{!q7sBUt`
z=hQsCe<k5~{j|N!=L1V+=_%;Fh8Bu;zP+b|ehE;2h8z)eP^PvnSlB0DJW^D3!I(E~
z2~E-~R@oS<_pF{hnt5FDy^}9WrEIvwQ@(hdWwbj@-O<WbtZ@)Dnkt>MJu)4cHtfq+
z&0plbSI1@7y*btR^S$l1cp=^Q({qJ2V+gPUhV@r7=Imi@`lAcWf$G{Na{+^QPS`=S
zDhODh7k*ZjVNddt+4x_&Zu&_Tog<JA$I4yo+(*X8cbUpP?(Qg!PCq#3ty#|&<?Q9t
zDFmXZKX@!lSi5AEuuz&jlWaVqQ6SCs-iY-<8S<<NbzD0YqSq9!86NJFmkSM*M{zN^
z6TDK%?7F;2^sfZswAPX+Ev;HCCO=3i#|)ep)3K_%(|}C(L~y10oV2}y;`uARdyuL_
z(t=_^@hA6lu)d(%Pa*MYW!wpsb1xc@DGh_@k$hR})`Nqt6)YfmZM!@q3M3*xE3>0V
z!rN;xoYz#gG$ntsrlvXJlW{+!$2_O8a0W}rp|d+q48tZSNV&HQ(V4C0ZU!-%GR3`2
z!Di%#juT;em)xiiMI;A}n>h%*pMD_dQp~g?n{%X<FA)<))r2RveXS(qJV0WH&7Y`b
z&yOsdeiFTvni)b9(a&^<HS)zurCBxQc#*~Qg~6*JwBViNlFC{p+X1bKH4v-N7fXpC
zaVql&I0Fo()|YjqvjrHg>a?TXI#U-sZT5R#Hlk@s$XeidohiPj2$ao6O6a<DiDX`g
z5g)z`n~};r4fi5963ejZ30mI334X5W`f}WNq_)FR6N!Ye(1Lwyg)eb|^}1L3Yxgen
zs8jzvWkv33Q|c4kI<le)oXBnM5wf&}bhpCj@FQI+Pp4=6CuZZIeWF3DZHbEMQO{J)
z%Ynu#mt(8hJ+qDw<QP+Wnx2So%03Y}t+3Dc0<L_&e>$!?ks!H9fAi^N|Eb8X8F#AJ
z!RfvIeHPU5J!==@<-yk{YmtqC3jPK>)EpcMvi5Cvw(a^=?=q}io8m3G<C~m+teE{7
zp#M(LnAjOBI@;O)$tf&cE$9cqh)O0HuYe1S<6mNi_VzzsDB>R4O><!e?OYHBe~U~h
zVF?yh<IX1c)OZJwnl^6gb_P9($BgBUyE^Z}UbttY&;X9Z`JB#LknFQqj!i!~hryk`
zOebz9W_eYGi<3M{Eb{0v=0`+gVWI^;2HQiy%CO9~P9jzrv@~=j-f9fiu8|2q-_S&!
z6rmA9O819XnFU2n(*~|sp79ADz8nsp*JMDPQ{0La1jLlj_}^pZu4Z-gWy*PIU0G|J
zW7(fe`Pj)}0@?a~x3TUq=83=ZAG2^B+<&+#EiY3kFlYbT-vud3v8y_`R(#E^3Z7i5
zFp8Lx44a$&g41RjmXbs#7J+X9ESf6rPDQn4=*mzWsf3n3q32<RHC94L@|6uk2Td`p
zHA+!5Qt9-zp!?a4qv&+c#DvrFaWIXVEzr_lxGm2kEUX*vEF@%FHsw)vD}4zF;W+-P
zboK7~71fm72c-7lxX;N}y5g-o4Lzr*#^PTXr`9~bmm@c}?Ju3(T~k=tb3gX<1@|<r
zp4p@2rN!A1mwZR~>Dk-VBWU6kt`BqBA}syj{oQ58c8;b$e5TxU!z|aIu+z;3h-ZX8
zriIwF<wIVfTa6|^<(!L~h;QIq+SwFF&2ZeXF*kRu$H_Y|jztkMNFA|y92^jenDK!~
z*~Z+YK`T%N|E`I2nGSnmEIP$&b2nB&v~eQ6VgUmse~kKAM>Ij+a&bPioT!})1KT;m
z?w8~?7{OFi9Q+YGlSCF25(HWRo~D?=DK5SS`gIV}4o6?pO62o7pd*WDW>!m6Oj;#s
zmAxPa*@~DZqzH2cn%1f3<6KT0Zz{wh898zyl1<%{P;EA%Sm3Ej0f|RXv&X=!>>INx
zb<S7zj4kwlCW?<`*9}O1+daafn0ChT27Pj1$2z}Xxun{hZj7UvUu>V&Aa;~P(yTUJ
zkkj88kRqgW?qtUh+OaXSsn53UI^7ST+r2L}g_V|gGB|u-^|1VIScl;uZu?7BKDmVp
z2C8RcVExf_tnhEK&+eyT)}EDIn;v8Qr#H}r`6P5<-f)I_1Lw97{GHp3jFYo#=E4ky
zogbxd14Ja&gd9Wf<ra7x4C>@*9V{Pxgl3C=znXDP<#U+F?~uPdt$ca3Qa~safHRcj
zZ#E3Y=bAycj6%AaX;esKL5n4Ag7Pa45W#<e;%tT=<poqS`=2oZ-$xBDOX0g?B?aZX
z80==3>s5r7h%hyI7|@LXlnPDab9)}~TCRz9UV$;SRpVL;mm`r0P(o4K=n8Nuoei@s
zeR1S@pIOZWYH#B@#7MVSg($!A_y{K3oP20FKECC^uy~AI%y=R&!6uJ)#J-<93z93h
zwy4ojS8f{Ta%oPvFsLnj!=eOg6{MvP^*53;<=o|*BR?5}1oa;Xrw(Uddc8<xhG!dl
z`$1Bbo^sZ?22v4$4<c>asTbz`YV@IJdo)O2=G=w7me<Hrf~RZ;mwfOzoyKSh6wn%*
z5u>vp*GL$S4UJ&v2TC%gu1ghyQj5>Yv-<Z@6lLJxfq?)2)&=H{zm0#ngZaI{Ka=Lo
zWx?&Z$%|ny7yl%<|5NFomj^#l06@tv%nye9zbU)n*l)*;vQMyB`oXyW)b!8Y-Mm-d
zjvK)IE#Tib_$LSd69{9HU}|A{Zef3q+W)!V8>;$t+^A=T-EzU!`48gy=hkm?)}Pe%
zPc#6a`VZ`HBKx<+Zddt}0sFnm6Vx9&_y-^MdxigH!Yf!9{bC?~!)_J-Nmcw-%m5?f
ze!+ed7k@7BXS?`&fq7U8hb`drt9kql{a;)5hp~Xa>h*8f?Uw&6?7ubU{~h*Mng0|0
yXN~!d)`49<{8IX};`|B!Gb#RtZxH|ZyMGThMH!?YXAKwDfV9IR(wX!|{Qm%z*nGqQ

literal 0
HcmV?d00001

diff --git a/studies/additional_data/protocols/.gitkeep b/studies/additional_data/protocols/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/studies/additional_data/resources/.gitkeep b/studies/additional_data/resources/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/studies/additional_data/resources/color_pathway_genes.bed b/studies/additional_data/resources/color_pathway_genes.bed
new file mode 100644
index 0000000..c4ee581
--- /dev/null
+++ b/studies/additional_data/resources/color_pathway_genes.bed
@@ -0,0 +1,36 @@
+chr	start	end	strand	gene_id	transcript_id	Gene	Pathway
+Scaffold_1	7523181	7527088	-	AHp000674	AHp000674.1	AhCYP76AD5	Betalain
+Scaffold_1	28687404	28688868	-	AHp001663	AHp001663.1	AhBetanidin5GT	Betalain
+Scaffold_2	6806355	6807984	+	AHp003152	AHp003152.1	F3-H_3	Flavonoid
+Scaffold_2	6806355	6807984	+	AHp003152	AHp003152.2	F3-H_3	Flavonoid
+Scaffold_2	31518210	31522095	+	AHp004305	AHp004305.1	CHS	Flavonoid
+Scaffold_2	31518210	31522095	+	AHp004305	AHp004305.2	CHS	Flavonoid
+Scaffold_3	14652200	14653652	+	AHp005940	AHp005940.1	AhBetanidin6GT	Betalain
+Scaffold_3	29507485	29509926	+	AHp006454	AHp006454.1	F3H	Flavonoid
+Scaffold_4	10226509	10227946	-	AHp007219	AHp007219.1	AhcDOPA5GT	Betalain
+Scaffold_5	9470955	9474986	-	AHp008991	AHp008991.1	FLS	Flavonoid
+Scaffold_5	9470955	9474986	-	AHp008991	AHp008991.2	FLS	Flavonoid
+Scaffold_5	19537497	19543306	+	AHp009303	AHp009303.1	DFR	Flavonoid
+Scaffold_6	3461614	3469253	-	AHp009962	AHp009962.1	CHI1	Flavonoid
+Scaffold_6	13877397	13882497	+	AHp010386	AHp010386.1	AhDODAα2	Betalain
+Scaffold_8	3308349	3313908	+	AHp012752	AHp012752.1	PAL_1	Flavonoid
+Scaffold_8	7925628	7930892	-	AHp013217	AHp013217.1	C4H_1	Flavonoid
+Scaffold_8	7925628	7930892	-	AHp013217	AHp013217.2	C4H_1	Flavonoid
+Scaffold_9	12115220	12125730	-	AHp014409	AHp014409.1	4CL_1	Flavonoid
+Scaffold_10	21762620	21764924	-	AHp016530	AHp016530.1	AhMYB3	Betalain
+Scaffold_10	21780564	21783050	-	AHp016531	AHp016531.1	AhMYB4	Betalain
+Scaffold_11	16155424	16159543	+	AHp017409	AHp017409.1	LAR	Flavonoid
+Scaffold_11	17204504	17209738	-	AHp017497	AHp017497.1	F3-H_1	Flavonoid
+Scaffold_14	12286837	12295353	+	AHp020962	AHp020962.1	4CL_2	Flavonoid
+Scaffold_15	1098208	1100359	+	AHp021795	AHp021795.1	ANS	Flavonoid
+Scaffold_15	2886482	2891165	-	AHp021980	AHp021980.1	PAL_2	Flavonoid
+Scaffold_15	4193988	4197809	+	AHp022120	AHp022120.1	F3-H_4	Flavonoid
+Scaffold_15	4211064	4214362	+	AHp022122	AHp022122.1	F3-H_2	Flavonoid
+Scaffold_15	4227367	4229108	-	AHp022123	AHp022123.1	F3-H_5	Flavonoid
+Scaffold_15	7959672	7965298	+	AHp022382	AHp022382.1	C4H_3	Flavonoid
+Scaffold_15	7959672	7965298	+	AHp022382	AHp022382.2	C4H_3	Flavonoid
+Scaffold_15	8003999	8008333	+	AHp022384	AHp022384.1	C4H_2	Flavonoid
+Scaffold_16	988733	991447	+	AHp022773	AHp022773.1	AhMYB2	Betalain
+Scaffold_16	988733	991447	+	AHp022773	AHp022773.2	AhMYB2	Betalain
+Scaffold_16	5231548	5239058	-	AHp023147	AHp023147.1	AhDODAα1	Betalain
+Scaffold_16	5301961	5305973	+	AHp023148	AHp023148.1	AhCYP76AD2	Betalain
diff --git a/studies/additional_data/resources/color_pathway_genes.csv b/studies/additional_data/resources/color_pathway_genes.csv
new file mode 100644
index 0000000..a427fab
--- /dev/null
+++ b/studies/additional_data/resources/color_pathway_genes.csv
@@ -0,0 +1,30 @@
+Gene,Pathway,Gene_id
+PAL_1,Flavonoid,AHp012752
+PAL_2,Flavonoid,AHp021980
+C4H_1,Flavonoid,AHp013217
+C4H_2,Flavonoid,AHp022384
+C4H_3,Flavonoid,AHp022382
+4CL_1,Flavonoid,AHp014409
+4CL_2,Flavonoid,AHp020962
+CHS,Flavonoid,AHp004305
+CHI1,Flavonoid,AHp009962
+F3-H_1,Flavonoid,AHp017497
+F3-H_2,Flavonoid,AHp022122
+F3-H_3,Flavonoid,AHp003152
+F3-H_4,Flavonoid,AHp022120
+F3-H_5,Flavonoid,AHp022123
+DFR,Flavonoid,AHp009303
+F3H,Flavonoid,AHp006454
+FLS,Flavonoid,AHp008991
+LAR,Flavonoid,AHp017409
+ANS,Flavonoid,AHp021795
+AhCYP76AD2,Betalain,AHp023148
+AhCYP76AD5,Betalain,AHp000674
+AhDODAα2,Betalain,AHp010386
+AhDODAα1,Betalain,AHp023147
+AhcDOPA5GT,Betalain,AHp007219
+AhBetanidin5GT,Betalain,AHp001663
+AhBetanidin6GT,Betalain,AHp005940
+AhMYB2,Betalain,AHp022773
+AhMYB3,Betalain,AHp016530
+AhMYB4,Betalain,AHp016531
diff --git a/studies/isoform_sequencing/isa.study.xlsx b/studies/isoform_sequencing/isa.study.xlsx
index 60867b92cbe45252333f78b49c485bd39a3e02b2..48af51fa63c14650219e5e214e849d894b1120dd 100644
GIT binary patch
delta 701
zcmdnuzuBKBz?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM
zSk#Y23&gErabf|{`#2JrL8=sa3_w&c&j*mGE3Xnrc5<em8i+eluz(4~WflP&r6sbP
z4J7<rYCc%FLl!K&UUoYpr;f8#q<Knpcl_jhdF}cRH${)!qboh8a%Ekeovr_1(v4|*
zO`g1eZNssk=v8iddf6WS<)4!$vfC%jy^`hD*4)9eV24;xZhFezsFq+MixjU7G1@aG
za>>k0FV{5Zye<~E)+`~giF?n)B2IJTP0l8qOZ(n0NXUC2dTO$CE_a*UEnU-|_cz5d
zWL$-#7>o|qPq=u?z+|uTOC8qtx@QkQ>ur1>Ql#OqaPi@T6XtE@*j8Z9E%nSP(qj$J
zohkE9#MPE;5}%f8so~yx<-7e7`P0+4r(FqHRTV1ZF^y|p0XNUqv)8iLm#hq`nz*`h
z$_(dgOLkvyfA`@lugvWd{u-SFh1$HothYW1Jv2X=KXm2Vf_Z!6oW3rVI23k!)zWh(
zn&()?{3^~@={WxC&g*!sw`I4_F(`eo_PoVAZ^rkFmp^AYZnEe)p7=%J*yK<X-$%by
z<m#{4#$TP!+x>20z4bhvT|d|TOZ~=;9PgXIE6f7N>0~8)Q1o0;G6GQ?%H|--QCS~E
z?N*bS+@~xFVy;wn15qE9tw5BniVKJ;hEO|I96-F^D*hnKM-|NLQ#AwejzM@JlT_5e
tCfTcjEo@M;1j+AKGX+tf)NGKV-&`H6H(y<b@zvxWbtSeIC17%8004lH1t9<c

delta 702
zcmdn&zr~*?z?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5
zU{OC7EfBYg#fb$(@8d{h2B}iyF#u7)JRd-!uDnVh*~yuLY9Q`J!2%`_mstdCl$OYD
zHjwahsrg{x4q349dfDxaoY!ovB3176%}JP?FRxv{rPpb~t*+pSTB280nO~1Ppt@1F
zI`hQ+>y-iyPp{lQckWGvykB~|5`WzRtCd%$@URPVIuz<Ix#e>*%aZ@S=Yt8F2aDDw
zcxe@+OuM_}j!H~)#nxL5uAF)w+)k<9@i{4WQDutNzSgFljo}*p>$d7jRArc7G1!+Z
zy`WIUH-qs6OTDvO_TdX(`j%`K*|XiGdbT-RQ}Bs}46S{9?9Nq}lx`e~)y<e;oT|9d
zs35dTrTXi^lW`H#o-U9v4Z8n-LY+?By;*_ID_?~cDnx2k9MUzoVivt}?xm$(FC*7I
z3rymRoczv7e&^ro<^?&IZ9i;eIKI~8#rrFh#5wExWJ8v2J5>2Cp6TlJBo67^)zhpL
z`HP+vJUqX5nu6Tw;Pv$@x7^OPX6`ufUM17KBITaD|M}CFCm(6ZwO@3W@Sk=u)@Yu}
z<J;$VJ@5WCIq{<Hr~KoK9nJ3_-G8Qz5jo;Fe^ZzRj?_s?_MrH=tYieD*p<yel!LN9
zh$>Vz1yL)M-9Xd_Wh)S+qv8UhiXhYu6$cRSmx@1#@>T`&dR5IpyrU3ap|Z?mWi?5V
sh@BePvU)X35Dz4A4I=ST%?2s{&D6pA^VDS+Ur+8<S7K{b0;X360Oyt=1^@s6

diff --git a/workflows/BSA/RNAseq/QC.sh b/workflows/BSA/RNAseq/QC.sh
index 64db677..947a4ff 100644
--- a/workflows/BSA/RNAseq/QC.sh
+++ b/workflows/BSA/RNAseq/QC.sh
@@ -22,8 +22,8 @@ GTFIN=polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_correc
 
 # Quality control
 # Initialize the output directory
-QCIN=raw_data/BSA_rnaseq/
-QCOUT=raw_data/BSA_rnaseq/fastqc
+QCIN=assays/BSA_RNAseq/dataset
+QCOUT=assays/BSA_RNAseq/fastqc
 
 mkdir -p $QCOUT
 # run quality control fastqc
@@ -32,14 +32,14 @@ fastqc -t 10 -o $QCOUT "$QCIN"*P.fq.gz
 
 # Quality control
 # Initialize the output directory
-QCIN=raw_data/BSA_rnaseq/
-QCOUT=raw_data/BSA_rnaseq/fastqc
+QCIN=assays/BSA_RNAseq/
+QCOUT=assays/BSA_RNAseq/fastqc
 
 mkdir -p $QCOUT
 # run quality control fastqc
 fastqc -t 10 -o $QCOUT "$QCIN"*P.fq.gz
 
-cp -r $QCOUT data/BSA/RNAseq/STAR_flower_mappings/QC/
+cp -r $QCOUT runs/BSA/RNAseq/STAR_flower_mappings/QC/
 
 module load samtools/1.13
 
@@ -53,11 +53,11 @@ GTFQM=/scratch/twinkle1/temp.gtf
 sed 's/CDS/exon/' $GTFIN > $GTFQM
 
 # define input files
-QMIN=data/BSA/RNAseq/STAR_flower_mappings/
+QMIN=runs/BSA/RNAseq/STAR_flower_mappings/
 
 
 # define output directory
-QMOUT=data/BSA/RNAseq/STAR_flower_mappings/QC/qualimap
+QMOUT=runs/BSA/RNAseq/STAR_flower_mappings/QC/qualimap
 
 # create main output directory
 mkdir -p $QMOUT
@@ -85,8 +85,8 @@ done
 rm $GTFQM
 
 # run multiqc to combine the results from fastqc and qualimap into a single report
-MULTIQCOUT=data/BSA/RNAseq/STAR_flower_mappings/multiqc
-MULTIQCIN=data/BSA/RNAseq/STAR_flower_mappings/QC/
+MULTIQCOUT=runs/BSA/RNAseq/STAR_flower_mappings/multiqc
+MULTIQCIN=runs/BSA/RNAseq/STAR_flower_mappings/QC/
 
 mkdir -p $MULTIQCOUT
 
diff --git a/workflows/BSA/RNAseq/adapter_trimming.sh b/workflows/BSA/RNAseq/adapter_trimming.sh
index 15d5ab7..2821fec 100644
--- a/workflows/BSA/RNAseq/adapter_trimming.sh
+++ b/workflows/BSA/RNAseq/adapter_trimming.sh
@@ -20,7 +20,7 @@ module load trimmomatic/0.39
 
 # run this part as array job
 # create array of read fastq files (R1 only):
-SOURCE_DIR=raw_data/BSA_rnaseq/
+SOURCE_DIR=assays/BSA_RNAseq/dataset/
 FILES=("$SOURCE_DIR"/*R1.fastq.gz)
 
 # run trimmomatic, use 6 threads, taking advantage of the baseout function to name output files
@@ -32,4 +32,4 @@ java -jar $TRIMMOMATIC/trimmomatic.jar PE \
 	"${FILES["${SLURM_ARRAY_TASK_ID}"]}" \
 	"${FILES["${SLURM_ARRAY_TASK_ID}"]/R1.fastq.gz/R2.fastq.gz}" \
 	-baseout "${FILES["${SLURM_ARRAY_TASK_ID}"]/R1.fastq.gz/trimmed.fq.gz}" \
-	ILLUMINACLIP:raw_data/BSA_rnaseq/adapters/custom_adapters.fa:2:30:10
+	ILLUMINACLIP:assays/BSA_RNAseq/adapters/custom_adapters.fa:2:30:10
diff --git a/workflows/BSA/RNAseq/index_STAR.sh b/workflows/BSA/RNAseq/index_STAR.sh
index cb8606a..6d12c7f 100644
--- a/workflows/BSA/RNAseq/index_STAR.sh
+++ b/workflows/BSA/RNAseq/index_STAR.sh
@@ -18,10 +18,10 @@ module load star/2.7.8a
 # more specific settings: use the polished, softmasked reference assembly
 # sjdbOverhang dependend on input read length
 # as SJDB file, use the newly generated braker2 protein gtf file
-REFGENOME=polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
-SJDBFILE=polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf
+REFGENOME=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
+SJDBFILE=runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf
 
-mkdir -p data/BSA/RNAseq/STAR_flower_index
+mkdir -p runs/BSA/RNAseq/STAR_flower_index
 
 STAR --runThreadN 8 \
 	--runMode genomeGenerate \
diff --git a/workflows/BSA/RNAseq/run_STAR.sh b/workflows/BSA/RNAseq/run_STAR.sh
index 1064306..099d4e6 100644
--- a/workflows/BSA/RNAseq/run_STAR.sh
+++ b/workflows/BSA/RNAseq/run_STAR.sh
@@ -12,9 +12,9 @@
 module load star/2.7.8a
 
 # create array of read fastq files (R1 only):
-SOURCE_DIR=raw_data/BSA_rnaseq/
+SOURCE_DIR=assays/BSA_RNAseq/dataset/
 FILES=("$SOURCE_DIR"/*_1P.fq.gz)
-OUTPUTDIR=data/BSA/RNAseq/STAR_flower_mappings
+OUTPUTDIR=runs/BSA/RNAseq/STAR_flower_mappings
 
 # change directory of outprefix
 OUTPREFIX1=("${FILES["${SLURM_ARRAY_TASK_ID}"]/$SOURCE_DIR/$OUTPUTDIR}")
diff --git a/workflows/BSA/RNAseq/run_kallisto.sh b/workflows/BSA/RNAseq/run_kallisto.sh
index ae124fb..88243f0 100644
--- a/workflows/BSA/RNAseq/run_kallisto.sh
+++ b/workflows/BSA/RNAseq/run_kallisto.sh
@@ -23,9 +23,9 @@
 
 ##### Perform quantification
 # create array of read fastq files (R1 only):
-SOURCE_DIR=raw_data/flower_color_mapping/
+SOURCE_DIR=assays/BSA_RNAseq/dataset/flower_color_mapping/
 FILES=("$SOURCE_DIR"AM*_1P.fq.gz)
-OUTDIR=data/flower_color_mapping/kallisto_quant/
+OUTDIR=runs/flower_color_mapping/kallisto_quant/
 TISSUE_NAMES=("AM_00331_gf" "AM_00331_rf" "AM_00332_gf" "AM_00332_rf")
 
 mkdir -p $OUTDIR
diff --git a/workflows/BSA/WGS/combined_filter.sh b/workflows/BSA/WGS/combined_filter.sh
index 707a9bd..78e6a17 100644
--- a/workflows/BSA/WGS/combined_filter.sh
+++ b/workflows/BSA/WGS/combined_filter.sh
@@ -12,13 +12,13 @@
 module load samtools/1.13
 
 
-REFERENCE=polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
+REFERENCE=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
 
 PROVIDER=CCG
-OUTDIR=data/BSA/wgs/vcf
+OUTDIR=runs/BSA/wgs/vcf
 mkdir -p $OUTDIR
 
-ALLSAMP=$(for i in data/BSA/wgs/bam_files/gvcf/AM_00*.vcf; do echo -V $i;done)
+ALLSAMP=$(for i in runs/BSA/wgs/bam_files/gvcf/AM_00*.vcf; do echo -V $i;done)
 
 $MYUTIL/tools/gatk-4.1.7.0/gatk --java-options "-Xmx48G" \
 	CombineGVCFs \
diff --git a/workflows/BSA/WGS/map_reads.sh b/workflows/BSA/WGS/map_reads.sh
index ad2bcf2..3a1d2a8 100644
--- a/workflows/BSA/WGS/map_reads.sh
+++ b/workflows/BSA/WGS/map_reads.sh
@@ -17,15 +17,15 @@ module load bwamem2/2.0_gnu
 module load samtools/1.13
 
 
-REFERENCE=polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
+REFERENCE=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
 #bwa-mem2 index $REFERENCE
 
 
 PROVIDER=CCG
 
 
-INPUTPATH=raw_data/BSA_wgs/ #
-OUTPUTPATH=data/BSA/wgs/bam_files/ # Change this for different generations
+INPUTPATH=assays/BSA_WGS/dataset #
+OUTPUTPATH=runs/BSA/wgs/bam_files/ # Change this for different generations
 
 mkdir -p $OUTPUTPATH
 mkdir -p ${OUTPUTPATH}/metrics/
diff --git a/workflows/BSA/betalain_quantification.Rmd b/workflows/BSA/betalain_quantification.Rmd
index 8ab6265..31a9b69 100644
--- a/workflows/BSA/betalain_quantification.Rmd
+++ b/workflows/BSA/betalain_quantification.Rmd
@@ -20,7 +20,7 @@ library(ggbeeswarm)
 Create output directory for plots
 
 ```{bash}
-mkdir ../../plots/betalain_quantification
+mkdir ../../runs/betalain_quantification
 ```
 
 
@@ -28,7 +28,7 @@ Read in measured data
 
 ```{r}
 # read in data
-betalain_quantification <- read_excel(path = "../../raw_data/betalain_quantification/Betalain_quantification_summary.xlsx",
+betalain_quantification <- read_excel(path = "../../assays/betalain_photometric_quantification/dataset/Betalain_quantification_summary.xlsx",
                                       sheet = "Photometric_quantification")
 
 # normalize to the same input weight
@@ -219,11 +219,11 @@ p_leaf <- ggplot(data = melted_quant %>% filter(tissue == "leaf")) +
                    yend = mean_content),
                linewidth = 0.9,
                color = "black") +
-  geom_text(data = leaf_bc_groups, 
+  geom_text(data = leaf_bc_groups,
           aes(x = as.numeric(factors) - 0.19,
               y = 1.15,
-              label = groups), 
-          size=8, 
+              label = groups),
+          size=8,
           inherit.aes = F,
           color = "red3") +  
   scale_color_manual(values = c( "red3","#F0B327"), labels = c("Betacyanins", "Betaxanthins")) +
@@ -246,8 +246,8 @@ p_leaf
 
 # plot betalain quantification from flower
 p_flower <- ggplot(data = melted_quant %>% filter(tissue == "flower")) +
-  # geom_boxplot(aes(x = accession, 
-  #                y = content, 
+  # geom_boxplot(aes(x = accession,
+  #                y = content,
   #                fill = metabolite),
   #            color = "black",
   #            #color = "red4",
@@ -282,18 +282,18 @@ p_flower <- ggplot(data = melted_quant %>% filter(tissue == "flower")) +
                    yend = mean_content),
                linewidth = 0.9,
                color = "black") +
-  geom_text(data = flower_bc_groups, 
+  geom_text(data = flower_bc_groups,
           aes(x = as.numeric(factors) - 0.19,
               y = 1.15,
-              label = groups), 
-          size=8, 
+              label = groups),
+          size=8,
           inherit.aes = F,
           color = "red3") +  
-  geom_text(data = flower_bx_groups, 
+  geom_text(data = flower_bx_groups,
           aes(x = as.numeric(factors) + 0.19,
               y = 1.15,
-              label = groups), 
-          size=8, 
+              label = groups),
+          size=8,
           inherit.aes = F,
           color = "#F0B327") +    
   scale_fill_manual(values = c( "red3","#F0B327"), labels = c("Betacyanins", "Betaxanthins")) +
@@ -322,7 +322,7 @@ patchplot_betalains <- p_flower / p_leaf +
 
 patchplot_betalains
 
-ggsave(filename = "../../plots/betalain_quantification/betalain_quant_photometric_content.png",
+ggsave(filename = "../../runs/betalain_quantification/betalain_quant_photometric_content.png",
        plot = patchplot_betalains,
        bg = "white",
        dpi = 450,
@@ -335,7 +335,7 @@ HPLC quantification
 
 ```{r}
 # hplc quantification
-hplc_quantification <- read_excel(path = "../../raw_data/betalain_quantification/Betalain_quantification_summary.xlsx",
+hplc_quantification <- read_excel(path = "../../assays/betalain_photometric_quantification/dataset/Betalain_quantification_summary.xlsx",
                                       sheet = "LC-MS_quantification")
 
 # get blank measurements
@@ -447,14 +447,14 @@ Plot results amaranthin
 ```{r}
 # plot
 pa_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
-  # geom_point(aes(x = accession, 
-  #                y = Amaranthin_ratio, 
+  # geom_point(aes(x = accession,
+  #                y = Amaranthin_ratio,
   #                group = uniq_ind),
   #            color = "red3",
   #            position = position_dodge(width = 0.75),
   #            size = 2.8) +
-  geom_beeswarm(aes(x = accession, 
-                 y = Amaranthin_ratio, 
+  geom_beeswarm(aes(x = accession,
+                 y = Amaranthin_ratio,
                  group = uniq_ind),
              color = "red3",
              cex = 1.5,
@@ -489,14 +489,14 @@ pa_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
 
 
 pa_flower <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "flower")) +
-  # geom_point(aes(x = accession, 
-  #                y = Amaranthin_ratio, 
+  # geom_point(aes(x = accession,
+  #                y = Amaranthin_ratio,
   #                group = uniq_ind),
   #            color = "red3",
   #            position = position_dodge(width = 0.75),
   #            size = 2.8) +
-  geom_beeswarm(aes(x = accession, 
-                 y = Amaranthin_ratio, 
+  geom_beeswarm(aes(x = accession,
+                 y = Amaranthin_ratio,
                  group = uniq_ind),
              color = "red3",
              cex = 1.5,
@@ -536,8 +536,8 @@ Plot results betanin
 ```{r}
 # plot
 pb_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
-    geom_beeswarm(aes(x = accession, 
-                 y = Betanin_ratio, 
+    geom_beeswarm(aes(x = accession,
+                 y = Betanin_ratio,
                  group = uniq_ind),
              color = "red3",
              cex = 1.5,
@@ -572,8 +572,8 @@ pb_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
 
 
 pb_flower <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "flower")) +
-    geom_beeswarm(aes(x = accession, 
-                 y = Betanin_ratio, 
+    geom_beeswarm(aes(x = accession,
+                 y = Betanin_ratio,
                  group = uniq_ind),
              color = "red3",
              cex = 1.5,
@@ -612,8 +612,8 @@ Plot results betalamic acid
 ```{r}
 # plot
 pba_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
-    geom_beeswarm(aes(x = accession, 
-                 y = Betalamic_acid_ratio, 
+    geom_beeswarm(aes(x = accession,
+                 y = Betalamic_acid_ratio,
                  group = uniq_ind),
              color = "grey40",
              cex = 1.5,
@@ -648,8 +648,8 @@ pba_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
 
 
 pba_flower <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "flower")) +
-    geom_beeswarm(aes(x = accession, 
-                 y = Betalamic_acid_ratio, 
+    geom_beeswarm(aes(x = accession,
+                 y = Betalamic_acid_ratio,
                  group = uniq_ind),
              color = "grey40",
              cex = 1.5,
@@ -688,8 +688,8 @@ Plot results vulgaxanthin
 ```{r}
 # plot
 pv_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
-  geom_beeswarm(aes(x = accession, 
-                 y = Vulgaxanthin_IV_ratio, 
+  geom_beeswarm(aes(x = accession,
+                 y = Vulgaxanthin_IV_ratio,
                  group = uniq_ind),
              color = "#F0B327",
              method = "swarm",
@@ -718,8 +718,8 @@ pv_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) +
 
 
 pv_flower <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "flower")) +
-  geom_beeswarm(aes(x = accession, 
-                 y = Vulgaxanthin_IV_ratio, 
+  geom_beeswarm(aes(x = accession,
+                 y = Vulgaxanthin_IV_ratio,
                  group = uniq_ind),
              color = "#F0B327",
              method = "swarm",
@@ -762,7 +762,7 @@ overview_patchplot <- p_leaf + p_flower + pa_leaf + pa_flower + pb_leaf + pb_flo
         legend.direction = "vertical",
         plot.tag = element_text(vjust = 2, size = 17, face = "bold"))
 
-ggsave(filename = "../../plots/betalain_quantification/overview_betalain_quantification.png",
+ggsave(filename = "../../runs/betalain_quantification/overview_betalain_quantification.png",
        plot = overview_patchplot,
        bg = "white",
        dpi = 450,
@@ -785,7 +785,7 @@ photo_hplc_patchplot <- plot_spacer() / (p_leaf + p_flower) / (pa_leaf + pa_flow
 
 
 
-ggsave(filename = "../../plots/betalain_quantification/quantification_without_picture.png",
+ggsave(filename = "../../runs/betalain_quantification/quantification_without_picture.png",
        plot = photo_hplc_patchplot,
        bg = "white",
        dpi = 450,
@@ -798,7 +798,7 @@ Analyse HPLC results from amaranth roots:
 
 ```{r}
 # hplc quantification
-root_quant <- read_excel(path = "../../raw_data/betalain_quantification/Betalain_quantification_summary.xlsx",
+root_quant <- read_excel(path = "../../assays/betalain_photometric_quantification/dataset/Betalain_quantification_summary.xlsx",
                                       sheet = "transgenic_roots_LC-MS_quantification")
 root_quant <- root_quant %>%
   mutate(sample_id = paste0("plate_", plate, "_", root_type)) %>%
@@ -834,7 +834,7 @@ root_quant_plot <- ggplot(data = root_quant_norm) +
         legend.position = "none")
 root_quant_plot
 
-ggsave(filename = "../../plots/betalain_quantification/root_quantification.png",
+ggsave(filename = "../../runs/betalain_quantification/root_quantification.png",
        plot = root_quant_plot,
        bg = "white",
        dpi = 450,
@@ -842,5 +842,3 @@ ggsave(filename = "../../plots/betalain_quantification/root_quantification.png",
        height = 5)
 
 ```
-
-
diff --git a/workflows/BSA/bsa_and_plotting.R b/workflows/BSA/bsa_and_plotting.R
index 4f9f266..ed0b831 100644
--- a/workflows/BSA/bsa_and_plotting.R
+++ b/workflows/BSA/bsa_and_plotting.R
@@ -1,5 +1,5 @@
 
-setwd("/home/tom/Documents/projects/Ahyp_v2_2_publication/")
+setwd("/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/")
 
 library(QTLseqr)
 library(tidyverse)
@@ -16,7 +16,7 @@ bsa_analysis <- function(rawData,HighBulk,LowBulk,Chroms,nhigh,nlow){
                        highBulk = HighBulk,
                        lowBulk = LowBulk,
                        chromList = Chroms)
-  
+
   df <- df %>% select(CHROM,
                       POS,
                       REF,
@@ -38,7 +38,7 @@ bsa_analysis <- function(rawData,HighBulk,LowBulk,Chroms,nhigh,nlow){
     mutate(CHROM=as.factor(as.numeric(gsub("Scaffold_","",CHROM))),
            POS=as.numeric(POS)) %>%
     filter(REF!='*',ALT!='*')
-  
+
   df_filt <-filterSNPs(SNPset = df,
                        refAlleleFreq = 0.2,
                        minTotalDepth = 50,
@@ -46,7 +46,7 @@ bsa_analysis <- function(rawData,HighBulk,LowBulk,Chroms,nhigh,nlow){
                        minSampleDepth = 20,
                        minGQ = 99,
                        verbose = TRUE)
-  
+
   df_filt <- runGprimeAnalysis(
     SNPset = df_filt,
     windowSize = 2e6,
@@ -64,14 +64,14 @@ bsa_analysis <- function(rawData,HighBulk,LowBulk,Chroms,nhigh,nlow){
   return(df_filt)
 }
 
-AM_00332_leaf_green_red <- bsa_analysis(rawData = 'data/BSA/wgs/vcf/bulk_snps05.table',
+AM_00332_leaf_green_red <- bsa_analysis(rawData = 'runs/BSA/wgs/vcf/bulk_snps05.table',
                                         HighBulk = "AM_00332_gl",
                                         LowBulk = "AM_00332_rl",
                                         Chroms = paste0(rep("Scaffold_",
                                                             16),1:16),
                                         nhigh=80,
                                         nlow=80)
-AM_00331_flower_red_green <- bsa_analysis(rawData = 'data/BSA/wgs/vcf/bulk_snps05.table',
+AM_00331_flower_red_green <- bsa_analysis(rawData = 'runs/BSA/wgs/vcf/bulk_snps05.table',
                                           HighBulk = "AM_00331_rf",
                                           LowBulk = "AM_00331_gf",
                                           Chroms = paste0(rep("Scaffold_",
@@ -83,15 +83,15 @@ AM_00331_flower_red_green <- bsa_analysis(rawData = 'data/BSA/wgs/vcf/bulk_snps0
 # plot all results
 # leaf
 plotGresults <- function(Gresults,betalain_genes){
-  qval <- Gresults %>% 
-    filter(qvalue<=0.01) 
+  qval <- Gresults %>%
+    filter(qvalue<=0.01)
   #qval <- min(qval$Gprime)
   qval <- 3
-  
-  
+
+
   mG <- Gresults %>%
     filter(Gprime==max(Gresults$Gprime))
-  
+
   p1 <- ggplot()+
     geom_line(data=Gresults,aes(POS/1e6,Gprime), size=2) +
     labs(x= 'Position (Mb)',y= "G' value") +
@@ -99,7 +99,7 @@ plotGresults <- function(Gresults,betalain_genes){
     geom_hline(data=data.frame(yint=qval),
                aes(yintercept =yint,
                    linetype ='dashed',
-                   color=alpha('red',0.6)), 
+                   color=alpha('red',0.6)),
                size=1.7)+
     facet_grid(.~CHROM, space = 'free_x',scales='free_x') +
     theme(panel.spacing.x=unit(0.25, "lines")) +
@@ -117,23 +117,23 @@ plotGresults <- function(Gresults,betalain_genes){
           axis.ticks = element_line(linewidth = 1.5),
           axis.ticks.length = unit(.25, "cm")) +
     scale_x_continuous(guide = guide_axis(check.overlap = T))
-    #geom_gene_arrow(data=betalain_genes, 
+    #geom_gene_arrow(data=betalain_genes,
     #                aes(xmin = start/1e6, xmax = end/1e6, y = max(Gresults$Gprime), fill = type))
-  
+
   #ggsave(outfile,p1,width = 18,height = 7,,bg='white')
   return(p1)
 }
 # flower
 plotGresults1 <- function(Gresults,betalain_genes){
-  qval <- Gresults %>% 
-    filter(qvalue<=0.01) 
+  qval <- Gresults %>%
+    filter(qvalue<=0.01)
   #qval <- min(qval$Gprime)
   qval <- 3
-  
-  
+
+
   mG <- Gresults %>%
     filter(Gprime==max(Gresults$Gprime))
-  
+
   p1 <- ggplot()+
     geom_line(data=Gresults,aes(POS/1e6,Gprime), size=2) +
     labs(x= 'Position (Mb)',y= "G' value") +
@@ -142,7 +142,7 @@ plotGresults1 <- function(Gresults,betalain_genes){
     geom_hline(data=data.frame(yint=qval),
                aes(yintercept =yint,
                    linetype ='dashed',
-                   color=alpha('red',0.6)), 
+                   color=alpha('red',0.6)),
                size=1.7)+
     facet_grid(.~CHROM,space = 'free_x',scales='free_x') +
     theme(panel.spacing.x=unit(0.25, "lines")) +
@@ -158,9 +158,9 @@ plotGresults1 <- function(Gresults,betalain_genes){
           axis.ticks = element_line(linewidth = 1.5),
           axis.ticks.length = unit(.25, "cm")) +
     scale_x_continuous(guide = guide_axis(check.overlap = T))
-    #geom_gene_arrow(data=betalain_genes, 
+    #geom_gene_arrow(data=betalain_genes,
     #                aes(xmin = start/1e6, xmax = end/1e6, y = max(Gresults$Gprime), fill = type))
-  
+
   #ggsave(outfile,p1,width = 18,height = 7,,bg='white')
   return(p1)
 }
@@ -168,23 +168,23 @@ plotGresults1 <- function(Gresults,betalain_genes){
 # plot individual chromosomes
 # flower
 plotGqtl <- function(Gresults,chr,genes){
-  
-  qval <- Gresults %>% 
-    filter(qvalue<=0.01) 
+
+  qval <- Gresults %>%
+    filter(qvalue<=0.01)
   #qval <- min(qval$Gprime)
   qval <- 3
   my_qtl <- getQTLTable(SNPset = Gresults, alpha = 0.01,export = F)
-  
+
   ggplot()+
     geom_line(data=filter(Gresults,CHROM==chr),aes(POS/1e6,Gprime),size=2) +
     labs(x= 'Position (Mb)',y= "G' value") +
     scale_x_continuous(breaks = c(0,10,20,30))+
     geom_hline(data=data.frame(yint=qval),
-               aes(yintercept = yint, 
-                   linetype ='dashed', 
+               aes(yintercept = yint,
+                   linetype ='dashed',
                    color=alpha('red',0.6)),
                size = 2) +
-    facet_grid(.~CHROM,space = 'free_x',scales='free_x') + 
+    facet_grid(.~CHROM,space = 'free_x',scales='free_x') +
     theme(panel.spacing.x=unit(0.25, "lines")) +
     ylim(0,10) +
     theme( strip.background = element_rect(fill = alpha('lightblue',0.2)),
@@ -199,19 +199,19 @@ plotGqtl <- function(Gresults,chr,genes){
           axis.line = element_line(linewidth = 2),
           axis.ticks = element_line(linewidth = 1.5),
           axis.ticks.length = unit(.25, "cm")) +
-    geom_gene_arrow(data=filter(genes, CHROM==chr, type == "transcript") %>% droplevels(), 
+    geom_gene_arrow(data=filter(genes, CHROM==chr, type == "transcript") %>% droplevels(),
                     aes(xmin = start/1e6, xmax = end/1e6, y = 9.5, color = attributes), size=6) +
     scale_color_manual(values = c(alpha('red',0.6), "black", "black","grey","grey"))
 }
 # leaf
 plotGqtl1 <- function(Gresults,chr,genes){
-  
-  qval <- Gresults %>% 
-    filter(qvalue<=0.01) 
+
+  qval <- Gresults %>%
+    filter(qvalue<=0.01)
   #qval <- min(qval$Gprime)
   qval <- 3
   my_qtl <- getQTLTable(SNPset = Gresults, alpha = 0.01,export = F)
-  
+
   p1 <- ggplot() +
     geom_line(data=filter(Gresults,CHROM==chr),aes(POS/1e6,Gprime),size=2) +
     labs(x= 'Position (Mb)',y= "G' value") +
@@ -239,22 +239,22 @@ plotGqtl1 <- function(Gresults,chr,genes){
           axis.line = element_line(linewidth = 2),
           axis.ticks = element_line(linewidth = 1.5),
           axis.ticks.length = unit(.25, "cm"))
-  
-  
+
+
   p2 <- ggplot() +
-    geom_gene_arrow(data=filter(genes, 
-                                CHROM==chr, 
+    geom_gene_arrow(data=filter(genes,
+                                CHROM==chr,
                                 type == "transcript",
                                 attributes == "ID=AHp023147.1;geneID=AHp023147" | attributes == "ID=AHp023148.1;geneID=AHp023148") %>% droplevels(),
-                    aes(xmin = start, 
-                        xmax = end, 
-                        y = "chr16", 
-                        fill = attributes, 
+                    aes(xmin = start,
+                        xmax = end,
+                        y = "chr16",
+                        fill = attributes,
                         forward = c(F,T)),
                     size = 1.5,
                     color = "black",
-                    arrowhead_height = unit(12, "mm"), 
-                    arrowhead_width = unit(6, "mm"), 
+                    arrowhead_height = unit(12, "mm"),
+                    arrowhead_width = unit(6, "mm"),
                     arrow_body_height = grid::unit(6, "mm")) +
     geom_text(aes(x = c(5246000,5290000),
                   y = "chr16",
@@ -268,7 +268,7 @@ plotGqtl1 <- function(Gresults,chr,genes){
           axis.line = element_line(linewidth = 2),
           axis.ticks = element_line(linewidth = 1.5),
           axis.text.x = element_text(size=20),
-          panel.grid.major.y = ggplot2::element_line(colour = "grey", 
+          panel.grid.major.y = ggplot2::element_line(colour = "grey",
                                                      linewidth = 1),
           #axis.title.x = element_text(size=40),
           axis.ticks.length = unit(.25, "cm"),
@@ -278,12 +278,12 @@ plotGqtl1 <- function(Gresults,chr,genes){
           axis.ticks.y = element_blank(),
           axis.text.y = element_blank()) +
     scale_fill_manual(values = c("chocolate2","cyan3",'red'))
-  
+
   # combine plots:
   out <- plot_grid(p2,p1,
                    nrow = 2,
                    rel_heights = c(0.3,0.7))
-  
+
   out
 }
 
@@ -348,15 +348,9 @@ MYB_plot <- plot_grid(cowplot_flower, pathway_plot, alignment_plot,
                       labels = c("", "C", "D"),
                       label_size = 34)
 
-ggsave(filename = "plots/paper_myb_combined_alignment.png",
+ggsave(filename = "runs/plots/paper_myb_combined_alignment.png",
        plot = MYB_plot,
        dpi = 400,
        width = 25,
        height = 25,
        bg = "white")
-
-
-
-
-
-
diff --git a/workflows/BSA/read_count_analysis.Rmd b/workflows/BSA/read_count_analysis.Rmd
index 4154c86..f7c97bb 100644
--- a/workflows/BSA/read_count_analysis.Rmd
+++ b/workflows/BSA/read_count_analysis.Rmd
@@ -11,14 +11,14 @@ library(tidyverse)
 library(DESeq2)
 library(factoextra)
 library(patchwork)
-knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2_publication/")
+knitr::opts_knit$set(root.dir = "/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/")
 ```
 
 
 ```{r}
 ########################## Create a function to generate plots for all betalain pathway genes
 # load object with names of all betalain and flavonoid genes
-pathway_genes <- read.csv(file = "data/manual_sheets/color_pathway_genes.csv", header=T)
+pathway_genes <- read.csv(file = "studies/additional_data/resources/color_pathway_genes.csv", header=T)
 colnames(pathway_genes) <- c("pathway_gene", "pathway", "gene_id")
 betalain.genes <- pathway_genes %>%
   filter(pathway == "Betalain")
@@ -32,13 +32,13 @@ Transcript level gene expression quantification from kallisto:
 
 ```{r}
 # vector of input directories
-sample_names <- dir(path = "data/flower_color_mapping/kallisto_quant/")
+sample_names <- dir(path = "runs/flower_color_mapping/kallisto_quant/")
 
 # read in tables
 kallisto_quant <- c()
 
 for (i in 1:length(sample_names)){
-  x <- read_table(file = paste0("data/flower_color_mapping/kallisto_quant/",
+  x <- read_table(file = paste0("runs/flower_color_mapping/kallisto_quant/",
                                 sample_names[i],
                                 "/abundance.tsv"))
   # set column names and keep relevant columns
@@ -126,7 +126,7 @@ patchplot <- betalain_plots[[8]] + betalain_plots[[9]] + betalain_plots[[11]] +
   theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"),
         plot.tag = element_text(size = 35))
 
-ggsave(filename = "plots/flower_mapping_expression/betalain_gene_kallisto.png",
+ggsave(filename = "runs/plots/flower_mapping_expression/betalain_gene_kallisto.png",
        width = 28,
        height = 20)
 
@@ -182,7 +182,7 @@ betalain_plots <- plot_betalain_counts(gene_ID_list = betalain_quant$transcript_
 
 # save all plots
 for (i in 1:length(betalain_plots)){
-  ggsave(filename = paste0("plots/flower_mapping_expression/", gene_names[i], ".png"),
+  ggsave(filename = paste0("runs/plots/flower_mapping_expression/", gene_names[i], ".png"),
        plot = betalain_plots[[i]],
        height = 6,
        width = 8)
@@ -219,7 +219,7 @@ ggplot(data=regulator_quant) +
         legend.position = "bottom",
         legend.direction = "vertical")
 
-ggsave(filename = "plots/flower_mapping_expression/flower_expression_grid.png",
+ggsave(filename = "runs/plots/flower_mapping_expression/flower_expression_grid.png",
        width = 4,
        height = 8,
        dpi = 500)
@@ -289,7 +289,7 @@ ggplot(data=flavonoid_grid) +
         legend.position = "right",
         legend.direction = "vertical")
 
-ggsave(filename = "plots/flower_mapping_expression/flavonoid_expression_grid.png",
+ggsave(filename = "runs/plots/flower_mapping_expression/flavonoid_expression_grid.png",
        width = 8,
        height = 8,
        dpi = 500)
diff --git a/workflows/BSA/read_count_analysis_from_bam.Rmd b/workflows/BSA/read_count_analysis_from_bam.Rmd
index a3d4423..f442e22 100644
--- a/workflows/BSA/read_count_analysis_from_bam.Rmd
+++ b/workflows/BSA/read_count_analysis_from_bam.Rmd
@@ -11,40 +11,40 @@ library(tidyverse)
 library(chromstaR)
 library(ggtranscript)
 library(RColorBrewer)
-knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2/")
+knitr::opts_knit$set(root.dir = "/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/")
 ```
 
 Extract the sequencing reads from the bam files which overlap both the two non-synonymous SNPs and the stop-gained SNP. Samtools view can be used to extract reads covering a particular position (and their pairs). Non-primary alignments can be discarded. Reads should cover the position of the stop-gained variant (Scaffold 16, 5305851, C->T) and the position of the right non-synonymous variant (Scaffold 16, 5305727, A->T).
 
 ```{bash}
-mkdir -p data/BSA/RNAseq/phased_reads/
+mkdir -p runs/BSA/RNAseq/phased_reads/
 
 # green flower
 # index and extract everything overlapping the right non-synonymous variant position
-samtools index data/BSA/RNAseq/STAR_flower_mappings/AM_00332_gf_Aligned.sortedByCoord.out.bam
-samtools view -b -h -F 256 -P data/BSA/RNAseq/STAR_flower_mappings/AM_00332_gf_Aligned.sortedByCoord.out.bam Scaffold_16:5305727-5305727 > data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam
+samtools index runs/BSA/RNAseq/STAR_flower_mappings/AM_00332_gf_Aligned.sortedByCoord.out.bam
+samtools view -b -h -F 256 -P runs/BSA/RNAseq/STAR_flower_mappings/AM_00332_gf_Aligned.sortedByCoord.out.bam Scaffold_16:5305727-5305727 > runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam
 
 # index and extract everything overlapping the stop-gained variant position
-samtools index data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam
-samtools view -b -h -P data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam Scaffold_16:5305851-5305851 > data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam
-samtools index data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam
+samtools index runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam
+samtools view -b -h -P runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam Scaffold_16:5305851-5305851 > runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam
+samtools index runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam
 
 # save as tsv file using sam2tsv from jvarkit
-java -jar /home/tom/Documents/tools/jvarkit/dist/sam2tsv.jar -R polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam > data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam.tsv
+java -jar /home/tom/Documents/tools/jvarkit/dist/sam2tsv.jar -R runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam > runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam.tsv
 
 
 # red flower
 # index and extract everything overlapping the right non-synonymous variant position
-samtools index data/BSA/RNAseq/STAR_flower_mappings/AM_00332_rf_Aligned.sortedByCoord.out.bam
-samtools view -b -h -F 256 -P data/BSA/RNAseq/STAR_flower_mappings/AM_00332_rf_Aligned.sortedByCoord.out.bam Scaffold_16:5305727-5305727 > data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam
+samtools index runs/BSA/RNAseq/STAR_flower_mappings/AM_00332_rf_Aligned.sortedByCoord.out.bam
+samtools view -b -h -F 256 -P runs/BSA/RNAseq/STAR_flower_mappings/AM_00332_rf_Aligned.sortedByCoord.out.bam Scaffold_16:5305727-5305727 > runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam
 
 # index and extract everything overlapping the stop-gained variant position
-samtools index data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam
-samtools view -b -h -P data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam Scaffold_16:5305851-5305851 > data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam
-samtools index data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam
+samtools index runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam
+samtools view -b -h -P runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam Scaffold_16:5305851-5305851 > runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam
+samtools index runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam
 
 # save as tsv file using sam2tsv from jvarkit
-java -jar /home/tom/Documents/tools/jvarkit/dist/sam2tsv.jar -R polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam > data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam.tsv
+java -jar /home/tom/Documents/tools/jvarkit/dist/sam2tsv.jar -R runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam > runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam.tsv
 ```
 
 Put in loading and filtering of the data (tsv and sequencing data) into separate functions:
@@ -72,9 +72,9 @@ read_bam.tsv <- function(filename){
     arrange(read_name) %>%
     ungroup() %>%
     #mutate(allele = ifelse(tolower(read_base) == ref_base, "ref", "alt")) %>% # is it the reference or alternative allele?
-    mutate(allele = ifelse(ref_pos == 5305722 & read_base == "A", 
-                           "alt", 
-                           ifelse(ref_pos == 5305727 & read_base == "T", 
+    mutate(allele = ifelse(ref_pos == 5305722 & read_base == "A",
+                           "alt",
+                           ifelse(ref_pos == 5305727 & read_base == "T",
                                   "alt",
                                   ifelse(ref_pos == 5305851 & read_base == "T",
                                          "alt", "ref"))))
@@ -104,22 +104,22 @@ plot_reads <- function(df, tsv, title){
   # maybe rather join the two tables?
   number_alt_alleles <- tsv %>%
     filter(allele == "alt") %>%
-    group_by(read_name) %>% 
+    group_by(read_name) %>%
     summarise(n = n(),
               snp_grouped = sum(snp_group))
   # join the table detailing the number of alt alleles
   snp.bam.df <- left_join(df, number_alt_alleles, by = c("qname" = "read_name"))
-  
+
   # first plot, showing specific reads
   snpplot <- ggplot() +
-    geom_range(data = snp.bam.df, 
+    geom_range(data = snp.bam.df,
                aes(xstart = start, xend = end, y = factor(qname, levels=unique(qname[order(n,snp_grouped,qname)]), ordered=TRUE)))
-  
+
   # second plot showing alternativ allele positions
   snpplot2 <- snpplot +
-    geom_point(data = tsv %>% filter(allele == "alt"), 
-               aes(x = ref_pos, 
-                   y = read_name, 
+    geom_point(data = tsv %>% filter(allele == "alt"),
+               aes(x = ref_pos,
+                   y = read_name,
                    color = as.factor(snp_group)),
                size = 2.2) +
     theme_classic() +
@@ -149,9 +149,9 @@ Load in data for the red and green flower bulks of the BSA on AM_00332:
 
 ```{r}
 # green flower
-snp_tsv.gf <- read_bam.tsv(filename = "data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam.tsv")
+snp_tsv.gf <- read_bam.tsv(filename = "runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam.tsv")
 
-snp_df.gf <- read_bam_snps_as_df(filename = "data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam",
+snp_df.gf <- read_bam_snps_as_df(filename = "runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam",
                                  tsv = snp_tsv.gf)
 
 # how many reads after filtering?
@@ -159,9 +159,9 @@ snp_tsv.gf %>%
   dplyr::count(read_name) # 99 reads
 
 # red flower
-snp_tsv.rf <- read_bam.tsv(filename = "data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam.tsv")
+snp_tsv.rf <- read_bam.tsv(filename = "runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam.tsv")
 
-snp_df.rf <- read_bam_snps_as_df(filename = "data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam",
+snp_df.rf <- read_bam_snps_as_df(filename = "runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam",
                                  tsv = snp_tsv.rf)
 
 # how many reads after filtering?
@@ -179,7 +179,7 @@ plot.gf <- plot_reads(df = snp_df.gf,
 plot.gf
 
 # save plot
-ggsave(filename = "plots/rna_seq_reads_gf.png",
+ggsave(filename = "runs/plots/rna_seq_reads_gf.png",
        plot = plot.gf,
        width = 10,
        height = 8)
@@ -192,17 +192,8 @@ plot.rf <- plot_reads(df = snp_df.rf,
 plot.rf
 
 # save plot
-ggsave(filename = "plots/rna_seq_reads_rf.png",
+ggsave(filename = "runs/plots/rna_seq_reads_rf.png",
        plot = plot.rf,
        width = 10,
        height = 8)
 ```
-
-
-
-
-
-
-
-
-
diff --git a/workflows/BSA/snpEff_analysis.Rmd b/workflows/BSA/snpEff_analysis.Rmd
index f74a6b0..28c1f06 100644
--- a/workflows/BSA/snpEff_analysis.Rmd
+++ b/workflows/BSA/snpEff_analysis.Rmd
@@ -14,7 +14,7 @@ library(ggtranscript)
 library(reshape2)
 library(cowplot)
 library(patchwork)
-knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2_publication/")
+knitr::opts_knit$set(root.dir = "/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/")
 ```
 
 ## Database creation
@@ -22,15 +22,15 @@ knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2_publicat
 Run snpEff database creation on the fixed annotation files. Copy the fixed files to the snpeff directory:
 
 ```{bash}
-mkdir -p data/annotation_analysis/snpEff/databases/AHv2.2/
+mkdir -p runs/annotation_analysis/snpEff/databases/AHv2.2/
 
 # snpEff analysis
 # add genome file to snpEff database
-cp polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta data/annotation_analysis/snpEff/databases/AHv2.2/sequences.fa
+cp runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta runs/annotation_analysis/snpEff/databases/AHv2.2/sequences.fa
 # add annotation file to snpEff database
-cp data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gff data/annotation_analysis/snpEff/databases/AHv2.2/genes.gff
-cp data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.cds.fasta data/annotation_analysis/snpEff/databases/AHv2.2/cds.fa
-cp data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.prot.fasta data/annotation_analysis/snpEff/databases/AHv2.2/protein.fa
+cp runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gff runs/annotation_analysis/snpEff/databases/AHv2.2/genes.gff
+cp runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.cds.fasta runs/annotation_analysis/snpEff/databases/AHv2.2/cds.fa
+cp runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.prot.fasta runs/annotation_analysis/snpEff/databases/AHv2.2/protein.fa
 
 # create database:
 java -jar /home/tom/Documents/tools/snpEff/snpEff.jar build -v AHv2.2
@@ -42,18 +42,18 @@ java -jar /home/tom/Documents/tools/snpEff/snpEff.jar build -v AHv2.2
 Run using data from Markus color/sterility mapping bulks:
 
 ```{bash}
-mkdir -p data/annotation_analysis/snpEff/bsa_sterility_color/analysis
+mkdir -p runs/annotation_analysis/snpEff/bsa_sterility_color/analysis
 
 # get data and run snpEff on the example data:
-java -jar /home/tom/Documents/tools/snpEff/snpEff.jar -csvStats data/annotation_analysis/snpEff/bsa_sterility_color/output.stats.csv -v AHv2.2 data/annotation_analysis/snpEff/bsa_sterility_color/gatk_filter_maxmissing05_biallelic.vcf.gz > data/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.vcf
+java -jar /home/tom/Documents/tools/snpEff/snpEff.jar -csvStats runs/annotation_analysis/snpEff/bsa_sterility_color/output.stats.csv -v AHv2.2 runs/annotation_analysis/snpEff/bsa_sterility_color/gatk_filter_maxmissing05_biallelic.vcf.gz > runs/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.vcf
 
 
 # two files are not saved in the output directory but in the current working directory
-mv snpEff_* data/annotation_analysis/snpEff/bsa_sterility_color/
+mv snpEff_* runs/annotation_analysis/snpEff/bsa_sterility_color/
 
 # it is challenging to process the snpEff output for downstream analysis
 # snpsift is a software package distributed with snpEff that eases processing
-cat data/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.vcf | /home/tom/Documents/tools/snpEff/scripts/vcfEffOnePerLine.pl | java -jar /home/tom/Documents/tools/snpEff/SnpSift.jar extractFields - CHROM POS "ANN[*].GENEID" "ANN[*].EFFECT" > data/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.snpsift.txt
+cat runs/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.vcf | /home/tom/Documents/tools/snpEff/scripts/vcfEffOnePerLine.pl | java -jar /home/tom/Documents/tools/snpEff/SnpSift.jar extractFields - CHROM POS "ANN[*].GENEID" "ANN[*].EFFECT" > runs/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.snpsift.txt
 ```
 
 
@@ -63,11 +63,11 @@ Analyze the snpEff test run and check for high impact variants that can be manua
 
 ```{r}
 # load in snpEff summary file
-snpEff.tab <- read.table("data/annotation_analysis/snpEff/bsa_sterility_color/snpEff_genes.txt", skip = 1, header = T, comment.char = "")
+snpEff.tab <- read.table("runs/annotation_analysis/snpEff/bsa_sterility_color/snpEff_genes.txt", skip = 1, header = T, comment.char = "")
 colnames(snpEff.tab)[1] <- "GeneName"
 
 # load in list of betalain and flavonoid pathway genes
-color_pathways <- read.csv("data/manual_sheets/color_pathway_genes.csv", header = T)
+color_pathways <- read.csv("runs/manual_sheets/color_pathway_genes.csv", header = T)
 snpEff.tab <- left_join(snpEff.tab, color_pathways, by = c("GeneId" = "Gene_id"))
 
 # check color pathway genes for high impact variants, moderate and in theory also modifier might also be relevant
@@ -88,14 +88,14 @@ Subset a more detailed table of betalain genes and their respective positions in
 
 ```{r}
 # load in list of betalain and flavonoid pathway genes
-color_pathways <- read.csv("data/manual_sheets/color_pathway_genes.csv", header = T)
+color_pathways <- read.csv("runs/manual_sheets/color_pathway_genes.csv", header = T)
 betalain_chr16 <- color_pathways %>%
   filter(Gene_id == "AHp022773" | Gene_id == "AHp023148" | Gene_id == "AHp023147")
 # add BvMYB1like gene
 betalain_chr16[2,] <- c("BvMYB1like", "Betalain", "AHp022773")
 
-write.table(betalain_chr16, 
-            file = "data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.txt",
+write.table(betalain_chr16,
+            file = "runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.txt",
             quote = F)
 
 # set up function for reading in a gtf file
@@ -123,18 +123,18 @@ read.gtf <- function(file){
 }
 
 # read in annotation
-annotation.gtf <- read.gtf("polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf")
+annotation.gtf <- read.gtf("runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf")
 
 # subset betalain genes
 betalain_chr16.gtf <- annotation.gtf %>%
   filter(gene_id %in% betalain_chr16$Gene_id)
 
-saveRDS(betalain_chr16.gtf, file = "data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf")
+saveRDS(betalain_chr16.gtf, file = "runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf")
 
 
 
 #################### generate BED file of relevant positions
-betalain_chr16.gtf <- readRDS("data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf")
+betalain_chr16.gtf <- readRDS("runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf")
 # snpeff by default uses the 5000 positions before and after a gene
 # create a bed file that can be used to subset the vcf file into relevant variants
 betalain_chr16.bed <- betalain_chr16.gtf %>%
@@ -145,16 +145,16 @@ betalain_chr16.bed <- betalain_chr16.gtf %>%
   select(chrom, chromStart, chromEnd) %>%
   unique()
 
-write_tsv(betalain_chr16.bed, file = "data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed")  
+write_tsv(betalain_chr16.bed, file = "runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed")  
 ```
 
 Subset the vcf file and extract the allele frequencies using vcftools
 
 ```{bash}
 # to extract the format field
-vcftools --gzvcf data/BSA/wgs/vcf/gatk_filter_maxmissing05_biallelic.vcf.gz --bed data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed --indv AM_00331_gf --indv AM_00331_rf --indv AM_00332_gl --indv AM_00332_rl --extract-FORMAT-info AD --out data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16
+vcftools --gzvcf runs/BSA/wgs/vcf/gatk_filter_maxmissing05_biallelic.vcf.gz --bed runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed --indv AM_00331_gf --indv AM_00331_rf --indv AM_00332_gl --indv AM_00332_rl --extract-FORMAT-info AD --out runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16
 # also subset the vcf file to include only the variants around the betalain genes
-vcftools --gzvcf data/BSA/wgs/vcf/gatk_filter_maxmissing05_biallelic.vcf.gz --bed data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed --indv AM_00331_gf --indv AM_00331_rf --indv AM_00332_gl --indv AM_00332_rl --recode --recode-INFO-all --out data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16
+vcftools --gzvcf runs/BSA/wgs/vcf/gatk_filter_maxmissing05_biallelic.vcf.gz --bed runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed --indv AM_00331_gf --indv AM_00331_rf --indv AM_00332_gl --indv AM_00332_rl --recode --recode-INFO-all --out runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16
 ```
 
 
@@ -162,10 +162,10 @@ Plot the annotated variants in betalain genes. In general, it could be interesti
 
 ```{r}
 # read in betalain gene list on chr 16
-betalain_chr16 <- read.table("data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.txt")
+betalain_chr16 <- read.table("runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.txt")
 
 # read in variant count of extracted SNPs
-allele_depth.tab <- read.table(file = "data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.AD.FORMAT", header = T)
+allele_depth.tab <- read.table(file = "runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.AD.FORMAT", header = T)
 allele_depth.tab <- allele_depth.tab %>%
   mutate(customid = paste0("16_", POS))
 # allele depth denotes first the reference allele and then the alternative allele, only those reads which were involved in allele calling
@@ -176,7 +176,7 @@ allele_depth.tab <- separate(data = allele_depth.tab, col = "AM_00332_gl", sep =
 allele_depth.tab <- separate(data = allele_depth.tab, col = "AM_00332_rl", sep = ",", into = c("AM00332_rl_ref", "AM00332_rl_alt"))
 
 # read in snpsift output and subset for betalain genes
-snpsift.tab <- read.table("data/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.snpsift.txt", header = T)
+snpsift.tab <- read.table("runs/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.snpsift.txt", header = T)
 snpsift.tab <- snpsift.tab %>%
   filter(ANN....GENEID %in% betalain_chr16$Gene_id) %>%
   mutate(customid = paste0("16_", POS)) %>% # add customid column to snpsift table to enable merging of the two tables
@@ -184,7 +184,7 @@ snpsift.tab <- snpsift.tab %>%
 
 
 # read in betalain gene gtf on chr 16
-betalain_chr16.gtf <- readRDS("data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf")
+betalain_chr16.gtf <- readRDS("runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf")
 
 # join the snpsift table with the allele depth table
 joined.df <- left_join(snpsift.tab, allele_depth.tab, by = c("CHROM", "POS", "customid"))
@@ -201,7 +201,7 @@ Set up plotting functions:
 
 filter_variants <- function(variants, gene, bulk1_ref, bulk1_alt, bulk2_ref, bulk2_alt){
   # prepare data by only keeping the relevant gene variants and samples
-  dat <- variants %>% 
+  dat <- variants %>%
     filter(ANN....GENEID == gene) %>%
     select(CHROM, POS, ANN....EFFECT, bulk1_ref, bulk1_alt, bulk2_ref, bulk2_alt) %>%
     filter(bulk1_alt != 0 & bulk2_alt != 0)
@@ -226,13 +226,13 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){
     filter(transcript_id == trans_id,
            type == "CDS")
   filtered_variants <- filtered_variants[filtered_variants$POS >= min(annotation.filtered$start) & filtered_variants$POS <= max(annotation.filtered$end),]
-  filtered_variants$ANN....EFFECT <- factor(filtered_variants$ANN....EFFECT, levels = c("intron_variant", 
-                                                                                        "synonymous_variant", 
+  filtered_variants$ANN....EFFECT <- factor(filtered_variants$ANN....EFFECT, levels = c("intron_variant",
+                                                                                        "synonymous_variant",
                                                                                         "missense_variant",
                                                                                         "stop_gained"))
   min_pos <- min(annotation.filtered$start)
   max_pos <- max(annotation.filtered$end)
-  
+
   # plot the gene with variants
   p2 <- ggplot() +
     geom_range(data = annotation.filtered,
@@ -253,7 +253,7 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){
          x = "Position Scaffold 16 (bp)") +
     theme_classic() +
     #scale_fill_brewer(palette = "RdBl", direction = -1) + # think about color palette to use
-    scale_fill_viridis_d(direction = -1, 
+    scale_fill_viridis_d(direction = -1,
                          labels = c("Intron variant", "Synonymous variant", "Missense variant", "Stop gained")) +
     theme(text = element_text(size = 21),
           #legend.position = "none",
@@ -277,23 +277,23 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){
   out_plot <- plot_grid(p2, legend,
                         nrow = 1,
                         rel_widths = c(0.75, 0.25))
-  
+
   # # rearrange data:
   # dat1 <- filtered_variants[,-(4:5)]
   # dat2 <- filtered_variants[,-(6:7)]
   # dat1.melt <- melt(dat1, id.vars = c("CHROM", "POS", "ANN....EFFECT"))
   # dat2.melt <- melt(dat2, id.vars = c("CHROM", "POS", "ANN....EFFECT"))
-  # 
+  #
   # # plot relative allele frequency
   # p1 <- ggplot() +
   #   geom_col(data = dat1.melt,
   #          aes(x = POS, y = as.numeric(value), fill = variable), position = "stack", width = 20) +
   #   xlim(c(min_pos, max_pos)) +
-  #   labs(y = "", 
-  #        fill = "red_bulk", 
+  #   labs(y = "",
+  #        fill = "red_bulk",
   #        x = "") +
   #   theme_classic() +
-  #   scale_fill_brewer(palette = "Set1", 
+  #   scale_fill_brewer(palette = "Set1",
   #                     labels = c("Reference allele", "Alternative allele"),
   #                     direction = -1,
   #                     guide = guide_legend(override.aes = list(alpha = 0))) + # make legend invisible
@@ -307,12 +307,12 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){
   #         axis.title.x = element_blank(),
   #         legend.title = element_text(color = "transparent"),
   #         legend.text = element_text(color = "transparent"))
-  # 
+  #
   # bars <- map(unique(dat2.melt$POS)
   #           , ~geom_col(position = "stack",
   #                       width = 20
   #                      , data = dat2.melt %>% filter(POS == .x)))
-  # 
+  #
   # p3 <- ggplot(data = dat2.melt,
   #              aes(x=POS,
   #                  y=as.numeric(value),
@@ -322,8 +322,8 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){
   #   labs(fill = "green_bulk",
   #        x = "Position on Scaffold 16") +
   #   theme_classic() +
-  #   scale_fill_brewer(palette = "Set1", 
-  #                     labels = c("Reference allele", "Alternative allele"), 
+  #   scale_fill_brewer(palette = "Set1",
+  #                     labels = c("Reference allele", "Alternative allele"),
   #                     direction = -1,
   #                     guide = guide_legend(override.aes = list(alpha = 0))) +
   #   theme(text = element_text(size = 21),
@@ -331,9 +331,9 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){
   #         legend.position = "none",
   #         legend.title = element_text(color = "transparent"),
   #         legend.text = element_text(color = "transparent"))
-  
-  
-  #allplots <- p1 + p2 + p3 + 
+
+
+  #allplots <- p1 + p2 + p3 +
     #plot_layout(ncol = 1)
   return(out_plot)
 }
@@ -346,7 +346,7 @@ Plot for CYP76AD2:
 ```{r}
 # plot for one gene
 # filter all homozygous reference variants
-dat <- joined.df %>% 
+dat <- joined.df %>%
   filter(ANN....GENEID == "AHp023148") %>%
   filter(AM00332_gl_alt != 0)
 
@@ -355,7 +355,7 @@ dat <- joined.df %>%
 
 
 # filter to only include specific gene
-dat.filtered <- filter_variants(variants = joined.df, 
+dat.filtered <- filter_variants(variants = joined.df,
                   gene = "AHp023148",
                   bulk1_ref = "AM00332_gl_ref",
                   bulk1_alt = "AM00332_gl_alt",
@@ -373,8 +373,8 @@ AM00332_CYP76AD <- plot_bulk_comparison(annotation = betalain_chr16.gtf,
 AM00332_CYP76AD
 
 
-ggsave(filename = "plots/CYP76AD_AHp023148_bsa_snpeff.png",
-       width = 14, 
+ggsave(filename = "runs/plots/CYP76AD_AHp023148_bsa_snpeff.png",
+       width = 14,
        height = 6)
 ```
 
@@ -387,19 +387,19 @@ plot_all_genes <- function(gene_id, transcript_id, bulk){
   output <- list()
   for (i in 1:length(transcript_id)){
       # filter all homozygous reference variants
-      dat <- joined.df %>% 
+      dat <- joined.df %>%
         filter(ANN....GENEID == gene_id[i]) %>%
         filter(AM00332_gl_alt != 0)
       # filter to only include specific gene
       if (bulk == "AM00332"){
-        dat.filtered <- filter_variants(variants = joined.df, 
+        dat.filtered <- filter_variants(variants = joined.df,
                       gene = gene_id[i],
                       bulk1_ref = "AM00332_gl_ref",
                       bulk1_alt = "AM00332_gl_alt",
                       bulk2_ref = "AM00332_rl_ref",
                       bulk2_alt = "AM00332_rl_alt")
       } else if (bulk == "AM00331"){
-        dat.filtered <- filter_variants(variants = joined.df, 
+        dat.filtered <- filter_variants(variants = joined.df,
                                     gene = gene_id[i],
                                     bulk1_ref = "AM00331_gf_ref",
                                     bulk1_alt = "AM00331_gf_alt",
@@ -425,9 +425,9 @@ bulk_AM00332_plot_list <- plot_all_genes(gene_id = gene_id,
                                          bulk = "AM00332")
 
 for (i in 1:length(bulk_AM00332_plot_list)){
-  ggsave(filename = paste0("plots/BSA/AM00332_color_loss_", gene_id[i], ".png"),
+  ggsave(filename = paste0("runs/plots/BSA/AM00332_color_loss_", gene_id[i], ".png"),
          plot = bulk_AM00332_plot_list[[i]],
-         width = 14, 
+         width = 14,
          height = 6)
 }
 
@@ -437,9 +437,9 @@ bulk_AM00331_plot_list <- plot_all_genes(gene_id = gene_id,
                                          bulk = "AM00331")
 
 for (i in 1:length(bulk_AM00331_plot_list)){
-  ggsave(filename = paste0("plots/BSA/AM00331_color_loss_", gene_id[i], ".png"),
+  ggsave(filename = paste0("runs/plots/BSA/AM00331_color_loss_", gene_id[i], ".png"),
          plot = bulk_AM00331_plot_list[[i]],
-         width = 14, 
+         width = 14,
          height = 6)
 }
 ```
@@ -456,8 +456,8 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){
     filter(transcript_id == trans_id,
            type == "CDS")
   filtered_variants <- filtered_variants[filtered_variants$POS >= min(annotation.filtered$start) & filtered_variants$POS <= max(annotation.filtered$end),]
-  filtered_variants$ANN....EFFECT <- factor(filtered_variants$ANN....EFFECT, levels = c("intron_variant", 
-                                                                                        "synonymous_variant", 
+  filtered_variants$ANN....EFFECT <- factor(filtered_variants$ANN....EFFECT, levels = c("intron_variant",
+                                                                                        "synonymous_variant",
                                                                                         "missense_variant",
                                                                                         "stop_gained"))
   # plot the gene with variants
@@ -486,26 +486,26 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){
           axis.line.x = element_blank(),
           axis.text.x = element_blank(),
           legend.position = "none") # this increases the legend margin
-  # margin has to be increased so that other legends are not cut off, 
+  # margin has to be increased so that other legends are not cut off,
   # since the first legend seems to determine the margins
-  
+
   # rearrange data:
   dat1 <- filtered_variants[,-(4:5)]
   dat2 <- filtered_variants[,-(6:7)]
   dat1.melt <- melt(dat1, id.vars = c("CHROM", "POS", "ANN....EFFECT"))
   dat2.melt <- melt(dat2, id.vars = c("CHROM", "POS", "ANN....EFFECT"))
-  
+
   # plot relative allele frequency
   p1 <- ggplot() +
     geom_col(data = dat1.melt,
            aes(x = POS, y = as.numeric(value), fill = variable), position = "stack", width = 2) +
     xlim(c(min(annotation.filtered %>% select(start)),
          max(annotation.filtered %>% select(end)))) +
-    labs(y = "Red bulk\n allele depth", 
-         fill = "red_bulk", 
+    labs(y = "Red bulk\n allele depth",
+         fill = "red_bulk",
          x = "") +
     theme_classic() +
-    scale_fill_brewer(palette = "Set1", 
+    scale_fill_brewer(palette = "Set1",
                       labels = c("Reference allele", "Alternative allele"),
                       direction = -1,
                       guide = guide_legend(override.aes = list(alpha = 0))) + # make legend invisible
@@ -524,12 +524,12 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){
           axis.title.x = element_blank(),
           legend.title = element_text(color = "transparent"),
           legend.text = element_text(color = "transparent"))
-  
+
   bars <- map(unique(dat2.melt$POS)
             , ~geom_col(position = "stack",
                         width = 2
                        , data = dat2.melt %>% filter(POS == .x)))
-  
+
   p3 <- ggplot(data = dat2.melt,
                aes(x=POS,
                    y=as.numeric(value),
@@ -541,7 +541,7 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){
          fill = "",
          y = "Green bulk\n allele depth") +
     theme_classic() +
-    scale_fill_brewer(palette = "Set1", 
+    scale_fill_brewer(palette = "Set1",
                       labels = c("Reference allele", "Alternative allele"),
                       #guide = guide_legend(override.aes = list(alpha = 0)),
                       direction = -1) +
@@ -564,13 +564,13 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){
   p3 <- p3 + theme(legend.position = "none")
   # combine the three plots
   allplots <- plot_grid(p1, p2, p3,
-                        ncol = 1, 
+                        ncol = 1,
                         align = "v",
                         rel_heights = c(0.3, 0.25, 0.45))
-  
-  
+
+
   # combine other plots with legend
-  allplots <- plot_grid(legend, allplots, 
+  allplots <- plot_grid(legend, allplots,
                         ncol = 1,
                         #align = "v",
                         rel_heights = c(0.2,0.8))
@@ -582,7 +582,7 @@ AM00332_CYP76AD_zoom <- plot_bulk_comparison_zoom(annotation = betalain_chr16.gt
                              filtered_variants = dat.filtered)
 AM00332_CYP76AD_zoom
 
-ggsave(filename = "plots/CYP76AD_AHp023148_bsa_snpeff_zoom.png")
+ggsave(filename = "runs/plots/CYP76AD_AHp023148_bsa_snpeff_zoom.png")
 ```
 
 
@@ -620,16 +620,10 @@ grid_with_BSA <- plot_grid(cowplot_leaf, complete_grid,
 #grid_with_BSA
 
 # save plot
-ggsave(filename = "plots/BSA_with_grid.png",
+ggsave(filename = "runs/plots/BSA_with_grid.png",
        plot = grid_with_BSA,
        width = 25,
        height = 20,
        bg = "white",
        dpi = 500)
 ```
-
-
-
-
-
-
diff --git a/workflows/genome_polishing/helper_script.R b/workflows/genome_polishing/helper_script.R
index 2a93648..1571dce 100644
--- a/workflows/genome_polishing/helper_script.R
+++ b/workflows/genome_polishing/helper_script.R
@@ -1,20 +1,20 @@
 # set working directory
-setwd("/home/tom/Documents/projects/Ahyp_v2_2_publication/")
+setwd("/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/")
 
 # read in list of all headers with the correct order
-headers <- read.table("data/NextPolish/input/out.headers.txt")
+headers <- read.table("runs/NextPolish/input/out.headers.txt")
 headers <- gsub(">","",headers$V1)
 headers <- gsub("quiver_","quiver",headers)
 
 # read in prefiltered fasta index
-prefilter <- read.table("data/NextPolish/input/out.prefiltered.renamed.txt.fai")
+prefilter <- read.table("runs/NextPolish/input/out.prefiltered.renamed.txt.fai")
 
-# use the 
-# every sequence that is in 
+# use the
+# every sequence that is in
 no_seq <- headers[!headers %in% prefilter[,1]]
 no_seq <- sub("",">",no_seq)
 
-write.table(no_seq, file="data/NextPolish/processed/header_without_sequence.fa", 
+write.table(no_seq, file="runs/NextPolish/processed/header_without_sequence.fa", 
             quote=F,
             row.names = F,
             col.names = F)
diff --git a/workflows/genome_polishing/process_nextpolish_output.sh b/workflows/genome_polishing/process_nextpolish_output.sh
index 65ab458..d24e306 100644
--- a/workflows/genome_polishing/process_nextpolish_output.sh
+++ b/workflows/genome_polishing/process_nextpolish_output.sh
@@ -4,8 +4,8 @@
 # (see master_thesis/code/process_nextpolish_output.sh for more information about the input file preparation)
 
 # Setup
-NPOUT=data/NextPolish/output/
-NPPROCESSED=data/NextPolish/processed/
+NPOUT=runs/NextPolish/output/
+NPPROCESSED=runs/NextPolish/processed/
 
 mkdir -p "$NPPROCESSED"
 
@@ -15,10 +15,10 @@ mkdir -p "$NPPROCESSED"
 cut -f1,2 -d'_' "$NPOUT"genome.nextpolish.fa > "$NPPROCESSED"genome.nextpolish.renamed.fa
 
 # index the prefiltered fasta file for use in R
-samtools faidx data/NextPolish/input/out.prefiltered.renamed.txt
+samtools faidx runs/NextPolish/input/out.prefiltered.renamed.txt
 
 # filter the prefiltered file for everything that is not in Nextpolish file:
-/home/tom/Documents/tools/bbmap/filterbyname.sh in=data/NextPolish/input/out.prefiltered.renamed.txt \
+/home/tom/Documents/tools/bbmap/filterbyname.sh in=runs/NextPolish/input/out.prefiltered.renamed.txt \
 	names="$NPPROCESSED"genome.nextpolish.renamed.fa \
 	out="$NPPROCESSED"prefilter_not_in_Nextpolish.fa
 
@@ -34,7 +34,7 @@ cat "$NPPROCESSED"genome.nextpolish.renamed.fa \
 LC_ALL=C awk -v RS=">" -v FS="\n" -v ORS="\n" -v OFS="" '$0 {$1=">"$1"\n"; print}' "$NPPROCESSED"combined.fa > "$NPPROCESSED"combined.linear.fa
 
 # rename header file by removing trailing underscore character of Contigs:
-sed 's/quiver_/quiver/' data/NextPolish/input/out.headers.txt > "$NPPROCESSED"out.header.renamed.txt
+sed 's/quiver_/quiver/' runs/NextPolish/input/out.headers.txt > "$NPPROCESSED"out.header.renamed.txt
 
 # order file:
 ORDER="$NPPROCESSED"out.header.renamed.txt
@@ -60,5 +60,5 @@ rm "$NPPROCESSED"out*
 rm "$NPPROCESSED"prefilter*
 
 # copy to final output directory
-mkdir -p polished_reference_genome/polished_genome_annotation/assembly/
-cp "$NPPROCESSED"Ahypochondriacus_2.2_polished.fasta polished_reference_genome/polished_genome_annotation/assembly/
+mkdir -p runs/polished_reference_genome/polished_genome_annotation/assembly/
+cp "$NPPROCESSED"Ahypochondriacus_2.2_polished.fasta runs/polished_reference_genome/polished_genome_annotation/assembly/
diff --git a/workflows/genome_polishing/remove_ambiguous_bases.sh b/workflows/genome_polishing/remove_ambiguous_bases.sh
index c810a8a..1cb8a10 100644
--- a/workflows/genome_polishing/remove_ambiguous_bases.sh
+++ b/workflows/genome_polishing/remove_ambiguous_bases.sh
@@ -6,8 +6,8 @@
 # N specific entries can in a last step be removed. To reconstruct, the saved order can be used to integrate the N chromosomes again (which record the number of Ns removed)
 
 # Change these two parameters
-INPUT=reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.nospace.underscore.fa
-OUTDIR=data/NextPolish/input/
+INPUT=studies/additional_data/resources/reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.nospace.underscore.fa
+OUTDIR=runs/NextPolish/input/
 
 mkdir -p $OUTDIR
 
@@ -37,9 +37,8 @@ grep ">" "$OUTDIR"tmp2.txt > "$OUTDIR"out.headers.txt
 echo "headers saved"
 
 # remove headers without sequence (Can be caused by stretch of Ns at the start of a Scaffold (see Scaffold 10))
-sed -r 'N; /(>)[^\n]*\n\1/ s/[^\n]*//; P; D' "$OUTDIR"tmp2.txt | grep . | grep -i -B 1 --no-group-separator  '[ATGC]'  > "$OUTDIR"data/NextPolish/input/Ahypochondriacus_split.fasta
+sed -r 'N; /(>)[^\n]*\n\1/ s/[^\n]*//; P; D' "$OUTDIR"tmp2.txt | grep . | grep -i -B 1 --no-group-separator  '[ATGC]'  > "$OUTDIR"Ahypochondriacus_split.fasta
 
 # remove and rename temporary files
 rm "$OUTDIR"tmp.txt
 mv "$OUTDIR"tmp2.txt "$OUTDIR"out.prefiltered.txt
-
diff --git a/workflows/genome_polishing/run_nextpolish.sh b/workflows/genome_polishing/run_nextpolish.sh
index cbf0579..a086c0b 100644
--- a/workflows/genome_polishing/run_nextpolish.sh
+++ b/workflows/genome_polishing/run_nextpolish.sh
@@ -19,15 +19,15 @@ module load samtools
 mkdir -p /scratch/twinkle1/nextpolish/
 
 # set output directory for saving the polished genome to:
-OUTDIR=data/NextPolish/output/
+OUTDIR=runs/NextPolish/output/
 
 
 
 ### Prepare input files:
 # remove all reads containing ambiguous bases from the input using bbduk
 /home/twinkle1/tools/bbmap/bbduk.sh maxns=0 \
-	in=/projects/ag-stetter/twinkle/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz \
-	in2=/projects/ag-stetter/twinkle/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz \
+	in=/studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz \
+	in2=/studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz \
 	out=/scratch/twinkle1/SRR2106212_1.cleaned.fq \
 	out2=/scratch/twinkle1/SRR2106212_2.cleaned.fq \
 	-Xmx16g
@@ -49,7 +49,7 @@ round=2
 threads=20
 read1=/scratch/twinkle1/SRR2106212_1.cleaned.repair.fq
 read2=/scratch/twinkle1/SRR2106212_2.cleaned.repair.fq
-input=/home/twinkle1/master_thesis/data/NextPolish/input/Ahypochondriacus_split.fasta
+input=/runs/NextPolish/input/Ahypochondriacus_split.fasta
 
 
 for ((i=1; i<=${round};i++)); do
diff --git a/workflows/genome_polishing/unpackSRA.sh b/workflows/genome_polishing/unpackSRA.sh
index 7467f47..6cdf885 100644
--- a/workflows/genome_polishing/unpackSRA.sh
+++ b/workflows/genome_polishing/unpackSRA.sh
@@ -14,29 +14,29 @@
 # download file, show progress, increase default max size so that the download starts
 # tools/sratoolkit.2.11.2-centos_linux64/bin/prefetch -p -O /projects/ag-stetter/twinkle/lightfoot_WGS_short_reads/ --max-size 30G SRR2106212
 
-QCOUT=data/NextPolish/QC
+QCOUT=runs/NextPolish/QC
 
 # set working directory
-cd /projects/ag-stetter/twinkle/projects/Ahyp_v2_2_publication/raw_data/lightfoot_WGS_short_reads/
+cd /studies/additional_data/lightfoot_WGS_short_reads/
 
 # before running the fastq-dump command, switch off "Enable Remote Access" by running sratoolskit/bin/vdb-config -i
 # split into fastq files
 /home/twinkle1/tools/sratoolkit.2.11.2-centos_linux64/bin/fastq-dump --split-3 --verbose SRR2106212.sra
 
 # set working directory
-cd /projects/ag-stetter/twinkle/projects/Ahyp_v2_2_publication/
+cd /projects/ag-stetter/twinkle/ARC_projects/betalain_regulation_amaranth
 
 # gzip the resulting fastq files
 # Even though there is an option to gzip it directly using the fastq-dump command, the option is deprecated and should no longer be used
-gzip raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq
-gzip raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq
+gzip /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq
+gzip /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq
 
 # remove sra file afterwards
-rm raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212.sra
+rm /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212.sra
 
 # quality control:
 module load fastqc/0.11.9
 
-fastqc -o raw_data/lightfoot_WGS_short_reads/QC/ -t 8 \
-        raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz \
-        raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz
+fastqc -o /studies/additional_data/lightfoot_WGS_short_reads/QC/ -t 8 \
+        /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz \
+        /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz
-- 
GitLab