From b6981cdad469d0edb524917d51f467b1ed90acc2 Mon Sep 17 00:00:00 2001 From: Tom Winkler <t.winkler@uni-koeln.de> Date: Thu, 26 Sep 2024 10:25:44 +0200 Subject: [PATCH] adjusted paths to ARC structure --- assays/BSA_RNAseq/adapters/custom_adapters.fa | 24 +++ assays/BSA_RNAseq/adapters/readme.txt | 3 + assays/BSA_RNAseq/isa.assay.xlsx | Bin 9293 -> 9292 bytes assays/BSA_WGS/isa.assay.xlsx | Bin 9311 -> 9310 bytes .../isa.assay.xlsx | Bin 9277 -> 9276 bytes .../isa.assay.xlsx | Bin 10101 -> 10100 bytes assays/isoform_sequencing/isa.assay.xlsx | Bin 9324 -> 9323 bytes isa.investigation.xlsx | Bin 8417 -> 8416 bytes .../BSA_parent_betalain_quant/isa.study.xlsx | Bin 12584 -> 12583 bytes .../Bulk_segregant_analysis/isa.study.xlsx | Bin 10586 -> 10585 bytes studies/additional_data/README.md | 0 studies/additional_data/isa.study.xlsx | Bin 0 -> 7504 bytes studies/additional_data/protocols/.gitkeep | 0 studies/additional_data/resources/.gitkeep | 0 .../resources/color_pathway_genes.bed | 36 +++++ .../resources/color_pathway_genes.csv | 30 ++++ studies/isoform_sequencing/isa.study.xlsx | Bin 10164 -> 10163 bytes workflows/BSA/RNAseq/QC.sh | 18 +-- workflows/BSA/RNAseq/adapter_trimming.sh | 4 +- workflows/BSA/RNAseq/index_STAR.sh | 6 +- workflows/BSA/RNAseq/run_STAR.sh | 4 +- workflows/BSA/RNAseq/run_kallisto.sh | 4 +- workflows/BSA/WGS/combined_filter.sh | 6 +- workflows/BSA/WGS/map_reads.sh | 6 +- workflows/BSA/betalain_quantification.Rmd | 80 +++++----- workflows/BSA/bsa_and_plotting.R | 102 ++++++------ workflows/BSA/read_count_analysis.Rmd | 16 +- .../BSA/read_count_analysis_from_bam.Rmd | 69 ++++----- workflows/BSA/snpEff_analysis.Rmd | 146 +++++++++--------- workflows/genome_polishing/helper_script.R | 12 +- .../process_nextpolish_output.sh | 14 +- .../remove_ambiguous_bases.sh | 7 +- workflows/genome_polishing/run_nextpolish.sh | 8 +- workflows/genome_polishing/unpackSRA.sh | 18 +-- 34 files changed, 341 insertions(+), 272 deletions(-) create mode 100644 assays/BSA_RNAseq/adapters/custom_adapters.fa create mode 100644 assays/BSA_RNAseq/adapters/readme.txt create mode 100644 studies/additional_data/README.md create mode 100644 studies/additional_data/isa.study.xlsx create mode 100644 studies/additional_data/protocols/.gitkeep create mode 100644 studies/additional_data/resources/.gitkeep create mode 100644 studies/additional_data/resources/color_pathway_genes.bed create mode 100644 studies/additional_data/resources/color_pathway_genes.csv diff --git a/assays/BSA_RNAseq/adapters/custom_adapters.fa b/assays/BSA_RNAseq/adapters/custom_adapters.fa new file mode 100644 index 0000000..c6da458 --- /dev/null +++ b/assays/BSA_RNAseq/adapters/custom_adapters.fa @@ -0,0 +1,24 @@ +>read1_1/1 +AGATCGGAAGAGCACACGTCTGAACTCCAGTCA +>read1_2/1 +AGATCGGAAGAGCACACGTCTGAAC +>read1_3/1 +TGGAATTCTCGGGTGCCAAGG +>read1_4/1 +AGATCGGAAGAGCACACGTCT +>read1_5/1 +CTGTCTCTTATACACATCT +>read1_6/1 +AGATGTGTATAAGAGACAG +>read2_1/2 +AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT +>read2_2/2 +AGATCGGAAGAGCGTCGTGTAGGGA +>read2_3/2 +TGGAATTCTCGGGTGCCAAGG +>read2_4/2 +AGATCGGAAGAGCACACGTCT +>read2_5/2 +CTGTCTCTTATACACATCT +>read2_6/2 +AGATGTGTATAAGAGACAG diff --git a/assays/BSA_RNAseq/adapters/readme.txt b/assays/BSA_RNAseq/adapters/readme.txt new file mode 100644 index 0000000..a029aca --- /dev/null +++ b/assays/BSA_RNAseq/adapters/readme.txt @@ -0,0 +1,3 @@ +## raw_data/BSA_rnaseq/adapters/ + +Sequencing adapters used for reads trimming diff --git a/assays/BSA_RNAseq/isa.assay.xlsx b/assays/BSA_RNAseq/isa.assay.xlsx index 2d21f88fb855a54a1f3982155f0bb99249397390..399f4b8a40fb6354edb648cda747f4dc1d558145 100644 GIT binary patch delta 701 zcmX@>amIruz?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM zSk#Y23&gErxy1yc!`Q!o=*d1@1|X`A>jOx%fO`U1l!;Fb#MR;}0(0jHfQ>pTu$v7e z+$269EUYXA7WR_b&d90bY!zvqQr#UtnM+2yzQaw?BlqY^kEvW)S7&GIKbUl5+Fp|< z?_b++EGT-Fo1R{_hkyC!<caL|33IPxxwSQSuq@ai7L=QwvNx(FSjZy9YeS6ojEP(_ zGt<j8%{i}&#jQ0<2yEisGqH%%+<23-3Fp$j_X`s89*CZrES<~UCU;BMwCDXzu?!hk z;V1^9gY^?G-ZC)RtNc=j^}X)dgU@;!ABYrbI4oRz_~3+jTRFBBm~%@#bBgp>!*gfK zyc2P?C7Z;jrCMsZ_g?vKzeN7@^zCU^0#;Rp%6LrUnpeQhv-Rw?to0=;gQ_O3uADN% z`P!1*7u?@{_{uADyM(_+=Rl!0?=S1EPeKpPPv#F@xwc^5-Z-bPOC=74-Cniy+==En zmNCDI^Hn;Izq<1}Uh8ey?Q;xDAFMrZ@y?s^{o>`%S&o}5x{fD)5jZwE)WrAEZxy-v zYqs%M=ks>In^<o>k7w7<b^lVoaU;k3=1H<Mz;UW0Zx4#RTzMl9wNTz1MBS3t2lJF< zCaWt*f|yPUZXl{l!3snjQ*Z%MJc?i{NYMero1*9sqMkx{>PluHUc3@mZlRJXh<8H? rY?-98C5X2WXncV(*!XT`8>HyJpbXZ_p(4ZBI$2dkiA`1>m|PhE_>=+< delta 725 zcmX@(an^$;z?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5 zU{OC7EfBYg<rWi&4rBiUq9^-s8Gxudt`8v50`3W5Q6@e$5Lb(@2+W-)05<BVz-~5> zaFh6au&}ZeSlCNyJ0s^cTdPQwJAHE!CUePX*Kg@{nsBQtc%qi*l~v}~;|{29)UD1u zasPUyfWy-(x6hq>Qz7q{-mb)7cfe}p)hRsef}9S8x=U{PoXoQ1fA9HVg66@ZwFzEY z1u4_+F1e!;6J4?OR)Z_2-Uqi+s&{-&id|HhVzsZeX=h`&hX1;)x)N0x=2r~%B}*?T z6!FbqJi$`$?3R7_!k4}!n??3)H>sX&&ejxsVj)9oA0NAO)g`4HhhlXzW*Da`ZZs+g ztx~D}dhldi#I&aiWK4tZ|DRB&6L)V`p!3RCp@j;OS`~+M4X&6)ubg{nsn^TMb<YBm z_#!92bCTcr_qusO&Sl#V8ySwTHF@#=$|P~l`aao^rP~fwev4<iIz5R)I(PLnD@FdI zX9W+>@13R~w>o%z{mL!3bFG;>4!l>%G_Odx=k9<0wB^Z18glIyohAIIU5quFr}Fsr z`CZSue@#xjX!|Mu_+m%%`$zYmsbfTr_{|e#XMiJBTizZNdpYt(AZmfUIf%L`uMg%a z$xK#LkOVOu72H5nr-BuTI;!9TqPP{oRG^{*h&Nf$A4ENY@YIydK)g65u-pPAQxNaE z64){cWlIom0nqq-Ww7yG$~H*xe_q*49+Voyxi|&085tP1FflOj15+#lv`tn~QDT#m H2Z;axQT-py diff --git a/assays/BSA_WGS/isa.assay.xlsx b/assays/BSA_WGS/isa.assay.xlsx index 1397489d55497f42e3468d5e7c53118165c9f3de..2e357f7219a77f02119427594574cd6d4d116af0 100644 GIT binary patch delta 717 zcmccbanFM%z?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zP1_4o__@dOD zVtr&in^l?B!K(aNv_P7wSZ*_c=y3KgAbPSdmjQ^X=lTE=E#zJZ78T)B195Hmioo0r z0;(YHb%EV%AbN`Ue6X;o6j(S$YC9vRj<Z#yc}jJ6{A3v!?fMQkMUUL0D?O%iWnG<} zt^Z)sjcI#Lp1gl;!?B>~Rc?BE*&hDopOYuD+b7JulI7Oc+`+P7hgeW<ddl9YmS7=^ z6t4|2+A}6{$;?bI*EHw6E*7`eEFrLod(XroPIKc;&L*5o``#}|$a^4qYO-`LcbnWT zUDKZTH^nk!T!o_;j1JaMxOmIJWUul|9oF}{XAeH>ZG0e7q~Wk|@!^9L=56KJR$$I8 z^~@>KV-3%pDf3Rm)s}1$pO$K=;of`YyZsXR)6=)7T?trK6)NK~jcZ;3H_z6y*Rs}^ ztPHA}xVm!64CiZ0c3*IR_u(tA%<U5X8l3}$+PuH4w>}9yG(VX?bmiKDd3)oWzAlwG z6n1;n(sL)8=UB%4D$ZBwIR5I+>v*lVWw*~UD1ET@yu~|j#`lYtKW8~^vgkUV_(kB@ z<WLjeN556%>aW?xU!Bj}{cd8t^*o+kKiB<B{l<+Fr%PmKfaBCo-X0hqlk4S;K-6Y= za}f1LULQosDwu+(5Cu07HAle;MBPwu0a0>_U@Aq?0mNIT=nta4LwJ@-W*}an5?D@F zL1yxKB}tITD<!aH+RBz79!R1|87whZ*#;^4A1Q<NOR2~(PM>V9qQs^z4@|xc0Of@S AM*si- delta 718 zcmccTao>X{z?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD8w5m!;)_yq ziuIB8Y*uAf2dna9(E@3zV!6!(qQlw0fauA-Tm~Sjp6dfhw2*rtSX6{h4aBwKD*|&j z2&jU%*9CU7f#@mX^TEQVQefd2sqKuM*KDmKRqpi7Nti4nqg}tH*J;A7uHcDUqE}X# zUynPWx>2_}^Thq@l>!b=uiQR&?oEZfUwXR|f87DAl~<?munTfJ6zVRy<#RI2lK;Ku zg9(}ki`FK1X%(bQySwC$N=$Uc)>{p(oO&PJPO0AUIVpBgWs23l)~20};Trzyw(3e$ zWtd+v*q1E5pism&gYg7Qy|Y{P;R|2-mTVT;v)!b6wmDl<@QH;Ct$lp#&Q+I`ZXAl$ z&6r`Fs<_dpAhb%Q`s=}yaS_v=E|4(|y8nMdole}nS%J<gUxgMbL~2zW(lxkZ7QJ%r zrKMgkBiB6(OyY~2{LV>!=ilq*1v!^(KWt<;zSiW$`zw>gIqUmmLzZqkRQWBQ>FV?( z4(Z(0)2tNvi=GucJim9Eg52uh_4O;a+|IRT?l|yXCDXhj<(|9$`O}ssA8E+7Uv!r6 zpLQ|UXr9XB+vj&Z@BTG8@uKag{Nsxq&F>%Gf2NKRB~lm5&HzWMt-L)jLMGSA8-b`z z^5!7wwY)xvl2I@PQNapsAZoUP6^Od7-~ys#6~R=pq63JxRM8(qeS`2Ul*~Z90wu7V zjDpPMb4rpRk(Wwf%e0g&K|GK|qcT`xj<O9>{6AC%>z7oKVVp79Oht*!KpvQW8357f BBE$dy diff --git a/assays/betalain_LCMS_quantification/isa.assay.xlsx b/assays/betalain_LCMS_quantification/isa.assay.xlsx index 134a053fcd23d521beea075532f59f2dd93aa81c..d361dd4b2ffae902a2be61a221938294149b6bfa 100644 GIT binary patch delta 701 zcmdn%vB!faz?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM zSk#Y23&gErxy=Nk!`T~|L8@+X8GtAr?hhc*f81>#*~x8uY9Q`9zI$N3eF9*kwhHWK z0|^(2&j+g#mI4b~No{B3)N!_oG*7ASj-UKXTD!i(P0=Iw=t_^NTv=CVXX`(hbYt3H zlPB+A+i)x>dX<}=UbcsS`RC+`?Dh$BuVlHkHFvNq*dZ2_o1U^aswG&+BE@S%jP{I) zTrxA$%Qek8uZzX4HA@I=;@&f{h|}D7ld}ou(!Tc#67n91o|-J3%iSh-OV_mL{Y|k9 z8CT&b2BU-Z6E5B|Fxjj8Qit`u?%9LSdK({z6lpjtTzvT8gn3&zwiTFjOFeUn^jO1l zXUe=2akV9z#HXcNYPk1a`EI{N{`B<iX;%VPRfWoUOyinYz|FJu?6s`*B`brfCa$iW zGQ;`WlHC{F-+lPXD|5SqzeeXkp*HU?>#a{h56w^J4_&#oVBX$1r>{#T4u#!bwe;MH z<~f!zzl!r!I*z}(^EzJZZQ1Q}3`!rYJ#X>OoALeP<<D7;n=HDHCw>t)HaXP9_t9?^ zx%z9i@mJ^bcE6igZ#|D^*UxqTQonH{$NT0c*%{z?k&(9t#a67m5r~>7Zw{hP$?Jo8 zN-~qh6(m7SBLz1QRi$7BqP8ixfT%wZ%1O}y#A{LX2T@lcJaHv65YJx;EH_cf6vR8B s1h$M**%HK?2sA!H8EkyDvJF!7A5aGC{h=(wSUOo$MTw199++Gi0FRgifB*mh delta 725 zcmdnvvDbqqz?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5 zU{OC7EfBYg<u((D4rgy<2C2HmWdNdhxIchI|8cj0WGA=rse!ob`0j!A_6dNE+A6S{ z4J2G7J|C<~SPCp`CAFQA^O~(yq{^MXISG@0No&_{>2;cLt1Eb-mgtpL=GWs6sBYA) z&OCAddZmEF(<`^noqJOu@0Z@L#9w#7YUR}_JnVv;4u!f)Zuy+dvgCj7`Cx+P!J@SZ zURnhy)9x<0qY@KcvGrDiE2rKEw^OQjd`^m8RGDJ6ueE7sW4MO@x~;krRT<`24E7~U zFDMl8&0sviQt#}RefYwcz9pMQ_G~w)o^8(76ntVKLu(%&yK~hgr5lH0bu(rdrz&nV zDhRDoss4KKWL(6wrwe3EgYN&IP^S}jZ&sl5%2%O<3XxhBhjb0Dm_@IgdugfH%gA-l z0+aY6C%<!&-}(2tc|p!)+YcKVj;}R&@&3vranAZa*^s5%4pn}OXSzB)i9<Sf^)xF* z{-S3E56|zNrXaUEczylKEw^*6nL7@=SIIQ5NV(_kfBv-P$wwM;?H8RT{HI-vHJYdL z`1bi-&%1w3PP}OQDgXFlNAvqf_n)a_M2`5)jj}Vq5hE>c4~niBc_R=tLEaohos`!H z^OR&Jiz!Hgn1%{&AgWTq3Pf#HZ~;-jA(W${1BlnG=ntZ<KzL$GW+0xQ5?F46k|~IH zTnTI$hq5JzHvwpTyfWDMDrFm__}{N=CJ#yt;#{19*^CSfTbLLa_<<=F0m>$es3@`V H$%8}y12G{_ diff --git a/assays/betalain_photometric_quantification/isa.assay.xlsx b/assays/betalain_photometric_quantification/isa.assay.xlsx index e18f639f6add7f87f39ecc7590f4455803e5d5b1..68931c6c470e924b1bd598f466bf6f20196dc4dc 100644 GIT binary patch delta 700 zcmezB_r;GVz?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM zSk#Y23&gErInM;5z1f4<K&s~R8i1&aydOZKhxyimWG5F1s)4w(1hv52W5Qsg-U{yq z>DjzWYCc%lTNW%_B)grFQ^(mV(mbWQJASf}ymozuo1#bV(Ul%kxw5X#&enf0>Bh9Z zCQshKw&7S%^eQ(!y=)Kv^3TZ=+3geNUdeK6YwloKutO{;H$7!<R7<dsMT*yk80{Go zxnyRhmus4HUKfj7YnBk$#Jy)?5vRHFCTA1QrG4)gB;-91JvCW6m%B~wmab{f`<r4J zGOof=3`Pg*CtSQ`V6s>Fr4H+R-LnUu^)@~bDbjFQxcKnF3G=pcY%4J5mU`wC>9L0A z&Xjp4;%ZAaiBC(l)Nt>;^4)%k{ORf2)2;-pstT3yn8r1)fSYIQ*=t$rOI8L|O<Y|$ zWrp*$CA%-Uzx(i&SLSvJe~r$8LT%n()?1&19-5!bAG&gF!Mwe3PG6Tw916R=YU#NX z&2ubceii4dbR2(m=XJc++p^o|7?eI(d*0%mH{<)o%b&9xH(7KYPy8ZqY;vfH@1x%; za`o42<FC%=?S410-g+L-uAl4vrGDc^j`z*G6lQ?qG(^cB6kAi3j6l>$C36tPq^u93 zYSd&V`zuR=m|4nhAZnAc6^MGL>;j@pRKQe~iUWwZN5vmR@v4G({;FmmUOR*bGU=%* t$fQYXVEY`^EJ4yWYNjA+vziT3?Eg>$Yc)`pVO%@eS6zwCRSB3{834*A1!e#M delta 702 zcmez3_tlRlz?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5 zU{OC7EfBYg<vbIJ_GS-a1F4$NYXG7y@_qn`9_CvMlAT;6s0QNB64U~7j|qc~dMmse zq-XOgsrg`GZ&|Q#k?eLx&TF<-kt%ok<|IrulGm=^((5$gR#)&uEzv8h%&*5CP~E6o zoq6K^^-2MUr&n&DJNKqS-Y>mfiNEfE)yk_=c-RFw9SU`q-10e@Wy$~E^T7nogGFl- zytE2Zrrlj~M<ph@V(YC2S5CbTZl_f5_?#5Gs4~TBUu)CO#&8Y)bz5~Msxr*480<@y zUQj6Fo56U3rQX>s`|yP?eM>fr?AdNoJ=>hEDfq-fhSoklcIT=~N;eM0>SoL^PF37! zR1jLFQvLPd$+(DVPZ!9T2HpQZp-w06-mF09m9Ihz6(Y4N4(S?PF^gU~_tH|Ymyzq9 z1t#%DPJZVkzw_^P^MahqwjVY!9A9hl;{BCL;+*w;vLQ>i9jg2m&vbQq5{Go|>S<Pr z{6)_S9-iMjO+jvT@cR0dTW;rCGj|+#uaaqAk#f)7|NLpolaDmy+Algw_)ohSYcx;g z@$K`wo_GJ6oOsdpQ~vSAj^_7|?mtt<h#c{ocPh*PM{2N=Jt(@SC>epM6H4YFicwh~ zL{+QFO!iZj1Tiy}-9XevWh)T%R@nta8LNP)N)-nXZ?}p+h~iNN^ZZoJK)f~x4`k94 vRj^6_RlycIs9A#KtJO?F)Fw3>r1<}?2G*;uF2lHPvX8nFo0}3ay)pm*k#!>D diff --git a/assays/isoform_sequencing/isa.assay.xlsx b/assays/isoform_sequencing/isa.assay.xlsx index 9719eb30c1c6f3ccd645f4189923b8d6215fce9d..ae346d235c02e0ea6c0a10e07149fcc41e5f20f7 100644 GIT binary patch delta 724 zcmaFk@!EqYz?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM zSk#Y23&gErxy=Nk!`T&>L8>ab3_#Rst`8v5x!lV@vXfo;)Ii)^K0h#buK?JnCjz_K zK*9^f=Yxfvq`<<NQrj6hb)2mt%~Pto<0os%XxDeRDSG4{UFk8EE9>g)Z2bq5ZcN*2 z^5p$%8;%7<uX5AV%l7au|C~IL-9BONl`OZm<_?wxJH&!=(^K|FwFC=Uq<C$J(Vj7p zOJ-(zxu!Yib+Nd$W(k2!+<PV#ahe-%ayH>y+V_4zLf!+>Q<J50x!dG!>6-SuzbTd> z<0>4*V05s4!o^z#CVQ1%>af1oJ$vw3Z{q`zA`ORyiw_^1FmEfzwgPi*sb@}+9&32+ zOqq8guC`>8__S0@4fozF-|d&kpPs%w?MlF^s!$n^X<YLPxOujoy_U7UWMxp*#MPBk zW;kD4vipMjyANM^Wp0=7*XSH5)aLzVz4b}xq4~-Dp)1!G%-b91^mVDkp|IPlmYzG& zJjXKTS8={d$MIKpUdL;_ExUb=LFt3F=PllOGrnKE{5i{UlSS9@#4iHJCWo5%KKiX9 zSAWem{_1?*?spUGt>^LV`nm33>NjrWc;CEHb_O_3z2)sevDG7Q1fmYen}evI^7>$& zlFVc`1xXMyQNayFtyHiAQBM_IK$Ny3m?~6s0P!{}`hzGIB{0uT$qdA+gYXV0nSyxV zmB5x+C|iPf2Y|-+D}#++rEG%~{qL2{<Uxr+oQqR1n~{NG3ljqaKQO@}z>>)>DoSiN H@*oia&vgcz delta 725 zcmaFu@y3HEz?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5 zU{OC7EfBYg<u((D4rf<j2C1s#G5}GlxjukI=W;It$xe3VQv-2x`TW4#y#ipPo(Sw_ z0|_q_pAQyxk^&27N^NK4yk=_^sdA@pPQqkO8SVNly-pKubp=n<61}p@{CeC0)s4E< znJ4aFuM}{2dgb=Hb8jl-{nFc&`0EZ>t-Lyghh31<p-^|pEuWKFmi+HMA573ZShP05 zORFGd+TA60RAQnlw%%%R<<$G&c1rb*&q=Y1DpRcXwKnZ+4A<~qw^di7D#QGW!M<ea z1%)EM8H^`b>Yd%P4`2Aww`8-(p6w>pv(4F>f=?`DXzk-;cdojmbmLI0ZpIAbRK<-( z1))_c)n5;ujEk7|bb*X%(Ea}t>U84n%?fm0`6{$fAyTX2kgmZMv*?v`FD><Y8M*FR zU=m;C<abW;JO5rcFUYxU`(Y!)@wFx|-d~v{&RO3l8?toUp~`RZOjoBTaY*N`o@S-U zU-Yct;rYGO6y#P1udiRZ<#w($bH{=ADw*aLDfis{&!4tD`A9>q{i3sk|Fny-M)Oo2 z-#)+VdH1i$i5G1@<sV<{Xnz0b{xfxq$PvGJgX|1&q<YEQgQBZj-UvkPmp2DdKjihn zJSCaQt_qSMW`cqnh+3gw1)`oPxPT}vMKD#M=m6qvQuGH=%t~OMtCAUrR}10oS26|h zzA1q%Ggr0*@%96a?^6aFzf#!-DgNIno5_PxgE$wbU^XKI!xkn627X|QMS!J~omG_B IY~?{B0Q=`78UO$Q diff --git a/isa.investigation.xlsx b/isa.investigation.xlsx index 8c37020a85cca1fe30c338cad3743015e7e4fa3b..586f555874be0956c7b05f5da13c824073eaa385 100644 GIT binary patch delta 629 zcmaFp_`s1Tz?+$ci-CcGgF(l6BTok-laBM`X^cuhYV$5e0WkLrlRQZDEt4upRD@X` zENaT41>z>MOk@VpU)blefmBWA1FPD_w;ROWTrN7F2_!5b0T#BG*uu!E<7^dao>JW% zKlzWOc72DNqDSu0l^#>MvaZg~)_*YR#<aaAPu{<_;aE`gDmOj7Y!CnP&&d<n?Gxr+ z$#QFJ?qFH4Lo6saJ!NlHOR$hdir0o1?HLofWM-z9YnpRj7mHhKmJry)y=P((r@8Sa zXA{n)eeV|}<UJ5QHCZ~ByG`zvu4&Kvn_?L<uEJ3aMhELBT)bsqvRC<~4(of}vj?B` zHa-w3(r{R~`0&9A^R{wqD=_Dldgc`Av4-c)lzAuOYD+eWPfNAbaPPhH-F}Ju>FL|k zt^};A3YGDg#x<{in`i6UYgy|{Rt8l~TwOV3hV!*0yDzxE`|y=l=5`5xjn08WZQftj zTc3m;nxD)cx^iv7yuEQwUzbW83cI~(>A4flb1Y+i73ZsT9DjA^b-dQwvfJkvls;H{ z-r}7%<NL+SpR*h{S#%vw{338{a;S;#qu(lW_1A3Uug>S~emAk+dLGZNpX>gme&a@t z(#<W>z2N8)mbC}PLx`*qi0Y6v2T=!Q^+D7Nd6~(4a*`mXrkopyDwMMVQETN~K-6bB z2M`5P7AOx^mLm@~ZLPc|M8XtA$t&0(#chg$8Ax!Bf(&E%<edsiY%H?CWX1ph)I0EI delta 630 zcmaFh_|TCjz?+$ci-CcGgW;O(MxG8vrfas7r!gu4sm;3>1;E@hO!6Snw@j)aQ4wZ& zu&61E7Koe3GLacXe_@}?22wSd53Fhz-)<0hbGhhzCXld%1X$Q!VhbbZHCwAll{<ZN z5+?tV)UMys>onn3SMWqF(JQOWug4uw-KblgdE)-{N&$zbS8ks>_ohPLFTGufzwUt5 z%Bxd&*abNq3U!y<@;RAh$^YK-!352NMQanhv<gzD-Cc4=B__IJ>#YV?PQ4Fqr&RCw zoD{pLGR10NYtzoga1H-;TXiL>GR&_S>`RtjP$=S?!FYnD-q|ht@P#jZOE!z_*=|xj z+nlW__{2hn);>OV=c-FeHx9+>X3Q{7RorM)5L%^D{q^9<xQJ;_7s!|f-TyzKPABf( ztU%|LuR;qIBDE?G=^9)yi(WbR(o(ONk?WoXCh<j1e&-~=^Y3-@f}G2?A2u=^Uu*K> z{gp}Lob`ROAxpO%s{9tubai?Xhji}hX;zB-Mb8Qzp5Hr7L2h;M`udeyZs%GvcN}=H zl4)L%a?joW{AtURk2K`kFFH&3PrDdvG*9L6?en{ycmJB4c+vJ#{_(|*=J${8KU2qu z9H*O`rF+5gB_wMPiiluYBM{XtYYw6g$m)ZrkFus9N<+>KL>0(cfv7cdE+FcYoCApZ zC@V8LKwc8W%$5h6v_{?%Cc&%#mXK4hL5kdD1v8NRYy}y{ipe_^l-O8hf$59^0H}oq Ap8x;= diff --git a/studies/BSA_parent_betalain_quant/isa.study.xlsx b/studies/BSA_parent_betalain_quant/isa.study.xlsx index ec46c0474b3ff8ea33525da7511c111656479c76..486411ac1c8c88a7585c68191536289931207512 100644 GIT binary patch delta 723 zcmZ3Hv^<F?z?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM zSk#Y23&gErNn`=hcR32UL8|;k3_w()$On*Uv8XL8h`UKj4aB`AHJb^<EtdrwHBWXo z8%Q`>Wj<K=j~ZB5S$#Vrr;f8#q<Knpcl_kX8rt<8Zi*hcM^}1G<;uD`J6r$3q#M)r znml>`+J<96(W~6_^s+tt%ReViWVcV4dnL=Qt+|6`!49#Y-1L;aQ7yqj7AamEVzg&W z<dT`0Uao1*d0i}Styw}~6Zf8pMV#iwo19HJm-fA1kdXI4^webOT<$ixTe_w_?{A7_ z$hZneF&G`JpK$S(fyrLwmpZKPb<ZAr*4y|%q)5YI;o`#wC(PT*v8}+ITk4rpq{kYb zJ5%PJh^sBxBt9+GQp3IX%6I!E@~5Y7PrDMZswz~*V;a}I0&bqIXRl?gFIgE>HF0(2 zlo`&~mh8UZ{_ew9UYXk^{53iU3blEES#Nz3dT4$!f9T4!1@rdCIelF!aVYHes-@>n zG|#b&`Bj{+(sBINo!9YNZ_93<V^I2F?RkrL-i+@TFMrN*++@*pJn@UbvB{w(zK?#Z z$kku7jlVjdxBK11dh2;SyMC_wm->wxIo>yCYs~`3DZ7q6D0)0}j6hV4jyZ_hs-q90 z^bKStGwDi#n6kQVASzYY3Pdf`bpcV&A(XD31BjQe=MSO|KzL00W+0xGK3EQ9(hPmD zNt^Y-7JkvU1j*|gn1ZM@0~@61Uu|F}4@wMY*B(^LWn^I3!o<M94@|HK5I4zCiS36r GNB{sTSO>BI delta 738 zcmZ3Uv?7Toz?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5 zU{OC7EfBYgC6NV0-{mOa2C4EFF#u7GA|F7a#iF*XAnqn9H4yif)NCdYw_Fx%)I8bU zY#`ximHA-dKWboMW%cchLf34qB3176%}HQnV0g^Lz#zyl`JjRD<o_D{^;>$KCfw=@ zo~R{yWtI8$xC5#ib*nQ^+`nEa;PCXy?Q`eeRLJ|Kw=4129k5z?bqWu=Ag4p2?vh(R zC$lX1-+Mlopn0%pZGx9pLCUndOYW$|L|1IR)!@ph_rdLy>K&hxVi#4WSnX?V+SwSc z;lFOHu0&Oa`4xkG$<hl7MSL?DPq5TGyJa7~@TG6bW|2MHO{!;`vo!^uSjf=Y$H(qm zbxG;Qp;+CF8OEuK8;uG=t5mAL9y}QrG41IB8PlNq|0mSx#NC?}=)Ce(XrV%+R>dJ* zgDYmyE9YKX>h&^m-Lt?XzR1b%oaA@@y>4ERbJ_O8Muy{SO<ugeGD)1XzE3t}>9#|a z-{P6BPEX>H&RsptN|C?lS;52ed#5SLtqxvazjDj%Tx;fz1MgKb%_~ywx%;0#ZF%yM zhFtqaX9@pl7h{d)sXV@Ye%JHvUy~Cr+J4GEzSz<H{?Yws>KKtz!R9QjS>PCE)3FCd zmb;D-h^p2x2T@ye^g)!Kfy`t^T}cpAM%N8QrRZ9Ls0F$%AnF-}($RAO@$&TiLDYT- zk5S(Y#Iw`~%YjUqt`9b8lRntO&-#`ic|8MD5S40R!wF9elUEs-$%A-j*B(^L1tyIx Wz@)(sOuh&ZKl#6b65CI0kO%+`kSE6g diff --git a/studies/Bulk_segregant_analysis/isa.study.xlsx b/studies/Bulk_segregant_analysis/isa.study.xlsx index 43a7b33b2beb38dc6c65334db05c480f743bd916..b7b89a26b6cd86616539825c87df590f547b6e4e 100644 GIT binary patch delta 724 zcmcZ=bTf!2z?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM zSk#Y23&gEr$!7u4uQ^;m^khR`0}z$W`vD{x%2x{(y&|Lr;{Fj@%><HNDGD~~g6M8G zkZ`Zee6X;dJXknXemf(lj<Z#yc}jJ6{A5u@?fMQkMUUL0D?O%iWnG<}t^Z)sjcI#L zp1gl;!?B>~Rc?BE*&hDopOYuD+b7JulI7Oc+`+P7hgeW<ddl9YmS7=^6t4|2+A}6{ z$;?bI*EHw6E*7`eEFrLod(XroPIKc;&L*5o``#}|$a^4qYO-`LcbnWTUDKZTH^nk! zT!o_;j1JaMxOmIJWUul|9oF}{XAeH>ZG0e7q~Wk|@!^9L=56KJR$$I8^~@>KV-3%p zDf3Rm)s}1$pO$K=;of`YyZsXR)6=)7T?trK6)NK~jcZ;3H_z6y*Rs}^tPHA}xVm!6 z4CiZ0c3*IR_u(tA%<U5X8l3}$+PuH4w>}9yG(VX?bmiKDd3)oWzAlwG6n1;n(sL)8 z=UB%4D$ZBwIR5I+>v*lVWw*~UD1ET@yu~|j#`lYtKW8~^vgkUV_(kB@<WLjeN556% z>aW?xU!Bj}{cd8t^*o+kKiB<B{l<+P@0;f;%>u`%rHVZ$da6{6K-4-Fa}f1hMIS^Z zYRF7BQI!NS{Z-vS)O1xV5OrDA1w=`zfvI>k2M}+enm>s81mT&en}K*a>R>sLNvG7o zCOuULTd1aC36f9LFa=RFG;ENf|E`9aJSZ_dnwqsUmyv;C3ljqaKQO@}z=X+0no4Y1 HDj*R6$;k+Q delta 725 zcmcZ^bSsD_z?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5 zU{OC7EfBYgC7%UEzvgfO(UT2%4M0>f?+1`*C|@mD^oo!gi2FxqH4{j7r6|~_3!=N( zK*GH;^TEP;@?ham`R$CH*KDmKRqpi7Nti6Es9nFM*J;A7uHcDUqE}X#UynPWx>2_} z^Thq@l>!b=uiQR&?oEZfUwXR|f87DAl~<?munTfJ6zVRy<#RI2lK;Kug9(}ki`FK1 zX%(bQySwC$N=$Uc)>{p(oO&PJPO0AUIVpBgWs23l)~20};Trzyw(3e$Wtd+v*q1E5 zpism&gYg7Qy|Y{P;R|2-mTVT;v)!b6wmDl<@QH;Ct$lp#&Q+I`ZXAl$&6r`Fs<_dp zAhb%Q`s=}yaS_v=E|4(|y8nMdole}nS%J<gUxgMbL~2zW(lxkZ7QJ%rrKMgkBiB6( zOyY~2{LV>!=ilq*1v!^(KWt<;zSiW$`zw>gIqUmmLzZqkRQWBQ>FV?(4(Z(0)2tNv zi=GucJim9Eg52uh_4O;a+|IRT?l|yXCDXhj<(|9$`O}ssA8E+7Uv!r6pLQ|UXr9XB z+vj&Z@BTG8@uKag{Nsxq&F>%Gf2NKRIpR0ZQJMvgR0|b*Q2bP?7=fs@D&`>SnTkG$ zO3;v*Y^*8?V*07Nfv9P!Rv_w<stbscPy<tOY7QXY0yTdS^%24|RyPCjvem(IAd^n2 zgH3v(4z^HL!xAK)pkWH4rfb+B#s3`*GkH*Icr-O@XD%ZH!xkn627X|QMSzKu4K<b6 Iv{gVN0Pgi9`~Uy| diff --git a/studies/additional_data/README.md b/studies/additional_data/README.md new file mode 100644 index 0000000..e69de29 diff --git a/studies/additional_data/isa.study.xlsx b/studies/additional_data/isa.study.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..566ca09ccb44bcd0d23eb8c1fe9b6e8b6b8de26d GIT binary patch literal 7504 zcmai3bzGEP(_XrJ>F!#jL6j8)k)=zzJEXfsS{hsdX{4kP1f)c|k#40Mq&xf;dDQ3i zdA|2Of9!A1y>p#8J2Piy?&~PZARwXx002|~Nc^5=?DQuzO*jBx7ZCs;fIZa~x3hIN zwRP5i;$d&<q<hER#=00PXWPt$8*~DeSmKbz)P15V(fsCp4^jRhVAx#@L@~Xe*f{Nm z&r-lfQ(!>A{#tOyE3K7Op(;{pj0AE&Ib2Bqh&DCEYC~Len$nmXO_zzP_S}KU>76+K z%LQBES&!_%WRB%GQG+5<Ql^nuiYvleB~R|~;hqVAvO^rvFrX(b^8ujLNDE^!Z5*YD zJdLLA)+N)P=#z)=WcXbNdK2Z2a9_!F#g=Myw2wmKs8C7WT*(S5^WZQ7g{3UmSK&fF zx~0hupuZ#yJVnMTTpH_)#OUE%jh{4qNx-VBx{m*fT^+01WC_9l4H>rp)8<$aoDIs= zrMzsp_G3Y8$HSd2yfdH0YfqoR{#+D@gL#ZC__+Ulw2rvw5gr0{L^8_hw$J=aU45zL zYfC>Ym3|v&PQ{$?9v;@|cirohrmFi#)`7GeJy~G<$@7hOM)%o+HW0O4MT!>4760-C z;D;w*#{V^tVc+x}O|6|cf6Cl?Yd>9TY902Q3_JjU@k{2;jbw3LlY9#ors$5h#L`8Q zEd(_)r;aB33lraiPYkx?m62Jk0pE6B#E4=>TB1ZbY^|F2Z1xF9ZbT{{Q(%G$$$&^b z{UQCIdaf-UaM&=AL+DX>?6WbGSIn+t>810#8lp7`A)Uycv8Nd>RbaV9_9$StLXV{w z>*ta6P}fgUhXo}&&gWfzD}nh{{i0K~jBl^Fhxn~+we8kpw+-LWm}^~jvzW+IzIg62 zvXc_5zLLqNIIya#HePoL&P#&~WM)*QSm3GT%P;h3HM&qt>!PiGj*t<n{$4|o6y#6u zrio^J$uq<(IN%NVX_0?g4FP5mck5p!`=<cft-!xoHjv?++KpvvV3sBOyO^7u<5MF$ zyQeoN;isXkTDWi@?OdV1JJE90l4gx{VtuZoeS#49zDm|w%0kfQ>O_?sLV=ozlHOW0 z@wx-D*>`d`R6%wGr8I+08h1^utYHV}%{P5vj@vq=4MvPcXJVKbaoZH6;9I41^kImV z6K1vV-II{!!IqSh2wgjnBOiJz)HBlcwolKEMcwp_VR!~%14H3m1^Q6G8ch=G!n1nu z>0@O50HKJrGwDEvwBTM&vebJfNRqlH)G_0W>N4$4U)!9{(}l4MwJ~wR)#jYVF8WzT zcI#i9of7pf#&PM>Zg<h{lP>X{OZ0W*lX~neZczXKhQ_`%(2Z%FEKE(E{|wWlK5gnQ zm^U(r005GI$o_zE-`Mlj#Gvgk7Z7>QD~+xjEaPpR_jHs9jsPBw0a0AXD}Pvv!C~nG z<>I;nuUcvpRa4E|C(~Va9ea5UdLPdxD<_+R2hV(8g%|RCw${Ih+^jCVoZhZW821o( zJZvYs=~dTFy_MLg)u{3HV%K;^le_{r5MAg^yjaE(_U`Lse}uYknU9sF_nNG1TNy_? ztwzOb`Hd;~?sB$!i|3czhIrS^ErOAS+mWRwsoRUjkh0<zmZyvTSc@uJ>faS~%$uXH zcFT{y#L#`y*<5G=CYkFtoR)3-aJkvkZl#O8K4<k}yCj^gxpz7!xX0FZMY=XSF3h<i zA+5zJct^L%VXVR+S@l{A^!16UUQxnu(@yR~U+l?t0khe5&g<(2muX$u6Ugf8*^aTJ z<1Fxm0DtlRe*L#%yS!S<?C`#pNXkt<{X(5h6=FfAq$8fw3s7G@#La`N<AQ}~q1j`Z zaE)*6+<j})S+sym<*e*ZLYrX&XyZ^uDNk6`W49Fcu>cjUY-qBvui@-kmw6+6R>XO> z^ATvC2JBWzIN)Pe$P)GuDqUDoWht#q5nSxu#$4`8>*KJk`#3YsZ@x9oV&^Ml*WZ=A z$PWQoDV0N^ca?Eth21sMXyV@j)RsseNIjBPV{l~%4dT>@L(F9et>n~5L>yxXUFOtC zLqujYd$bfm_~HvHe3U^KUSbr@7Ag_d^CQ3!#9$q+2n7}VX$V0B2iMat2c@4%A&|;o z{dq+mZYGL`Ctwp!45#c6FotmR^bgrGDw;!p42}UfZh&6XlVOY=4IGyvX#|e*Bs5-+ z)re*|gv3*M9Q#VH#e%^2o3VjZT|@NmsHFbuRANmD1V-Of3Z!Zq$||Fh@X;wdWGRxw z^90I7H+upzCHFo)?|<Nr3J%MWGz5nYWNCN;$Flx?nlzB5>B%>ig$7Q_ku(PXIX#~< zUmw(nW;(R^%G5l02R>7>;?SZ<lE9NsCc52|??0z(VhU|TDu&=uCos}f4SDaFFOT8o zIrmc=UZfmNjOe<+;qPl9>-x3JZVw*Mf?v6gSzF(YA4;;Rk5&j;;65pgyr4a3%7K&j zDlGKgrA1yex+gUHeLp|5fplgspI`mJ3i$x)RYqHTF4mIBw(WqzsH?ZQQ2hOyfP>)n zefhTGl@$lXHcKaut6!Duwo19XbaT7G@+c~-L;-J)zmtWbqp69Cv!kW0xzo+jPmOPi zn&QF@-tj?aKf$kkrKXBhG=x|H5mm%z+s^0d5(NnfRHmYT{V3l;7vG9KNicV2mUw0p zZNBfw#(It+hTostpD;M_@DooOfotNuY3IGO<bdpEPzRJZ@t%21f6vzNBk7>`>@QXF z6gN)4xR|PmzE}>Wes0;57LLz;r9z?O0rYdy1iX!_Rao4s&^BtOfbNCm=O0{-J|v`n z#<t6eq0nzl*Jb_2OsW=3kGUjCZpL%I{B6NyL^DMqofrJV*QkRO7G1pDMyDeCYA<PE zkgW=UhhRw~WDPmaN*!M_JQ%(RQ%PVnDh{(v6i%kh4?_&D*kO`K#)gUJOP!|<-QL@( zR7qwwh2o4#6P85g`Ohv_^Bwa{kqfOF>_S}7v}iN#uK=h&OKgJaz{tmvZe4Ak0PSY> zSh%p&@Na>F)6_i!CqrCX<jOXPZI$>(CxlBZnCF8u=ohr5%1Fc(QrR0J9FnU<GC;Lg z-cWM7lCC<1#h}$>g=TnjX_)~R+*!DlEJp;){M_X+_22}CuZ5ZD3ew(>*?pGT;Bvm< z#(URI(&kUoX%u|elcQxP$-A`wCU=NUnPzV8*JNqSqUPdNJ>MShTC?c@eeC<5PuIDL z@blfuls0xFqHRi^HP=eYi$}G!hYu*#9m$iYhRF8{Jrca$Vi9RE=1fX1)n<yk>1?=Y zsXeV1-azxu4=JzDq@I|)e=Z*6(!RDS?)zTeMUp?&s5tKWf^n=OE@vfJ3wIB-itfD6 z*puYb$YB}ZoQ{IVh_p4jZYh{~CXK^(TQ3cx_+9Gc4jSWxBQ@Eb72sCvH9^2})$({{ zdI^=O`S<_QJp9!gI9r(7nEq-ZZsR$wpPxAb){?c--1f+@9^+Q*$LEhqwymjPH!T4C z&gTEE#6){PJ;T<p$8}i1(!YIsfS!Js$XJ>W!uCQ5NHR5i2Lai+<qm@Dv>}O_0g~EQ zISxTa+n7%%Sn8U7>Pd2y<)EPhAUWH2XRqUgidw(5a9ly$io*B~YPgo!S(pvk-Tld! zM~m?ub_1OJvf>aR7M>}d?Q338@8)>P8xGPrTyh%97aSuz2&{S9@r>Rij~CFB7f{n8 zNpNBV?@()tN6Cvmg%pR`j2Gd?cJk2y{f8O`Lr*727})x9_NjtOAz>z;@-9CqnD@$L zPt0ehT!y~hWf6?Rvw6++IVKfzH=hqHEHDs-V_04!*JZn)68NTERDgM0LBy%1=99g$ z)bi=6J?q-t5|yW%N9sIR$shC@)Hg@t&y@lDJ{p#qSrvMFDMA7$-AQC$zIaw^sqG3~ zjbeFE%UuJ#U4q;McCS8Gmm6Y3`M+S*Pg}ay7WldamD<d0y4G!u5PGAdv%Fv;x0o(g zZD_Mfh{4O6)^Z<qIX~*A_$a1Xv(~*tuL*ogd(WsI$8V$t64x11!AeC-ZBO-PDTcr} zBmx1oo;57d&#=zuWSh;K`I-EbG&rn^t6R!k{Nn_P^eIX6stKF+3dexe3Js@z{avri z%dNc~bA_}N@U_c>liV4Buy&7{aS@+`&p7&I&%PfoZsyHy?9oL(JL*v{y*@pV+sF~y ztTQi76Y)B`8h>_m`2@kv$vH6u2ND|OfRaw)riqF%BI}29RwsoC{|JZ%1SlhHb&0}# zeN9`?;!<YSd_lnzgzZlMK=}g|qHz2pgnG(Abo&JEDMOvQl<6*=v*p%74H2Q}gAaA6 zl$$HZV-z#tdAR$J@`f<V#GKOos|QIWHORVn1)4PIkNt4lO6poyWy@musBp6aLdO^o z99fRUrWp_$dz}%RM#F4ecgQIpyKK~*YDhF!i9X50S80n7R0Ju#HH{EUlKe(Px(1=@ z{ZLAx1KB+!Q87CV)<8||rAlXYwi6E?tn*3e7bEOqe7zQPKlGh&c@LAGUvqvu?+bt7 z<J?5iP&(suDGs#UZ!y_8zM@K*bvp#0SsU%y0L*Nhm?f!R7j1Z67m@xoRKwJL0i0J` zQ!9_iDO25x0*|rVfvFj^?d=W8_k@#q!0Xy@Ca-LYD*8CG+pUJ8%lvp)^PloXF^Zv0 zA_$uydApiFePJH_26vCH&yC5d^KcG2oI3>GP=G#>b}zR)VmN*j+MnYhA}X!H=#v>e zP8$8yCHi^qpjYRdTm17@U)yO++_rN+QW_R6=XIh-Mh%eiRONRX?1IXI+uc}WS`7qo zYsGD!wI&yRlgd}K2!)CDH-I_NBltYj6Kc@YT9&9|?i7Xu)jm_zgJ(rnVaNNX98Q`& zI%7QPMe&-iIMuE>6bcwY-pjmnP^wPSw)=D(-iTXQhSbQz@@((DTllatgwo}kB88-< zk<Irp=mG)&a)wEc_gXZ1RilHrP2r`PD-_+dJlp2)keg*K?=R#qq0-clg$zCA#tusA zef;!jbh<9*{JkKN2fS^;;U$CP%Y23cGW?^BGYE7$+-Pv5%mO;w+T87dgw2e=XxF04 zY>YUMObeF1gm!V(d(Efa;mNJt3N!PNTWuCDj_C|amZl>PhNGa`Y$4v-G*j8x2#bH* zu*dCfW3z%xmJeP;Jk}XS;(w4-)r)sIjM!(7e*P#fAD2>+V>rP{yivjIwIgjE1d$Zv z11%O|z;_CW!!-i$Ad?k;c*X8=alknKUPPovb5p)O3-8#psj~p%GY{ib9yO}`JrME` zO4DxZ-E8j0@K1`i4Jjn{kU$+RCy4u<59(?<GU^A#Z*{7Qs#x9(1Ns!8-aMb@EWM<d ztD~Un=LMitG@OpB@VhRvcizYIVPMC28WLT&hj>S+s($VsU8yx|@jYPOn@16g(h^rF zF33}JcGWR>7?WJ0W>77iDJZhbrxC5iuCAWj5UFkB>W&yY8)U_wt8BvAA)k49Omq^f z$pn!ko?L!H#?8!ekhxt&S-RlKST}zxEkF8B2CG_^qf)n8>syU!-E_V%d{!q7sBUt` z=hQsCe<k5~{j|N!=L1V+=_%;Fh8Bu;zP+b|ehE;2h8z)eP^PvnSlB0DJW^D3!I(E~ z2~E-~R@oS<_pF{hnt5FDy^}9WrEIvwQ@(hdWwbj@-O<WbtZ@)Dnkt>MJu)4cHtfq+ z&0plbSI1@7y*btR^S$l1cp=^Q({qJ2V+gPUhV@r7=Imi@`lAcWf$G{Na{+^QPS`=S zDhODh7k*ZjVNddt+4x_&Zu&_Tog<JA$I4yo+(*X8cbUpP?(Qg!PCq#3ty#|&<?Q9t zDFmXZKX@!lSi5AEuuz&jlWaVqQ6SCs-iY-<8S<<NbzD0YqSq9!86NJFmkSM*M{zN^ z6TDK%?7F;2^sfZswAPX+Ev;HCCO=3i#|)ep)3K_%(|}C(L~y10oV2}y;`uARdyuL_ z(t=_^@hA6lu)d(%Pa*MYW!wpsb1xc@DGh_@k$hR})`Nqt6)YfmZM!@q3M3*xE3>0V z!rN;xoYz#gG$ntsrlvXJlW{+!$2_O8a0W}rp|d+q48tZSNV&HQ(V4C0ZU!-%GR3`2 z!Di%#juT;em)xiiMI;A}n>h%*pMD_dQp~g?n{%X<FA)<))r2RveXS(qJV0WH&7Y`b z&yOsdeiFTvni)b9(a&^<HS)zurCBxQc#*~Qg~6*JwBViNlFC{p+X1bKH4v-N7fXpC zaVql&I0Fo()|YjqvjrHg>a?TXI#U-sZT5R#Hlk@s$XeidohiPj2$ao6O6a<DiDX`g z5g)z`n~};r4fi5963ejZ30mI334X5W`f}WNq_)FR6N!Ye(1Lwyg)eb|^}1L3Yxgen zs8jzvWkv33Q|c4kI<le)oXBnM5wf&}bhpCj@FQI+Pp4=6CuZZIeWF3DZHbEMQO{J) z%Ynu#mt(8hJ+qDw<QP+Wnx2So%03Y}t+3Dc0<L_&e>$!?ks!H9fAi^N|Eb8X8F#AJ z!RfvIeHPU5J!==@<-yk{YmtqC3jPK>)EpcMvi5Cvw(a^=?=q}io8m3G<C~m+teE{7 zp#M(LnAjOBI@;O)$tf&cE$9cqh)O0HuYe1S<6mNi_VzzsDB>R4O><!e?OYHBe~U~h zVF?yh<IX1c)OZJwnl^6gb_P9($BgBUyE^Z}UbttY&;X9Z`JB#LknFQqj!i!~hryk` zOebz9W_eYGi<3M{Eb{0v=0`+gVWI^;2HQiy%CO9~P9jzrv@~=j-f9fiu8|2q-_S&! z6rmA9O819XnFU2n(*~|sp79ADz8nsp*JMDPQ{0La1jLlj_}^pZu4Z-gWy*PIU0G|J zW7(fe`Pj)}0@?a~x3TUq=83=ZAG2^B+<&+#EiY3kFlYbT-vud3v8y_`R(#E^3Z7i5 zFp8Lx44a$&g41RjmXbs#7J+X9ESf6rPDQn4=*mzWsf3n3q32<RHC94L@|6uk2Td`p zHA+!5Qt9-zp!?a4qv&+c#DvrFaWIXVEzr_lxGm2kEUX*vEF@%FHsw)vD}4zF;W+-P zboK7~71fm72c-7lxX;N}y5g-o4Lzr*#^PTXr`9~bmm@c}?Ju3(T~k=tb3gX<1@|<r zp4p@2rN!A1mwZR~>Dk-VBWU6kt`BqBA}syj{oQ58c8;b$e5TxU!z|aIu+z;3h-ZX8 zriIwF<wIVfTa6|^<(!L~h;QIq+SwFF&2ZeXF*kRu$H_Y|jztkMNFA|y92^jenDK!~ z*~Z+YK`T%N|E`I2nGSnmEIP$&b2nB&v~eQ6VgUmse~kKAM>Ij+a&bPioT!})1KT;m z?w8~?7{OFi9Q+YGlSCF25(HWRo~D?=DK5SS`gIV}4o6?pO62o7pd*WDW>!m6Oj;#s zmAxPa*@~DZqzH2cn%1f3<6KT0Zz{wh898zyl1<%{P;EA%Sm3Ej0f|RXv&X=!>>INx zb<S7zj4kwlCW?<`*9}O1+daafn0ChT27Pj1$2z}Xxun{hZj7UvUu>V&Aa;~P(yTUJ zkkj88kRqgW?qtUh+OaXSsn53UI^7ST+r2L}g_V|gGB|u-^|1VIScl;uZu?7BKDmVp z2C8RcVExf_tnhEK&+eyT)}EDIn;v8Qr#H}r`6P5<-f)I_1Lw97{GHp3jFYo#=E4ky zogbxd14Ja&gd9Wf<ra7x4C>@*9V{Pxgl3C=znXDP<#U+F?~uPdt$ca3Qa~safHRcj zZ#E3Y=bAycj6%AaX;esKL5n4Ag7Pa45W#<e;%tT=<poqS`=2oZ-$xBDOX0g?B?aZX z80==3>s5r7h%hyI7|@LXlnPDab9)}~TCRz9UV$;SRpVL;mm`r0P(o4K=n8Nuoei@s zeR1S@pIOZWYH#B@#7MVSg($!A_y{K3oP20FKECC^uy~AI%y=R&!6uJ)#J-<93z93h zwy4ojS8f{Ta%oPvFsLnj!=eOg6{MvP^*53;<=o|*BR?5}1oa;Xrw(Uddc8<xhG!dl z`$1Bbo^sZ?22v4$4<c>asTbz`YV@IJdo)O2=G=w7me<Hrf~RZ;mwfOzoyKSh6wn%* z5u>vp*GL$S4UJ&v2TC%gu1ghyQj5>Yv-<Z@6lLJxfq?)2)&=H{zm0#ngZaI{Ka=Lo zWx?&Z$%|ny7yl%<|5NFomj^#l06@tv%nye9zbU)n*l)*;vQMyB`oXyW)b!8Y-Mm-d zjvK)IE#Tib_$LSd69{9HU}|A{Zef3q+W)!V8>;$t+^A=T-EzU!`48gy=hkm?)}Pe% zPc#6a`VZ`HBKx<+Zddt}0sFnm6Vx9&_y-^MdxigH!Yf!9{bC?~!)_J-Nmcw-%m5?f ze!+ed7k@7BXS?`&fq7U8hb`drt9kql{a;)5hp~Xa>h*8f?Uw&6?7ubU{~h*Mng0|0 yXN~!d)`49<{8IX};`|B!Gb#RtZxH|ZyMGThMH!?YXAKwDfV9IR(wX!|{Qm%z*nGqQ literal 0 HcmV?d00001 diff --git a/studies/additional_data/protocols/.gitkeep b/studies/additional_data/protocols/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/studies/additional_data/resources/.gitkeep b/studies/additional_data/resources/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/studies/additional_data/resources/color_pathway_genes.bed b/studies/additional_data/resources/color_pathway_genes.bed new file mode 100644 index 0000000..c4ee581 --- /dev/null +++ b/studies/additional_data/resources/color_pathway_genes.bed @@ -0,0 +1,36 @@ +chr start end strand gene_id transcript_id Gene Pathway +Scaffold_1 7523181 7527088 - AHp000674 AHp000674.1 AhCYP76AD5 Betalain +Scaffold_1 28687404 28688868 - AHp001663 AHp001663.1 AhBetanidin5GT Betalain +Scaffold_2 6806355 6807984 + AHp003152 AHp003152.1 F3-H_3 Flavonoid +Scaffold_2 6806355 6807984 + AHp003152 AHp003152.2 F3-H_3 Flavonoid +Scaffold_2 31518210 31522095 + AHp004305 AHp004305.1 CHS Flavonoid +Scaffold_2 31518210 31522095 + AHp004305 AHp004305.2 CHS Flavonoid +Scaffold_3 14652200 14653652 + AHp005940 AHp005940.1 AhBetanidin6GT Betalain +Scaffold_3 29507485 29509926 + AHp006454 AHp006454.1 F3H Flavonoid +Scaffold_4 10226509 10227946 - AHp007219 AHp007219.1 AhcDOPA5GT Betalain +Scaffold_5 9470955 9474986 - AHp008991 AHp008991.1 FLS Flavonoid +Scaffold_5 9470955 9474986 - AHp008991 AHp008991.2 FLS Flavonoid +Scaffold_5 19537497 19543306 + AHp009303 AHp009303.1 DFR Flavonoid +Scaffold_6 3461614 3469253 - AHp009962 AHp009962.1 CHI1 Flavonoid +Scaffold_6 13877397 13882497 + AHp010386 AHp010386.1 AhDODAα2 Betalain +Scaffold_8 3308349 3313908 + AHp012752 AHp012752.1 PAL_1 Flavonoid +Scaffold_8 7925628 7930892 - AHp013217 AHp013217.1 C4H_1 Flavonoid +Scaffold_8 7925628 7930892 - AHp013217 AHp013217.2 C4H_1 Flavonoid +Scaffold_9 12115220 12125730 - AHp014409 AHp014409.1 4CL_1 Flavonoid +Scaffold_10 21762620 21764924 - AHp016530 AHp016530.1 AhMYB3 Betalain +Scaffold_10 21780564 21783050 - AHp016531 AHp016531.1 AhMYB4 Betalain +Scaffold_11 16155424 16159543 + AHp017409 AHp017409.1 LAR Flavonoid +Scaffold_11 17204504 17209738 - AHp017497 AHp017497.1 F3-H_1 Flavonoid +Scaffold_14 12286837 12295353 + AHp020962 AHp020962.1 4CL_2 Flavonoid +Scaffold_15 1098208 1100359 + AHp021795 AHp021795.1 ANS Flavonoid +Scaffold_15 2886482 2891165 - AHp021980 AHp021980.1 PAL_2 Flavonoid +Scaffold_15 4193988 4197809 + AHp022120 AHp022120.1 F3-H_4 Flavonoid +Scaffold_15 4211064 4214362 + AHp022122 AHp022122.1 F3-H_2 Flavonoid +Scaffold_15 4227367 4229108 - AHp022123 AHp022123.1 F3-H_5 Flavonoid +Scaffold_15 7959672 7965298 + AHp022382 AHp022382.1 C4H_3 Flavonoid +Scaffold_15 7959672 7965298 + AHp022382 AHp022382.2 C4H_3 Flavonoid +Scaffold_15 8003999 8008333 + AHp022384 AHp022384.1 C4H_2 Flavonoid +Scaffold_16 988733 991447 + AHp022773 AHp022773.1 AhMYB2 Betalain +Scaffold_16 988733 991447 + AHp022773 AHp022773.2 AhMYB2 Betalain +Scaffold_16 5231548 5239058 - AHp023147 AHp023147.1 AhDODAα1 Betalain +Scaffold_16 5301961 5305973 + AHp023148 AHp023148.1 AhCYP76AD2 Betalain diff --git a/studies/additional_data/resources/color_pathway_genes.csv b/studies/additional_data/resources/color_pathway_genes.csv new file mode 100644 index 0000000..a427fab --- /dev/null +++ b/studies/additional_data/resources/color_pathway_genes.csv @@ -0,0 +1,30 @@ +Gene,Pathway,Gene_id +PAL_1,Flavonoid,AHp012752 +PAL_2,Flavonoid,AHp021980 +C4H_1,Flavonoid,AHp013217 +C4H_2,Flavonoid,AHp022384 +C4H_3,Flavonoid,AHp022382 +4CL_1,Flavonoid,AHp014409 +4CL_2,Flavonoid,AHp020962 +CHS,Flavonoid,AHp004305 +CHI1,Flavonoid,AHp009962 +F3-H_1,Flavonoid,AHp017497 +F3-H_2,Flavonoid,AHp022122 +F3-H_3,Flavonoid,AHp003152 +F3-H_4,Flavonoid,AHp022120 +F3-H_5,Flavonoid,AHp022123 +DFR,Flavonoid,AHp009303 +F3H,Flavonoid,AHp006454 +FLS,Flavonoid,AHp008991 +LAR,Flavonoid,AHp017409 +ANS,Flavonoid,AHp021795 +AhCYP76AD2,Betalain,AHp023148 +AhCYP76AD5,Betalain,AHp000674 +AhDODAα2,Betalain,AHp010386 +AhDODAα1,Betalain,AHp023147 +AhcDOPA5GT,Betalain,AHp007219 +AhBetanidin5GT,Betalain,AHp001663 +AhBetanidin6GT,Betalain,AHp005940 +AhMYB2,Betalain,AHp022773 +AhMYB3,Betalain,AHp016530 +AhMYB4,Betalain,AHp016531 diff --git a/studies/isoform_sequencing/isa.study.xlsx b/studies/isoform_sequencing/isa.study.xlsx index 60867b92cbe45252333f78b49c485bd39a3e02b2..48af51fa63c14650219e5e214e849d894b1120dd 100644 GIT binary patch delta 701 zcmdnuzuBKBz?+$ci-CcGgF(l6BhO4mCLQO=%Ndn`)aKKS0$}c4CV7zPKPFX>s4BBM zSk#Y23&gErabf|{`#2JrL8=sa3_w&c&j*mGE3Xnrc5<em8i+eluz(4~WflP&r6sbP z4J7<rYCc%FLl!K&UUoYpr;f8#q<Knpcl_jhdF}cRH${)!qboh8a%Ekeovr_1(v4|* zO`g1eZNssk=v8iddf6WS<)4!$vfC%jy^`hD*4)9eV24;xZhFezsFq+MixjU7G1@aG za>>k0FV{5Zye<~E)+`~giF?n)B2IJTP0l8qOZ(n0NXUC2dTO$CE_a*UEnU-|_cz5d zWL$-#7>o|qPq=u?z+|uTOC8qtx@QkQ>ur1>Ql#OqaPi@T6XtE@*j8Z9E%nSP(qj$J zohkE9#MPE;5}%f8so~yx<-7e7`P0+4r(FqHRTV1ZF^y|p0XNUqv)8iLm#hq`nz*`h z$_(dgOLkvyfA`@lugvWd{u-SFh1$HothYW1Jv2X=KXm2Vf_Z!6oW3rVI23k!)zWh( zn&()?{3^~@={WxC&g*!sw`I4_F(`eo_PoVAZ^rkFmp^AYZnEe)p7=%J*yK<X-$%by z<m#{4#$TP!+x>20z4bhvT|d|TOZ~=;9PgXIE6f7N>0~8)Q1o0;G6GQ?%H|--QCS~E z?N*bS+@~xFVy;wn15qE9tw5BniVKJ;hEO|I96-F^D*hnKM-|NLQ#AwejzM@JlT_5e tCfTcjEo@M;1j+AKGX+tf)NGKV-&`H6H(y<b@zvxWbtSeIC17%8004lH1t9<c delta 702 zcmdn&zr~*?z?+$ci-CcGgW;O(MxL3BOxJ8DFK1K&QkzdR3V^wHndCvD|Cm%kqN>d5 zU{OC7EfBYg#fb$(@8d{h2B}iyF#u7)JRd-!uDnVh*~yuLY9Q`J!2%`_mstdCl$OYD zHjwahsrg{x4q349dfDxaoY!ovB3176%}JP?FRxv{rPpb~t*+pSTB280nO~1Ppt@1F zI`hQ+>y-iyPp{lQckWGvykB~|5`WzRtCd%$@URPVIuz<Ix#e>*%aZ@S=Yt8F2aDDw zcxe@+OuM_}j!H~)#nxL5uAF)w+)k<9@i{4WQDutNzSgFljo}*p>$d7jRArc7G1!+Z zy`WIUH-qs6OTDvO_TdX(`j%`K*|XiGdbT-RQ}Bs}46S{9?9Nq}lx`e~)y<e;oT|9d zs35dTrTXi^lW`H#o-U9v4Z8n-LY+?By;*_ID_?~cDnx2k9MUzoVivt}?xm$(FC*7I z3rymRoczv7e&^ro<^?&IZ9i;eIKI~8#rrFh#5wExWJ8v2J5>2Cp6TlJBo67^)zhpL z`HP+vJUqX5nu6Tw;Pv$@x7^OPX6`ufUM17KBITaD|M}CFCm(6ZwO@3W@Sk=u)@Yu} z<J;$VJ@5WCIq{<Hr~KoK9nJ3_-G8Qz5jo;Fe^ZzRj?_s?_MrH=tYieD*p<yel!LN9 zh$>Vz1yL)M-9Xd_Wh)S+qv8UhiXhYu6$cRSmx@1#@>T`&dR5IpyrU3ap|Z?mWi?5V sh@BePvU)X35Dz4A4I=ST%?2s{&D6pA^VDS+Ur+8<S7K{b0;X360Oyt=1^@s6 diff --git a/workflows/BSA/RNAseq/QC.sh b/workflows/BSA/RNAseq/QC.sh index 64db677..947a4ff 100644 --- a/workflows/BSA/RNAseq/QC.sh +++ b/workflows/BSA/RNAseq/QC.sh @@ -22,8 +22,8 @@ GTFIN=polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_correc # Quality control # Initialize the output directory -QCIN=raw_data/BSA_rnaseq/ -QCOUT=raw_data/BSA_rnaseq/fastqc +QCIN=assays/BSA_RNAseq/dataset +QCOUT=assays/BSA_RNAseq/fastqc mkdir -p $QCOUT # run quality control fastqc @@ -32,14 +32,14 @@ fastqc -t 10 -o $QCOUT "$QCIN"*P.fq.gz # Quality control # Initialize the output directory -QCIN=raw_data/BSA_rnaseq/ -QCOUT=raw_data/BSA_rnaseq/fastqc +QCIN=assays/BSA_RNAseq/ +QCOUT=assays/BSA_RNAseq/fastqc mkdir -p $QCOUT # run quality control fastqc fastqc -t 10 -o $QCOUT "$QCIN"*P.fq.gz -cp -r $QCOUT data/BSA/RNAseq/STAR_flower_mappings/QC/ +cp -r $QCOUT runs/BSA/RNAseq/STAR_flower_mappings/QC/ module load samtools/1.13 @@ -53,11 +53,11 @@ GTFQM=/scratch/twinkle1/temp.gtf sed 's/CDS/exon/' $GTFIN > $GTFQM # define input files -QMIN=data/BSA/RNAseq/STAR_flower_mappings/ +QMIN=runs/BSA/RNAseq/STAR_flower_mappings/ # define output directory -QMOUT=data/BSA/RNAseq/STAR_flower_mappings/QC/qualimap +QMOUT=runs/BSA/RNAseq/STAR_flower_mappings/QC/qualimap # create main output directory mkdir -p $QMOUT @@ -85,8 +85,8 @@ done rm $GTFQM # run multiqc to combine the results from fastqc and qualimap into a single report -MULTIQCOUT=data/BSA/RNAseq/STAR_flower_mappings/multiqc -MULTIQCIN=data/BSA/RNAseq/STAR_flower_mappings/QC/ +MULTIQCOUT=runs/BSA/RNAseq/STAR_flower_mappings/multiqc +MULTIQCIN=runs/BSA/RNAseq/STAR_flower_mappings/QC/ mkdir -p $MULTIQCOUT diff --git a/workflows/BSA/RNAseq/adapter_trimming.sh b/workflows/BSA/RNAseq/adapter_trimming.sh index 15d5ab7..2821fec 100644 --- a/workflows/BSA/RNAseq/adapter_trimming.sh +++ b/workflows/BSA/RNAseq/adapter_trimming.sh @@ -20,7 +20,7 @@ module load trimmomatic/0.39 # run this part as array job # create array of read fastq files (R1 only): -SOURCE_DIR=raw_data/BSA_rnaseq/ +SOURCE_DIR=assays/BSA_RNAseq/dataset/ FILES=("$SOURCE_DIR"/*R1.fastq.gz) # run trimmomatic, use 6 threads, taking advantage of the baseout function to name output files @@ -32,4 +32,4 @@ java -jar $TRIMMOMATIC/trimmomatic.jar PE \ "${FILES["${SLURM_ARRAY_TASK_ID}"]}" \ "${FILES["${SLURM_ARRAY_TASK_ID}"]/R1.fastq.gz/R2.fastq.gz}" \ -baseout "${FILES["${SLURM_ARRAY_TASK_ID}"]/R1.fastq.gz/trimmed.fq.gz}" \ - ILLUMINACLIP:raw_data/BSA_rnaseq/adapters/custom_adapters.fa:2:30:10 + ILLUMINACLIP:assays/BSA_RNAseq/adapters/custom_adapters.fa:2:30:10 diff --git a/workflows/BSA/RNAseq/index_STAR.sh b/workflows/BSA/RNAseq/index_STAR.sh index cb8606a..6d12c7f 100644 --- a/workflows/BSA/RNAseq/index_STAR.sh +++ b/workflows/BSA/RNAseq/index_STAR.sh @@ -18,10 +18,10 @@ module load star/2.7.8a # more specific settings: use the polished, softmasked reference assembly # sjdbOverhang dependend on input read length # as SJDB file, use the newly generated braker2 protein gtf file -REFGENOME=polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta -SJDBFILE=polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf +REFGENOME=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta +SJDBFILE=runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf -mkdir -p data/BSA/RNAseq/STAR_flower_index +mkdir -p runs/BSA/RNAseq/STAR_flower_index STAR --runThreadN 8 \ --runMode genomeGenerate \ diff --git a/workflows/BSA/RNAseq/run_STAR.sh b/workflows/BSA/RNAseq/run_STAR.sh index 1064306..099d4e6 100644 --- a/workflows/BSA/RNAseq/run_STAR.sh +++ b/workflows/BSA/RNAseq/run_STAR.sh @@ -12,9 +12,9 @@ module load star/2.7.8a # create array of read fastq files (R1 only): -SOURCE_DIR=raw_data/BSA_rnaseq/ +SOURCE_DIR=assays/BSA_RNAseq/dataset/ FILES=("$SOURCE_DIR"/*_1P.fq.gz) -OUTPUTDIR=data/BSA/RNAseq/STAR_flower_mappings +OUTPUTDIR=runs/BSA/RNAseq/STAR_flower_mappings # change directory of outprefix OUTPREFIX1=("${FILES["${SLURM_ARRAY_TASK_ID}"]/$SOURCE_DIR/$OUTPUTDIR}") diff --git a/workflows/BSA/RNAseq/run_kallisto.sh b/workflows/BSA/RNAseq/run_kallisto.sh index ae124fb..88243f0 100644 --- a/workflows/BSA/RNAseq/run_kallisto.sh +++ b/workflows/BSA/RNAseq/run_kallisto.sh @@ -23,9 +23,9 @@ ##### Perform quantification # create array of read fastq files (R1 only): -SOURCE_DIR=raw_data/flower_color_mapping/ +SOURCE_DIR=assays/BSA_RNAseq/dataset/flower_color_mapping/ FILES=("$SOURCE_DIR"AM*_1P.fq.gz) -OUTDIR=data/flower_color_mapping/kallisto_quant/ +OUTDIR=runs/flower_color_mapping/kallisto_quant/ TISSUE_NAMES=("AM_00331_gf" "AM_00331_rf" "AM_00332_gf" "AM_00332_rf") mkdir -p $OUTDIR diff --git a/workflows/BSA/WGS/combined_filter.sh b/workflows/BSA/WGS/combined_filter.sh index 707a9bd..78e6a17 100644 --- a/workflows/BSA/WGS/combined_filter.sh +++ b/workflows/BSA/WGS/combined_filter.sh @@ -12,13 +12,13 @@ module load samtools/1.13 -REFERENCE=polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta +REFERENCE=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta PROVIDER=CCG -OUTDIR=data/BSA/wgs/vcf +OUTDIR=runs/BSA/wgs/vcf mkdir -p $OUTDIR -ALLSAMP=$(for i in data/BSA/wgs/bam_files/gvcf/AM_00*.vcf; do echo -V $i;done) +ALLSAMP=$(for i in runs/BSA/wgs/bam_files/gvcf/AM_00*.vcf; do echo -V $i;done) $MYUTIL/tools/gatk-4.1.7.0/gatk --java-options "-Xmx48G" \ CombineGVCFs \ diff --git a/workflows/BSA/WGS/map_reads.sh b/workflows/BSA/WGS/map_reads.sh index ad2bcf2..3a1d2a8 100644 --- a/workflows/BSA/WGS/map_reads.sh +++ b/workflows/BSA/WGS/map_reads.sh @@ -17,15 +17,15 @@ module load bwamem2/2.0_gnu module load samtools/1.13 -REFERENCE=polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta +REFERENCE=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta #bwa-mem2 index $REFERENCE PROVIDER=CCG -INPUTPATH=raw_data/BSA_wgs/ # -OUTPUTPATH=data/BSA/wgs/bam_files/ # Change this for different generations +INPUTPATH=assays/BSA_WGS/dataset # +OUTPUTPATH=runs/BSA/wgs/bam_files/ # Change this for different generations mkdir -p $OUTPUTPATH mkdir -p ${OUTPUTPATH}/metrics/ diff --git a/workflows/BSA/betalain_quantification.Rmd b/workflows/BSA/betalain_quantification.Rmd index 8ab6265..31a9b69 100644 --- a/workflows/BSA/betalain_quantification.Rmd +++ b/workflows/BSA/betalain_quantification.Rmd @@ -20,7 +20,7 @@ library(ggbeeswarm) Create output directory for plots ```{bash} -mkdir ../../plots/betalain_quantification +mkdir ../../runs/betalain_quantification ``` @@ -28,7 +28,7 @@ Read in measured data ```{r} # read in data -betalain_quantification <- read_excel(path = "../../raw_data/betalain_quantification/Betalain_quantification_summary.xlsx", +betalain_quantification <- read_excel(path = "../../assays/betalain_photometric_quantification/dataset/Betalain_quantification_summary.xlsx", sheet = "Photometric_quantification") # normalize to the same input weight @@ -219,11 +219,11 @@ p_leaf <- ggplot(data = melted_quant %>% filter(tissue == "leaf")) + yend = mean_content), linewidth = 0.9, color = "black") + - geom_text(data = leaf_bc_groups, + geom_text(data = leaf_bc_groups, aes(x = as.numeric(factors) - 0.19, y = 1.15, - label = groups), - size=8, + label = groups), + size=8, inherit.aes = F, color = "red3") + scale_color_manual(values = c( "red3","#F0B327"), labels = c("Betacyanins", "Betaxanthins")) + @@ -246,8 +246,8 @@ p_leaf # plot betalain quantification from flower p_flower <- ggplot(data = melted_quant %>% filter(tissue == "flower")) + - # geom_boxplot(aes(x = accession, - # y = content, + # geom_boxplot(aes(x = accession, + # y = content, # fill = metabolite), # color = "black", # #color = "red4", @@ -282,18 +282,18 @@ p_flower <- ggplot(data = melted_quant %>% filter(tissue == "flower")) + yend = mean_content), linewidth = 0.9, color = "black") + - geom_text(data = flower_bc_groups, + geom_text(data = flower_bc_groups, aes(x = as.numeric(factors) - 0.19, y = 1.15, - label = groups), - size=8, + label = groups), + size=8, inherit.aes = F, color = "red3") + - geom_text(data = flower_bx_groups, + geom_text(data = flower_bx_groups, aes(x = as.numeric(factors) + 0.19, y = 1.15, - label = groups), - size=8, + label = groups), + size=8, inherit.aes = F, color = "#F0B327") + scale_fill_manual(values = c( "red3","#F0B327"), labels = c("Betacyanins", "Betaxanthins")) + @@ -322,7 +322,7 @@ patchplot_betalains <- p_flower / p_leaf + patchplot_betalains -ggsave(filename = "../../plots/betalain_quantification/betalain_quant_photometric_content.png", +ggsave(filename = "../../runs/betalain_quantification/betalain_quant_photometric_content.png", plot = patchplot_betalains, bg = "white", dpi = 450, @@ -335,7 +335,7 @@ HPLC quantification ```{r} # hplc quantification -hplc_quantification <- read_excel(path = "../../raw_data/betalain_quantification/Betalain_quantification_summary.xlsx", +hplc_quantification <- read_excel(path = "../../assays/betalain_photometric_quantification/dataset/Betalain_quantification_summary.xlsx", sheet = "LC-MS_quantification") # get blank measurements @@ -447,14 +447,14 @@ Plot results amaranthin ```{r} # plot pa_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) + - # geom_point(aes(x = accession, - # y = Amaranthin_ratio, + # geom_point(aes(x = accession, + # y = Amaranthin_ratio, # group = uniq_ind), # color = "red3", # position = position_dodge(width = 0.75), # size = 2.8) + - geom_beeswarm(aes(x = accession, - y = Amaranthin_ratio, + geom_beeswarm(aes(x = accession, + y = Amaranthin_ratio, group = uniq_ind), color = "red3", cex = 1.5, @@ -489,14 +489,14 @@ pa_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) + pa_flower <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "flower")) + - # geom_point(aes(x = accession, - # y = Amaranthin_ratio, + # geom_point(aes(x = accession, + # y = Amaranthin_ratio, # group = uniq_ind), # color = "red3", # position = position_dodge(width = 0.75), # size = 2.8) + - geom_beeswarm(aes(x = accession, - y = Amaranthin_ratio, + geom_beeswarm(aes(x = accession, + y = Amaranthin_ratio, group = uniq_ind), color = "red3", cex = 1.5, @@ -536,8 +536,8 @@ Plot results betanin ```{r} # plot pb_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) + - geom_beeswarm(aes(x = accession, - y = Betanin_ratio, + geom_beeswarm(aes(x = accession, + y = Betanin_ratio, group = uniq_ind), color = "red3", cex = 1.5, @@ -572,8 +572,8 @@ pb_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) + pb_flower <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "flower")) + - geom_beeswarm(aes(x = accession, - y = Betanin_ratio, + geom_beeswarm(aes(x = accession, + y = Betanin_ratio, group = uniq_ind), color = "red3", cex = 1.5, @@ -612,8 +612,8 @@ Plot results betalamic acid ```{r} # plot pba_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) + - geom_beeswarm(aes(x = accession, - y = Betalamic_acid_ratio, + geom_beeswarm(aes(x = accession, + y = Betalamic_acid_ratio, group = uniq_ind), color = "grey40", cex = 1.5, @@ -648,8 +648,8 @@ pba_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) + pba_flower <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "flower")) + - geom_beeswarm(aes(x = accession, - y = Betalamic_acid_ratio, + geom_beeswarm(aes(x = accession, + y = Betalamic_acid_ratio, group = uniq_ind), color = "grey40", cex = 1.5, @@ -688,8 +688,8 @@ Plot results vulgaxanthin ```{r} # plot pv_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) + - geom_beeswarm(aes(x = accession, - y = Vulgaxanthin_IV_ratio, + geom_beeswarm(aes(x = accession, + y = Vulgaxanthin_IV_ratio, group = uniq_ind), color = "#F0B327", method = "swarm", @@ -718,8 +718,8 @@ pv_leaf <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "leaf")) + pv_flower <- ggplot(data = hplc_quant_normalised %>% filter(tissue == "flower")) + - geom_beeswarm(aes(x = accession, - y = Vulgaxanthin_IV_ratio, + geom_beeswarm(aes(x = accession, + y = Vulgaxanthin_IV_ratio, group = uniq_ind), color = "#F0B327", method = "swarm", @@ -762,7 +762,7 @@ overview_patchplot <- p_leaf + p_flower + pa_leaf + pa_flower + pb_leaf + pb_flo legend.direction = "vertical", plot.tag = element_text(vjust = 2, size = 17, face = "bold")) -ggsave(filename = "../../plots/betalain_quantification/overview_betalain_quantification.png", +ggsave(filename = "../../runs/betalain_quantification/overview_betalain_quantification.png", plot = overview_patchplot, bg = "white", dpi = 450, @@ -785,7 +785,7 @@ photo_hplc_patchplot <- plot_spacer() / (p_leaf + p_flower) / (pa_leaf + pa_flow -ggsave(filename = "../../plots/betalain_quantification/quantification_without_picture.png", +ggsave(filename = "../../runs/betalain_quantification/quantification_without_picture.png", plot = photo_hplc_patchplot, bg = "white", dpi = 450, @@ -798,7 +798,7 @@ Analyse HPLC results from amaranth roots: ```{r} # hplc quantification -root_quant <- read_excel(path = "../../raw_data/betalain_quantification/Betalain_quantification_summary.xlsx", +root_quant <- read_excel(path = "../../assays/betalain_photometric_quantification/dataset/Betalain_quantification_summary.xlsx", sheet = "transgenic_roots_LC-MS_quantification") root_quant <- root_quant %>% mutate(sample_id = paste0("plate_", plate, "_", root_type)) %>% @@ -834,7 +834,7 @@ root_quant_plot <- ggplot(data = root_quant_norm) + legend.position = "none") root_quant_plot -ggsave(filename = "../../plots/betalain_quantification/root_quantification.png", +ggsave(filename = "../../runs/betalain_quantification/root_quantification.png", plot = root_quant_plot, bg = "white", dpi = 450, @@ -842,5 +842,3 @@ ggsave(filename = "../../plots/betalain_quantification/root_quantification.png", height = 5) ``` - - diff --git a/workflows/BSA/bsa_and_plotting.R b/workflows/BSA/bsa_and_plotting.R index 4f9f266..ed0b831 100644 --- a/workflows/BSA/bsa_and_plotting.R +++ b/workflows/BSA/bsa_and_plotting.R @@ -1,5 +1,5 @@ -setwd("/home/tom/Documents/projects/Ahyp_v2_2_publication/") +setwd("/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/") library(QTLseqr) library(tidyverse) @@ -16,7 +16,7 @@ bsa_analysis <- function(rawData,HighBulk,LowBulk,Chroms,nhigh,nlow){ highBulk = HighBulk, lowBulk = LowBulk, chromList = Chroms) - + df <- df %>% select(CHROM, POS, REF, @@ -38,7 +38,7 @@ bsa_analysis <- function(rawData,HighBulk,LowBulk,Chroms,nhigh,nlow){ mutate(CHROM=as.factor(as.numeric(gsub("Scaffold_","",CHROM))), POS=as.numeric(POS)) %>% filter(REF!='*',ALT!='*') - + df_filt <-filterSNPs(SNPset = df, refAlleleFreq = 0.2, minTotalDepth = 50, @@ -46,7 +46,7 @@ bsa_analysis <- function(rawData,HighBulk,LowBulk,Chroms,nhigh,nlow){ minSampleDepth = 20, minGQ = 99, verbose = TRUE) - + df_filt <- runGprimeAnalysis( SNPset = df_filt, windowSize = 2e6, @@ -64,14 +64,14 @@ bsa_analysis <- function(rawData,HighBulk,LowBulk,Chroms,nhigh,nlow){ return(df_filt) } -AM_00332_leaf_green_red <- bsa_analysis(rawData = 'data/BSA/wgs/vcf/bulk_snps05.table', +AM_00332_leaf_green_red <- bsa_analysis(rawData = 'runs/BSA/wgs/vcf/bulk_snps05.table', HighBulk = "AM_00332_gl", LowBulk = "AM_00332_rl", Chroms = paste0(rep("Scaffold_", 16),1:16), nhigh=80, nlow=80) -AM_00331_flower_red_green <- bsa_analysis(rawData = 'data/BSA/wgs/vcf/bulk_snps05.table', +AM_00331_flower_red_green <- bsa_analysis(rawData = 'runs/BSA/wgs/vcf/bulk_snps05.table', HighBulk = "AM_00331_rf", LowBulk = "AM_00331_gf", Chroms = paste0(rep("Scaffold_", @@ -83,15 +83,15 @@ AM_00331_flower_red_green <- bsa_analysis(rawData = 'data/BSA/wgs/vcf/bulk_snps0 # plot all results # leaf plotGresults <- function(Gresults,betalain_genes){ - qval <- Gresults %>% - filter(qvalue<=0.01) + qval <- Gresults %>% + filter(qvalue<=0.01) #qval <- min(qval$Gprime) qval <- 3 - - + + mG <- Gresults %>% filter(Gprime==max(Gresults$Gprime)) - + p1 <- ggplot()+ geom_line(data=Gresults,aes(POS/1e6,Gprime), size=2) + labs(x= 'Position (Mb)',y= "G' value") + @@ -99,7 +99,7 @@ plotGresults <- function(Gresults,betalain_genes){ geom_hline(data=data.frame(yint=qval), aes(yintercept =yint, linetype ='dashed', - color=alpha('red',0.6)), + color=alpha('red',0.6)), size=1.7)+ facet_grid(.~CHROM, space = 'free_x',scales='free_x') + theme(panel.spacing.x=unit(0.25, "lines")) + @@ -117,23 +117,23 @@ plotGresults <- function(Gresults,betalain_genes){ axis.ticks = element_line(linewidth = 1.5), axis.ticks.length = unit(.25, "cm")) + scale_x_continuous(guide = guide_axis(check.overlap = T)) - #geom_gene_arrow(data=betalain_genes, + #geom_gene_arrow(data=betalain_genes, # aes(xmin = start/1e6, xmax = end/1e6, y = max(Gresults$Gprime), fill = type)) - + #ggsave(outfile,p1,width = 18,height = 7,,bg='white') return(p1) } # flower plotGresults1 <- function(Gresults,betalain_genes){ - qval <- Gresults %>% - filter(qvalue<=0.01) + qval <- Gresults %>% + filter(qvalue<=0.01) #qval <- min(qval$Gprime) qval <- 3 - - + + mG <- Gresults %>% filter(Gprime==max(Gresults$Gprime)) - + p1 <- ggplot()+ geom_line(data=Gresults,aes(POS/1e6,Gprime), size=2) + labs(x= 'Position (Mb)',y= "G' value") + @@ -142,7 +142,7 @@ plotGresults1 <- function(Gresults,betalain_genes){ geom_hline(data=data.frame(yint=qval), aes(yintercept =yint, linetype ='dashed', - color=alpha('red',0.6)), + color=alpha('red',0.6)), size=1.7)+ facet_grid(.~CHROM,space = 'free_x',scales='free_x') + theme(panel.spacing.x=unit(0.25, "lines")) + @@ -158,9 +158,9 @@ plotGresults1 <- function(Gresults,betalain_genes){ axis.ticks = element_line(linewidth = 1.5), axis.ticks.length = unit(.25, "cm")) + scale_x_continuous(guide = guide_axis(check.overlap = T)) - #geom_gene_arrow(data=betalain_genes, + #geom_gene_arrow(data=betalain_genes, # aes(xmin = start/1e6, xmax = end/1e6, y = max(Gresults$Gprime), fill = type)) - + #ggsave(outfile,p1,width = 18,height = 7,,bg='white') return(p1) } @@ -168,23 +168,23 @@ plotGresults1 <- function(Gresults,betalain_genes){ # plot individual chromosomes # flower plotGqtl <- function(Gresults,chr,genes){ - - qval <- Gresults %>% - filter(qvalue<=0.01) + + qval <- Gresults %>% + filter(qvalue<=0.01) #qval <- min(qval$Gprime) qval <- 3 my_qtl <- getQTLTable(SNPset = Gresults, alpha = 0.01,export = F) - + ggplot()+ geom_line(data=filter(Gresults,CHROM==chr),aes(POS/1e6,Gprime),size=2) + labs(x= 'Position (Mb)',y= "G' value") + scale_x_continuous(breaks = c(0,10,20,30))+ geom_hline(data=data.frame(yint=qval), - aes(yintercept = yint, - linetype ='dashed', + aes(yintercept = yint, + linetype ='dashed', color=alpha('red',0.6)), size = 2) + - facet_grid(.~CHROM,space = 'free_x',scales='free_x') + + facet_grid(.~CHROM,space = 'free_x',scales='free_x') + theme(panel.spacing.x=unit(0.25, "lines")) + ylim(0,10) + theme( strip.background = element_rect(fill = alpha('lightblue',0.2)), @@ -199,19 +199,19 @@ plotGqtl <- function(Gresults,chr,genes){ axis.line = element_line(linewidth = 2), axis.ticks = element_line(linewidth = 1.5), axis.ticks.length = unit(.25, "cm")) + - geom_gene_arrow(data=filter(genes, CHROM==chr, type == "transcript") %>% droplevels(), + geom_gene_arrow(data=filter(genes, CHROM==chr, type == "transcript") %>% droplevels(), aes(xmin = start/1e6, xmax = end/1e6, y = 9.5, color = attributes), size=6) + scale_color_manual(values = c(alpha('red',0.6), "black", "black","grey","grey")) } # leaf plotGqtl1 <- function(Gresults,chr,genes){ - - qval <- Gresults %>% - filter(qvalue<=0.01) + + qval <- Gresults %>% + filter(qvalue<=0.01) #qval <- min(qval$Gprime) qval <- 3 my_qtl <- getQTLTable(SNPset = Gresults, alpha = 0.01,export = F) - + p1 <- ggplot() + geom_line(data=filter(Gresults,CHROM==chr),aes(POS/1e6,Gprime),size=2) + labs(x= 'Position (Mb)',y= "G' value") + @@ -239,22 +239,22 @@ plotGqtl1 <- function(Gresults,chr,genes){ axis.line = element_line(linewidth = 2), axis.ticks = element_line(linewidth = 1.5), axis.ticks.length = unit(.25, "cm")) - - + + p2 <- ggplot() + - geom_gene_arrow(data=filter(genes, - CHROM==chr, + geom_gene_arrow(data=filter(genes, + CHROM==chr, type == "transcript", attributes == "ID=AHp023147.1;geneID=AHp023147" | attributes == "ID=AHp023148.1;geneID=AHp023148") %>% droplevels(), - aes(xmin = start, - xmax = end, - y = "chr16", - fill = attributes, + aes(xmin = start, + xmax = end, + y = "chr16", + fill = attributes, forward = c(F,T)), size = 1.5, color = "black", - arrowhead_height = unit(12, "mm"), - arrowhead_width = unit(6, "mm"), + arrowhead_height = unit(12, "mm"), + arrowhead_width = unit(6, "mm"), arrow_body_height = grid::unit(6, "mm")) + geom_text(aes(x = c(5246000,5290000), y = "chr16", @@ -268,7 +268,7 @@ plotGqtl1 <- function(Gresults,chr,genes){ axis.line = element_line(linewidth = 2), axis.ticks = element_line(linewidth = 1.5), axis.text.x = element_text(size=20), - panel.grid.major.y = ggplot2::element_line(colour = "grey", + panel.grid.major.y = ggplot2::element_line(colour = "grey", linewidth = 1), #axis.title.x = element_text(size=40), axis.ticks.length = unit(.25, "cm"), @@ -278,12 +278,12 @@ plotGqtl1 <- function(Gresults,chr,genes){ axis.ticks.y = element_blank(), axis.text.y = element_blank()) + scale_fill_manual(values = c("chocolate2","cyan3",'red')) - + # combine plots: out <- plot_grid(p2,p1, nrow = 2, rel_heights = c(0.3,0.7)) - + out } @@ -348,15 +348,9 @@ MYB_plot <- plot_grid(cowplot_flower, pathway_plot, alignment_plot, labels = c("", "C", "D"), label_size = 34) -ggsave(filename = "plots/paper_myb_combined_alignment.png", +ggsave(filename = "runs/plots/paper_myb_combined_alignment.png", plot = MYB_plot, dpi = 400, width = 25, height = 25, bg = "white") - - - - - - diff --git a/workflows/BSA/read_count_analysis.Rmd b/workflows/BSA/read_count_analysis.Rmd index 4154c86..f7c97bb 100644 --- a/workflows/BSA/read_count_analysis.Rmd +++ b/workflows/BSA/read_count_analysis.Rmd @@ -11,14 +11,14 @@ library(tidyverse) library(DESeq2) library(factoextra) library(patchwork) -knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2_publication/") +knitr::opts_knit$set(root.dir = "/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/") ``` ```{r} ########################## Create a function to generate plots for all betalain pathway genes # load object with names of all betalain and flavonoid genes -pathway_genes <- read.csv(file = "data/manual_sheets/color_pathway_genes.csv", header=T) +pathway_genes <- read.csv(file = "studies/additional_data/resources/color_pathway_genes.csv", header=T) colnames(pathway_genes) <- c("pathway_gene", "pathway", "gene_id") betalain.genes <- pathway_genes %>% filter(pathway == "Betalain") @@ -32,13 +32,13 @@ Transcript level gene expression quantification from kallisto: ```{r} # vector of input directories -sample_names <- dir(path = "data/flower_color_mapping/kallisto_quant/") +sample_names <- dir(path = "runs/flower_color_mapping/kallisto_quant/") # read in tables kallisto_quant <- c() for (i in 1:length(sample_names)){ - x <- read_table(file = paste0("data/flower_color_mapping/kallisto_quant/", + x <- read_table(file = paste0("runs/flower_color_mapping/kallisto_quant/", sample_names[i], "/abundance.tsv")) # set column names and keep relevant columns @@ -126,7 +126,7 @@ patchplot <- betalain_plots[[8]] + betalain_plots[[9]] + betalain_plots[[11]] + theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"), plot.tag = element_text(size = 35)) -ggsave(filename = "plots/flower_mapping_expression/betalain_gene_kallisto.png", +ggsave(filename = "runs/plots/flower_mapping_expression/betalain_gene_kallisto.png", width = 28, height = 20) @@ -182,7 +182,7 @@ betalain_plots <- plot_betalain_counts(gene_ID_list = betalain_quant$transcript_ # save all plots for (i in 1:length(betalain_plots)){ - ggsave(filename = paste0("plots/flower_mapping_expression/", gene_names[i], ".png"), + ggsave(filename = paste0("runs/plots/flower_mapping_expression/", gene_names[i], ".png"), plot = betalain_plots[[i]], height = 6, width = 8) @@ -219,7 +219,7 @@ ggplot(data=regulator_quant) + legend.position = "bottom", legend.direction = "vertical") -ggsave(filename = "plots/flower_mapping_expression/flower_expression_grid.png", +ggsave(filename = "runs/plots/flower_mapping_expression/flower_expression_grid.png", width = 4, height = 8, dpi = 500) @@ -289,7 +289,7 @@ ggplot(data=flavonoid_grid) + legend.position = "right", legend.direction = "vertical") -ggsave(filename = "plots/flower_mapping_expression/flavonoid_expression_grid.png", +ggsave(filename = "runs/plots/flower_mapping_expression/flavonoid_expression_grid.png", width = 8, height = 8, dpi = 500) diff --git a/workflows/BSA/read_count_analysis_from_bam.Rmd b/workflows/BSA/read_count_analysis_from_bam.Rmd index a3d4423..f442e22 100644 --- a/workflows/BSA/read_count_analysis_from_bam.Rmd +++ b/workflows/BSA/read_count_analysis_from_bam.Rmd @@ -11,40 +11,40 @@ library(tidyverse) library(chromstaR) library(ggtranscript) library(RColorBrewer) -knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2/") +knitr::opts_knit$set(root.dir = "/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/") ``` Extract the sequencing reads from the bam files which overlap both the two non-synonymous SNPs and the stop-gained SNP. Samtools view can be used to extract reads covering a particular position (and their pairs). Non-primary alignments can be discarded. Reads should cover the position of the stop-gained variant (Scaffold 16, 5305851, C->T) and the position of the right non-synonymous variant (Scaffold 16, 5305727, A->T). ```{bash} -mkdir -p data/BSA/RNAseq/phased_reads/ +mkdir -p runs/BSA/RNAseq/phased_reads/ # green flower # index and extract everything overlapping the right non-synonymous variant position -samtools index data/BSA/RNAseq/STAR_flower_mappings/AM_00332_gf_Aligned.sortedByCoord.out.bam -samtools view -b -h -F 256 -P data/BSA/RNAseq/STAR_flower_mappings/AM_00332_gf_Aligned.sortedByCoord.out.bam Scaffold_16:5305727-5305727 > data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam +samtools index runs/BSA/RNAseq/STAR_flower_mappings/AM_00332_gf_Aligned.sortedByCoord.out.bam +samtools view -b -h -F 256 -P runs/BSA/RNAseq/STAR_flower_mappings/AM_00332_gf_Aligned.sortedByCoord.out.bam Scaffold_16:5305727-5305727 > runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam # index and extract everything overlapping the stop-gained variant position -samtools index data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam -samtools view -b -h -P data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam Scaffold_16:5305851-5305851 > data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam -samtools index data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam +samtools index runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam +samtools view -b -h -P runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_mismatch.bam Scaffold_16:5305851-5305851 > runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam +samtools index runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam # save as tsv file using sam2tsv from jvarkit -java -jar /home/tom/Documents/tools/jvarkit/dist/sam2tsv.jar -R polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam > data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam.tsv +java -jar /home/tom/Documents/tools/jvarkit/dist/sam2tsv.jar -R runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam > runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam.tsv # red flower # index and extract everything overlapping the right non-synonymous variant position -samtools index data/BSA/RNAseq/STAR_flower_mappings/AM_00332_rf_Aligned.sortedByCoord.out.bam -samtools view -b -h -F 256 -P data/BSA/RNAseq/STAR_flower_mappings/AM_00332_rf_Aligned.sortedByCoord.out.bam Scaffold_16:5305727-5305727 > data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam +samtools index runs/BSA/RNAseq/STAR_flower_mappings/AM_00332_rf_Aligned.sortedByCoord.out.bam +samtools view -b -h -F 256 -P runs/BSA/RNAseq/STAR_flower_mappings/AM_00332_rf_Aligned.sortedByCoord.out.bam Scaffold_16:5305727-5305727 > runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam # index and extract everything overlapping the stop-gained variant position -samtools index data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam -samtools view -b -h -P data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam Scaffold_16:5305851-5305851 > data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam -samtools index data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam +samtools index runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam +samtools view -b -h -P runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_mismatch.bam Scaffold_16:5305851-5305851 > runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam +samtools index runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam # save as tsv file using sam2tsv from jvarkit -java -jar /home/tom/Documents/tools/jvarkit/dist/sam2tsv.jar -R polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam > data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam.tsv +java -jar /home/tom/Documents/tools/jvarkit/dist/sam2tsv.jar -R runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam > runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam.tsv ``` Put in loading and filtering of the data (tsv and sequencing data) into separate functions: @@ -72,9 +72,9 @@ read_bam.tsv <- function(filename){ arrange(read_name) %>% ungroup() %>% #mutate(allele = ifelse(tolower(read_base) == ref_base, "ref", "alt")) %>% # is it the reference or alternative allele? - mutate(allele = ifelse(ref_pos == 5305722 & read_base == "A", - "alt", - ifelse(ref_pos == 5305727 & read_base == "T", + mutate(allele = ifelse(ref_pos == 5305722 & read_base == "A", + "alt", + ifelse(ref_pos == 5305727 & read_base == "T", "alt", ifelse(ref_pos == 5305851 & read_base == "T", "alt", "ref")))) @@ -104,22 +104,22 @@ plot_reads <- function(df, tsv, title){ # maybe rather join the two tables? number_alt_alleles <- tsv %>% filter(allele == "alt") %>% - group_by(read_name) %>% + group_by(read_name) %>% summarise(n = n(), snp_grouped = sum(snp_group)) # join the table detailing the number of alt alleles snp.bam.df <- left_join(df, number_alt_alleles, by = c("qname" = "read_name")) - + # first plot, showing specific reads snpplot <- ggplot() + - geom_range(data = snp.bam.df, + geom_range(data = snp.bam.df, aes(xstart = start, xend = end, y = factor(qname, levels=unique(qname[order(n,snp_grouped,qname)]), ordered=TRUE))) - + # second plot showing alternativ allele positions snpplot2 <- snpplot + - geom_point(data = tsv %>% filter(allele == "alt"), - aes(x = ref_pos, - y = read_name, + geom_point(data = tsv %>% filter(allele == "alt"), + aes(x = ref_pos, + y = read_name, color = as.factor(snp_group)), size = 2.2) + theme_classic() + @@ -149,9 +149,9 @@ Load in data for the red and green flower bulks of the BSA on AM_00332: ```{r} # green flower -snp_tsv.gf <- read_bam.tsv(filename = "data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam.tsv") +snp_tsv.gf <- read_bam.tsv(filename = "runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam.tsv") -snp_df.gf <- read_bam_snps_as_df(filename = "data/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam", +snp_df.gf <- read_bam_snps_as_df(filename = "runs/BSA/RNAseq/phased_reads/AM_00332_gf_Aligned.sortedByCoord.out.covering_both.bam", tsv = snp_tsv.gf) # how many reads after filtering? @@ -159,9 +159,9 @@ snp_tsv.gf %>% dplyr::count(read_name) # 99 reads # red flower -snp_tsv.rf <- read_bam.tsv(filename = "data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam.tsv") +snp_tsv.rf <- read_bam.tsv(filename = "runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam.tsv") -snp_df.rf <- read_bam_snps_as_df(filename = "data/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam", +snp_df.rf <- read_bam_snps_as_df(filename = "runs/BSA/RNAseq/phased_reads/AM_00332_rf_Aligned.sortedByCoord.out.covering_both.bam", tsv = snp_tsv.rf) # how many reads after filtering? @@ -179,7 +179,7 @@ plot.gf <- plot_reads(df = snp_df.gf, plot.gf # save plot -ggsave(filename = "plots/rna_seq_reads_gf.png", +ggsave(filename = "runs/plots/rna_seq_reads_gf.png", plot = plot.gf, width = 10, height = 8) @@ -192,17 +192,8 @@ plot.rf <- plot_reads(df = snp_df.rf, plot.rf # save plot -ggsave(filename = "plots/rna_seq_reads_rf.png", +ggsave(filename = "runs/plots/rna_seq_reads_rf.png", plot = plot.rf, width = 10, height = 8) ``` - - - - - - - - - diff --git a/workflows/BSA/snpEff_analysis.Rmd b/workflows/BSA/snpEff_analysis.Rmd index f74a6b0..28c1f06 100644 --- a/workflows/BSA/snpEff_analysis.Rmd +++ b/workflows/BSA/snpEff_analysis.Rmd @@ -14,7 +14,7 @@ library(ggtranscript) library(reshape2) library(cowplot) library(patchwork) -knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2_publication/") +knitr::opts_knit$set(root.dir = "/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/") ``` ## Database creation @@ -22,15 +22,15 @@ knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2_publicat Run snpEff database creation on the fixed annotation files. Copy the fixed files to the snpeff directory: ```{bash} -mkdir -p data/annotation_analysis/snpEff/databases/AHv2.2/ +mkdir -p runs/annotation_analysis/snpEff/databases/AHv2.2/ # snpEff analysis # add genome file to snpEff database -cp polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta data/annotation_analysis/snpEff/databases/AHv2.2/sequences.fa +cp runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta runs/annotation_analysis/snpEff/databases/AHv2.2/sequences.fa # add annotation file to snpEff database -cp data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gff data/annotation_analysis/snpEff/databases/AHv2.2/genes.gff -cp data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.cds.fasta data/annotation_analysis/snpEff/databases/AHv2.2/cds.fa -cp data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.prot.fasta data/annotation_analysis/snpEff/databases/AHv2.2/protein.fa +cp runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gff runs/annotation_analysis/snpEff/databases/AHv2.2/genes.gff +cp runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.cds.fasta runs/annotation_analysis/snpEff/databases/AHv2.2/cds.fa +cp runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.prot.fasta runs/annotation_analysis/snpEff/databases/AHv2.2/protein.fa # create database: java -jar /home/tom/Documents/tools/snpEff/snpEff.jar build -v AHv2.2 @@ -42,18 +42,18 @@ java -jar /home/tom/Documents/tools/snpEff/snpEff.jar build -v AHv2.2 Run using data from Markus color/sterility mapping bulks: ```{bash} -mkdir -p data/annotation_analysis/snpEff/bsa_sterility_color/analysis +mkdir -p runs/annotation_analysis/snpEff/bsa_sterility_color/analysis # get data and run snpEff on the example data: -java -jar /home/tom/Documents/tools/snpEff/snpEff.jar -csvStats data/annotation_analysis/snpEff/bsa_sterility_color/output.stats.csv -v AHv2.2 data/annotation_analysis/snpEff/bsa_sterility_color/gatk_filter_maxmissing05_biallelic.vcf.gz > data/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.vcf +java -jar /home/tom/Documents/tools/snpEff/snpEff.jar -csvStats runs/annotation_analysis/snpEff/bsa_sterility_color/output.stats.csv -v AHv2.2 runs/annotation_analysis/snpEff/bsa_sterility_color/gatk_filter_maxmissing05_biallelic.vcf.gz > runs/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.vcf # two files are not saved in the output directory but in the current working directory -mv snpEff_* data/annotation_analysis/snpEff/bsa_sterility_color/ +mv snpEff_* runs/annotation_analysis/snpEff/bsa_sterility_color/ # it is challenging to process the snpEff output for downstream analysis # snpsift is a software package distributed with snpEff that eases processing -cat data/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.vcf | /home/tom/Documents/tools/snpEff/scripts/vcfEffOnePerLine.pl | java -jar /home/tom/Documents/tools/snpEff/SnpSift.jar extractFields - CHROM POS "ANN[*].GENEID" "ANN[*].EFFECT" > data/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.snpsift.txt +cat runs/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.vcf | /home/tom/Documents/tools/snpEff/scripts/vcfEffOnePerLine.pl | java -jar /home/tom/Documents/tools/snpEff/SnpSift.jar extractFields - CHROM POS "ANN[*].GENEID" "ANN[*].EFFECT" > runs/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.snpsift.txt ``` @@ -63,11 +63,11 @@ Analyze the snpEff test run and check for high impact variants that can be manua ```{r} # load in snpEff summary file -snpEff.tab <- read.table("data/annotation_analysis/snpEff/bsa_sterility_color/snpEff_genes.txt", skip = 1, header = T, comment.char = "") +snpEff.tab <- read.table("runs/annotation_analysis/snpEff/bsa_sterility_color/snpEff_genes.txt", skip = 1, header = T, comment.char = "") colnames(snpEff.tab)[1] <- "GeneName" # load in list of betalain and flavonoid pathway genes -color_pathways <- read.csv("data/manual_sheets/color_pathway_genes.csv", header = T) +color_pathways <- read.csv("runs/manual_sheets/color_pathway_genes.csv", header = T) snpEff.tab <- left_join(snpEff.tab, color_pathways, by = c("GeneId" = "Gene_id")) # check color pathway genes for high impact variants, moderate and in theory also modifier might also be relevant @@ -88,14 +88,14 @@ Subset a more detailed table of betalain genes and their respective positions in ```{r} # load in list of betalain and flavonoid pathway genes -color_pathways <- read.csv("data/manual_sheets/color_pathway_genes.csv", header = T) +color_pathways <- read.csv("runs/manual_sheets/color_pathway_genes.csv", header = T) betalain_chr16 <- color_pathways %>% filter(Gene_id == "AHp022773" | Gene_id == "AHp023148" | Gene_id == "AHp023147") # add BvMYB1like gene betalain_chr16[2,] <- c("BvMYB1like", "Betalain", "AHp022773") -write.table(betalain_chr16, - file = "data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.txt", +write.table(betalain_chr16, + file = "runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.txt", quote = F) # set up function for reading in a gtf file @@ -123,18 +123,18 @@ read.gtf <- function(file){ } # read in annotation -annotation.gtf <- read.gtf("polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf") +annotation.gtf <- read.gtf("runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf") # subset betalain genes betalain_chr16.gtf <- annotation.gtf %>% filter(gene_id %in% betalain_chr16$Gene_id) -saveRDS(betalain_chr16.gtf, file = "data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf") +saveRDS(betalain_chr16.gtf, file = "runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf") #################### generate BED file of relevant positions -betalain_chr16.gtf <- readRDS("data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf") +betalain_chr16.gtf <- readRDS("runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf") # snpeff by default uses the 5000 positions before and after a gene # create a bed file that can be used to subset the vcf file into relevant variants betalain_chr16.bed <- betalain_chr16.gtf %>% @@ -145,16 +145,16 @@ betalain_chr16.bed <- betalain_chr16.gtf %>% select(chrom, chromStart, chromEnd) %>% unique() -write_tsv(betalain_chr16.bed, file = "data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed") +write_tsv(betalain_chr16.bed, file = "runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed") ``` Subset the vcf file and extract the allele frequencies using vcftools ```{bash} # to extract the format field -vcftools --gzvcf data/BSA/wgs/vcf/gatk_filter_maxmissing05_biallelic.vcf.gz --bed data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed --indv AM_00331_gf --indv AM_00331_rf --indv AM_00332_gl --indv AM_00332_rl --extract-FORMAT-info AD --out data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16 +vcftools --gzvcf runs/BSA/wgs/vcf/gatk_filter_maxmissing05_biallelic.vcf.gz --bed runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed --indv AM_00331_gf --indv AM_00331_rf --indv AM_00332_gl --indv AM_00332_rl --extract-FORMAT-info AD --out runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16 # also subset the vcf file to include only the variants around the betalain genes -vcftools --gzvcf data/BSA/wgs/vcf/gatk_filter_maxmissing05_biallelic.vcf.gz --bed data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed --indv AM_00331_gf --indv AM_00331_rf --indv AM_00332_gl --indv AM_00332_rl --recode --recode-INFO-all --out data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16 +vcftools --gzvcf runs/BSA/wgs/vcf/gatk_filter_maxmissing05_biallelic.vcf.gz --bed runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.bed --indv AM_00331_gf --indv AM_00331_rf --indv AM_00332_gl --indv AM_00332_rl --recode --recode-INFO-all --out runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16 ``` @@ -162,10 +162,10 @@ Plot the annotated variants in betalain genes. In general, it could be interesti ```{r} # read in betalain gene list on chr 16 -betalain_chr16 <- read.table("data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.txt") +betalain_chr16 <- read.table("runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.txt") # read in variant count of extracted SNPs -allele_depth.tab <- read.table(file = "data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.AD.FORMAT", header = T) +allele_depth.tab <- read.table(file = "runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.AD.FORMAT", header = T) allele_depth.tab <- allele_depth.tab %>% mutate(customid = paste0("16_", POS)) # allele depth denotes first the reference allele and then the alternative allele, only those reads which were involved in allele calling @@ -176,7 +176,7 @@ allele_depth.tab <- separate(data = allele_depth.tab, col = "AM_00332_gl", sep = allele_depth.tab <- separate(data = allele_depth.tab, col = "AM_00332_rl", sep = ",", into = c("AM00332_rl_ref", "AM00332_rl_alt")) # read in snpsift output and subset for betalain genes -snpsift.tab <- read.table("data/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.snpsift.txt", header = T) +snpsift.tab <- read.table("runs/annotation_analysis/snpEff/bsa_sterility_color/output.snpeff.chr16.snpsift.txt", header = T) snpsift.tab <- snpsift.tab %>% filter(ANN....GENEID %in% betalain_chr16$Gene_id) %>% mutate(customid = paste0("16_", POS)) %>% # add customid column to snpsift table to enable merging of the two tables @@ -184,7 +184,7 @@ snpsift.tab <- snpsift.tab %>% # read in betalain gene gtf on chr 16 -betalain_chr16.gtf <- readRDS("data/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf") +betalain_chr16.gtf <- readRDS("runs/annotation_analysis/snpEff/bsa_sterility_color/analysis/betalain_chr16.gtf") # join the snpsift table with the allele depth table joined.df <- left_join(snpsift.tab, allele_depth.tab, by = c("CHROM", "POS", "customid")) @@ -201,7 +201,7 @@ Set up plotting functions: filter_variants <- function(variants, gene, bulk1_ref, bulk1_alt, bulk2_ref, bulk2_alt){ # prepare data by only keeping the relevant gene variants and samples - dat <- variants %>% + dat <- variants %>% filter(ANN....GENEID == gene) %>% select(CHROM, POS, ANN....EFFECT, bulk1_ref, bulk1_alt, bulk2_ref, bulk2_alt) %>% filter(bulk1_alt != 0 & bulk2_alt != 0) @@ -226,13 +226,13 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){ filter(transcript_id == trans_id, type == "CDS") filtered_variants <- filtered_variants[filtered_variants$POS >= min(annotation.filtered$start) & filtered_variants$POS <= max(annotation.filtered$end),] - filtered_variants$ANN....EFFECT <- factor(filtered_variants$ANN....EFFECT, levels = c("intron_variant", - "synonymous_variant", + filtered_variants$ANN....EFFECT <- factor(filtered_variants$ANN....EFFECT, levels = c("intron_variant", + "synonymous_variant", "missense_variant", "stop_gained")) min_pos <- min(annotation.filtered$start) max_pos <- max(annotation.filtered$end) - + # plot the gene with variants p2 <- ggplot() + geom_range(data = annotation.filtered, @@ -253,7 +253,7 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){ x = "Position Scaffold 16 (bp)") + theme_classic() + #scale_fill_brewer(palette = "RdBl", direction = -1) + # think about color palette to use - scale_fill_viridis_d(direction = -1, + scale_fill_viridis_d(direction = -1, labels = c("Intron variant", "Synonymous variant", "Missense variant", "Stop gained")) + theme(text = element_text(size = 21), #legend.position = "none", @@ -277,23 +277,23 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){ out_plot <- plot_grid(p2, legend, nrow = 1, rel_widths = c(0.75, 0.25)) - + # # rearrange data: # dat1 <- filtered_variants[,-(4:5)] # dat2 <- filtered_variants[,-(6:7)] # dat1.melt <- melt(dat1, id.vars = c("CHROM", "POS", "ANN....EFFECT")) # dat2.melt <- melt(dat2, id.vars = c("CHROM", "POS", "ANN....EFFECT")) - # + # # # plot relative allele frequency # p1 <- ggplot() + # geom_col(data = dat1.melt, # aes(x = POS, y = as.numeric(value), fill = variable), position = "stack", width = 20) + # xlim(c(min_pos, max_pos)) + - # labs(y = "", - # fill = "red_bulk", + # labs(y = "", + # fill = "red_bulk", # x = "") + # theme_classic() + - # scale_fill_brewer(palette = "Set1", + # scale_fill_brewer(palette = "Set1", # labels = c("Reference allele", "Alternative allele"), # direction = -1, # guide = guide_legend(override.aes = list(alpha = 0))) + # make legend invisible @@ -307,12 +307,12 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){ # axis.title.x = element_blank(), # legend.title = element_text(color = "transparent"), # legend.text = element_text(color = "transparent")) - # + # # bars <- map(unique(dat2.melt$POS) # , ~geom_col(position = "stack", # width = 20 # , data = dat2.melt %>% filter(POS == .x))) - # + # # p3 <- ggplot(data = dat2.melt, # aes(x=POS, # y=as.numeric(value), @@ -322,8 +322,8 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){ # labs(fill = "green_bulk", # x = "Position on Scaffold 16") + # theme_classic() + - # scale_fill_brewer(palette = "Set1", - # labels = c("Reference allele", "Alternative allele"), + # scale_fill_brewer(palette = "Set1", + # labels = c("Reference allele", "Alternative allele"), # direction = -1, # guide = guide_legend(override.aes = list(alpha = 0))) + # theme(text = element_text(size = 21), @@ -331,9 +331,9 @@ plot_bulk_comparison <- function(annotation, trans_id, filtered_variants){ # legend.position = "none", # legend.title = element_text(color = "transparent"), # legend.text = element_text(color = "transparent")) - - - #allplots <- p1 + p2 + p3 + + + + #allplots <- p1 + p2 + p3 + #plot_layout(ncol = 1) return(out_plot) } @@ -346,7 +346,7 @@ Plot for CYP76AD2: ```{r} # plot for one gene # filter all homozygous reference variants -dat <- joined.df %>% +dat <- joined.df %>% filter(ANN....GENEID == "AHp023148") %>% filter(AM00332_gl_alt != 0) @@ -355,7 +355,7 @@ dat <- joined.df %>% # filter to only include specific gene -dat.filtered <- filter_variants(variants = joined.df, +dat.filtered <- filter_variants(variants = joined.df, gene = "AHp023148", bulk1_ref = "AM00332_gl_ref", bulk1_alt = "AM00332_gl_alt", @@ -373,8 +373,8 @@ AM00332_CYP76AD <- plot_bulk_comparison(annotation = betalain_chr16.gtf, AM00332_CYP76AD -ggsave(filename = "plots/CYP76AD_AHp023148_bsa_snpeff.png", - width = 14, +ggsave(filename = "runs/plots/CYP76AD_AHp023148_bsa_snpeff.png", + width = 14, height = 6) ``` @@ -387,19 +387,19 @@ plot_all_genes <- function(gene_id, transcript_id, bulk){ output <- list() for (i in 1:length(transcript_id)){ # filter all homozygous reference variants - dat <- joined.df %>% + dat <- joined.df %>% filter(ANN....GENEID == gene_id[i]) %>% filter(AM00332_gl_alt != 0) # filter to only include specific gene if (bulk == "AM00332"){ - dat.filtered <- filter_variants(variants = joined.df, + dat.filtered <- filter_variants(variants = joined.df, gene = gene_id[i], bulk1_ref = "AM00332_gl_ref", bulk1_alt = "AM00332_gl_alt", bulk2_ref = "AM00332_rl_ref", bulk2_alt = "AM00332_rl_alt") } else if (bulk == "AM00331"){ - dat.filtered <- filter_variants(variants = joined.df, + dat.filtered <- filter_variants(variants = joined.df, gene = gene_id[i], bulk1_ref = "AM00331_gf_ref", bulk1_alt = "AM00331_gf_alt", @@ -425,9 +425,9 @@ bulk_AM00332_plot_list <- plot_all_genes(gene_id = gene_id, bulk = "AM00332") for (i in 1:length(bulk_AM00332_plot_list)){ - ggsave(filename = paste0("plots/BSA/AM00332_color_loss_", gene_id[i], ".png"), + ggsave(filename = paste0("runs/plots/BSA/AM00332_color_loss_", gene_id[i], ".png"), plot = bulk_AM00332_plot_list[[i]], - width = 14, + width = 14, height = 6) } @@ -437,9 +437,9 @@ bulk_AM00331_plot_list <- plot_all_genes(gene_id = gene_id, bulk = "AM00331") for (i in 1:length(bulk_AM00331_plot_list)){ - ggsave(filename = paste0("plots/BSA/AM00331_color_loss_", gene_id[i], ".png"), + ggsave(filename = paste0("runs/plots/BSA/AM00331_color_loss_", gene_id[i], ".png"), plot = bulk_AM00331_plot_list[[i]], - width = 14, + width = 14, height = 6) } ``` @@ -456,8 +456,8 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){ filter(transcript_id == trans_id, type == "CDS") filtered_variants <- filtered_variants[filtered_variants$POS >= min(annotation.filtered$start) & filtered_variants$POS <= max(annotation.filtered$end),] - filtered_variants$ANN....EFFECT <- factor(filtered_variants$ANN....EFFECT, levels = c("intron_variant", - "synonymous_variant", + filtered_variants$ANN....EFFECT <- factor(filtered_variants$ANN....EFFECT, levels = c("intron_variant", + "synonymous_variant", "missense_variant", "stop_gained")) # plot the gene with variants @@ -486,26 +486,26 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){ axis.line.x = element_blank(), axis.text.x = element_blank(), legend.position = "none") # this increases the legend margin - # margin has to be increased so that other legends are not cut off, + # margin has to be increased so that other legends are not cut off, # since the first legend seems to determine the margins - + # rearrange data: dat1 <- filtered_variants[,-(4:5)] dat2 <- filtered_variants[,-(6:7)] dat1.melt <- melt(dat1, id.vars = c("CHROM", "POS", "ANN....EFFECT")) dat2.melt <- melt(dat2, id.vars = c("CHROM", "POS", "ANN....EFFECT")) - + # plot relative allele frequency p1 <- ggplot() + geom_col(data = dat1.melt, aes(x = POS, y = as.numeric(value), fill = variable), position = "stack", width = 2) + xlim(c(min(annotation.filtered %>% select(start)), max(annotation.filtered %>% select(end)))) + - labs(y = "Red bulk\n allele depth", - fill = "red_bulk", + labs(y = "Red bulk\n allele depth", + fill = "red_bulk", x = "") + theme_classic() + - scale_fill_brewer(palette = "Set1", + scale_fill_brewer(palette = "Set1", labels = c("Reference allele", "Alternative allele"), direction = -1, guide = guide_legend(override.aes = list(alpha = 0))) + # make legend invisible @@ -524,12 +524,12 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){ axis.title.x = element_blank(), legend.title = element_text(color = "transparent"), legend.text = element_text(color = "transparent")) - + bars <- map(unique(dat2.melt$POS) , ~geom_col(position = "stack", width = 2 , data = dat2.melt %>% filter(POS == .x))) - + p3 <- ggplot(data = dat2.melt, aes(x=POS, y=as.numeric(value), @@ -541,7 +541,7 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){ fill = "", y = "Green bulk\n allele depth") + theme_classic() + - scale_fill_brewer(palette = "Set1", + scale_fill_brewer(palette = "Set1", labels = c("Reference allele", "Alternative allele"), #guide = guide_legend(override.aes = list(alpha = 0)), direction = -1) + @@ -564,13 +564,13 @@ plot_bulk_comparison_zoom <- function(annotation, trans_id, filtered_variants){ p3 <- p3 + theme(legend.position = "none") # combine the three plots allplots <- plot_grid(p1, p2, p3, - ncol = 1, + ncol = 1, align = "v", rel_heights = c(0.3, 0.25, 0.45)) - - + + # combine other plots with legend - allplots <- plot_grid(legend, allplots, + allplots <- plot_grid(legend, allplots, ncol = 1, #align = "v", rel_heights = c(0.2,0.8)) @@ -582,7 +582,7 @@ AM00332_CYP76AD_zoom <- plot_bulk_comparison_zoom(annotation = betalain_chr16.gt filtered_variants = dat.filtered) AM00332_CYP76AD_zoom -ggsave(filename = "plots/CYP76AD_AHp023148_bsa_snpeff_zoom.png") +ggsave(filename = "runs/plots/CYP76AD_AHp023148_bsa_snpeff_zoom.png") ``` @@ -620,16 +620,10 @@ grid_with_BSA <- plot_grid(cowplot_leaf, complete_grid, #grid_with_BSA # save plot -ggsave(filename = "plots/BSA_with_grid.png", +ggsave(filename = "runs/plots/BSA_with_grid.png", plot = grid_with_BSA, width = 25, height = 20, bg = "white", dpi = 500) ``` - - - - - - diff --git a/workflows/genome_polishing/helper_script.R b/workflows/genome_polishing/helper_script.R index 2a93648..1571dce 100644 --- a/workflows/genome_polishing/helper_script.R +++ b/workflows/genome_polishing/helper_script.R @@ -1,20 +1,20 @@ # set working directory -setwd("/home/tom/Documents/projects/Ahyp_v2_2_publication/") +setwd("/home/tom/Documents/ARC_projects/betalain_regulation_amaranth/") # read in list of all headers with the correct order -headers <- read.table("data/NextPolish/input/out.headers.txt") +headers <- read.table("runs/NextPolish/input/out.headers.txt") headers <- gsub(">","",headers$V1) headers <- gsub("quiver_","quiver",headers) # read in prefiltered fasta index -prefilter <- read.table("data/NextPolish/input/out.prefiltered.renamed.txt.fai") +prefilter <- read.table("runs/NextPolish/input/out.prefiltered.renamed.txt.fai") -# use the -# every sequence that is in +# use the +# every sequence that is in no_seq <- headers[!headers %in% prefilter[,1]] no_seq <- sub("",">",no_seq) -write.table(no_seq, file="data/NextPolish/processed/header_without_sequence.fa", +write.table(no_seq, file="runs/NextPolish/processed/header_without_sequence.fa", quote=F, row.names = F, col.names = F) diff --git a/workflows/genome_polishing/process_nextpolish_output.sh b/workflows/genome_polishing/process_nextpolish_output.sh index 65ab458..d24e306 100644 --- a/workflows/genome_polishing/process_nextpolish_output.sh +++ b/workflows/genome_polishing/process_nextpolish_output.sh @@ -4,8 +4,8 @@ # (see master_thesis/code/process_nextpolish_output.sh for more information about the input file preparation) # Setup -NPOUT=data/NextPolish/output/ -NPPROCESSED=data/NextPolish/processed/ +NPOUT=runs/NextPolish/output/ +NPPROCESSED=runs/NextPolish/processed/ mkdir -p "$NPPROCESSED" @@ -15,10 +15,10 @@ mkdir -p "$NPPROCESSED" cut -f1,2 -d'_' "$NPOUT"genome.nextpolish.fa > "$NPPROCESSED"genome.nextpolish.renamed.fa # index the prefiltered fasta file for use in R -samtools faidx data/NextPolish/input/out.prefiltered.renamed.txt +samtools faidx runs/NextPolish/input/out.prefiltered.renamed.txt # filter the prefiltered file for everything that is not in Nextpolish file: -/home/tom/Documents/tools/bbmap/filterbyname.sh in=data/NextPolish/input/out.prefiltered.renamed.txt \ +/home/tom/Documents/tools/bbmap/filterbyname.sh in=runs/NextPolish/input/out.prefiltered.renamed.txt \ names="$NPPROCESSED"genome.nextpolish.renamed.fa \ out="$NPPROCESSED"prefilter_not_in_Nextpolish.fa @@ -34,7 +34,7 @@ cat "$NPPROCESSED"genome.nextpolish.renamed.fa \ LC_ALL=C awk -v RS=">" -v FS="\n" -v ORS="\n" -v OFS="" '$0 {$1=">"$1"\n"; print}' "$NPPROCESSED"combined.fa > "$NPPROCESSED"combined.linear.fa # rename header file by removing trailing underscore character of Contigs: -sed 's/quiver_/quiver/' data/NextPolish/input/out.headers.txt > "$NPPROCESSED"out.header.renamed.txt +sed 's/quiver_/quiver/' runs/NextPolish/input/out.headers.txt > "$NPPROCESSED"out.header.renamed.txt # order file: ORDER="$NPPROCESSED"out.header.renamed.txt @@ -60,5 +60,5 @@ rm "$NPPROCESSED"out* rm "$NPPROCESSED"prefilter* # copy to final output directory -mkdir -p polished_reference_genome/polished_genome_annotation/assembly/ -cp "$NPPROCESSED"Ahypochondriacus_2.2_polished.fasta polished_reference_genome/polished_genome_annotation/assembly/ +mkdir -p runs/polished_reference_genome/polished_genome_annotation/assembly/ +cp "$NPPROCESSED"Ahypochondriacus_2.2_polished.fasta runs/polished_reference_genome/polished_genome_annotation/assembly/ diff --git a/workflows/genome_polishing/remove_ambiguous_bases.sh b/workflows/genome_polishing/remove_ambiguous_bases.sh index c810a8a..1cb8a10 100644 --- a/workflows/genome_polishing/remove_ambiguous_bases.sh +++ b/workflows/genome_polishing/remove_ambiguous_bases.sh @@ -6,8 +6,8 @@ # N specific entries can in a last step be removed. To reconstruct, the saved order can be used to integrate the N chromosomes again (which record the number of Ns removed) # Change these two parameters -INPUT=reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.nospace.underscore.fa -OUTDIR=data/NextPolish/input/ +INPUT=studies/additional_data/resources/reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.nospace.underscore.fa +OUTDIR=runs/NextPolish/input/ mkdir -p $OUTDIR @@ -37,9 +37,8 @@ grep ">" "$OUTDIR"tmp2.txt > "$OUTDIR"out.headers.txt echo "headers saved" # remove headers without sequence (Can be caused by stretch of Ns at the start of a Scaffold (see Scaffold 10)) -sed -r 'N; /(>)[^\n]*\n\1/ s/[^\n]*//; P; D' "$OUTDIR"tmp2.txt | grep . | grep -i -B 1 --no-group-separator '[ATGC]' > "$OUTDIR"data/NextPolish/input/Ahypochondriacus_split.fasta +sed -r 'N; /(>)[^\n]*\n\1/ s/[^\n]*//; P; D' "$OUTDIR"tmp2.txt | grep . | grep -i -B 1 --no-group-separator '[ATGC]' > "$OUTDIR"Ahypochondriacus_split.fasta # remove and rename temporary files rm "$OUTDIR"tmp.txt mv "$OUTDIR"tmp2.txt "$OUTDIR"out.prefiltered.txt - diff --git a/workflows/genome_polishing/run_nextpolish.sh b/workflows/genome_polishing/run_nextpolish.sh index cbf0579..a086c0b 100644 --- a/workflows/genome_polishing/run_nextpolish.sh +++ b/workflows/genome_polishing/run_nextpolish.sh @@ -19,15 +19,15 @@ module load samtools mkdir -p /scratch/twinkle1/nextpolish/ # set output directory for saving the polished genome to: -OUTDIR=data/NextPolish/output/ +OUTDIR=runs/NextPolish/output/ ### Prepare input files: # remove all reads containing ambiguous bases from the input using bbduk /home/twinkle1/tools/bbmap/bbduk.sh maxns=0 \ - in=/projects/ag-stetter/twinkle/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz \ - in2=/projects/ag-stetter/twinkle/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz \ + in=/studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz \ + in2=/studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz \ out=/scratch/twinkle1/SRR2106212_1.cleaned.fq \ out2=/scratch/twinkle1/SRR2106212_2.cleaned.fq \ -Xmx16g @@ -49,7 +49,7 @@ round=2 threads=20 read1=/scratch/twinkle1/SRR2106212_1.cleaned.repair.fq read2=/scratch/twinkle1/SRR2106212_2.cleaned.repair.fq -input=/home/twinkle1/master_thesis/data/NextPolish/input/Ahypochondriacus_split.fasta +input=/runs/NextPolish/input/Ahypochondriacus_split.fasta for ((i=1; i<=${round};i++)); do diff --git a/workflows/genome_polishing/unpackSRA.sh b/workflows/genome_polishing/unpackSRA.sh index 7467f47..6cdf885 100644 --- a/workflows/genome_polishing/unpackSRA.sh +++ b/workflows/genome_polishing/unpackSRA.sh @@ -14,29 +14,29 @@ # download file, show progress, increase default max size so that the download starts # tools/sratoolkit.2.11.2-centos_linux64/bin/prefetch -p -O /projects/ag-stetter/twinkle/lightfoot_WGS_short_reads/ --max-size 30G SRR2106212 -QCOUT=data/NextPolish/QC +QCOUT=runs/NextPolish/QC # set working directory -cd /projects/ag-stetter/twinkle/projects/Ahyp_v2_2_publication/raw_data/lightfoot_WGS_short_reads/ +cd /studies/additional_data/lightfoot_WGS_short_reads/ # before running the fastq-dump command, switch off "Enable Remote Access" by running sratoolskit/bin/vdb-config -i # split into fastq files /home/twinkle1/tools/sratoolkit.2.11.2-centos_linux64/bin/fastq-dump --split-3 --verbose SRR2106212.sra # set working directory -cd /projects/ag-stetter/twinkle/projects/Ahyp_v2_2_publication/ +cd /projects/ag-stetter/twinkle/ARC_projects/betalain_regulation_amaranth # gzip the resulting fastq files # Even though there is an option to gzip it directly using the fastq-dump command, the option is deprecated and should no longer be used -gzip raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq -gzip raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq +gzip /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq +gzip /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq # remove sra file afterwards -rm raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212.sra +rm /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212.sra # quality control: module load fastqc/0.11.9 -fastqc -o raw_data/lightfoot_WGS_short_reads/QC/ -t 8 \ - raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz \ - raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz +fastqc -o /studies/additional_data/lightfoot_WGS_short_reads/QC/ -t 8 \ + /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz \ + /studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz -- GitLab