diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..300d7fa0bdb18676993d18c9505173e20a6bd944
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,8 @@
+assays/isoform_sequencing/dataset/reads/375-4.merged.bam filter=lfs diff=lfs merge=lfs -text
+assays/isoform_sequencing/dataset/reads/375-5.merged.bam filter=lfs diff=lfs merge=lfs -text
+assays/isoform_sequencing/dataset/reads/375-6.merged.bam filter=lfs diff=lfs merge=lfs -text
+studies/additional_data/resources/reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.softmasked.nospace.underscore.fa filter=lfs diff=lfs merge=lfs -text
+assays/isoform_sequencing/dataset/reads/119_1_CCS.bam filter=lfs diff=lfs merge=lfs -text
+assays/isoform_sequencing/dataset/reads/375-1.merged.bam filter=lfs diff=lfs merge=lfs -text
+assays/isoform_sequencing/dataset/reads/375-2.merged.bam filter=lfs diff=lfs merge=lfs -text
+assays/isoform_sequencing/dataset/reads/375-3.merged.bam filter=lfs diff=lfs merge=lfs -text
diff --git a/assays/BSA_RNAseq/isa.assay.xlsx b/assays/BSA_RNAseq/isa.assay.xlsx
index 399f4b8a40fb6354edb648cda747f4dc1d558145..72130cca8afe4887989ea2d9f789b515d63a7805 100644
Binary files a/assays/BSA_RNAseq/isa.assay.xlsx and b/assays/BSA_RNAseq/isa.assay.xlsx differ
diff --git a/assays/BSA_WGS/isa.assay.xlsx b/assays/BSA_WGS/isa.assay.xlsx
index 2e357f7219a77f02119427594574cd6d4d116af0..b8f9e15747d3a0d79169f1da3a75cdc94e088f7d 100644
Binary files a/assays/BSA_WGS/isa.assay.xlsx and b/assays/BSA_WGS/isa.assay.xlsx differ
diff --git a/assays/betalain_LCMS_quantification/isa.assay.xlsx b/assays/betalain_LCMS_quantification/isa.assay.xlsx
index d361dd4b2ffae902a2be61a221938294149b6bfa..3dfc4ee2c6bf0974b8500e80be0f48442074323a 100644
Binary files a/assays/betalain_LCMS_quantification/isa.assay.xlsx and b/assays/betalain_LCMS_quantification/isa.assay.xlsx differ
diff --git a/assays/betalain_photometric_quantification/isa.assay.xlsx b/assays/betalain_photometric_quantification/isa.assay.xlsx
index 68931c6c470e924b1bd598f466bf6f20196dc4dc..b5fbed0656e6357187d48c5f5fc97b85b02b1fea 100644
Binary files a/assays/betalain_photometric_quantification/isa.assay.xlsx and b/assays/betalain_photometric_quantification/isa.assay.xlsx differ
diff --git a/assays/isoform_sequencing/dataset/reads/119_1_CCS.bam b/assays/isoform_sequencing/dataset/reads/119_1_CCS.bam
new file mode 100755
index 0000000000000000000000000000000000000000..3b93c6bd5b6ac5201600c087b9d65fd71bd5eb48
--- /dev/null
+++ b/assays/isoform_sequencing/dataset/reads/119_1_CCS.bam
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b53789374be790e77f6ac1f8a43cd871b87ca567361091ef89a2439b1ba53fb2
+size 1293612729
diff --git a/assays/isoform_sequencing/dataset/reads/375-1.merged.bam b/assays/isoform_sequencing/dataset/reads/375-1.merged.bam
new file mode 100644
index 0000000000000000000000000000000000000000..d96dfe1b0e1939f8874e9b46331f0cfea1bcde40
--- /dev/null
+++ b/assays/isoform_sequencing/dataset/reads/375-1.merged.bam
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82dffeb3ea29774537fad008dcb5aed4f5aa93baab9e33b870213f4267a14bb7
+size 615150400
diff --git a/assays/isoform_sequencing/dataset/reads/375-2.merged.bam b/assays/isoform_sequencing/dataset/reads/375-2.merged.bam
new file mode 100644
index 0000000000000000000000000000000000000000..0a989f356fe3ddb08a8c170daba8cbee142c9506
--- /dev/null
+++ b/assays/isoform_sequencing/dataset/reads/375-2.merged.bam
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:638b8a627d813363468b7ba99f91cedb25dfce0cbc796e102aedc5f87c011d97
+size 778034718
diff --git a/assays/isoform_sequencing/dataset/reads/375-3.merged.bam b/assays/isoform_sequencing/dataset/reads/375-3.merged.bam
new file mode 100644
index 0000000000000000000000000000000000000000..d7d31240f11e944f8bcebe5b994259c95a5e17c3
--- /dev/null
+++ b/assays/isoform_sequencing/dataset/reads/375-3.merged.bam
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62ea280d73147d216548b43a8a04dcf0070e18484e6895abeaa4f53ef61b13e8
+size 558073691
diff --git a/assays/isoform_sequencing/dataset/reads/375-4.merged.bam b/assays/isoform_sequencing/dataset/reads/375-4.merged.bam
new file mode 100644
index 0000000000000000000000000000000000000000..645e7ea876ee60960f7fe001732274129b770418
--- /dev/null
+++ b/assays/isoform_sequencing/dataset/reads/375-4.merged.bam
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a14cb9747f7e67e5b2823b4389cfee294f30f43afd389262e76f755b09fb713a
+size 344068849
diff --git a/assays/isoform_sequencing/dataset/reads/375-5.merged.bam b/assays/isoform_sequencing/dataset/reads/375-5.merged.bam
new file mode 100644
index 0000000000000000000000000000000000000000..6b86c2bf1ad96a1125bff5154dbdbecefcc884cb
--- /dev/null
+++ b/assays/isoform_sequencing/dataset/reads/375-5.merged.bam
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1e2fd5a7ca5bb8fdf53dd65b02d41dcb8dc5b6bbcd6e1de3a42b196d0913f35
+size 485186923
diff --git a/assays/isoform_sequencing/dataset/reads/375-6.merged.bam b/assays/isoform_sequencing/dataset/reads/375-6.merged.bam
new file mode 100644
index 0000000000000000000000000000000000000000..1046ee535d36fa5fd95d9665ea83cbe388a71ca3
--- /dev/null
+++ b/assays/isoform_sequencing/dataset/reads/375-6.merged.bam
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd25e104b61822019898b069ca57a23735bb193fef63ec3094803ca10b3fa68c
+size 253500036
diff --git a/assays/isoform_sequencing/isa.assay.xlsx b/assays/isoform_sequencing/isa.assay.xlsx
index ae346d235c02e0ea6c0a10e07149fcc41e5f20f7..6b7f3ef71f38b78db0fd0838b361fb8eb7e3b36b 100644
Binary files a/assays/isoform_sequencing/isa.assay.xlsx and b/assays/isoform_sequencing/isa.assay.xlsx differ
diff --git a/isa.investigation.xlsx b/isa.investigation.xlsx
index 586f555874be0956c7b05f5da13c824073eaa385..8dd45fa7fbd03a346ca95ac8c9f8cb7c0a5c0b35 100644
Binary files a/isa.investigation.xlsx and b/isa.investigation.xlsx differ
diff --git a/studies/BSA_parent_betalain_quant/isa.study.xlsx b/studies/BSA_parent_betalain_quant/isa.study.xlsx
index 486411ac1c8c88a7585c68191536289931207512..ff0a662db33b8f0884c36657625524382087e5f4 100644
Binary files a/studies/BSA_parent_betalain_quant/isa.study.xlsx and b/studies/BSA_parent_betalain_quant/isa.study.xlsx differ
diff --git a/studies/Bulk_segregant_analysis/isa.study.xlsx b/studies/Bulk_segregant_analysis/isa.study.xlsx
index b7b89a26b6cd86616539825c87df590f547b6e4e..fc71a2dec298cd1363acf5427b9d443042145e1e 100644
Binary files a/studies/Bulk_segregant_analysis/isa.study.xlsx and b/studies/Bulk_segregant_analysis/isa.study.xlsx differ
diff --git a/studies/additional_data/isa.study.xlsx b/studies/additional_data/isa.study.xlsx
index 566ca09ccb44bcd0d23eb8c1fe9b6e8b6b8de26d..1257ad2d6cab9e38f7644077deaa31902f89123c 100644
Binary files a/studies/additional_data/isa.study.xlsx and b/studies/additional_data/isa.study.xlsx differ
diff --git a/studies/additional_data/resources/reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.softmasked.nospace.underscore.fa b/studies/additional_data/resources/reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.softmasked.nospace.underscore.fa
new file mode 100644
index 0000000000000000000000000000000000000000..492065652a687bb40a32e926c786f34ffbb8243d
--- /dev/null
+++ b/studies/additional_data/resources/reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.softmasked.nospace.underscore.fa
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93583f53b0f4cc263e31e60df8ad2c08d3fc63c9d28754cb63729d84465081af
+size 410638442
diff --git a/studies/additional_data/resources/reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.softmasked.nospace.underscore.fa.fai b/studies/additional_data/resources/reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.softmasked.nospace.underscore.fa.fai
new file mode 100644
index 0000000000000000000000000000000000000000..97c2b7c3de9bd408c0d14599b35ce185f8abaa16
--- /dev/null
+++ b/studies/additional_data/resources/reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.softmasked.nospace.underscore.fa.fai
@@ -0,0 +1,908 @@
+Scaffold_10	22670516	13	60	61
+Scaffold_11	22280117	23048384	60	61
+Scaffold_12	22052327	45699850	60	61
+Scaffold_13	20679869	68119729	60	61
+Scaffold_14	20190685	89144276	60	61
+Scaffold_15	17522127	109671486	60	61
+Scaffold_16	16951160	127485662	60	61
+Scaffold_1	38124660	144719354	60	61
+Scaffold_2	35657244	183479437	60	61
+Scaffold_3	30204323	219730981	60	61
+Scaffold_4	28349311	250438722	60	61
+Scaffold_5	25672467	279260534	60	61
+Scaffold_6	24628041	305360888	60	61
+Scaffold_7	24364990	330399409	60	61
+Scaffold_8	23766980	355170495	60	61
+Scaffold_9	22691259	379333604	60	61
+Contig177_quiver_	1092290	402403070	60	61
+Contig536_quiver_	146947	403513584	60	61
+Contig747_quiver_	124984	403663000	60	61
+Contig366_quiver_	47732	403790087	60	61
+Contig260_quiver_	42336	403838634	60	61
+Contig722_quiver_	33993	403881695	60	61
+Contig197_quiver_	33645	403916274	60	61
+Contig115_quiver_	33068	403950499	60	61
+Contig208_quiver_	32090	403984138	60	61
+Contig519_quiver_	31873	404016782	60	61
+Contig566_quiver_	31835	404049206	60	61
+Contig723_quiver_	30881	404081591	60	61
+Contig727_quiver_	29350	404113006	60	61
+Contig312_quiver_	29079	404142865	60	61
+Contig70_quiver_	28210	404172447	60	61
+Contig279_quiver_	28065	404201147	60	61
+Contig169_quiver_	27805	404229699	60	61
+Contig466_quiver_	26991	404257987	60	61
+Contig605_quiver_	26859	404285447	60	61
+Contig709_quiver_	26773	404312773	60	61
+Contig522_quiver_	26662	404340012	60	61
+Contig227_quiver_	26461	404367138	60	61
+Contig628_quiver_	26403	404394060	60	61
+Contig86_quiver_	26099	404420922	60	61
+Contig748_quiver_	25848	404447475	60	61
+Contig89_quiver_	25648	404473772	60	61
+Contig289_quiver_	25117	404499867	60	61
+Contig689_quiver_	25079	404525422	60	61
+Contig859_quiver_	25067	404550938	60	61
+Contig603_quiver_	24882	404576442	60	61
+Contig493_quiver_	24433	404601758	60	61
+Contig900_quiver_	24095	404626618	60	61
+Contig358_quiver_	23685	404651134	60	61
+Contig590_quiver_	23389	404675233	60	61
+Contig897_quiver_	23285	404699031	60	61
+Contig607_quiver_	22971	404722724	60	61
+Contig297_quiver_	22702	404746097	60	61
+Contig486_quiver_	22614	404769197	60	61
+Contig153_quiver_	22391	404792207	60	61
+Contig43_quiver_	22275	404814990	60	61
+Contig836_quiver_	22221	404837656	60	61
+Contig98_quiver_	21908	404860266	60	61
+Contig738_quiver_	21900	404882559	60	61
+Contig44_quiver_	21840	404904842	60	61
+Contig839_quiver_	21773	404927065	60	61
+Contig171_quiver_	21664	404949220	60	61
+Contig683_quiver_	21654	404971265	60	61
+Contig829_quiver_	21408	404993299	60	61
+Contig414_quiver_	21258	405015083	60	61
+Contig420_quiver_	21195	405036715	60	61
+Contig62_quiver_	20867	405058282	60	61
+Contig716_quiver_	20807	405079516	60	61
+Contig380_quiver_	20550	405100689	60	61
+Contig660_quiver_	20476	405121601	60	61
+Contig636_quiver_	20341	405142438	60	61
+Contig392_quiver_	20248	405163138	60	61
+Contig511_quiver_	20120	405183743	60	61
+Contig796_quiver_	20095	405204218	60	61
+Contig734_quiver_	20078	405224667	60	61
+Contig731_quiver_	20055	405245099	60	61
+Contig717_quiver_	19854	405265508	60	61
+Contig698_quiver_	19744	405285712	60	61
+Contig881_quiver_	19605	405305805	60	61
+Contig706_quiver_	19576	405325756	60	61
+Contig598_quiver_	19424	405345678	60	61
+Contig644_quiver_	19382	405365445	60	61
+Contig810_quiver_	19292	405385170	60	61
+Contig489_quiver_	19185	405404803	60	61
+Contig47_quiver_	18870	405424326	60	61
+Contig168_quiver_	18782	405443530	60	61
+Contig174_quiver_	18767	405462645	60	61
+Contig535_quiver_	18605	405481744	60	61
+Contig377_quiver_	18599	405500679	60	61
+Contig445_quiver_	18556	405519607	60	61
+Contig194_quiver_	18476	405538492	60	61
+Contig183_quiver_	18405	405557295	60	61
+Contig48_quiver_	18327	405576025	60	61
+Contig190_quiver_	18041	405594677	60	61
+Contig161_quiver_	18026	405613038	60	61
+Contig432_quiver_	17693	405631384	60	61
+Contig140_quiver_	17678	405649391	60	61
+Contig68_quiver_	17594	405667382	60	61
+Contig534_quiver_	17355	405685289	60	61
+Contig643_quiver_	17128	405702953	60	61
+Contig858_quiver_	17049	405720386	60	61
+Contig789_quiver_	17014	405737739	60	61
+Contig347_quiver_	16909	405755056	60	61
+Contig826_quiver_	16890	405772266	60	61
+Contig654_quiver_	16827	405789457	60	61
+Contig348_quiver_	16708	405806584	60	61
+Contig848_quiver_	16704	405823590	60	61
+Contig550_quiver_	16652	405840592	60	61
+Contig685_quiver_	16619	405857541	60	61
+Contig436_quiver_	16606	405874456	60	61
+Contig863_quiver_	16518	405891358	60	61
+Contig718_quiver_	16450	405908171	60	61
+Contig560_quiver_	16426	405924915	60	61
+Contig373_quiver_	16252	405941634	60	61
+Contig199_quiver_	16166	405958176	60	61
+Contig546_quiver_	16118	405974631	60	61
+Contig774_quiver_	16111	405991037	60	61
+Contig60_quiver_	16102	406007435	60	61
+Contig143_quiver_	16086	406023825	60	61
+Contig250_quiver_	16084	406040199	60	61
+Contig764_quiver_	16044	406056571	60	61
+Contig622_quiver_	16022	406072902	60	61
+Contig268_quiver_	15987	406089211	60	61
+Contig699_quiver_	15933	406105484	60	61
+Contig235_quiver_	15851	406121702	60	61
+Contig695_quiver_	15803	406137837	60	61
+Contig806_quiver_	15713	406153923	60	61
+Contig447_quiver_	15703	406169917	60	61
+Contig697_quiver_	15630	406185901	60	61
+Contig17_quiver_	15627	406201810	60	61
+Contig562_quiver_	15413	406217717	60	61
+Contig111_quiver_	15403	406233406	60	61
+Contig189_quiver_	15398	406249085	60	61
+Contig264_quiver_	15366	406264759	60	61
+Contig554_quiver_	15352	406280401	60	61
+Contig132_quiver_	15342	406296028	60	61
+Contig118_quiver_	15318	406311645	60	61
+Contig338_quiver_	15310	406327238	60	61
+Contig135_quiver_	15292	406342823	60	61
+Contig862_quiver_	15265	406358389	60	61
+Contig246_quiver_	15248	406373928	60	61
+Contig71_quiver_	15152	406389449	60	61
+Contig750_quiver_	15085	406404873	60	61
+Contig370_quiver_	14967	406420229	60	61
+Contig363_quiver_	14899	406435465	60	61
+Contig506_quiver_	14868	406450632	60	61
+Contig50_quiver_	14797	406465766	60	61
+Contig538_quiver_	14772	406480829	60	61
+Contig884_quiver_	14749	406495867	60	61
+Contig304_quiver_	14597	406510881	60	61
+Contig668_quiver_	14590	406525741	60	61
+Contig580_quiver_	14555	406540594	60	61
+Contig773_quiver_	14446	406555411	60	61
+Contig282_quiver_	14371	406570117	60	61
+Contig228_quiver_	14266	406584747	60	61
+Contig674_quiver_	14249	406599270	60	61
+Contig505_quiver_	14210	406613776	60	61
+Contig651_quiver_	14182	406628242	60	61
+Contig544_quiver_	14164	406642680	60	61
+Contig93_quiver_	14121	406657099	60	61
+Contig27_quiver_	14081	406671474	60	61
+Contig596_quiver_	13967	406685809	60	61
+Contig393_quiver_	13912	406700028	60	61
+Contig405_quiver_	13879	406714191	60	61
+Contig254_quiver_	13811	406728321	60	61
+Contig207_quiver_	13794	406742382	60	61
+Contig658_quiver_	13769	406756425	60	61
+Contig355_quiver_	13753	406770443	60	61
+Contig203_quiver_	13722	406784445	60	61
+Contig233_quiver_	13719	406798415	60	61
+Contig623_quiver_	13698	406812382	60	61
+Contig277_quiver_	13696	406826328	60	61
+Contig75_quiver_	13694	406840271	60	61
+Contig179_quiver_	13674	406854213	60	61
+Contig638_quiver_	13657	406868134	60	61
+Contig577_quiver_	13634	406882038	60	61
+Contig465_quiver_	13633	406895919	60	61
+Contig127_quiver_	13618	406909799	60	61
+Contig299_quiver_	13570	406923663	60	61
+Contig345_quiver_	13504	406937479	60	61
+Contig154_quiver_	13463	406951228	60	61
+Contig469_quiver_	13462	406964935	60	61
+Contig604_quiver_	13409	406978641	60	61
+Contig144_quiver_	13380	406992293	60	61
+Contig123_quiver_	13329	407005915	60	61
+Contig126_quiver_	13299	407019486	60	61
+Contig164_quiver_	13290	407033026	60	61
+Contig125_quiver_	13211	407046557	60	61
+Contig872_quiver_	13177	407060008	60	61
+Contig889_quiver_	13146	407073424	60	61
+Contig105_quiver_	13130	407086809	60	61
+Contig901_quiver_	13046	407100177	60	61
+Contig286_quiver_	12947	407113460	60	61
+Contig715_quiver_	12902	407126642	60	61
+Contig186_quiver_	12877	407139779	60	61
+Contig14_quiver_	12873	407152889	60	61
+Contig96_quiver_	12859	407165995	60	61
+Contig524_quiver_	12816	407179088	60	61
+Contig418_quiver_	12657	407192137	60	61
+Contig431_quiver_	12636	407205024	60	61
+Contig479_quiver_	12627	407217890	60	61
+Contig855_quiver_	12574	407230747	60	61
+Contig390_quiver_	12382	407243550	60	61
+Contig619_quiver_	12319	407256158	60	61
+Contig215_quiver_	12215	407268702	60	61
+Contig473_quiver_	12211	407281140	60	61
+Contig453_quiver_	12178	407293574	60	61
+Contig57_quiver_	12174	407305973	60	61
+Contig512_quiver_	12135	407318369	60	61
+Contig729_quiver_	12102	407330726	60	61
+Contig586_quiver_	12083	407343049	60	61
+Contig430_quiver_	11974	407355353	60	61
+Contig647_quiver_	11966	407367546	60	61
+Contig641_quiver_	11958	407379731	60	61
+Contig421_quiver_	11855	407391908	60	61
+Contig531_quiver_	11852	407403980	60	61
+Contig621_quiver_	11846	407416049	60	61
+Contig324_quiver_	11840	407428112	60	61
+Contig608_quiver_	11840	407440169	60	61
+Contig903_quiver_	11839	407452226	60	61
+Contig733_quiver_	11826	407464282	60	61
+Contig831_quiver_	11816	407476325	60	61
+Contig438_quiver_	11802	407488357	60	61
+Contig46_quiver_	11679	407500374	60	61
+Contig865_quiver_	11640	407512267	60	61
+Contig666_quiver_	11591	407524120	60	61
+Contig273_quiver_	11549	407535924	60	61
+Contig620_quiver_	11511	407547685	60	61
+Contig516_quiver_	11439	407559407	60	61
+Contig128_quiver_	11427	407571056	60	61
+Contig165_quiver_	11388	407582693	60	61
+Contig193_quiver_	11359	407594290	60	61
+Contig156_quiver_	11336	407605858	60	61
+Contig595_quiver_	11316	407617402	60	61
+Contig813_quiver_	11270	407628926	60	61
+Contig851_quiver_	11269	407640403	60	61
+Contig724_quiver_	11264	407651879	60	61
+Contig755_quiver_	11252	407663350	60	61
+Contig216_quiver_	11232	407674809	60	61
+Contig838_quiver_	11232	407686248	60	61
+Contig321_quiver_	11214	407697687	60	61
+Contig853_quiver_	11126	407709107	60	61
+Contig602_quiver_	11110	407720438	60	61
+Contig515_quiver_	11085	407731753	60	61
+Contig802_quiver_	11026	407743042	60	61
+Contig818_quiver_	10988	407754271	60	61
+Contig458_quiver_	10976	407765462	60	61
+Contig488_quiver_	10811	407776640	60	61
+Contig530_quiver_	10762	407787651	60	61
+Contig794_quiver_	10737	407798612	60	61
+Contig201_quiver_	10670	407809547	60	61
+Contig383_quiver_	10652	407820414	60	61
+Contig499_quiver_	10629	407831263	60	61
+Contig226_quiver_	10488	407842089	60	61
+Contig116_quiver_	10417	407852771	60	61
+Contig803_quiver_	10387	407863381	60	61
+Contig592_quiver_	10385	407873961	60	61
+Contig461_quiver_	10378	407884539	60	61
+Contig91_quiver_	10354	407895108	60	61
+Contig185_quiver_	10330	407905654	60	61
+Contig138_quiver_	10318	407916176	60	61
+Contig833_quiver_	10279	407926685	60	61
+Contig873_quiver_	10246	407937155	60	61
+Contig823_quiver_	10244	407947591	60	61
+Contig399_quiver_	10195	407958025	60	61
+Contig763_quiver_	10189	407968409	60	61
+Contig79_quiver_	10185	407978786	60	61
+Contig327_quiver_	10183	407989160	60	61
+Contig133_quiver_	10157	407999532	60	61
+Contig99_quiver_	10121	408009877	60	61
+Contig533_quiver_	10121	408020186	60	61
+Contig714_quiver_	10087	408030495	60	61
+Contig601_quiver_	10062	408040770	60	61
+Contig882_quiver_	10042	408051019	60	61
+Contig308_quiver_	9995	408061248	60	61
+Contig868_quiver_	9995	408071429	60	61
+Contig579_quiver_	9949	408081610	60	61
+Contig434_quiver_	9925	408091744	60	61
+Contig340_quiver_	9889	408101854	60	61
+Contig180_quiver_	9873	408111927	60	61
+Contig649_quiver_	9834	408121984	60	61
+Contig319_quiver_	9800	408132001	60	61
+Contig244_quiver_	9786	408141984	60	61
+Contig39_quiver_	9723	408151952	60	61
+Contig166_quiver_	9711	408161857	60	61
+Contig266_quiver_	9690	408171749	60	61
+Contig874_quiver_	9669	408181620	60	61
+Contig694_quiver_	9570	408191470	60	61
+Contig703_quiver_	9565	408201219	60	61
+Contig287_quiver_	9537	408210963	60	61
+Contig589_quiver_	9487	408220678	60	61
+Contig25_quiver_	9456	408230342	60	61
+Contig777_quiver_	9449	408239975	60	61
+Contig51_quiver_	9311	408249600	60	61
+Contig753_quiver_	9286	408259086	60	61
+Contig449_quiver_	9282	408268546	60	61
+Contig633_quiver_	9256	408278002	60	61
+Contig58_quiver_	9227	408287431	60	61
+Contig661_quiver_	9221	408296831	60	61
+Contig109_quiver_	9032	408306225	60	61
+Contig609_quiver_	9000	408315427	60	61
+Contig798_quiver_	8995	408324596	60	61
+Contig251_quiver_	8954	408333760	60	61
+Contig490_quiver_	8949	408342883	60	61
+Contig781_quiver_	8893	408352001	60	61
+Contig137_quiver_	8867	408361062	60	61
+Contig270_quiver_	8856	408370096	60	61
+Contig378_quiver_	8830	408379119	60	61
+Contig242_quiver_	8804	408388116	60	61
+Contig342_quiver_	8773	408397086	60	61
+Contig597_quiver_	8716	408406025	60	61
+Contig394_quiver_	8712	408414906	60	61
+Contig525_quiver_	8697	408423783	60	61
+Contig545_quiver_	8692	408432644	60	61
+Contig225_quiver_	8689	408441500	60	61
+Contig102_quiver_	8675	408450353	60	61
+Contig103_quiver_	8663	408459192	60	61
+Contig229_quiver_	8647	408468019	60	61
+Contig497_quiver_	8642	408476830	60	61
+Contig556_quiver_	8639	408485636	60	61
+Contig31_quiver_	8556	408494437	60	61
+Contig551_quiver_	8520	408503155	60	61
+Contig106_quiver_	8492	408511836	60	61
+Contig510_quiver_	8453	408520489	60	61
+Contig637_quiver_	8430	408529102	60	61
+Contig55_quiver_	8381	408537691	60	61
+Contig627_quiver_	8363	408546231	60	61
+Contig663_quiver_	8327	408554753	60	61
+Contig907_quiver_	8288	408563238	60	61
+Contig902_quiver_	8281	408571684	60	61
+Contig735_quiver_	8127	408580123	60	61
+Contig424_quiver_	8110	408588405	60	61
+Contig56_quiver_	8082	408596669	60	61
+Contig688_quiver_	8076	408604905	60	61
+Contig330_quiver_	8017	408613135	60	61
+Contig883_quiver_	7952	408621305	60	61
+Contig761_quiver_	7936	408629409	60	61
+Contig344_quiver_	7934	408637497	60	61
+Contig239_quiver_	7904	408645583	60	61
+Contig73_quiver_	7868	408653637	60	61
+Contig104_quiver_	7855	408661656	60	61
+Contig455_quiver_	7850	408669661	60	61
+Contig290_quiver_	7764	408677661	60	61
+Contig364_quiver_	7732	408685574	60	61
+Contig248_quiver_	7681	408693454	60	61
+Contig69_quiver_	7669	408701282	60	61
+Contig669_quiver_	7637	408709098	60	61
+Contig584_quiver_	7594	408716882	60	61
+Contig134_quiver_	7591	408724622	60	61
+Contig204_quiver_	7585	408732359	60	61
+Contig336_quiver_	7525	408740090	60	61
+Contig879_quiver_	7519	408747760	60	61
+Contig114_quiver_	7516	408755424	60	61
+Contig632_quiver_	7458	408763085	60	61
+Contig113_quiver_	7415	408770687	60	61
+Contig196_quiver_	7415	408778245	60	61
+Contig291_quiver_	7395	408785803	60	61
+Contig332_quiver_	7394	408793341	60	61
+Contig843_quiver_	7357	408800878	60	61
+Contig904_quiver_	7305	408808377	60	61
+Contig398_quiver_	7302	408815823	60	61
+Contig38_quiver_	7248	408823265	60	61
+Contig261_quiver_	7227	408830653	60	61
+Contig571_quiver_	7197	408838020	60	61
+Contig160_quiver_	7195	408845356	60	61
+Contig24_quiver_	7191	408852689	60	61
+Contig122_quiver_	7175	408860019	60	61
+Contig148_quiver_	7167	408867333	60	61
+Contig252_quiver_	7136	408874639	60	61
+Contig785_quiver_	7123	408881913	60	61
+Contig611_quiver_	7073	408889174	60	61
+Contig30_quiver_	7060	408896383	60	61
+Contig812_quiver_	7053	408903580	60	61
+Contig807_quiver_	7034	408910770	60	61
+Contig110_quiver_	7016	408917941	60	61
+Contig679_quiver_	7003	408925093	60	61
+Contig439_quiver_	6972	408932232	60	61
+Contig610_quiver_	6952	408939340	60	61
+Contig613_quiver_	6886	408946427	60	61
+Contig147_quiver_	6852	408953447	60	61
+Contig306_quiver_	6833	408960433	60	61
+Contig840_quiver_	6816	408967399	60	61
+Contig365_quiver_	6801	408974348	60	61
+Contig667_quiver_	6756	408981282	60	61
+Contig422_quiver_	6688	408988170	60	61
+Contig322_quiver_	6639	408994989	60	61
+Contig454_quiver_	6583	409001758	60	61
+Contig192_quiver_	6567	409008470	60	61
+Contig296_quiver_	6489	409015166	60	61
+Contig357_quiver_	6465	409021783	60	61
+Contig456_quiver_	6462	409028375	60	61
+Contig687_quiver_	6454	409034964	60	61
+Contig804_quiver_	6445	409041545	60	61
+Contig626_quiver_	6431	409048117	60	61
+Contig567_quiver_	6430	409054675	60	61
+Contig310_quiver_	6413	409061232	60	61
+Contig36_quiver_	6404	409067770	60	61
+Contig292_quiver_	6394	409074300	60	61
+Contig200_quiver_	6384	409080820	60	61
+Contig100_quiver_	6344	409087330	60	61
+Contig741_quiver_	6342	409093799	60	61
+Contig837_quiver_	6337	409100266	60	61
+Contig648_quiver_	6329	409106728	60	61
+Contig521_quiver_	6301	409113182	60	61
+Contig575_quiver_	6294	409119608	60	61
+Contig83_quiver_	6277	409126025	60	61
+Contig587_quiver_	6245	409132426	60	61
+Contig825_quiver_	6243	409138795	60	61
+Contig583_quiver_	6234	409145162	60	61
+Contig708_quiver_	6220	409151519	60	61
+Contig832_quiver_	6215	409157862	60	61
+Contig484_quiver_	6198	409164200	60	61
+Contig759_quiver_	6146	409170521	60	61
+Contig395_quiver_	6145	409176789	60	61
+Contig537_quiver_	6145	409183056	60	61
+Contig730_quiver_	6131	409189323	60	61
+Contig811_quiver_	6130	409195576	60	61
+Contig404_quiver_	6100	409201828	60	61
+Contig387_quiver_	6096	409208049	60	61
+Contig117_quiver_	6086	409214266	60	61
+Contig684_quiver_	6070	409220473	60	61
+Contig876_quiver_	6031	409226664	60	61
+Contig258_quiver_	5973	409232815	60	61
+Contig675_quiver_	5916	409238907	60	61
+Contig284_quiver_	5907	409244941	60	61
+Contig37_quiver_	5842	409250965	60	61
+Contig614_quiver_	5812	409256924	60	61
+Contig151_quiver_	5770	409262852	60	61
+Contig681_quiver_	5738	409268738	60	61
+Contig130_quiver_	5715	409274591	60	61
+Contig407_quiver_	5710	409280421	60	61
+Contig463_quiver_	5675	409286246	60	61
+Contig563_quiver_	5662	409292035	60	61
+Contig49_quiver_	5657	409297810	60	61
+Contig412_quiver_	5653	409303581	60	61
+Contig313_quiver_	5621	409309348	60	61
+Contig496_quiver_	5609	409315082	60	61
+Contig303_quiver_	5606	409320804	60	61
+Contig665_quiver_	5555	409326523	60	61
+Contig720_quiver_	5553	409332190	60	61
+Contig846_quiver_	5541	409337855	60	61
+Contig21_quiver_	5516	409343507	60	61
+Contig23_quiver_	5492	409349133	60	61
+Contig80_quiver_	5492	409354735	60	61
+Contig175_quiver_	5474	409360338	60	61
+Contig502_quiver_	5429	409365923	60	61
+Contig819_quiver_	5425	409371462	60	61
+Contig45_quiver_	5424	409376996	60	61
+Contig63_quiver_	5408	409382529	60	61
+Contig181_quiver_	5372	409388047	60	61
+Contig328_quiver_	5348	409393528	60	61
+Contig555_quiver_	5330	409398985	60	61
+Contig769_quiver_	5329	409404423	60	61
+Contig384_quiver_	5325	409409860	60	61
+Contig367_quiver_	5317	409415293	60	61
+Contig346_quiver_	5314	409420718	60	61
+Contig485_quiver_	5308	409426140	60	61
+Contig809_quiver_	5262	409431556	60	61
+Contig564_quiver_	5260	409436925	60	61
+Contig369_quiver_	5251	409442292	60	61
+Contig513_quiver_	5229	409447650	60	61
+Contig379_quiver_	5226	409452986	60	61
+Contig314_quiver_	5223	409458319	60	61
+Contig792_quiver_	5208	409463649	60	61
+Contig905_quiver_	5197	409468963	60	61
+Contig481_quiver_	5170	409474266	60	61
+Contig646_quiver_	5139	409479542	60	61
+Contig76_quiver_	5134	409484785	60	61
+Contig72_quiver_	5098	409490023	60	61
+Contig656_quiver_	5071	409495225	60	61
+Contig676_quiver_	5063	409500400	60	61
+Contig331_quiver_	5023	409505567	60	61
+Contig222_quiver_	5017	409510693	60	61
+Contig139_quiver_	4978	409515813	60	61
+Contig202_quiver_	4978	409520893	60	61
+Contig503_quiver_	4942	409525973	60	61
+Contig220_quiver_	4935	409531017	60	61
+Contig568_quiver_	4927	409536054	60	61
+Contig163_quiver_	4925	409541083	60	61
+Contig397_quiver_	4916	409546110	60	61
+Contig428_quiver_	4916	409551127	60	61
+Contig311_quiver_	4908	409556144	60	61
+Contig766_quiver_	4898	409561153	60	61
+Contig20_quiver_	4895	409566151	60	61
+Contig316_quiver_	4891	409571147	60	61
+Contig468_quiver_	4812	409576139	60	61
+Contig176_quiver_	4807	409581051	60	61
+Contig782_quiver_	4794	409585958	60	61
+Contig498_quiver_	4785	409590851	60	61
+Contig618_quiver_	4773	409595735	60	61
+Contig527_quiver_	4755	409600607	60	61
+Contig3_quiver_	4742	409605459	60	61
+Contig281_quiver_	4737	409610300	60	61
+Contig617_quiver_	4734	409615135	60	61
+Contig423_quiver_	4718	409619967	60	61
+Contig448_quiver_	4716	409624783	60	61
+Contig307_quiver_	4712	409629597	60	61
+Contig305_quiver_	4703	409634407	60	61
+Contig528_quiver_	4687	409639208	60	61
+Contig864_quiver_	4685	409643993	60	61
+Contig74_quiver_	4653	409648775	60	61
+Contig817_quiver_	4638	409653525	60	61
+Contig262_quiver_	4631	409658260	60	61
+Contig743_quiver_	4598	409662988	60	61
+Contig339_quiver_	4597	409667682	60	61
+Contig816_quiver_	4547	409672375	60	61
+Contig834_quiver_	4546	409677017	60	61
+Contig606_quiver_	4537	409681658	60	61
+Contig491_quiver_	4500	409686290	60	61
+Contig178_quiver_	4491	409690884	60	61
+Contig543_quiver_	4465	409695469	60	61
+Contig188_quiver_	4461	409700028	60	61
+Contig353_quiver_	4460	409704583	60	61
+Contig457_quiver_	4437	409709137	60	61
+Contig713_quiver_	4424	409713667	60	61
+Contig625_quiver_	4420	409718184	60	61
+Contig121_quiver_	4407	409722697	60	61
+Contig403_quiver_	4392	409727197	60	61
+Contig520_quiver_	4387	409731682	60	61
+Contig209_quiver_	4370	409736162	60	61
+Contig767_quiver_	4367	409740624	60	61
+Contig361_quiver_	4364	409745083	60	61
+Contig371_quiver_	4362	409749539	60	61
+Contig557_quiver_	4353	409753993	60	61
+Contig224_quiver_	4332	409758438	60	61
+Contig142_quiver_	4321	409762862	60	61
+Contig732_quiver_	4309	409767275	60	61
+Contig591_quiver_	4301	409771675	60	61
+Contig10_quiver_	4296	409776066	60	61
+Contig218_quiver_	4292	409780453	60	61
+Contig517_quiver_	4276	409784836	60	61
+Contig514_quiver_	4242	409789203	60	61
+Contig34_quiver_	4240	409793534	60	61
+Contig26_quiver_	4167	409797863	60	61
+Contig131_quiver_	4154	409802119	60	61
+Contig150_quiver_	4136	409806362	60	61
+Contig315_quiver_	4135	409810586	60	61
+Contig451_quiver_	4126	409814809	60	61
+Contig362_quiver_	4115	409819023	60	61
+Contig265_quiver_	4049	409823226	60	61
+Contig408_quiver_	4041	409827362	60	61
+Contig354_quiver_	4031	409831490	60	61
+Contig475_quiver_	3979	409835608	60	61
+Contig464_quiver_	3945	409839673	60	61
+Contig482_quiver_	3917	409843703	60	61
+Contig411_quiver_	3900	409847705	60	61
+Contig899_quiver_	3879	409851689	60	61
+Contig742_quiver_	3866	409855652	60	61
+Contig326_quiver_	3864	409859602	60	61
+Contig827_quiver_	3858	409863550	60	61
+Contig409_quiver_	3856	409867492	60	61
+Contig294_quiver_	3847	409871432	60	61
+Contig799_quiver_	3845	409875363	60	61
+Contig744_quiver_	3821	409879292	60	61
+Contig841_quiver_	3820	409883196	60	61
+Contig875_quiver_	3799	409887099	60	61
+Contig642_quiver_	3766	409890981	60	61
+Contig680_quiver_	3750	409894829	60	61
+Contig728_quiver_	3750	409898661	60	61
+Contig671_quiver_	3726	409902493	60	61
+Contig518_quiver_	3723	409906301	60	61
+Contig187_quiver_	3706	409910106	60	61
+Contig472_quiver_	3676	409913893	60	61
+Contig866_quiver_	3667	409917650	60	61
+Contig271_quiver_	3645	409921398	60	61
+Contig19_quiver_	3601	409925122	60	61
+Contig149_quiver_	3582	409928803	60	61
+Contig217_quiver_	3570	409932464	60	61
+Contig368_quiver_	3565	409936113	60	61
+Contig887_quiver_	3563	409939757	60	61
+Contig97_quiver_	3532	409943398	60	61
+Contig191_quiver_	3529	409947008	60	61
+Contig549_quiver_	3502	409950615	60	61
+Contig847_quiver_	3501	409954195	60	61
+Contig94_quiver_	3500	409957773	60	61
+Contig240_quiver_	3490	409961351	60	61
+Contig476_quiver_	3459	409964919	60	61
+Contig711_quiver_	3452	409968455	60	61
+Contig443_quiver_	3432	409971984	60	61
+Contig721_quiver_	3428	409975493	60	61
+Contig221_quiver_	3424	409978998	60	61
+Contig101_quiver_	3402	409982499	60	61
+Contig470_quiver_	3402	409985977	60	61
+Contig754_quiver_	3389	409989455	60	61
+Contig725_quiver_	3382	409992920	60	61
+Contig898_quiver_	3364	409996378	60	61
+Contig778_quiver_	3363	409999818	60	61
+Contig612_quiver_	3356	410003257	60	61
+Contig624_quiver_	3355	410006688	60	61
+Contig285_quiver_	3351	410010118	60	61
+Contig860_quiver_	3337	410013544	60	61
+Contig124_quiver_	3323	410016956	60	61
+Contig775_quiver_	3309	410020354	60	61
+Contig892_quiver_	3296	410023738	60	61
+Contig350_quiver_	3269	410027108	60	61
+Contig585_quiver_	3242	410030451	60	61
+Contig740_quiver_	3242	410033767	60	61
+Contig323_quiver_	3197	410037083	60	61
+Contig61_quiver_	3171	410040352	60	61
+Contig772_quiver_	3168	410043595	60	61
+Contig783_quiver_	3141	410046835	60	61
+Contig693_quiver_	3114	410050048	60	61
+Contig795_quiver_	3112	410053233	60	61
+Contig278_quiver_	3108	410056416	60	61
+Contig814_quiver_	3096	410059595	60	61
+Contig293_quiver_	3083	410062762	60	61
+Contig309_quiver_	3078	410065916	60	61
+Contig223_quiver_	3058	410069065	60	61
+Contig425_quiver_	3050	410072193	60	61
+Contig870_quiver_	3042	410075313	60	61
+Contig501_quiver_	3041	410078425	60	61
+Contig845_quiver_	3018	410081536	60	61
+Contig786_quiver_	3011	410084624	60	61
+Contig588_quiver_	3010	410087705	60	61
+Contig639_quiver_	3002	410090785	60	61
+Contig645_quiver_	2994	410093857	60	61
+Contig157_quiver_	2973	410096920	60	61
+Contig507_quiver_	2972	410099962	60	61
+Contig40_quiver_	2967	410103002	60	61
+Contig539_quiver_	2959	410106038	60	61
+Contig653_quiver_	2947	410109066	60	61
+Contig376_quiver_	2943	410112082	60	61
+Contig523_quiver_	2932	410115094	60	61
+Contig787_quiver_	2924	410118094	60	61
+Contig301_quiver_	2895	410121086	60	61
+Contig302_quiver_	2888	410124049	60	61
+Contig896_quiver_	2883	410127005	60	61
+Contig705_quiver_	2863	410129956	60	61
+Contig283_quiver_	2849	410132886	60	61
+Contig702_quiver_	2783	410135802	60	61
+Contig276_quiver_	2778	410138651	60	61
+Contig770_quiver_	2773	410141495	60	61
+Contig388_quiver_	2772	410144334	60	61
+Contig474_quiver_	2763	410147172	60	61
+Contig631_quiver_	2760	410150001	60	61
+Contig791_quiver_	2748	410152826	60	61
+Contig255_quiver_	2734	410155639	60	61
+Contig92_quiver_	2721	410158437	60	61
+Contig84_quiver_	2714	410161222	60	61
+Contig389_quiver_	2704	410164001	60	61
+Contig650_quiver_	2699	410166770	60	61
+Contig600_quiver_	2695	410169533	60	61
+Contig375_quiver_	2670	410172292	60	61
+Contig253_quiver_	2668	410175026	60	61
+Contig788_quiver_	2667	410177758	60	61
+Contig854_quiver_	2662	410180489	60	61
+Contig878_quiver_	2648	410183215	60	61
+Contig274_quiver_	2636	410185927	60	61
+Contig210_quiver_	2624	410188626	60	61
+Contig446_quiver_	2598	410191313	60	61
+Contig634_quiver_	2576	410193974	60	61
+Contig760_quiver_	2574	410196612	60	61
+Contig333_quiver_	2564	410199248	60	61
+Contig630_quiver_	2557	410201874	60	61
+Contig381_quiver_	2523	410204493	60	61
+Contig337_quiver_	2517	410207078	60	61
+Contig483_quiver_	2517	410209656	60	61
+Contig219_quiver_	2495	410212234	60	61
+Contig359_quiver_	2490	410214790	60	61
+Contig739_quiver_	2490	410217341	60	61
+Contig391_quiver_	2478	410219892	60	61
+Contig867_quiver_	2477	410222431	60	61
+Contig2_quiver_	2474	410224967	60	61
+Contig415_quiver_	2473	410227502	60	61
+Contig417_quiver_	2459	410230036	60	61
+Contig159_quiver_	2445	410232555	60	61
+Contig288_quiver_	2431	410235060	60	61
+Contig652_quiver_	2401	410237551	60	61
+Contig95_quiver_	2399	410240011	60	61
+Contig170_quiver_	2398	410242469	60	61
+Contig28_quiver_	2361	410244925	60	61
+Contig452_quiver_	2352	410247345	60	61
+Contig87_quiver_	2348	410249755	60	61
+Contig793_quiver_	2344	410252162	60	61
+Contig356_quiver_	2341	410254565	60	61
+Contig815_quiver_	2341	410256965	60	61
+Contig784_quiver_	2329	410259365	60	61
+Contig67_quiver_	2328	410261751	60	61
+Contig894_quiver_	2323	410264137	60	61
+Contig885_quiver_	2318	410266518	60	61
+Contig145_quiver_	2314	410268894	60	61
+Contig352_quiver_	2308	410271266	60	61
+Contig259_quiver_	2298	410273632	60	61
+Contig659_quiver_	2297	410275988	60	61
+Contig33_quiver_	2296	410278342	60	61
+Contig267_quiver_	2293	410280696	60	61
+Contig726_quiver_	2256	410283047	60	61
+Contig82_quiver_	2252	410285359	60	61
+Contig692_quiver_	2241	410287668	60	61
+Contig64_quiver_	2240	410289965	60	61
+Contig136_quiver_	2235	410292262	60	61
+Contig406_quiver_	2192	410294554	60	61
+Contig241_quiver_	2191	410296802	60	61
+Contig704_quiver_	2190	410299049	60	61
+Contig429_quiver_	2177	410301295	60	61
+Contig245_quiver_	2167	410303528	60	61
+Contig459_quiver_	2124	410305751	60	61
+Contig701_quiver_	2115	410307930	60	61
+Contig372_quiver_	2101	410310100	60	61
+Contig553_quiver_	2097	410312256	60	61
+Contig561_quiver_	2097	410314407	60	61
+Contig460_quiver_	2094	410316558	60	61
+Contig736_quiver_	2091	410318706	60	61
+Contig745_quiver_	2089	410320851	60	61
+Contig65_quiver_	2076	410322993	60	61
+Contig852_quiver_	2069	410325123	60	61
+Contig90_quiver_	2064	410327245	60	61
+Contig861_quiver_	2063	410329363	60	61
+Contig808_quiver_	2056	410331480	60	61
+Contig234_quiver_	2055	410333590	60	61
+Contig574_quiver_	2051	410335699	60	61
+Contig664_quiver_	2047	410337804	60	61
+Contig295_quiver_	2046	410339905	60	61
+Contig77_quiver_	2039	410342004	60	61
+Contig341_quiver_	2032	410344096	60	61
+Contig547_quiver_	2027	410346181	60	61
+Contig558_quiver_	2011	410348261	60	61
+Contig629_quiver_	1981	410350325	60	61
+Contig880_quiver_	1975	410352359	60	61
+Contig771_quiver_	1969	410354386	60	61
+Contig696_quiver_	1959	410356407	60	61
+Contig495_quiver_	1957	410358418	60	61
+Contig78_quiver_	1952	410360426	60	61
+Contig552_quiver_	1945	410362430	60	61
+Contig146_quiver_	1941	410364427	60	61
+Contig396_quiver_	1941	410366420	60	61
+Contig158_quiver_	1939	410368413	60	61
+Contig280_quiver_	1935	410370404	60	61
+Contig824_quiver_	1934	410372391	60	61
+Contig529_quiver_	1933	410374377	60	61
+Contig257_quiver_	1930	410376362	60	61
+Contig682_quiver_	1926	410378344	60	61
+Contig141_quiver_	1925	410380322	60	61
+Contig243_quiver_	1921	410382299	60	61
+Contig895_quiver_	1919	410384272	60	61
+Contig427_quiver_	1910	410386242	60	61
+Contig542_quiver_	1906	410388203	60	61
+Contig570_quiver_	1904	410390160	60	61
+Contig906_quiver_	1904	410392115	60	61
+Contig877_quiver_	1891	410394070	60	61
+Contig820_quiver_	1887	410396012	60	61
+Contig842_quiver_	1886	410397950	60	61
+Contig382_quiver_	1885	410399887	60	61
+Contig657_quiver_	1882	410401823	60	61
+Contig441_quiver_	1875	410403756	60	61
+Contig886_quiver_	1860	410405682	60	61
+Contig737_quiver_	1858	410407592	60	61
+Contig581_quiver_	1854	410409500	60	61
+Contig751_quiver_	1834	410411404	60	61
+Contig779_quiver_	1818	410413288	60	61
+Contig236_quiver_	1817	410415156	60	61
+Contig678_quiver_	1814	410417023	60	61
+Contig385_quiver_	1810	410418887	60	61
+Contig59_quiver_	1805	410420746	60	61
+Contig559_quiver_	1796	410422601	60	61
+Contig891_quiver_	1786	410424446	60	61
+Contig9_quiver_	1773	410426279	60	61
+Contig88_quiver_	1764	410428100	60	61
+Contig830_quiver_	1762	410429913	60	61
+Contig400_quiver_	1749	410431724	60	61
+Contig578_quiver_	1744	410433522	60	61
+Contig888_quiver_	1743	410435315	60	61
+Contig374_quiver_	1738	410437107	60	61
+Contig504_quiver_	1738	410438893	60	61
+Contig801_quiver_	1726	410440679	60	61
+Contig66_quiver_	1725	410442452	60	61
+Contig640_quiver_	1715	410444225	60	61
+Contig532_quiver_	1714	410445988	60	61
+Contig351_quiver_	1711	410447750	60	61
+Contig410_quiver_	1704	410449509	60	61
+Contig719_quiver_	1695	410451261	60	61
+Contig467_quiver_	1694	410453004	60	61
+Contig572_quiver_	1689	410454746	60	61
+Contig494_quiver_	1680	410456483	60	61
+Contig195_quiver_	1670	410458210	60	61
+Contig237_quiver_	1665	410459927	60	61
+Contig231_quiver_	1656	410461639	60	61
+Contig155_quiver_	1631	410463342	60	61
+Contig569_quiver_	1627	410465020	60	61
+Contig320_quiver_	1623	410466694	60	61
+Contig416_quiver_	1621	410468364	60	61
+Contig442_quiver_	1621	410470032	60	61
+Contig790_quiver_	1621	410471700	60	61
+Contig81_quiver_	1607	410473367	60	61
+Contig444_quiver_	1607	410475020	60	61
+Contig573_quiver_	1606	410476673	60	61
+Contig120_quiver_	1599	410478325	60	61
+Contig849_quiver_	1596	410479970	60	61
+Contig478_quiver_	1595	410481612	60	61
+Contig167_quiver_	1582	410483253	60	61
+Contig582_quiver_	1579	410484881	60	61
+Contig413_quiver_	1569	410486506	60	61
+Contig746_quiver_	1567	410488121	60	61
+Contig52_quiver_	1566	410489733	60	61
+Contig437_quiver_	1561	410491345	60	61
+Contig541_quiver_	1561	410492952	60	61
+Contig462_quiver_	1557	410494559	60	61
+Contig749_quiver_	1552	410496161	60	61
+Contig594_quiver_	1544	410497758	60	61
+Contig401_quiver_	1534	410499347	60	61
+Contig756_quiver_	1533	410500926	60	61
+Contig272_quiver_	1530	410502504	60	61
+Contig7_quiver_	1529	410504077	60	61
+Contig18_quiver_	1515	410505650	60	61
+Contig212_quiver_	1513	410507210	60	61
+Contig780_quiver_	1513	410508768	60	61
+Contig173_quiver_	1512	410510326	60	61
+Contig275_quiver_	1496	410511883	60	61
+Contig426_quiver_	1491	410513423	60	61
+Contig107_quiver_	1478	410514958	60	61
+Contig593_quiver_	1475	410516480	60	61
+Contig440_quiver_	1467	410517999	60	61
+Contig686_quiver_	1466	410519510	60	61
+Contig828_quiver_	1465	410521020	60	61
+Contig797_quiver_	1464	410522529	60	61
+Contig677_quiver_	1461	410524037	60	61
+Contig435_quiver_	1458	410525542	60	61
+Contig821_quiver_	1443	410527044	60	61
+Contig300_quiver_	1437	410528531	60	61
+Contig655_quiver_	1424	410530011	60	61
+Contig119_quiver_	1417	410531478	60	61
+Contig576_quiver_	1416	410532938	60	61
+Contig857_quiver_	1411	410534397	60	61
+Contig419_quiver_	1406	410535851	60	61
+Contig776_quiver_	1404	410537300	60	61
+Contig615_quiver_	1397	410538747	60	61
+Contig172_quiver_	1393	410540187	60	61
+Contig433_quiver_	1385	410541623	60	61
+Contig343_quiver_	1382	410543051	60	61
+Contig492_quiver_	1374	410544476	60	61
+Contig890_quiver_	1370	410545892	60	61
+Contig471_quiver_	1369	410547304	60	61
+Contig182_quiver_	1366	410548715	60	61
+Contig238_quiver_	1359	410550123	60	61
+Contig707_quiver_	1358	410551524	60	61
+Contig835_quiver_	1348	410552924	60	61
+Contig822_quiver_	1339	410554314	60	61
+Contig768_quiver_	1337	410555695	60	61
+Contig856_quiver_	1330	410557074	60	61
+Contig53_quiver_	1324	410558445	60	61
+Contig263_quiver_	1323	410559811	60	61
+Contig844_quiver_	1322	410561176	60	61
+Contig269_quiver_	1318	410562540	60	61
+Contig205_quiver_	1316	410563899	60	61
+Contig765_quiver_	1311	410565256	60	61
+Contig710_quiver_	1304	410566608	60	61
+Contig112_quiver_	1300	410567953	60	61
+Contig249_quiver_	1296	410569294	60	61
+Contig129_quiver_	1295	410570631	60	61
+Contig247_quiver_	1295	410571967	60	61
+Contig480_quiver_	1294	410573303	60	61
+Contig500_quiver_	1277	410574638	60	61
+Contig334_quiver_	1269	410575956	60	61
+Contig508_quiver_	1268	410577266	60	61
+Contig565_quiver_	1262	410578575	60	61
+Contig509_quiver_	1261	410579878	60	61
+Contig691_quiver_	1259	410581180	60	61
+Contig758_quiver_	1256	410582479	60	61
+Contig184_quiver_	1252	410583775	60	61
+Contig805_quiver_	1243	410585067	60	61
+Contig329_quiver_	1240	410586350	60	61
+Contig548_quiver_	1231	410587630	60	61
+Contig450_quiver_	1229	410588901	60	61
+Contig757_quiver_	1220	410590170	60	61
+Contig752_quiver_	1213	410591430	60	61
+Contig41_quiver_	1210	410592682	60	61
+Contig335_quiver_	1203	410593932	60	61
+Contig206_quiver_	1199	410595175	60	61
+Contig230_quiver_	1196	410596413	60	61
+Contig325_quiver_	1187	410597648	60	61
+Contig670_quiver_	1182	410598874	60	61
+Contig232_quiver_	1176	410600095	60	61
+Contig318_quiver_	1173	410601310	60	61
+Contig673_quiver_	1163	410602522	60	61
+Contig800_quiver_	1161	410603724	60	61
+Contig540_quiver_	1158	410604924	60	61
+Contig317_quiver_	1156	410606121	60	61
+Contig850_quiver_	1153	410607316	60	61
+Contig386_quiver_	1152	410608508	60	61
+Contig214_quiver_	1143	410609699	60	61
+Contig402_quiver_	1136	410610881	60	61
+Contig298_quiver_	1127	410612055	60	61
+Contig198_quiver_	1126	410613220	60	61
+Contig85_quiver_	1123	410614383	60	61
+Contig616_quiver_	1115	410615544	60	61
+Contig42_quiver_	1110	410616696	60	61
+Contig869_quiver_	1109	410617844	60	61
+Contig662_quiver_	1099	410618991	60	61
+Contig211_quiver_	1080	410620128	60	61
+Contig108_quiver_	1077	410621245	60	61
+Contig635_quiver_	1072	410622359	60	61
+Contig893_quiver_	1066	410623468	60	61
+Contig349_quiver_	1064	410624571	60	61
+Contig871_quiver_	1043	410625672	60	61
+Contig256_quiver_	1040	410626752	60	61
+Contig700_quiver_	1040	410627829	60	61
+Contig690_quiver_	981	410628906	60	61
+Contig162_quiver_	970	410629923	60	61
+Contig672_quiver_	964	410630929	60	61
+Contig526_quiver_	872	410631929	60	61
+Contig712_quiver_	869	410632835	60	61
+Contig213_quiver_	818	410633738	60	61
+Contig599_quiver_	815	410634589	60	61
+Contig762_quiver_	741	410635437	60	61
+Contig360_quiver_	727	410636210	60	61
+Contig487_quiver_	640	410636969	60	61
+Contig477_quiver_	639	410637639	60	61
+Contig152_quiver_	131	410638308	60	61
diff --git a/studies/isoform_sequencing/isa.study.xlsx b/studies/isoform_sequencing/isa.study.xlsx
index 48af51fa63c14650219e5e214e849d894b1120dd..8eb16406627b088cc52f0610b3be96421ca7c2d1 100644
Binary files a/studies/isoform_sequencing/isa.study.xlsx and b/studies/isoform_sequencing/isa.study.xlsx differ
diff --git a/workflows/BSA/readme.txt b/workflows/BSA/readme.txt
index 1c81ccdc62112abefd1eb757dfce03d9db65600c..a7750ca09f24c08d3cf55d2b0f935a1bfa3e3a5f 100644
--- a/workflows/BSA/readme.txt
+++ b/workflows/BSA/readme.txt
@@ -7,39 +7,39 @@ Code for bulk segregant analysis, haplotype analysis and quantification of flowe
 
 RNAseq:
 
-- code/BSA/RNAseq/adapter_trimming.sh
+- workflows/BSA/RNAseq/adapter_trimming.sh
 trimming of adapter sequences using Trimmomatic
 
-- code/BSA/RNAseq/index_STAR.sh
+- workflows/BSA/RNAseq/index_STAR.sh
 index the genome with STAR using the genome annotation v2.2
 
-- code/BSA/RNAseq/run_STAR.sh
+- workflows/BSA/RNAseq/run_STAR.sh
 map reads to the genome using STAR
 
-- code/BSA/RNAseq/QC.sh
+- workflows/BSA/RNAseq/QC.sh
 quality control of adapter trimming and read mapping
 
-- code/BSA/RNAseq/run_kallisto.sh
+- workflows/BSA/RNAseq/run_kallisto.sh
 quantification of gene expression using kallisto
 
 WGS:
 
-- code/BSA/WGS/map_reads.sh
+- workflows/BSA/WGS/map_reads.sh
 map WGS reads to the genome
 
-- code/BSA/WGS/combined_filter.sh
+- workflows/BSA/WGS/combined_filter.sh
 call and filter variants
 
 Analysis:
 
-- code/BSA/read_count_analysis.Rmd
+- workflows/BSA/read_count_analysis.Rmd
 Analysis of gene expression data in pooled flower tissue
 
-- code/BSA/bsa_and_plotting.R
+- workflows/BSA/bsa_and_plotting.R
 Conduct bulk segregant analysis
 
-- code/BSA/snpEff_analysis.Rmd
+- workflows/BSA/snpEff_analysis.Rmd
 Analyse BSA variants using snpEff
 
-- code/BSA/read_count_analysis_from_bam.Rmd
+- workflows/BSA/read_count_analysis_from_bam.Rmd
 Investigate support for AhCYP76AD2 variants by RNA-seq data
diff --git a/workflows/annotation_analysis/R2R3_analysis_reannotation.Rmd b/workflows/annotation_analysis/R2R3_analysis_reannotation.Rmd
index a35b46aa656b5298a7c755b2a10131a4ddc0fb11..2d73d201de112f39962676d6f211b59bb92dd7a7 100644
--- a/workflows/annotation_analysis/R2R3_analysis_reannotation.Rmd
+++ b/workflows/annotation_analysis/R2R3_analysis_reannotation.Rmd
@@ -27,9 +27,9 @@ This script takes the braker2 output and runs an HMMscan using the MYB DNA-bindi
 ## HMMscan using the MYB DNA-binding domain:
 
 ```{bash}
-mkdir -p data/annotation_analysis/myb_annotation/hmmscan
+mkdir -p runs/annotation_analysis/myb_annotation/hmmscan
 
-hmmscan --domtblout data/annotation_analysis/myb_annotation/hmmscan/out.txt data/annotation_analysis/myb_annotation/myb_profile/Myb_DNA-binding.hmm polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.prot.fasta
+hmmscan --domtblout runs/annotation_analysis/myb_annotation/hmmscan/out.txt runs/annotation_analysis/myb_annotation/myb_profile/Myb_DNA-binding.hmm runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.prot.fasta
 ```
 
 
@@ -40,7 +40,7 @@ Takes domtblout output of the HMMscan using the MYB DNA-binding domain HMM profi
 
 ```{r}
 # Read hmmscan output file into variable data (domtblout format)
-data <- read.table('data/annotation_analysis/myb_annotation/hmmscan/out.txt', row.names = NULL, fill = T)
+data <- read.table('runs/annotation_analysis/myb_annotation/hmmscan/out.txt', row.names = NULL, fill = T)
 ## Add column including the length of alignment
 data <- data %>%
   mutate(alignment_length = V17-V16)
@@ -144,7 +144,7 @@ Extracting only the R2R3 mybs (with two adjacent MYB domains).
 filtered_mybs <- filtered_mybs[filtered_mybs$Freq > 1,]
 
 # write gene names to file
-write.csv(filtered_mybs, file = "data/annotation_analysis/myb_annotation/myb_names.txt",
+write.csv(filtered_mybs, file = "runs/annotation_analysis/myb_annotation/myb_names.txt",
             quote = F,
             row.names = F)
 ```
@@ -179,12 +179,12 @@ read.gtf <- function(file){
 }
 
 # read annotation and filter for MYB genes
-annotation <- read.gtf(file = "polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf")
+annotation <- read.gtf(file = "runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf")
 annotation <- annotation %>%
   filter(transcript_id %in% filtered_mybs$query_name)
 
 # read in MYB subgroup and function assignment
-myb_function <- read_csv(file = "data/manual_sheets/MYB_with_subgroups_and_function.csv")
+myb_function <- read_csv(file = "studies/additional_data/resources/MYB_with_subgroups_and_function.csv")
 myb_function <- myb_function %>%
   mutate(Function = replace_na(Function, "-"))
 
@@ -232,7 +232,7 @@ myb_stats[62,1] <- "AmMYB3"
 myb_stats[63,1] <- "AmMYB4"
 
 # write output file
-write.csv(myb_stats, file = "data/annotation_analysis/myb_annotation/myb_stats.csv",
+write.csv(myb_stats, file = "runs/annotation_analysis/myb_annotation/myb_stats.csv",
             quote = T,
             row.names = F)
 ```
@@ -242,7 +242,7 @@ Analyse basic statistics:
 
 ```{r}
 # read in data
-myb_stats <- read.csv(file = "data/annotation_analysis/myb_annotation/myb_stats.csv")
+myb_stats <- read.csv(file = "runs/annotation_analysis/myb_annotation/myb_stats.csv")
 
 # exon count of R2R3 MYBs
 myb_stats %>%
@@ -257,7 +257,7 @@ myb_stats %>%
 Extract myb protein fasta based on the identified MYB names.
 
 ```{bash}
-seqkit faidx -l data/annotation_analysis/myb_annotation/myb_names.txt polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.prot.fasta > data/annotation_analysis/myb_annotation/AH_myb.fasta
+seqkit faidx -l runs/annotation_analysis/myb_annotation/myb_names.txt runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.prot.fasta > runs/annotation_analysis/myb_annotation/AH_myb.fasta
 ```
 
 ## Alignment and Phylogeny
@@ -270,10 +270,10 @@ Supplements of: Stracke, R., Holtgräwe, D., Schneider, J., Pucker, B., Rosleff
 
 ```{bash}
 # align MYBs using clustalOmega
-cat data/annotation_analysis/myb_annotation/AH_myb.fasta data/annotation_analysis/myb_annotation/bvulgaris_athaliana_myb.fasta | /home/tom/Documents/tools/clustalo-1.2.4-Ubuntu-x86_64 --threads=6 --in=- --outfile=data/annotation_analysis/myb_annotation/all_myb.aln --force
+cat runs/annotation_analysis/myb_annotation/AH_myb.fasta runs/annotation_analysis/myb_annotation/bvulgaris_athaliana_myb.fasta | /home/tom/Documents/tools/clustalo-1.2.4-Ubuntu-x86_64 --threads=6 --in=- --outfile=runs/annotation_analysis/myb_annotation/all_myb.aln --force
 
 # create phylogeny from alignment using clustalw2
-/home/tom/Documents/tools/clustalw-2.1-linux-x86_64-libcppstatic/clustalw2 -bootstrap=1000 -infile=data/annotation_analysis/myb_annotation/all_myb.aln -outfile=data/annotation_analysis/myb_annotation/all_myb.phb
+/home/tom/Documents/tools/clustalw-2.1-linux-x86_64-libcppstatic/clustalw2 -bootstrap=1000 -infile=runs/annotation_analysis/myb_annotation/all_myb.aln -outfile=runs/annotation_analysis/myb_annotation/all_myb.phb
 ```
 
 ## Plot phylogeny
@@ -282,7 +282,7 @@ I will try to use the same functions as I did for my bachelor thesis.
 
 ```{r}
 # read in the bootstrapped phylogeny
-tree <- read.raxml("data/annotation_analysis/myb_annotation/all_myb.phb")
+tree <- read.raxml("runs/annotation_analysis/myb_annotation/all_myb.phb")
 
 # create tree object
 t <- ggtree(tree, branch.length = 'none', layout='circular', size=0.2)
@@ -401,12 +401,12 @@ t_species +
   plot_clade2(588,'AtMYB5', "Development, flavonoid biosynthesis") +
   theme(plot.margin = margin(1,2,1,1, "cm"))
 
-ggsave(filename = "plots/MYB_phylogenetic_tree.png",
+ggsave(filename = "runs/MYB_phylogenetic_tree.png",
        width = 14,
        height = 14,
        dpi = 600)
 
-ggsave(filename = "plots/MYB_phylogenetic_tree.pdf",
+ggsave(filename = "runs/MYB_phylogenetic_tree.pdf",
        device = "pdf",
        width = 12,
        height = 12,
@@ -419,17 +419,17 @@ ggsave(filename = "plots/MYB_phylogenetic_tree.pdf",
 Create and plot alignment of MYBs of S6 and BvMYB1-like clades.
 
 ```{bash}
-mkdir plots/AmMYB2_figure/
+mkdir runs/plots/AmMYB2_figure/
 
 # align using clustalomega
-/home/tom/Documents/tools/clustalo-1.2.4-Ubuntu-x86_64 --in=data/annotation_analysis/myb_annotation/S6_betalain_myb_alignment/manual_S6.fasta --outfile=data/annotation_analysis/myb_annotation/S6_betalain_myb_alignment/manual_S6.aln
+/home/tom/Documents/tools/clustalo-1.2.4-Ubuntu-x86_64 --in=runs/annotation_analysis/myb_annotation/S6_betalain_myb_alignment/manual_S6.fasta --outfile=runs/annotation_analysis/myb_annotation/S6_betalain_myb_alignment/manual_S6.aln
 ```
 
 Import and plot alignment:
 
 ```{r}
 # read in alignment
-S6_align <- readAAMultipleAlignment("data/annotation_analysis/myb_annotation/S6_betalain_myb_alignment/manual_S6.aln",
+S6_align <- readAAMultipleAlignment("runs/annotation_analysis/myb_annotation/S6_betalain_myb_alignment/manual_S6.aln",
                                     format = "fasta")
 S6_align@unmasked@ranges@NAMES <- c("AtMYB75 (PAP1)", "AtMYB90 (PAP2)", "AtMYB113", "AtMYB114", "AhMYB2.1", "AhMYB2.2", "BvMYB1", "Bv_ralf",
                                     "AhMYB3.1","AhMYB4.1")
@@ -447,7 +447,7 @@ ggmsa(S6_align,
         plot.margin = unit(c(1,1,1,1), "cm"))
 
 # save the alignment
-ggsave(filename = "plots/AmMYB2_figure/S6_betalain_myb_align.png",
+ggsave(filename = "runs/plots/AmMYB2_figure/S6_betalain_myb_align.png",
        width = 9,
        height = 6,
        dpi = 320,
@@ -466,7 +466,7 @@ ggmsa(S6_align,
         plot.margin = unit(c(0,0.5,0,0.5), "cm"))
 
 # save the alignment
-ggsave(filename = "plots/AmMYB2_figure/S6_betalain_myb_S6.png",
+ggsave(filename = "runs/plots/AmMYB2_figure/S6_betalain_myb_S6.png",
        width = 9,
        height = 2.5,
        dpi = 400,
@@ -518,7 +518,7 @@ residue_raster <- ggplot(data = df_melt) +
 residue_raster
 
 
-ggsave(filename = "plots/AmMYB2_figure/residue_raster.png",
+ggsave(filename = "runs/plots/AmMYB2_figure/residue_raster.png",
        width = 9,
        height = 1.5,
        dpi = 400)
diff --git a/workflows/annotation_analysis/betalain_and_flavonoid_identification.sh b/workflows/annotation_analysis/betalain_and_flavonoid_identification.sh
index d11067692f19afdb04f2d6d738790d484a9aed17..b4b878aab818aff5a777df5e6dbe33b34128eacc 100644
--- a/workflows/annotation_analysis/betalain_and_flavonoid_identification.sh
+++ b/workflows/annotation_analysis/betalain_and_flavonoid_identification.sh
@@ -5,19 +5,19 @@
 # identification of betalains by blast
 # create blast database of annotated protein sequences
 # source of betalain pathway protein sequences is described in manuscript
-mkdir -p data/annotation_analysis/betalains/DB
-mkdir -p data/annotation_analysis/flavonoids
+mkdir -p runs/annotation_analysis/betalains/DB
+mkdir -p runs/annotation_analysis/flavonoids
 
-makeblastdb -in polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.prot.fasta \
-	-out data/annotation_analysis/betalains/DB/blast_db \
-	-logfile data/annotation_analysis/betalains/blast_db.log \
+makeblastdb -in runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.prot.fasta \
+	-out runs/annotation_analysis/betalains/DB/blast_db \
+	-logfile runs/annotation_analysis/betalains/blast_db.log \
 	-dbtype prot
 
 # run protein blast of described betalain pathway genes against blast database
-blastp -query data/annotation_analysis/betalains/pathway.fasta \
-	-db data/annotation_analysis/betalains/DB/blast_db \
+blastp -query runs/annotation_analysis/betalains/pathway.fasta \
+	-db runs/annotation_analysis/betalains/DB/blast_db \
 	-outfmt 7 \
-	-out data/annotation_analysis/betalains/pathway_against_protein.out \
+	-out runs/annotation_analysis/betalains/pathway_against_protein.out \
 	-qcov_hsp_perc 80
 
 
@@ -25,36 +25,36 @@ blastp -query data/annotation_analysis/betalains/pathway.fasta \
 # run KIPEs:
 python /home/tom/Documents/tools/KIPEs/KIPEs3.py --baits /home/tom/Documents/tools/KIPEs/flavonoid_baits/ \
 	--positions /home/tom/Documents/tools/KIPEs/flavonoid_residues/ \
-	--out data/annotation_analysis/flavonoids/ \
-	--subject polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.prot.fasta \
+	--out runs/annotation_analysis/flavonoids/ \
+	--subject runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.prot.fasta \
 	--seqtype pep \
 	--cpus 6
 
 # create blast db
-mkdir data/annotation_analysis/flavonoids/blast_db
-makeblastdb -in polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta -out data/annotation_analysis/flavonoids/blast_db/db -dbtype nucl
+mkdir runs/annotation_analysis/flavonoids/blast_db
+makeblastdb -in runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta -out runs/annotation_analysis/flavonoids/blast_db/db -dbtype nucl
 
 # blast searches for unidentified candidate genes using KIPEs bait sequences
-tblastn -query data/annotation_analysis/flavonoids/KIPEs/blast_query/ANS.fasta -db data/annotation_analysis/flavonoids/blast_db/db -outfmt 7 > data/annotation_analysis/flavonoids/ANS_blast.out
-tblastn -query data/annotation_analysis/flavonoids/KIPEs/blast_query/ANR.fasta -db data/annotation_analysis/flavonoids/blast_db/db -outfmt 7 > data/annotation_analysis/flavonoids/ANR_blast.out
-tblastn -query data/annotation_analysis/flavonoids/KIPEs/blast_query/F3-5-H.fasta -db data/annotation_analysis/flavonoids/blast_db/db -outfmt 7 > data/annotation_analysis/flavonoids/F3-5-H_blast.out
-tblastn -query data/annotation_analysis/flavonoids/KIPEs/blast_query/FNS1.fasta -db data/annotation_analysis/flavonoids/blast_db/db -outfmt 7 > data/annotation_analysis/flavonoids/FNS1_blast.out
-tblastn -query data/annotation_analysis/flavonoids/KIPEs/blast_query/CHI2.fasta -db data/annotation_analysis/flavonoids/blast_db/db -outfmt 7 > data/annotation_analysis/flavonoids/CHI2_blast.out
+tblastn -query runs/annotation_analysis/flavonoids/KIPEs/blast_query/ANS.fasta -db runs/annotation_analysis/flavonoids/blast_db/db -outfmt 7 > runs/annotation_analysis/flavonoids/ANS_blast.out
+tblastn -query runs/annotation_analysis/flavonoids/KIPEs/blast_query/ANR.fasta -db runs/annotation_analysis/flavonoids/blast_db/db -outfmt 7 > runs/annotation_analysis/flavonoids/ANR_blast.out
+tblastn -query runs/annotation_analysis/flavonoids/KIPEs/blast_query/F3-5-H.fasta -db runs/annotation_analysis/flavonoids/blast_db/db -outfmt 7 > runs/annotation_analysis/flavonoids/F3-5-H_blast.out
+tblastn -query runs/annotation_analysis/flavonoids/KIPEs/blast_query/FNS1.fasta -db runs/annotation_analysis/flavonoids/blast_db/db -outfmt 7 > runs/annotation_analysis/flavonoids/FNS1_blast.out
+tblastn -query runs/annotation_analysis/flavonoids/KIPEs/blast_query/CHI2.fasta -db runs/annotation_analysis/flavonoids/blast_db/db -outfmt 7 > runs/annotation_analysis/flavonoids/CHI2_blast.out
 
 # exonerate protein alignments
 # prepare fasta file of KIPEs bait sequences
-sed 's/%_//' data/annotation_analysis/flavonoids/KIPEs/blast_query/ANS.fasta > data/annotation_analysis/flavonoids/ANS_fixed.fasta
+sed 's/%_//' runs/annotation_analysis/flavonoids/KIPEs/blast_query/ANS.fasta > runs/annotation_analysis/flavonoids/ANS_fixed.fasta
 # run exonerate
-exonerate --model protein2genome --percent 55 --showvulgar no --showalignment no --showtargetgff yes --query data/annotation_analysis/flavonoids/ANS_fixed.fasta --target polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta > data/annotation_analysis/flavonoids/ANS_exonerate.gff
+exonerate --model protein2genome --percent 55 --showvulgar no --showalignment no --showtargetgff yes --query runs/annotation_analysis/flavonoids/ANS_fixed.fasta --target runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta > runs/annotation_analysis/flavonoids/ANS_exonerate.gff
 
-sed 's/%_//' data/annotation_analysis/flavonoids/KIPEs/blast_query/ANR.fasta > data/annotation_analysis/flavonoids/ANR_fixed.fasta
-exonerate --model protein2genome --percent 55 --showvulgar no --showalignment no --showtargetgff yes --query data/annotation_analysis/flavonoids/ANR_fixed.fasta --target polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta > data/annotation_analysis/flavonoids/ANR_exonerate.gff
+sed 's/%_//' runs/annotation_analysis/flavonoids/KIPEs/blast_query/ANR.fasta > runs/annotation_analysis/flavonoids/ANR_fixed.fasta
+exonerate --model protein2genome --percent 55 --showvulgar no --showalignment no --showtargetgff yes --query runs/annotation_analysis/flavonoids/ANR_fixed.fasta --target runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta > runs/annotation_analysis/flavonoids/ANR_exonerate.gff
 
-sed 's/%_//' data/annotation_analysis/flavonoids/KIPEs/blast_query/F3-5-H.fasta > data/annotation_analysis/flavonoids/F3-5-H_fixed.fasta
-exonerate --model protein2genome --percent 55 --showvulgar no --showalignment no --showtargetgff yes --query data/annotation_analysis/flavonoids/F3-5-H_fixed.fasta --target polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta > data/annotation_analysis/flavonoids/F3-5-H_exonerate.gff
+sed 's/%_//' runs/annotation_analysis/flavonoids/KIPEs/blast_query/F3-5-H.fasta > runs/annotation_analysis/flavonoids/F3-5-H_fixed.fasta
+exonerate --model protein2genome --percent 55 --showvulgar no --showalignment no --showtargetgff yes --query runs/annotation_analysis/flavonoids/F3-5-H_fixed.fasta --target runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta > runs/annotation_analysis/flavonoids/F3-5-H_exonerate.gff
 
-sed 's/%_//' data/annotation_analysis/flavonoids/KIPEs/blast_query/FNS1.fasta > data/annotation_analysis/flavonoids/FNS1_fixed.fasta
-exonerate --model protein2genome --percent 55 --showvulgar no --showalignment no --showtargetgff yes --query data/annotation_analysis/flavonoids/FNS1_fixed.fasta --target polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta > data/annotation_analysis/flavonoids/FNS1_exonerate.gff
+sed 's/%_//' runs/annotation_analysis/flavonoids/KIPEs/blast_query/FNS1.fasta > runs/annotation_analysis/flavonoids/FNS1_fixed.fasta
+exonerate --model protein2genome --percent 55 --showvulgar no --showalignment no --showtargetgff yes --query runs/annotation_analysis/flavonoids/FNS1_fixed.fasta --target runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta > runs/annotation_analysis/flavonoids/FNS1_exonerate.gff
 
-sed 's/%_//' data/annotation_analysis/flavonoids/KIPEs/blast_query/CHI2.fasta > data/annotation_analysis/flavonoids/CHI2_fixed.fasta
-exonerate --model protein2genome --percent 55 --showvulgar no --showalignment no --showtargetgff yes --query data/annotation_analysis/flavonoids/CHI2_fixed.fasta --target polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta > data/annotation_analysis/flavonoids/CHI2_exonerate.gff
+sed 's/%_//' runs/annotation_analysis/flavonoids/KIPEs/blast_query/CHI2.fasta > runs/annotation_analysis/flavonoids/CHI2_fixed.fasta
+exonerate --model protein2genome --percent 55 --showvulgar no --showalignment no --showtargetgff yes --query runs/annotation_analysis/flavonoids/CHI2_fixed.fasta --target runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta > runs/annotation_analysis/flavonoids/CHI2_exonerate.gff
diff --git a/workflows/annotation_analysis/betalain_phylogenetic_analysis.Rmd b/workflows/annotation_analysis/betalain_phylogenetic_analysis.Rmd
index 881844d0c47f5ddc341cfd0a4eb59415c56f1d01..51422229a209bf205a21214ffc92e3b32753d360 100644
--- a/workflows/annotation_analysis/betalain_phylogenetic_analysis.Rmd
+++ b/workflows/annotation_analysis/betalain_phylogenetic_analysis.Rmd
@@ -18,22 +18,22 @@ knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2/")
 Perform phylogenetic analysis of betalain pathway genes. Searched for protein fasta sequences of known betalain pathway genes. Aligned the fasta sequences using ClustalOmega with default settings. Created a neighbour joining tree with 1000 bootstrap replicates using ClustalW. Used seaview to correctly root the phylogeny. Perform alignment and tree construction:
 
 ```{bash}
-mkdir -p data/annotation_analysis/betalains/phylogenetic_analysis/CYP76AD
-mkdir -p data/annotation_analysis/betalains/phylogenetic_analysis/DODA
+mkdir -p runs/annotation_analysis/betalains/phylogenetic_analysis/CYP76AD
+mkdir -p runs/annotation_analysis/betalains/phylogenetic_analysis/DODA
 
 # CYP76AD
 ### perform alignment
-/home/tom/Documents/tools/clustalo-1.2.4-Ubuntu-x86_64 --threads=6 --in=data/annotation_analysis/betalains/phylogenetic_analysis/CYP76AD/CYP76AD.fasta --outfile=data/annotation_analysis/betalains/phylogenetic_analysis/CYP76AD/CYP76AD.aln --force
+/home/tom/Documents/tools/clustalo-1.2.4-Ubuntu-x86_64 --threads=6 --in=runs/annotation_analysis/betalains/phylogenetic_analysis/CYP76AD/CYP76AD.fasta --outfile=runs/annotation_analysis/betalains/phylogenetic_analysis/CYP76AD/CYP76AD.aln --force
 
 ### create phylogeny, 1000 bootstrap replicates
-/home/tom/Documents/tools/clustalw-2.1-linux-x86_64-libcppstatic/clustalw2 -bootstrap=1000 -infile=data/annotation_analysis/betalains/phylogenetic_analysis/CYP76AD/CYP76AD.aln -outfile=data/annotation_analysis/betalains/phylogenetic_analysis/CYP76AD/CYP76AD.phb
+/home/tom/Documents/tools/clustalw-2.1-linux-x86_64-libcppstatic/clustalw2 -bootstrap=1000 -infile=runs/annotation_analysis/betalains/phylogenetic_analysis/CYP76AD/CYP76AD.aln -outfile=runs/annotation_analysis/betalains/phylogenetic_analysis/CYP76AD/CYP76AD.phb
 
 # DODA
 ### perform alignment
-/home/tom/Documents/tools/clustalo-1.2.4-Ubuntu-x86_64 --threads=6 --in=data/annotation_analysis/betalains/phylogenetic_analysis/DODA/DODA.fasta --outfile=data/annotation_analysis/betalains/phylogenetic_analysis/DODA/DODA.aln --force
+/home/tom/Documents/tools/clustalo-1.2.4-Ubuntu-x86_64 --threads=6 --in=runs/annotation_analysis/betalains/phylogenetic_analysis/DODA/DODA.fasta --outfile=runs/annotation_analysis/betalains/phylogenetic_analysis/DODA/DODA.aln --force
 
 ### create phylogeny, 1000 bootstrap replicates
-/home/tom/Documents/tools/clustalw-2.1-linux-x86_64-libcppstatic/clustalw2 -bootstrap=1000 -infile=data/annotation_analysis/betalains/phylogenetic_analysis/DODA/DODA.aln -outfile=data/annotation_analysis/betalains/phylogenetic_analysis/DODA/DODA.phb
+/home/tom/Documents/tools/clustalw-2.1-linux-x86_64-libcppstatic/clustalw2 -bootstrap=1000 -infile=runs/annotation_analysis/betalains/phylogenetic_analysis/DODA/DODA.aln -outfile=runs/annotation_analysis/betalains/phylogenetic_analysis/DODA/DODA.phb
 ```
 
 ## Alignment plotting
@@ -42,7 +42,7 @@ After rerooting to the outgroup using seaview:
 
 ```{r}
 # read in the CYP76AD1 tree
-tree.CYP <- read.tree(file = "data/annotation_analysis/betalains/phylogenetic_analysis/CYP76AD/CYP76AD_rooted.phb")
+tree.CYP <- read.tree(file = "runs/annotation_analysis/betalains/phylogenetic_analysis/CYP76AD/CYP76AD_rooted.phb")
 
 # adjust names
 tree.CYP$tip.label <- gsub("\\_.*", "", tree.CYP$tip.label)
@@ -56,12 +56,12 @@ ggtree(tree.CYP, layout = "rectangular") +
   xlim_tree(0.4)
 
 # save tree
-ggsave(filename = "plots/CYP76AD_rooted.png", width = 10)
-ggsave(filename = "plots/CYP76AD_rooted.pdf", width = 10)
+ggsave(filename = "runs/plots/CYP76AD_rooted.png", width = 10)
+ggsave(filename = "runs/plots/CYP76AD_rooted.pdf", width = 10)
 
 
 # read in the DODA tree
-tree.DODA <- read.tree(file = "data/annotation_analysis/betalains/phylogenetic_analysis/DODA/DODA_rooted.phb")
+tree.DODA <- read.tree(file = "runs/annotation_analysis/betalains/phylogenetic_analysis/DODA/DODA_rooted.phb")
 
 # adjust names
 tree.DODA$tip.label <- gsub("\\_.*", "", tree.DODA$tip.label)
@@ -80,13 +80,6 @@ ggtree(tree.DODA, layout = "rectangular") +
   geom_tiplab(align = T) +
   xlim_tree(0.37)
 
-ggsave(filename = "plots/DODA_rooted.png", width = 10)
-ggsave(filename = "plots/DODA_rooted.pdf", width = 10)
+ggsave(filename = "runs/plots/DODA_rooted.png", width = 10)
+ggsave(filename = "runs/plots/DODA_rooted.pdf", width = 10)
 ```
-
-
-
-
-
-
-
diff --git a/workflows/annotation_analysis/circos_plotting.Rmd b/workflows/annotation_analysis/circos_plotting.Rmd
index d52f50b9bedb962396ad23a018d227b030ade41b..139d65437037909574de680d089885517ee5aa86 100644
--- a/workflows/annotation_analysis/circos_plotting.Rmd
+++ b/workflows/annotation_analysis/circos_plotting.Rmd
@@ -50,10 +50,10 @@ Granges_from_gtf <- function(gtf){
             seqnames = unique(chr), # all sequences should be on the same chromosome
             gene_strand = unique(strand))
   # use the gene_structures object to create the genomic ranges object
-  gene_ranges <- GRanges(seqnames = gene_structures$seqnames, 
-                         ranges = IRanges(start=gene_structures$gene_start, 
+  gene_ranges <- GRanges(seqnames = gene_structures$seqnames,
+                         ranges = IRanges(start=gene_structures$gene_start,
                                           end=gene_structures$gene_end,
-                                          names = gene_structures$transcript_id), 
+                                          names = gene_structures$transcript_id),
                          strand = gene_structures$gene_strand)
   return(gene_ranges)
 }
@@ -68,7 +68,7 @@ Load in data and perform necessary transformations.
 ```{r}
 # for the circlize package, bed-like dataframes are required
 # load in the indexed genome and create a bed-like format from the genome
-genome.bed <- read.table("polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta.fai")
+genome.bed <- read.table("runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta.fai")
 genome.bed <- genome.bed %>%
   head(16) %>%
   summarize(chr = V1,
@@ -81,7 +81,7 @@ genome.bed$chr <- as.factor(genome.bed$chr)
 genome.bed$chr <- reorder(genome.bed$chr, order)
 
 # load in genome annotation
-gene_annotation <- read.gtf("polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf")
+gene_annotation <- read.gtf("runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf")
 gene_bed <- gene_annotation %>%
   filter(type=="CDS") %>%
   summarize(chr = chr,
@@ -89,14 +89,14 @@ gene_bed <- gene_annotation %>%
             end = end)
 
 # load in repetitive element annotation
-rep_annotation <- read.table("data/repeatmasking/repeatmasker/Ahypochondriacus_2.2_polished.capital.fasta.out.gff")
+rep_annotation <- read.table("runs/repeatmasking/repeatmasker/Ahypochondriacus_2.2_polished.capital.fasta.out.gff")
 rep_bed <- rep_annotation %>%
   summarize(chr = V1,
             start = V4,
             end = V5)
 
 # prepare MYB transcription factor and color pathway gene annotation in genome
-#myb_genes <- read.csv("data/manual_sheets/MYB_with_subgroups.csv")
+#myb_genes <- read.csv("runs/data/manual_sheets/MYB_with_subgroups.csv")
 #myb_annotation <- gene_annotation %>%
 #  filter(transcript_id %in% myb_genes$query_name,
 #         type == "transcript") %>%
@@ -111,11 +111,11 @@ rep_bed <- rep_annotation %>%
 # color scheme for chromosomes:
 genome.bed$clr <- colorRampPalette(c("#FFFFFF", "#71196E"))(16)
 
-png(filename = "plots/circos.png", width = 4800, height = 4800, res = 1200) # changed this line
+png(filename = "runs/plots/circos.png", width = 4800, height = 4800, res = 1200) # changed this line
 circos.clear()
 circos.par("start.degree" = 90)
-circos.genomicInitialize(data=genome.bed, 
-                         tickLabelsStartFromZero = F, 
+circos.genomicInitialize(data=genome.bed,
+                         tickLabelsStartFromZero = F,
                          axis.labels.cex = 0.3,
                          labels.cex = 0.5)
 
@@ -129,7 +129,7 @@ circos.genomicInitialize(data=genome.bed,
 #}, track.height = 0.10, bg.border = NA)
 
 # gene density
-circos.genomicDensity(gene_bed, 
+circos.genomicDensity(gene_bed,
                       track.height=0.15,
                       window.size = 1000000,
                       col="dodgerblue3")
@@ -148,6 +148,3 @@ dev.off()
 
 
 ```
-
-
-
diff --git a/workflows/annotation_analysis/readme.txt b/workflows/annotation_analysis/readme.txt
index ab1f357eb170404b986c307f4da1e8098f65d0e4..33ca1cf7d559697701db7ec179f7077835878d6b 100644
--- a/workflows/annotation_analysis/readme.txt
+++ b/workflows/annotation_analysis/readme.txt
@@ -4,14 +4,14 @@ Identify betalain and flavonoid pathway genes, as well as MYB transcription fact
 
 ### Script order:
 
-- code/annotation_analysis/circos_plotting.Rmd
+- workflows/annotation_analysis/circos_plotting.Rmd
 Generate circos plot based on gene and repetitive element annotation
 
-- code/annotation_analysis/betalain_and_flavonoid_identification.sh
+- workflows/annotation_analysis/betalain_and_flavonoid_identification.sh
 Identify candidate betalain genes by BLAST, candidate flavonoid genes by KIPEs
 
-- code/annotation_analysis/betalain_phylogenetic_analysis.Rmd
+- workflows/annotation_analysis/betalain_phylogenetic_analysis.Rmd
 Phylogenetic analysis of betalain pathway genes
 
-- code/annotation_analysis/R2R3_analysis_reannotation.Rmd
+- workflows/annotation_analysis/R2R3_analysis_reannotation.Rmd
 Identify MYB transcription factor genes by HMMscan, analyse putative function by phylogenetic assessment
diff --git a/workflows/braker2/braker2_prot.sh b/workflows/braker2/braker2_prot.sh
index 1ee42414ffc235c337b7c0b6682ae97d880df99f..d4baf4bb60312127f89598f958125e4cc37d766f 100644
--- a/workflows/braker2/braker2_prot.sh
+++ b/workflows/braker2/braker2_prot.sh
@@ -34,15 +34,15 @@ conda activate /opt/rrzk/software/conda-envs/braker2
 
 module load samtools/1.13
 
-mkdir -p data/braker2/polished_prot
+mkdir -p runs/braker2/polished_prot
 
 # run braker in RNAseq and protein mode:
 
 braker.pl --AUGUSTUS_CONFIG_PATH=/home/twinkle1/tools/config/ \
 	--epmode \
-	--prot_seq=data/braker2/input/protein_db.fasta \
-	--genome=polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta \
+	--prot_seq=runs/braker2/input/protein_db.fasta \
+	--genome=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta \
 	--softmasking \
 	--species=polished_prot \
 	--cores=8 \
-	--workingdir=data/braker2/polished_prot
+	--workingdir=runs/braker2/polished_prot
diff --git a/workflows/braker2/braker2_prot_rna.sh b/workflows/braker2/braker2_prot_rna.sh
index ca0de631d378fbf3d694addff56804f7ad35bec4..1f185921f832b24a544c866a5d2001d7de99189c 100644
--- a/workflows/braker2/braker2_prot_rna.sh
+++ b/workflows/braker2/braker2_prot_rna.sh
@@ -16,28 +16,28 @@ conda activate /opt/rrzk/software/conda-envs/braker2
 module load samtools
 
 # it is recommended to merge the separate bam files beforehand, as specifying many files can cause issues with braker
-samtools merge --threads 7 data/braker2/STAR_mappings/clouse_reads_merged.bam \
-	data/braker2/STAR_mappings/SRR_0_Aligned.sortedByCoord.out.bam \
-	data/braker2/STAR_mappings/SRR_1_Aligned.sortedByCoord.out.bam \
-	data/braker2/STAR_mappings/SRR_2_Aligned.sortedByCoord.out.bam \
-	data/braker2/STAR_mappings/SRR_3_Aligned.sortedByCoord.out.bam \
-	data/braker2/STAR_mappings/SRR_4_Aligned.sortedByCoord.out.bam \
-	data/braker2/STAR_mappings/SRR_5_Aligned.sortedByCoord.out.bam \
-	data/braker2/STAR_mappings/SRR_6_Aligned.sortedByCoord.out.bam \
-	data/braker2/STAR_mappings/SRR_7_Aligned.sortedByCoord.out.bam
+samtools merge --threads 7 runs/braker2/STAR_mappings/clouse_reads_merged.bam \
+	runs/braker2/STAR_mappings/SRR_0_Aligned.sortedByCoord.out.bam \
+	runs/braker2/STAR_mappings/SRR_1_Aligned.sortedByCoord.out.bam \
+	runs/braker2/STAR_mappings/SRR_2_Aligned.sortedByCoord.out.bam \
+	runs/braker2/STAR_mappings/SRR_3_Aligned.sortedByCoord.out.bam \
+	runs/braker2/STAR_mappings/SRR_4_Aligned.sortedByCoord.out.bam \
+	runs/braker2/STAR_mappings/SRR_5_Aligned.sortedByCoord.out.bam \
+	runs/braker2/STAR_mappings/SRR_6_Aligned.sortedByCoord.out.bam \
+	runs/braker2/STAR_mappings/SRR_7_Aligned.sortedByCoord.out.bam
 
 # it is unnecessary, however, to filter the bam files beforehand
 # see (https://github.com/Gaius-Augustus/BRAKER/issues/241)
 
 # run braker in RNAseq and protein mode:
-mkdir -p data/braker2/polished_prot_rna
+mkdir -p runs/braker2/polished_prot_rna
 
 braker.pl --AUGUSTUS_CONFIG_PATH=/home/twinkle1/tools/config/ \
 	--etpmode \
-	--prot_seq=data/braker2/input/protein_db.fasta \
-	--genome=polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta \
+	--prot_seq=runs/braker2/input/protein_db.fasta \
+	--genome=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta \
 	--softmasking \
 	--species=polished_prot_rna \
-	--bam=data/braker2/STAR_mappings/clouse_reads_merged.bam \
+	--bam=runs/braker2/STAR_mappings/clouse_reads_merged.bam \
 	--cores=8 \
-	--workingdir=data/braker2/polished_prot_rna
+	--workingdir=runs/braker2/polished_prot_rna
diff --git a/workflows/braker2/index_STAR.sh b/workflows/braker2/index_STAR.sh
index 6eb95f9002aa4084551caed185c57d0fd6b30603..72d3b17e4db980c39aa57635b4305ca70ffe1226 100644
--- a/workflows/braker2/index_STAR.sh
+++ b/workflows/braker2/index_STAR.sh
@@ -19,12 +19,12 @@ module load star/2.7.8a
 # as SJDB file, use the newly generated braker2 protein gtf file
 
 
-mkdir -p data/braker2/STAR_index/
+mkdir -p runs/braker2/STAR_index/
 
 STAR --runThreadN 8 \
 	--runMode genomeGenerate \
-	--genomeDir data/braker2/STAR_index/ \
+	--genomeDir runs/braker2/STAR_index/ \
 	--sjdbOverhang 89 \
 	--genomeSAindexNbases 13 \
-	--genomeFastaFiles polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta \
-	--sjdbGTFfile data/braker2/prot_run/braker.gtf
+	--genomeFastaFiles runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta \
+	--sjdbGTFfile runs/braker2/prot_run/braker.gtf
diff --git a/workflows/braker2/readme.txt b/workflows/braker2/readme.txt
index 7dee01eb4eba54b1cab58a45205063e4ea31da60..2fee4f38871722543425324a67253d9758c2e7e4 100644
--- a/workflows/braker2/readme.txt
+++ b/workflows/braker2/readme.txt
@@ -1,17 +1,17 @@
 ## Computational annotation
 
 Create computational genome annotation using BRAKER2.
- 
+
 ### Script order:
 
-- code/braker2/braker2_prot.sh
+- workflows/braker2/braker2_prot.sh
 initial run of BRAKER2 using only the protein database as evidence
 
-- code/braker2/index_STAR.sh
+- workflows/braker2/index_STAR.sh
 index the polished reference genome for use with STAR
 
-- code/braker2/run_STAR.sh
+- workflows/braker2/run_STAR.sh
 map all short reads from Clouse et al. to the polished reference genome for use with BRAKER2
 
-- code/braker2/braker2_prot_rna.sh
+- workflows/braker2/braker2_prot_rna.sh
 run BRAKER2 with the mapped RNA-seq reads as well as the protein database as input
diff --git a/workflows/braker2/run_STAR.sh b/workflows/braker2/run_STAR.sh
index c552c8f4089e6bc54b2299b31a8f1e7308b0f210..495a219bcf50afaa5f0a1da90873ba5ea4664b86 100644
--- a/workflows/braker2/run_STAR.sh
+++ b/workflows/braker2/run_STAR.sh
@@ -32,7 +32,7 @@
 module load star/2.7.8a
 
 # create array of read fastq files (R1 only):
-SOURCE_DIR=raw_data/Clouse_short_reads
+SOURCE_DIR=studies/additional_data/Clouse_short_reads
 FILES=("$SOURCE_DIR"/SRR*_1.fastq)
 
 # only for testing puposes:
@@ -40,11 +40,11 @@ FILES=("$SOURCE_DIR"/SRR*_1.fastq)
 #echo "${FILES[0]/_1.fastq.gz/_2.fastq.gz}"
 
 # run STAR after genome index creation
-mkdir -p data/braker2/STAR_mappings/
+mkdir -p runs/braker2/STAR_mappings/
 
 STAR --runThreadN 8 \
 	--runMode alignReads \
 	--outSAMtype BAM SortedByCoordinate \
-	--genomeDir data/braker2/STAR_index \
-	--outFileNamePrefix data/braker2/STAR_mappings/SRR_"${SLURM_ARRAY_TASK_ID}"_ \
+	--genomeDir runs/braker2/STAR_index \
+	--outFileNamePrefix runs/braker2/STAR_mappings/SRR_"${SLURM_ARRAY_TASK_ID}"_ \
 	--readFilesIn "${FILES["${SLURM_ARRAY_TASK_ID}"]}" "${FILES["${SLURM_ARRAY_TASK_ID}"]/_1.fastq/_2.fastq}"
diff --git a/workflows/functional_annotation/analyse_functional_annotation.Rmd b/workflows/functional_annotation/analyse_functional_annotation.Rmd
index 41c76f578a0b7c01a6d7474b50ef547192f0df52..f4fb8c7303a8be8f99dc974ab68159b917bd33e9 100644
--- a/workflows/functional_annotation/analyse_functional_annotation.Rmd
+++ b/workflows/functional_annotation/analyse_functional_annotation.Rmd
@@ -15,14 +15,14 @@ knitr::opts_knit$set(root.dir = "/home/tom/Documents/projects/Ahyp_v2_2_publicat
 Prepare input files:
 
 ```{bash}
-mkdir data/functional_annotation/analysis
+mkdir runs/functional_annotation/analysis
 
 # prepare interproscan annotations (Pfam, PANTHER and CDD)
-awk '{if (($4 == "Pfam") || ($4 == "PANTHER") || ($4 == "CDD")) {print $1}}' data/functional_annotation/interproscan/Ahypochondriacus_2.2_polished_corrected.prot.fasta.tsv | sort | uniq > data/functional_annotation/analysis/interpro_genes.txt
+awk '{if (($4 == "Pfam") || ($4 == "PANTHER") || ($4 == "CDD")) {print $1}}' runs/functional_annotation/interproscan/Ahypochondriacus_2.2_polished_corrected.prot.fasta.tsv | sort | uniq > runs/functional_annotation/analysis/interpro_genes.txt
 # prepare eggnog annotation
-tail -n +6 data/functional_annotation/eggnog_mapper/MM_8wexw920.emapper.annotations.tsv | head -n -3 | awk '{print $1}' > data/functional_annotation/analysis/eggnog_genes.txt
+tail -n +6 runs/functional_annotation/eggnog_mapper/MM_8wexw920.emapper.annotations.tsv | head -n -3 | awk '{print $1}' > runs/functional_annotation/analysis/eggnog_genes.txt
 # prepare mercator annotations
-grep ">" data/functional_annotation/mercator_v4/Ahyp2.fa | grep -v "not classified" | sed 's/ .*//' | sed 's/>//' > data/functional_annotation/analysis/mercator_genes.txt
+grep ">" runs/functional_annotation/mercator_v4/Ahyp2.fa | grep -v "not classified" | sed 's/ .*//' | sed 's/>//' > runs/functional_annotation/analysis/mercator_genes.txt
 ```
 
 Load in gene names and analyse overlap:
@@ -40,11 +40,11 @@ annotation_reader <- function(file, source){
 }
 
 # load in genes
-ips_genes <- annotation_reader(file = "data/functional_annotation/analysis/interpro_genes.txt",
+ips_genes <- annotation_reader(file = "runs/functional_annotation/analysis/interpro_genes.txt",
                                source = "Interproscan")
-eggnog_genes <- annotation_reader(file = "data/functional_annotation/analysis/eggnog_genes.txt",
+eggnog_genes <- annotation_reader(file = "runs/functional_annotation/analysis/eggnog_genes.txt",
                                source = "eggNOG")
-mercator_genes <- annotation_reader(file = "data/functional_annotation/analysis/mercator_genes.txt",
+mercator_genes <- annotation_reader(file = "runs/functional_annotation/analysis/mercator_genes.txt",
                                source = "Mercator")
 
 # create input list
@@ -60,7 +60,7 @@ p1 <- ggVennDiagram(gene_list,
   labs(fill = "Number of genes")
 p1
 
-ggsave(filename = "plots/functional_annotation_venn.png",
+ggsave(filename = "runs/plots/functional_annotation_venn.png",
        height = 5,
        width = 7,
        bg = "white",
@@ -72,7 +72,7 @@ getVennOverlap <- function(lsvenn = list(A = sort(sample(LETTERS, 15)),
                                      C = sort(sample(LETTERS, 15)),
                                      D = sort(sample(LETTERS, 15)))
                            ) {
-  
+
   ItemsList <- gplots::venn(lsvenn, show.plot = FALSE)
   print(lengths(attributes(ItemsList)$intersections))
   #return(attributes(ItemsList)$intersections)
@@ -80,4 +80,3 @@ getVennOverlap <- function(lsvenn = list(A = sort(sample(LETTERS, 15)),
 
 sum(getVennOverlap(lsvenn = gene_list))
 ```
-
diff --git a/workflows/functional_annotation/readme.txt b/workflows/functional_annotation/readme.txt
index 2c9dbeb6d6020a54eab763608a3bcda0c6d39cd2..9a494233bfc4f967c32470a3932b138f1e628494 100644
--- a/workflows/functional_annotation/readme.txt
+++ b/workflows/functional_annotation/readme.txt
@@ -4,10 +4,10 @@ Functional annotation and analysis
 
 ### Script order:
 
-- code/functional_annotation/run_interproscan.sh
+- workflows/functional_annotation/run_interproscan.sh
 Run Interproscan on protein sequences for functional annotation
 
 Functional annotation using eggNOG-mapper and Mercator was done using online submission and not run locally.
 
--code/functional_annotation/analyse_functional_annotation.Rmd
+-workflows/functional_annotation/analyse_functional_annotation.Rmd
 Analyse the number of annotated genes etc. for all three functional annotation programs
diff --git a/workflows/functional_annotation/run_interproscan.sh b/workflows/functional_annotation/run_interproscan.sh
index 1c858364662d21143f32ff28b467a65736d15590..730e669a82c498f8a8ac5fb2a867000d892a7cd9 100644
--- a/workflows/functional_annotation/run_interproscan.sh
+++ b/workflows/functional_annotation/run_interproscan.sh
@@ -23,9 +23,9 @@ module load openjdk/11.0.2
 # interpro directory with executable shell script
 INTERPRODIR=/scratch/twinkle1/interproscan/interproscan-5.56-89.0/
 # output directory for the annotation run
-INTERPROOUT=data/functional_annotation/interproscan/
+INTERPROOUT=runs/functional_annotation/interproscan/
 # input amino acid fasta file
-INTERPROIN=polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.prot.fasta
+INTERPROIN=runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.prot.fasta
 
 # main
 mkdir "$INTERPROOUT"
diff --git a/workflows/gene_expression_quantification/isoseq_expression_support.Rmd b/workflows/gene_expression_quantification/isoseq_expression_support.Rmd
index 955383247301777b226847cfdeb7c7c446a93efd..3a85cb003f747fb4852125b5bac468672ed3e386 100644
--- a/workflows/gene_expression_quantification/isoseq_expression_support.Rmd
+++ b/workflows/gene_expression_quantification/isoseq_expression_support.Rmd
@@ -46,12 +46,12 @@ read.gtf <- function(file){
 Bash script to run gffcompare for the isoseq to reference annotation comparison.
 
 ```{bash}
-mkdir data/gene_expression_quantification/isoseq/
+mkdir runs/gene_expression_quantification/isoseq/
 
 # run gffcompare
-/home/tom/Documents/tools/gffcompare/gffcompare -r data/isoseq/sqanti/output_polished/combined.collapsed.min_fl_2.filtered_corrected.gtf polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf
+/home/tom/Documents/tools/gffcompare/gffcompare -r runs/isoseq/sqanti/output_polished/combined.collapsed.min_fl_2.filtered_corrected.gtf runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.gtf
 # move to output directory
-mv gffcmp.* data/gene_expression_quantification/isoseq/
+mv gffcmp.* runs/gene_expression_quantification/isoseq/
 ```
 
 
@@ -60,7 +60,7 @@ It might be easier to use the output of gffcompare instead of sqanti:
 
 ```{r}
 # read in tracking file
-gff_tracking <- read.table(file = "data/gene_expression_quantification/isoseq/gffcmp.tracking")
+gff_tracking <- read.table(file = "runs/gene_expression_quantification/isoseq/gffcmp.tracking")
 gff_tracking$V4 <- as.factor(gff_tracking$V4)
 colnames(gff_tracking) <- c("locus", "xlocus", "isoseq_transcript", "code", "reference_transcript")
 
@@ -81,5 +81,3 @@ gff_tracking$reference_transcript_short <- matrix(unlist(strsplit(x = gff_tracki
 # all transcripts there?
 length(unique(gff_tracking$reference_transcript_short)) #28074
 ```
-
-
diff --git a/workflows/gene_expression_quantification/plot_expression_quantification_kallisto.Rmd b/workflows/gene_expression_quantification/plot_expression_quantification_kallisto.Rmd
index a384ececc24257d47eef82f4c64b3a84574c02a6..a24d15ff4a717ae3ad6efe3bcb4fa0e462a6880a 100644
--- a/workflows/gene_expression_quantification/plot_expression_quantification_kallisto.Rmd
+++ b/workflows/gene_expression_quantification/plot_expression_quantification_kallisto.Rmd
@@ -20,7 +20,7 @@ After gene expression quantification using kallisto (of fastq files against the
 ##### Setup, loading of data, preprocessing
 
 # vector of input directories
-indirs <- list.dirs(path = "data/gene_expression_quantification/kallisto_quant")
+indirs <- list.dirs(path = "runs/gene_expression_quantification/kallisto_quant")
 indirs <- indirs[-1] # remove base directory
 indirs <- indirs[grep("index", indirs, invert = T)] # remove index directory
 
@@ -40,7 +40,7 @@ tpm.df <- data.frame(abundances[[1]]$target_id,
                      abundances[[6]]$tpm,
                      abundances[[7]]$tpm,
                      abundances[[8]]$tpm)
-colnames(tpm.df) <- c("GeneID", gsub("data/gene_expression_quantification/kallisto_quant/", "", indirs))
+colnames(tpm.df) <- c("GeneID", gsub("runs/gene_expression_quantification/kallisto_quant/", "", indirs))
 colnames(tpm.df) <- c("GeneID", "Cotyledones", "Flower", "Leaf", "Mature seed", "Root", "Stem", "Water-stressed", "Developing seed")
 
 # melt dataframe for plotting into long format:
@@ -48,16 +48,16 @@ melted.df <- melt(tpm.df, id.vars = "GeneID")
 colnames(melted.df) <- c("GeneID", "tissue", "tpm")
 
 write_csv(tpm.df,
-          file = "data/gene_expression_quantification/kallisto_quant/all_tissue_expression.csv")
+          file = "runs/gene_expression_quantification/kallisto_quant/all_tissue_expression.csv")
 write_csv(melted.df,
-          file = "data/gene_expression_quantification/kallisto_quant/all_tissue_expression_long.csv")
+          file = "runs/gene_expression_quantification/kallisto_quant/all_tissue_expression_long.csv")
 ```
 
 Create dataframe of the betalain genes and the myb genes which can be later used for plotting the gene expression levels for the respective genes.
 
 ```{r}
 # load object with names of all betalain and flavonoid genes
-pathway_genes <- read.csv(file = "data/manual_sheets/color_pathway_genes.csv", header=T)
+pathway_genes <- read.csv(file = "runs/manual_sheets/color_pathway_genes.csv", header=T)
 
 betalain.genes <- pathway_genes %>%
   filter(Pathway == "Betalain")
@@ -66,8 +66,8 @@ flavonoid.genes <- pathway_genes %>%
   filter(Pathway == "Flavonoid")
 
 # read in the myb genes and subset the betalain mybs
-myb_genes <- read.csv(file = "data/manual_sheets/MYB_with_subgroups.csv", header=T)
-#myb_genes <- read_csv(file = "data/annotation_analysis/myb_annotation/myb_stats.csv")
+myb_genes <- read.csv(file = "runs/manual_sheets/MYB_with_subgroups.csv", header=T)
+#myb_genes <- read_csv(file = "runs/annotation_analysis/myb_annotation/myb_stats.csv")
 colnames(myb_genes) <- c("Transcript_id", "Freq", "Gene_id", "Subgroup")
 
 ```
@@ -81,7 +81,7 @@ betalain.df <- melted.df %>%
   mutate(joining = gsub("\\..*", "", GeneID))
 
 # join to obtain pathway gene names
-betalain_plotting <- left_join(x = betalain.df, y = betalain.genes, by = c("joining" = "Gene_id")) 
+betalain_plotting <- left_join(x = betalain.df, y = betalain.genes, by = c("joining" = "Gene_id"))
 betalain_plotting <- betalain_plotting %>%
   select(GeneID, tissue, tpm, Gene) %>%
   mutate(label = paste0(Gene, " (", GeneID, ")"))
@@ -122,7 +122,7 @@ ggplot(data = betalain_plotting) +
         axis.title.x = element_blank(),
         axis.title.y = element_blank(),
         axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
-ggsave(filename = "plots/gene_expression_quantification/betalain_expression.png", width = 10, height = 7,
+ggsave(filename = "runs/plots/gene_expression_quantification/betalain_expression.png", width = 10, height = 7,
        dpi = 400)
 
 
@@ -150,10 +150,10 @@ label[93] <- "AhMYB2.1 (BvMYB1-like)"
 label[94] <- "AhMYB2.2 (BvMYB1-like)"
 
 # create matrices for the pheatmap function
-myb.mat_label <- matrix(data = round(myb.df$tpm, digits = 2), 
+myb.mat_label <- matrix(data = round(myb.df$tpm, digits = 2),
                   ncol = 8,
                   byrow = F)
-myb.mat <- matrix(data = log10(myb.df$tpm+1), 
+myb.mat <- matrix(data = log10(myb.df$tpm+1),
                   ncol = 8,
                   byrow = F)
 rownames(myb.mat) <- label
@@ -180,7 +180,7 @@ pheatmap(mat = myb.mat,
          cluster_rows = T,
          cluster_cols = F,
          angle_col = 45,
-         filename = "plots/gene_expression_quantification/myb_expression.png",
+         filename = "runs/plots/gene_expression_quantification/myb_expression.png",
          width = 10,
          height = 15,
          display_numbers = myb.mat_label,
@@ -202,7 +202,7 @@ flavonoid.df <- melted.df %>%
   mutate(joining = gsub("\\..*", "", GeneID))
 
 # join to obtain pathway gene names
-flavonoid_plotting <- left_join(x = flavonoid.df, y = flavonoid.genes, by = c("joining" = "Gene_id")) 
+flavonoid_plotting <- left_join(x = flavonoid.df, y = flavonoid.genes, by = c("joining" = "Gene_id"))
 flavonoid_plotting <- flavonoid_plotting %>%
   select(GeneID, tissue, tpm, Gene) %>%
   mutate(label = paste0(Gene, " (", GeneID, ")"))
@@ -257,15 +257,6 @@ ggplot(data = flavonoid_plotting) +
         axis.title.x = element_blank(),
         axis.title.y = element_blank(),
         axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
-ggsave(filename = "plots/gene_expression_quantification/flavonoid_pathway_expression.png", width = 10, height = 10)
-#ggsave(filename = "plots/gene_expression_quantification/flavonoid_pathway_expression.png", width = 10, height = 12)
+ggsave(filename = "runs/plots/gene_expression_quantification/flavonoid_pathway_expression.png", width = 10, height = 10)
+#ggsave(filename = "runs/plots/gene_expression_quantification/flavonoid_pathway_expression.png", width = 10, height = 12)
 ```
-
-
-
-
-
-
-
-
-
diff --git a/workflows/gene_expression_quantification/readme.txt b/workflows/gene_expression_quantification/readme.txt
index 6ecc87b7a9c252b71236101605bd45344dd2c58c..fbe97a73684b28b5d4ba01492cd5164da2f59e7d 100644
--- a/workflows/gene_expression_quantification/readme.txt
+++ b/workflows/gene_expression_quantification/readme.txt
@@ -3,11 +3,11 @@
 Quantify gene expression levels using short-read RNA-seq data from different tissues and the long-read sequencing data.
 
 ### Script order:
-- code/gene_expression_quantification/run_kallisto.sh
+- workflows/gene_expression_quantification/run_kallisto.sh
 Run Kallisto to quantify gene expression using short-read RNA-seq data
 
-- code/gene_expression_quantification/plot_expression_quantification_kallisto.Rmd
+- workflows/gene_expression_quantification/plot_expression_quantification_kallisto.Rmd
 Generate plots from assessed gene expression levels
 
-- code/gene_expression_quantification/isoseq_expression_support.Rmd
+- workflows/gene_expression_quantification/isoseq_expression_support.Rmd
 Assess support of Iso-Seq transcripts for annotated genes
diff --git a/workflows/gene_expression_quantification/run_kallisto.sh b/workflows/gene_expression_quantification/run_kallisto.sh
index dd5393b365190368a9e04e374ea28bfb1e07440b..f853f9095d819e066827b48cf9855f96154255f4 100644
--- a/workflows/gene_expression_quantification/run_kallisto.sh
+++ b/workflows/gene_expression_quantification/run_kallisto.sh
@@ -15,7 +15,7 @@
 
 
 # index the transcriptome
-KALINDEX=data/gene_expression_quantification/kallisto_quant/index
+KALINDEX=runs/gene_expression_quantification/kallisto_quant/index
 mkdir -p $KALINDEX
 
 /home/tom/Documents/tools/kallisto/kallisto index -i "$KALINDEX"/index polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.cds.fasta
@@ -23,9 +23,9 @@ mkdir -p $KALINDEX
 
 ##### Perform quantification
 # create array of read fastq files (R1 only):
-SOURCE_DIR=raw_data/Clouse_short_reads/
+SOURCE_DIR=studies/additional_data/Clouse_short_reads/
 FILES=("$SOURCE_DIR"SRR*_1.fastq.gz)
-OUTDIR=data/gene_expression_quantification/kallisto_quant/
+OUTDIR=runs/gene_expression_quantification/kallisto_quant/
 TISSUE_NAMES=("young_seed" "stem" "flower" "leaf" "root" "water_stressed" "cotyledones" "mature_seed")
 
 # kallisto after indexing
diff --git a/workflows/genome_polishing/readme.txt b/workflows/genome_polishing/readme.txt
index e5aef947118edbe27449e45a5dd00f4313ba79fa..8b1a98c092675a41e418bf82a869d4f2b73724ca 100644
--- a/workflows/genome_polishing/readme.txt
+++ b/workflows/genome_polishing/readme.txt
@@ -4,16 +4,16 @@ Polish the previously published reference genome of A. hypochondriacus.
 
 ### Script order:
 
-- code/genome_polishing/remove_ambiguous_bases.sh
+- workflows/genome_polishing/remove_ambiguous_bases.sh
 prepare the reference genome v2.1 for genome polishing by removing all ambiguous bases
 
-- code/genome_polishing/unpackSRA.sh 
+- workflows/genome_polishing/unpackSRA.sh
 unpack the WGS short read SRA file from the Lightfoot genome assembly
 
-- code/genome_polishing/run_nextpolish.sh
+- workflows/genome_polishing/run_nextpolish.sh
 repare input files and run NextPolish to polish the reference assembly.
 
-- code/genome_polishing/process_nextpolish_output.sh
+- workflows/genome_polishing/process_nextpolish_output.sh
 return all ambigious bases into the polished reference genome and restore the same chromosome order
 
 
diff --git a/workflows/isoseq_assembly/combined_isoseq3_pipeline.sh b/workflows/isoseq_assembly/combined_isoseq3_pipeline.sh
index e64fc14a3873024264c134d8312959fe7b36e018..370cfb91da27f51bef47ae0af13bcb05188ee36a 100644
--- a/workflows/isoseq_assembly/combined_isoseq3_pipeline.sh
+++ b/workflows/isoseq_assembly/combined_isoseq3_pipeline.sh
@@ -14,7 +14,7 @@ conda activate isoseq3
 
 module load samtools/1.13
 
-ISOSEQOUT=raw_data/isoseq_raw_reads/processed/
+ISOSEQOUT=assays/isoform_sequencing/dataset/processed/
 mkdir -p $ISOSEQOUT
 
 # flnc reads were created using the following command, removing artificial concatemers and filtering out all genes without Poly-A tail
diff --git a/workflows/isoseq_assembly/combined_mapping_and_collapse.sh b/workflows/isoseq_assembly/combined_mapping_and_collapse.sh
index 0ad606a18e7e727dd68b0c006eb8215d428df667..ac1100a7a24a1a1d330bdc6ed48ea77c8c45a4cc 100644
--- a/workflows/isoseq_assembly/combined_mapping_and_collapse.sh
+++ b/workflows/isoseq_assembly/combined_mapping_and_collapse.sh
@@ -13,16 +13,16 @@ source $CONDA_PREFIX/etc/profile.d/conda.sh
 conda activate isoseq
 
 # create output directory
-MAPPINGOUT=data/isoseq/mapping_and_collapse/
+MAPPINGOUT=runs/isoseq/mapping_and_collapse/
 mkdir -p "$MAPPINGOUT"
 
 
 ### MAPPING
 
-REFERENCE=polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
+REFERENCE=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
 
 # Align sequences to reference genome
-INPUT=raw_data/isoseq_raw_reads/processed/combined.merged_clustered.hq.fasta
+INPUT=assays/isoform_sequencing/dataset/processed/combined.merged_clustered.hq.fasta
 OUTPUTMM2="$MAPPINGOUT"combined_aln.sam
 
 minimap2 -t 8 -ax splice:hq -uf --secondary=no -a $REFERENCE $INPUT -o $OUTPUTMM2
@@ -55,7 +55,7 @@ collapse_isoforms_by_sam.py --input $INPUT -s $INPUTSORTED -o $OUTPUTCOLLAPSE -c
 # First obtain associated count information
 
 INPUTABUNDANCE="$MAPPINGOUT"combined.collapsed
-CLUSTERREPORT=raw_data/isoseq_raw_reads/processed/combined.merged_clustered.cluster_report.csv
+CLUSTERREPORT=assays/isoform_sequencing/dataset/processed/combined.merged_clustered.cluster_report.csv
 
 get_abundance_post_collapse.py $INPUTABUNDANCE $CLUSTERREPORT
 
diff --git a/workflows/isoseq_assembly/combined_mapping_and_collapse_old_genome.sh b/workflows/isoseq_assembly/combined_mapping_and_collapse_old_genome.sh
index 64563ac26068b3ef6cbc6fccb7e1c6a480d2f52f..8faabb26d5660bef3fa9d2f8c81d861a795f914d 100644
--- a/workflows/isoseq_assembly/combined_mapping_and_collapse_old_genome.sh
+++ b/workflows/isoseq_assembly/combined_mapping_and_collapse_old_genome.sh
@@ -13,16 +13,16 @@ source $CONDA_PREFIX/etc/profile.d/conda.sh
 conda activate isoseq
 
 # create output directory
-MAPPINGOUT=data/isoseq/mapping_and_collapse_old_genome/
+MAPPINGOUT=runs/isoseq/mapping_and_collapse_old_genome/
 mkdir -p "$MAPPINGOUT"
 
 
 ### MAPPING
 # map reads onto the old reference genome
-REFERENCE=/home/tom/Documents/reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.softmasked.nospace.underscore.fa
+REFERENCE=studies/additional_data/resources/reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.softmasked.nospace.underscore.fa
 
 # Align sequences to reference genome
-INPUT=raw_data/isoseq_raw_reads/processed/combined.merged_clustered.hq.fasta
+INPUT=assays/isoform_sequencing/dataset/processed/combined.merged_clustered.hq.fasta
 OUTPUTMM2="$MAPPINGOUT"combined_aln.sam
 
 minimap2 -t 8 -ax splice:hq -uf --secondary=no -a $REFERENCE $INPUT -o $OUTPUTMM2
@@ -56,7 +56,7 @@ collapse_isoforms_by_sam.py --input $INPUT -s $INPUTSORTED -o $OUTPUTCOLLAPSE -c
 # First obtain associated count information
 
 INPUTABUNDANCE="$MAPPINGOUT"combined.collapsed
-CLUSTERREPORT=raw_data/isoseq_raw_reads/processed/combined.merged_clustered.cluster_report.csv
+CLUSTERREPORT=assays/isoform_sequencing/dataset/processed/combined.merged_clustered.cluster_report.csv
 
 get_abundance_post_collapse.py $INPUTABUNDANCE $CLUSTERREPORT
 
diff --git a/workflows/isoseq_assembly/comparison_isoseq_polishing_effectiveness.Rmd b/workflows/isoseq_assembly/comparison_isoseq_polishing_effectiveness.Rmd
index 8dcddd830ecbe4d1e6d346c10c244107aa05690d..e35684ba29d63e83ca47fcc119a18ee7cb8f9d8d 100644
--- a/workflows/isoseq_assembly/comparison_isoseq_polishing_effectiveness.Rmd
+++ b/workflows/isoseq_assembly/comparison_isoseq_polishing_effectiveness.Rmd
@@ -21,17 +21,17 @@ Compare the effect of genome polishing on the coding sequence prediction by CPC2
 Run CPC2 CDS prediction on both sequence sets:
 
 ```{bash}
-mkdir data/isoseq/comparison_genome_polishing
+mkdir runs/isoseq/comparison_genome_polishing
 
 # unpolished genome
-/hom/tom/Documents/tools/CPC2_standalone-1.0.1/bin/CPC2.py -i data/isoseq/sqanti/output_old_genome/combined.collapsed.min_fl_2.filtered.underscore_corrected.fasta --ORF -o data/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cpc2
+/hom/tom/Documents/tools/CPC2_standalone-1.0.1/bin/CPC2.py -i runs/isoseq/sqanti/output_old_genome/combined.collapsed.min_fl_2.filtered.underscore_corrected.fasta --ORF -o runs/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cpc2
 
-/hom/tom/Documents/tools/CPC2_standalone-1.0.1/bin/CPC2.py -i data/isoseq/mapping_and_collapse_old_genome/combined.collapsed.min_fl_2.filtered.rep.fa --ORF -o data/isoseq/comparison_genome_polishing/unpolished_isoseq_cpc2
+/hom/tom/Documents/tools/CPC2_standalone-1.0.1/bin/CPC2.py -i runs/isoseq/mapping_and_collapse_old_genome/combined.collapsed.min_fl_2.filtered.rep.fa --ORF -o runs/isoseq/comparison_genome_polishing/unpolished_isoseq_cpc2
 
 # polished genome
-/hom/tom/Documents/tools/CPC2_standalone-1.0.1/bin/CPC2.py -i data/isoseq/sqanti/output_polished/combined.collapsed.min_fl_2.filtered_corrected.fasta --ORF -o data/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cpc2
+/hom/tom/Documents/tools/CPC2_standalone-1.0.1/bin/CPC2.py -i runs/isoseq/sqanti/output_polished/combined.collapsed.min_fl_2.filtered_corrected.fasta --ORF -o runs/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cpc2
 
-/hom/tom/Documents/tools/CPC2_standalone-1.0.1/bin/CPC2.py -i data/isoseq/mapping_and_collapse/combined.collapsed.min_fl_2.filtered.rep.fa --ORF -o data/isoseq/comparison_genome_polishing/polished_isoseq_cpc2
+/hom/tom/Documents/tools/CPC2_standalone-1.0.1/bin/CPC2.py -i runs/isoseq/mapping_and_collapse/combined.collapsed.min_fl_2.filtered.rep.fa --ORF -o runs/isoseq/comparison_genome_polishing/polished_isoseq_cpc2
 ```
 
 
@@ -39,8 +39,8 @@ Compare the CDS prediction results from the two files:
 
 ```{r}
 # load in cpc2 files for the unpolished genome
-unpolished_cpc2 <- read.table("data/isoseq/comparison_genome_polishing/unpolished_isoseq_cpc2.txt")
-unpolished_sqanti_cpc2 <- read.table("data/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cpc2.txt")
+unpolished_cpc2 <- read.table("runs/isoseq/comparison_genome_polishing/unpolished_isoseq_cpc2.txt")
+unpolished_sqanti_cpc2 <- read.table("runs/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cpc2.txt")
 
 # prepare for merge
 unpolished_cpc2 <- unpolished_cpc2 %>%
@@ -83,8 +83,8 @@ table(interaction(as.factor(merged_unpolished$label), as.factor(merged_unpolishe
 
 
 ############# comparison with polished genome
-polished_cpc2 <- read.table("data/isoseq/comparison_genome_polishing/polished_isoseq_cpc2.txt")
-polished_sqanti_cpc2 <- read.table("data/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cpc2.txt")
+polished_cpc2 <- read.table("runs/isoseq/comparison_genome_polishing/polished_isoseq_cpc2.txt")
+polished_sqanti_cpc2 <- read.table("runs/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cpc2.txt")
 
 # prepare for merge
 polished_cpc2 <- polished_cpc2 %>%
@@ -140,77 +140,68 @@ create_bed_from_cpc2 <- function(cpc2_output){
 # write cpc2 output as bed file
 
 ### isoseq unpolished
-isoseq_unpolished_bed <- create_bed_from_cpc2("data/isoseq/comparison_genome_polishing/unpolished_isoseq_cpc2.txt") 
-write_tsv(isoseq_unpolished_bed, 
-          file = "data/isoseq/comparison_genome_polishing/unpolished_isoseq_cpc2.bed",
+isoseq_unpolished_bed <- create_bed_from_cpc2("runs/isoseq/comparison_genome_polishing/unpolished_isoseq_cpc2.txt")
+write_tsv(isoseq_unpolished_bed,
+          file = "runs/isoseq/comparison_genome_polishing/unpolished_isoseq_cpc2.bed",
           col_names = F)
 
 ### isoseq unpolished
-isoseq_unpolished_sqanti_bed <- create_bed_from_cpc2("data/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cpc2.txt") 
-write_tsv(isoseq_unpolished_sqanti_bed, 
-          file = "data/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cpc2.bed",
+isoseq_unpolished_sqanti_bed <- create_bed_from_cpc2("runs/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cpc2.txt")
+write_tsv(isoseq_unpolished_sqanti_bed,
+          file = "runs/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cpc2.bed",
           col_names = F)
 
 ### isoseq polished
-isoseq_polished_bed <- create_bed_from_cpc2("data/isoseq/comparison_genome_polishing/polished_isoseq_cpc2.txt") 
-write_tsv(isoseq_polished_bed, 
-          file = "data/isoseq/comparison_genome_polishing/polished_isoseq_cpc2.bed",
+isoseq_polished_bed <- create_bed_from_cpc2("runs/isoseq/comparison_genome_polishing/polished_isoseq_cpc2.txt")
+write_tsv(isoseq_polished_bed,
+          file = "runs/isoseq/comparison_genome_polishing/polished_isoseq_cpc2.bed",
           col_names = F)
 
 ### isoseq sqanti polished
-isoseq_polished_sqanti_bed <- create_bed_from_cpc2("data/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cpc2.txt") 
-write_tsv(isoseq_polished_sqanti_bed, 
-          file = "data/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cpc2.bed",
+isoseq_polished_sqanti_bed <- create_bed_from_cpc2("runs/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cpc2.txt")
+write_tsv(isoseq_polished_sqanti_bed,
+          file = "runs/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cpc2.bed",
           col_names = F)
 ```
 
 Extract predicted protein sequence and run busco:
 
 ```{bash}
-mkdir -p data/annotation_analysis/busco
+mkdir -p runs/annotation_analysis/busco
 
 ### ISOSEQ UNPOLISHED
 # Extract the coding sequence from the fasta file
-tools/bedtools getfasta -fi data/isoseq/mapping_and_collapse_old_genome/combined.collapsed.min_fl_2.filtered.rep.fa -fo data/isoseq/comparison_genome_polishing/unpolished_isoseq_cds.fasta -bed data/isoseq/comparison_genome_polishing/unpolished_isoseq_cpc2.bed
+tools/bedtools getfasta -fi runs/isoseq/mapping_and_collapse_old_genome/combined.collapsed.min_fl_2.filtered.rep.fa -fo runs/isoseq/comparison_genome_polishing/unpolished_isoseq_cds.fasta -bed runs/isoseq/comparison_genome_polishing/unpolished_isoseq_cpc2.bed
 # translate into protein sequence
-seqkit translate data/isoseq/comparison_genome_polishing/unpolished_isoseq_cds.fasta > data/isoseq/comparison_genome_polishing/unpolished_isoseq_cds.faa
+seqkit translate runs/isoseq/comparison_genome_polishing/unpolished_isoseq_cds.fasta > runs/isoseq/comparison_genome_polishing/unpolished_isoseq_cds.faa
 # to prepare for busco, trim the fasta header
-sed 's/|.*//' data/isoseq/comparison_genome_polishing/unpolished_isoseq_cds.faa > data/isoseq/comparison_genome_polishing/unpolished_isoseq_cds_fixed.faa
+sed 's/|.*//' runs/isoseq/comparison_genome_polishing/unpolished_isoseq_cds.faa > runs/isoseq/comparison_genome_polishing/unpolished_isoseq_cds_fixed.faa
 # run busco
-busco -m protein -i data/isoseq/comparison_genome_polishing/unpolished_isoseq_cds_fixed.faa -o unpolished_isoseq_cds -l embryophyta_odb10 --out_path data/annotation_analysis/busco/ --download_path data/busco/datasets/ -c 7 -f
+busco -m protein -i runs/isoseq/comparison_genome_polishing/unpolished_isoseq_cds_fixed.faa -o unpolished_isoseq_cds -l embryophyta_odb10 --out_path runs/annotation_analysis/busco/ --download_path runs/busco/datasets/ -c 7 -f
 
 ### ISOSEQ SQANTI UNPOLISHED
 # Extract the coding sequence from the fasta file
-tools/bedtools getfasta -fi data/isoseq/sqanti/output_old_genome/combined.collapsed.min_fl_2.filtered.underscore_corrected.fasta -fo data/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cds.fasta -bed data/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cpc2.bed
+tools/bedtools getfasta -fi runs/isoseq/sqanti/output_old_genome/combined.collapsed.min_fl_2.filtered.underscore_corrected.fasta -fo runs/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cds.fasta -bed runs/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cpc2.bed
 # translate into protein sequence
-seqkit translate data/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cds.fasta > data/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cds.faa
+seqkit translate runs/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cds.fasta > runs/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cds.faa
 # run busco
-busco -m protein -i data/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cds.faa -o unpolished_isoseq_sqanti_cds -l embryophyta_odb10 --out_path data/annotation_analysis/busco/ --download_path data/busco/datasets/ -c 7 -f
+busco -m protein -i runs/isoseq/comparison_genome_polishing/unpolished_isoseq_sqanti_cds.faa -o unpolished_isoseq_sqanti_cds -l embryophyta_odb10 --out_path runs/annotation_analysis/busco/ --download_path runs/busco/datasets/ -c 7 -f
 
 ### ISOSEQ POLISHED
 # Extract the coding sequence from the fasta file
-tools/bedtools getfasta -fi data/isoseq/mapping_and_collapse/combined.collapsed.min_fl_2.filtered.rep.fa -fo data/isoseq/comparison_genome_polishing/polished_isoseq_cds.fasta -bed data/isoseq/comparison_genome_polishing/polished_isoseq_cpc2.bed
+tools/bedtools getfasta -fi runs/isoseq/mapping_and_collapse/combined.collapsed.min_fl_2.filtered.rep.fa -fo runs/isoseq/comparison_genome_polishing/polished_isoseq_cds.fasta -bed runs/isoseq/comparison_genome_polishing/polished_isoseq_cpc2.bed
 # translate into protein sequence
-seqkit translate data/isoseq/comparison_genome_polishing/polished_isoseq_cds.fasta > data/isoseq/comparison_genome_polishing/polished_isoseq_cds.faa
+seqkit translate runs/isoseq/comparison_genome_polishing/polished_isoseq_cds.fasta > runs/isoseq/comparison_genome_polishing/polished_isoseq_cds.faa
 # to prepare for busco, trim the fasta header
-sed 's/|.*//' data/isoseq/comparison_genome_polishing/polished_isoseq_cds.faa > data/isoseq/comparison_genome_polishing/polished_isoseq_cds_fixed.faa
+sed 's/|.*//' runs/isoseq/comparison_genome_polishing/polished_isoseq_cds.faa > runs/isoseq/comparison_genome_polishing/polished_isoseq_cds_fixed.faa
 # run busco
-busco -m protein -i data/isoseq/comparison_genome_polishing/polished_isoseq_cds_fixed.faa -o polished_isoseq_cds -l embryophyta_odb10 --out_path data/annotation_analysis/busco/ --download_path data/busco/datasets/ -c 7 -f
+busco -m protein -i runs/isoseq/comparison_genome_polishing/polished_isoseq_cds_fixed.faa -o polished_isoseq_cds -l embryophyta_odb10 --out_path runs/annotation_analysis/busco/ --download_path runs/busco/datasets/ -c 7 -f
 
 ### ISOSEQ SQANTI POLISHED
 # Extract the coding sequence from the fasta file
-tools/bedtools getfasta -fi data/isoseq/sqanti/output_polished/combined.collapsed.min_fl_2.filtered_corrected.fasta -fo data/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cds.fasta -bed data/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cpc2.bed
+tools/bedtools getfasta -fi runs/isoseq/sqanti/output_polished/combined.collapsed.min_fl_2.filtered_corrected.fasta -fo runs/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cds.fasta -bed runs/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cpc2.bed
 # translate into protein sequence
-seqkit translate data/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cds.fasta > data/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cds.faa
+seqkit translate runs/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cds.fasta > runs/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cds.faa
 # run busco
-busco -m protein -i data/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cds.faa -o polished_isoseq_sqanti_cds -l embryophyta_odb10 --out_path data/annotation_analysis/busco/ --download_path data/busco/datasets/ -c 7 -f
+busco -m protein -i runs/isoseq/comparison_genome_polishing/polished_isoseq_sqanti_cds.faa -o polished_isoseq_sqanti_cds -l embryophyta_odb10 --out_path runs/annotation_analysis/busco/ --download_path runs/busco/datasets/ -c 7 -f
 ```
-
-
-
-
-
-
-
-
-
diff --git a/workflows/isoseq_assembly/isoseq3_pipeline.sh b/workflows/isoseq_assembly/isoseq3_pipeline.sh
index e7cebe34829325bcb5bb65da9cd4cb7d0b66fd53..13ebc3b8fb6161db527bcdfd2c9096a03906e19b 100644
--- a/workflows/isoseq_assembly/isoseq3_pipeline.sh
+++ b/workflows/isoseq_assembly/isoseq3_pipeline.sh
@@ -20,13 +20,13 @@ echo "$SLURM_ARRAY_TASK_ID"
 # removal of primers and barcodes already done
 # step 4, removal of polyA tail and artificial concatemers
 
-PROCESSING=raw_data/isoseq_raw_reads/processed/
+PROCESSING=assays/isoform_sequencing/dataset/isoseq_raw_reads/processed/
 mkdir -p "PROCESSING"
 
-SOURCE_DIR=raw_data/isoseq_raw_reads/reads
+SOURCE_DIR=assays/isoform_sequencing/dataset/reads
 FILES=("$SOURCE_DIR"/*bam)
 
-PRIMER_SOURCE=raw_data/isoseq_raw_reads/primers
+PRIMER_SOURCE=assays/isoform_sequencing/dataset/primers
 PRIMER_FILES=("$PRIMER_SOURCE"/*fasta)
 
 REFINEIN="$SOURCE_DIR"/"${FILES["${SLURM_ARRAY_TASK_ID}"]}"
@@ -46,7 +46,7 @@ samtools merge "$ISOSEQOUT"combined.merged_flnc.bam /home/twinkle1/isoseq_raw_re
 
 # clustering of identical reads
 
-IN="data/isoseq3_pipeline/combined.merged_flnc.bam"
-OUT="data/isoseq3_pipeline/combined.merged_clustered.bam"
+IN="runs/isoseq3_pipeline/combined.merged_flnc.bam"
+OUT="runs/isoseq3_pipeline/combined.merged_clustered.bam"
 
 isoseq3 cluster	"$IN" "$OUT" --verbose --use-qvs
diff --git a/workflows/isoseq_assembly/readme.txt b/workflows/isoseq_assembly/readme.txt
index 85aefb6c87ae724447cee891f106d4f45d99e4d7..9a10ba652a4092dd60da5d7a56e150a08a6cf729 100644
--- a/workflows/isoseq_assembly/readme.txt
+++ b/workflows/isoseq_assembly/readme.txt
@@ -4,20 +4,20 @@ Assembly full-length transcript sequencing data.
 
 ### Script order:
 
-- code/isoseq_assembly/isoseq3_pipeline.sh
+- workflows/isoseq_assembly/isoseq3_pipeline.sh
 assemble FLNC reads from CCS files
 
-- code/isoseq_assembly/combined_isoseq3_pipeline.sh
+- workflows/isoseq_assembly/combined_isoseq3_pipeline.sh
 combine FLNC reads and cluster identical reads
 
-- code/isoseq_assembly/combined_mapping_and_collapse.sh
+- workflows/isoseq_assembly/combined_mapping_and_collapse.sh
 collapse clustered reads into unique full-length transcripts using the polished reference genome
 
-- code/isoseq_assembly/combined_mapping_and_collapse_old_genome.sh
+- workflows/isoseq_assembly/combined_mapping_and_collapse_old_genome.sh
 collapse clustered reads into unique full-length transcripts using the unpolished reference genome
 
-- code/isoseq_assembly/run_sqanti.sh
+- workflows/isoseq_assembly/run_sqanti.sh
 run SQANTI in order to correct possible sequencing errors in the full-length transcripts using both reference genomes
 
-- code/isoseq_assembly/comparison_isoseq_polishing_effectiveness.Rmd
+- workflows/isoseq_assembly/comparison_isoseq_polishing_effectiveness.Rmd
 compare effect of genome polishing on error correction of full-length transcript sequences
diff --git a/workflows/isoseq_assembly/run_sqanti.sh b/workflows/isoseq_assembly/run_sqanti.sh
index 8b29756f693351f7bd9864b15c7cd2923f57ed3f..b14191664a830eba59fa9037221267c667015e4c 100644
--- a/workflows/isoseq_assembly/run_sqanti.sh
+++ b/workflows/isoseq_assembly/run_sqanti.sh
@@ -3,20 +3,20 @@
 # run sqanti correction and filtering
 # preliminary genome annotation is used to compare against, but this only effects the pdf output
 
-mkdir -p data/isoseq/sqanti/output_polished
+mkdir -p runs/isoseq/sqanti/output_polished
 
 /home/tom/Documents/tools/SQANTI3-4.2/sqanti3_qc.py \
-        data/isoseq/mapping_and_collapse/combined.collapsed.min_fl_2.filtered.gff \
-        data/braker2/polished_prot_rna/braker.gtf \
-        polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta \
-        -d data/isoseq/sqanti/output_polished/ \
+        runs/isoseq/mapping_and_collapse/combined.collapsed.min_fl_2.filtered.gff \
+        runs/braker2/polished_prot_rna/braker.gtf \
+        runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta \
+        -d runs/isoseq/sqanti/output_polished/ \
         -n 6
 
 mkdir -p data/isoseq/sqanti/output_old_genome
 
 /home/tom/Documents/tools/SQANTI3-4.2/sqanti3_qc.py \
-	data/isoseq/mapping_and_collapse_old_genome/combined.collapsed.min_fl_2.filtered.gff \
-	data/braker2/polished_prot_rna/braker.gtf \
-	/home/tom/Documents/reference_genomes/Ahypochondriacus/assembly/Ahypochondriacus_459_v2.0.softmasked.nospace.underscore.fa \
-	-d data/isoseq/sqanti/output_old_genome/ \
+	runs/isoseq/mapping_and_collapse_old_genome/combined.collapsed.min_fl_2.filtered.gff \
+	runs/braker2/polished_prot_rna/braker.gtf \
+	studies/additional_data/resources/reference_genomes/Ahypochondriacus/assembly//Ahypochondriacus_459_v2.0.softmasked.nospace.underscore.fa \
+	-d runs/isoseq/sqanti/output_old_genome/ \
 	-n 6
diff --git a/workflows/merge_annotation/Braker2_subsets_and_merge.Rmd b/workflows/merge_annotation/Braker2_subsets_and_merge.Rmd
index c56fa6e9f03ad064504612db1aaede31286aca52..d7005294484ab68178e936ff076696ee7144f524 100644
--- a/workflows/merge_annotation/Braker2_subsets_and_merge.Rmd
+++ b/workflows/merge_annotation/Braker2_subsets_and_merge.Rmd
@@ -54,10 +54,10 @@ Granges_from_gtf <- function(gtf){
             seqnames = unique(chr), # all sequences should be on the same chromosome
             gene_strand = unique(strand))
   # use the gene_structures object to create the genomic ranges object
-  gene_ranges <- GRanges(seqnames = gene_structures$seqnames, 
-                         ranges = IRanges(start=gene_structures$gene_start, 
+  gene_ranges <- GRanges(seqnames = gene_structures$seqnames,
+                         ranges = IRanges(start=gene_structures$gene_start,
                                           end=gene_structures$gene_end,
-                                          names = gene_structures$transcript_id), 
+                                          names = gene_structures$transcript_id),
                          strand = gene_structures$gene_strand)
   return(gene_ranges)
 }
@@ -86,21 +86,21 @@ report_both_codons <- function(gtf_object){
 Create differently supported subsets using the available script from augustus/braker2:
 
 ```{bash}
-mkdir -p data/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets
+mkdir -p runs/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets
 
 # activate busco environment for the Augustus script
 conda activate busco
 
 # extract braker aa and cds, also generate the bad_genes.lst file
-/home/tom/Documents/tools/Augustus/scripts/getAnnoFastaFromJoingenes.py -g polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta -f data/braker2/polished_prot_rna/braker.gtf -s FILTER -o data/braker2/polished_prot_rna/braker_extracted
+/home/tom/Documents/tools/Augustus/scripts/getAnnoFastaFromJoingenes.py -g runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta -f runs/braker2/polished_prot_rna/braker.gtf -s FILTER -o runs/braker2/polished_prot_rna/braker_extracted
 
 # select differently supported subsets based on the amount of support
-tools/BRAKER/scripts/predictionAnalysis/selectSupportedSubsets.py --noSupport data/braker_analysis/external_evidence/polished_prot_rna/no_support.txt --fullSupport data/braker_analysis/external_evidence/polished_prot_rna/full_support.txt --anySupport data/braker_analysis/external_evidence/polished_prot_rna/any_support.txt data/braker2/polished_prot_rna/braker.gtf data/braker2/polished_prot_rna/hintsfile.gff
+tools/BRAKER/scripts/predictionAnalysis/selectSupportedSubsets.py --noSupport runs/braker_analysis/external_evidence/polished_prot_rna/no_support.txt --fullSupport runs/braker_analysis/external_evidence/polished_prot_rna/full_support.txt --anySupport runs/braker_analysis/external_evidence/polished_prot_rna/any_support.txt runs/braker2/polished_prot_rna/braker.gtf runs/braker2/polished_prot_rna/hintsfile.gff
 
 # remove all lines that contain "#" characters in the beginning (cause issues at later steps)
-sed '/^#/d' data/braker_analysis/external_evidence/polished_prot_rna/full_support.txt > data/braker_analysis/external_evidence/polished_prot_rna/full_support_fixed.gtf
-sed '/^#/d' data/braker_analysis/external_evidence/polished_prot_rna/any_support.txt > data/braker_analysis/external_evidence/polished_prot_rna/any_support_fixed.gtf
-sed '/^#/d' data/braker_analysis/external_evidence/polished_prot_rna/no_support.txt > data/braker_analysis/external_evidence/polished_prot_rna/no_support_fixed.gtf
+sed '/^#/d' runs/braker_analysis/external_evidence/polished_prot_rna/full_support.txt > runs/braker_analysis/external_evidence/polished_prot_rna/full_support_fixed.gtf
+sed '/^#/d' runs/braker_analysis/external_evidence/polished_prot_rna/any_support.txt > runs/braker_analysis/external_evidence/polished_prot_rna/any_support_fixed.gtf
+sed '/^#/d' runs/braker_analysis/external_evidence/polished_prot_rna/no_support.txt > runs/braker_analysis/external_evidence/polished_prot_rna/no_support_fixed.gtf
 ```
 
 
@@ -108,12 +108,12 @@ Generate a dataframe with the transcript ids based on the differently supported
 
 ```{r}
 # load in the support set gtfs
-full.support <- read.gtf("data/braker_analysis/external_evidence/polished_prot_rna/full_support_fixed.gtf")
-any.support <- read.gtf("data/braker_analysis/external_evidence/polished_prot_rna/any_support_fixed.gtf")
-no.support <- read.gtf("data/braker_analysis/external_evidence/polished_prot_rna/no_support_fixed.gtf")
+full.support <- read.gtf("runs/braker_analysis/external_evidence/polished_prot_rna/full_support_fixed.gtf")
+any.support <- read.gtf("runs/braker_analysis/external_evidence/polished_prot_rna/any_support_fixed.gtf")
+no.support <- read.gtf("runs/braker_analysis/external_evidence/polished_prot_rna/no_support_fixed.gtf")
 
 # load in the genes with internal stop codons, as detected by the getAnnoFastaFromJoingenes script (see braker2_results folder readme.txt)
-badgenes <- read.table("data/braker_analysis/external_evidence/polished_prot_rna/bad_genes.lst")
+badgenes <- read.table("runs/braker_analysis/external_evidence/polished_prot_rna/bad_genes.lst")
 
 # extract the gene names with both, annotated start and annotated stop codons
 codons_any <- report_both_codons(any.support)
@@ -130,21 +130,21 @@ no.ids <- unique(no.support$transcript_id) #7443
 
 # create dataframe with the subset transcript ids and the support category
 all.ids <- c(full.ids, partial.ids, no.ids)
-support <- c(rep("full", length(full.ids)), 
-             rep("partial", length(partial.ids)), 
+support <- c(rep("full", length(full.ids)),
+             rep("partial", length(partial.ids)),
              rep("no", length(no.ids)))
 support.df <- data.frame(all.ids, support)
 
 
 # filter out internal stop codons
-support.df <- support.df[!support.df$all.ids %in% badgenes$V1,] 
+support.df <- support.df[!support.df$all.ids %in% badgenes$V1,]
 # exclude all annotated genes that do not have annotated start and stop codons
 fixed_support.df <- support.df[support.df$all.ids %in% codons,]
 
 
 ##################################
 # write created dataframe as rds object:
-saveRDS(fixed_support.df, file="data/braker_analysis/external_evidence/polished_prot_rna/external_evidence.RDS")
+saveRDS(fixed_support.df, file="runs/braker_analysis/external_evidence/polished_prot_rna/external_evidence.RDS")
 
 ```
 
@@ -153,12 +153,12 @@ Filter out predicted genes with internal stop codons as well as predicted genes
 
 ```{r}
 # load in the gtf files to subset
-full.support <- read.gtf("data/braker_analysis/external_evidence/polished_prot_rna/full_support_fixed.gtf")
-any.support <- read.gtf("data/braker_analysis/external_evidence/polished_prot_rna/any_support_fixed.gtf")
-no.support <- read.gtf("data/braker_analysis/external_evidence/polished_prot_rna/no_support_fixed.gtf")
+full.support <- read.gtf("runs/braker_analysis/external_evidence/polished_prot_rna/full_support_fixed.gtf")
+any.support <- read.gtf("runs/braker_analysis/external_evidence/polished_prot_rna/any_support_fixed.gtf")
+no.support <- read.gtf("runs/braker_analysis/external_evidence/polished_prot_rna/no_support_fixed.gtf")
 
 # load in the annotation dataframe:
-support.df <- readRDS(file="data/braker_analysis/external_evidence/polished_prot_rna/external_evidence.RDS")
+support.df <- readRDS(file="runs/braker_analysis/external_evidence/polished_prot_rna/external_evidence.RDS")
 
 # create partial support dataframe:
 partial.support <- any.support[!any.support$transcript_id %in% full.support$transcript_id,]
@@ -177,28 +177,28 @@ no.support.filtered <- no.support[no.support$transcript_id %in% support.df$all.i
 #length(unique(no.support.filtered$transcript_id)) # 3579 remain
 
 ### Write the filtered gtf files
-write.table(full.support.filtered[,1:9], 
-          "data/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/full_support.gtf",
+write.table(full.support.filtered[,1:9],
+          "runs/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/full_support.gtf",
           col.names = F, row.names = F, quote=F, sep ="\t")
-write.table(partial.support.filtered[,1:9], 
-          "data/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/partial_support.gtf",
+write.table(partial.support.filtered[,1:9],
+          "runs/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/partial_support.gtf",
           col.names = F, row.names = F, quote=F, sep ="\t")
-write.table(no.support.filtered[,1:9], 
-          "data/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/no_support.gtf",
+write.table(no.support.filtered[,1:9],
+          "runs/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/no_support.gtf",
           col.names = F, row.names = F, quote=F, sep ="\t")
 ```
 
-After filtering the gtf files, use the filtered files in order to subset the codingseq and amino acid fasta files. Load the coding sequence and amino acid fasta files and filter them using the support dataframe. 
+After filtering the gtf files, use the filtered files in order to subset the codingseq and amino acid fasta files. Load the coding sequence and amino acid fasta files and filter them using the support dataframe.
 
 ```{r}
 library(seqinr)
 
 # load in the support dataframe
-support.df <- readRDS(file="data/braker_analysis/external_evidence/polished_prot_rna/external_evidence.RDS")
+support.df <- readRDS(file="runs/braker_analysis/external_evidence/polished_prot_rna/external_evidence.RDS")
 support.df$all.ids <- as.character(support.df$all.ids)
 
 # subset fasta files using the support dataframe
-braker_dna <- read.fasta("data/braker2/polished_prot_rna/braker_extracted.codingseq",
+braker_dna <- read.fasta("runs/braker2/polished_prot_rna/braker_extracted.codingseq",
                             seqtype = "DNA")
 
 # filter the different subsets
@@ -208,22 +208,22 @@ braker_dna.partial.filtered <- braker_dna[getName(braker_dna) %in% support.df[su
 braker_dna.no.filtered <- braker_dna[getName(braker_dna) %in% support.df[support.df$support == "no",1]] # 7014 sequences
 
 # write subsets
-write.fasta(sequences=braker_dna.filtered, 
-            names=names(braker_dna.filtered), 
-            file.out = "data/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/braker_all.fasta")
-write.fasta(sequences=braker_dna.full.filtered, 
-            names=names(braker_dna.full.filtered), 
-            file.out = "data/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/full_support.fasta")
-write.fasta(sequences=braker_dna.partial.filtered, 
-            names=names(braker_dna.partial.filtered), 
-            file.out = "data/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/partial_support.fasta")
-write.fasta(sequences=braker_dna.no.filtered, 
-            names=names(braker_dna.no.filtered), 
-            file.out = "data/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/no_support.fasta")
+write.fasta(sequences=braker_dna.filtered,
+            names=names(braker_dna.filtered),
+            file.out = "runs/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/braker_all.fasta")
+write.fasta(sequences=braker_dna.full.filtered,
+            names=names(braker_dna.full.filtered),
+            file.out = "runs/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/full_support.fasta")
+write.fasta(sequences=braker_dna.partial.filtered,
+            names=names(braker_dna.partial.filtered),
+            file.out = "runs/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/partial_support.fasta")
+write.fasta(sequences=braker_dna.no.filtered,
+            names=names(braker_dna.no.filtered),
+            file.out = "runs/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/no_support.fasta")
 
 ### repeat for the AA fasta files:
 # subset fasta files using the support dataframe
-braker_aa <- read.fasta("data/braker2/polished_prot_rna/braker_extracted.aa",
+braker_aa <- read.fasta("runs/braker2/polished_prot_rna/braker_extracted.aa",
                             seqtype = "AA")
 
 braker_aa.filtered <- braker_aa[getName(braker_aa) %in% support.df[,1]] # 39141 sequences
@@ -232,20 +232,16 @@ braker_aa.partial.filtered <- braker_aa[getName(braker_aa) %in% support.df[suppo
 braker_aa.no.filtered <- braker_aa[getName(braker_aa) %in% support.df[support.df$support == "no",1]] # 7014 sequences
 
 # write subsets
-write.fasta(sequences=braker_aa.filtered, 
-            names=names(braker_aa.filtered), 
-            file.out = "data/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/braker_all.faa")
-write.fasta(sequences=braker_aa.full.filtered, 
-            names=names(braker_aa.full.filtered), 
-            file.out = "data/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/full_support.faa")
-write.fasta(sequences=braker_aa.partial.filtered, 
-            names=names(braker_aa.partial.filtered), 
-            file.out = "data/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/partial_support.faa")
-write.fasta(sequences=braker_aa.no.filtered, 
-            names=names(braker_aa.no.filtered), 
-            file.out = "data/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/no_support.faa")
+write.fasta(sequences=braker_aa.filtered,
+            names=names(braker_aa.filtered),
+            file.out = "runs/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/braker_all.faa")
+write.fasta(sequences=braker_aa.full.filtered,
+            names=names(braker_aa.full.filtered),
+            file.out = "runs/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/full_support.faa")
+write.fasta(sequences=braker_aa.partial.filtered,
+            names=names(braker_aa.partial.filtered),
+            file.out = "runs/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/partial_support.faa")
+write.fasta(sequences=braker_aa.no.filtered,
+            names=names(braker_aa.no.filtered),
+            file.out = "runs/braker_analysis/external_evidence/polished_prot_rna/fixed_subsets/no_support.faa")
 ```
-
-
-
-
diff --git a/workflows/merge_annotation/readme.txt b/workflows/merge_annotation/readme.txt
index 07756106ab59e572559a654025fa157186cd1b72..6c4599ca2b3835408bfec628bb1320d68e4cf6cf 100644
--- a/workflows/merge_annotation/readme.txt
+++ b/workflows/merge_annotation/readme.txt
@@ -4,8 +4,8 @@ Combine the computational annotation with full-length transcript sequencing data
 
 ### Script order:
 
-- code/merge_annotation/Braker2_subsets_and_merge.Rmd
+- workflows/merge_annotation/Braker2_subsets_and_merge.Rmd
 Prepare BRAKER2 input, use only predicted genes supported by external evidence for the merge
 
-- code/merge_annotation/reannotation_correction.Rmd
+- workflows/merge_annotation/reannotation_correction.Rmd
 Merge BRAKER2 and full-length transcript sequencing data using TSEBRA, deduplicate, rename genes and compare annotation completeness using BUSCO
diff --git a/workflows/merge_annotation/reannotation_correction.Rmd b/workflows/merge_annotation/reannotation_correction.Rmd
index d056ed1422893d7094c2a76959383ee851965b2c..29687114b52bc87c69d168942bc854252bb11572 100644
--- a/workflows/merge_annotation/reannotation_correction.Rmd
+++ b/workflows/merge_annotation/reannotation_correction.Rmd
@@ -56,10 +56,10 @@ Granges_from_gtf <- function(gtf){
             seqnames = unique(chr), # all sequences should be on the same chromosome
             gene_strand = unique(strand))
   # use the gene_structures object to create the genomic ranges object
-  gene_ranges <- GRanges(seqnames = gene_structures$seqnames, 
-                         ranges = IRanges(start=gene_structures$gene_start, 
+  gene_ranges <- GRanges(seqnames = gene_structures$seqnames,
+                         ranges = IRanges(start=gene_structures$gene_start,
                                           end=gene_structures$gene_end,
-                                          names = gene_structures$transcript_id), 
+                                          names = gene_structures$transcript_id),
                          strand = gene_structures$gene_strand)
   return(gene_ranges)
 }
@@ -101,7 +101,7 @@ lists.of.overlaps <- function(overlaps){
   list.out <- list()
   vec <- c()
   i <- 1
-  
+
   # while there are still selfoverlapping sequences
   while (length(selfoverlap) > 0){
     # check each overlap if it belongs to a selfoverlapping sequence
@@ -175,12 +175,12 @@ extract_attributes <- function(gtf_column, att_of_interest){
 Combine computational annotation and Isoseq transcripts into a single genome annotation. Predict ORFs in the transcript sequences using CPC2.
 
 ```{bash}
-mkdir data/isoseq/cpc/
+mkdir runs/isoseq/cpc/
 
 # Predict coding sequence in braker2 transcripts for TSEBRA
 tools/CPC2_standalone-1.0.1/bin/CPC2.py \
-	-i data/isoseq/sqanti/output_polished/combined.collapsed.min_fl_2.filtered_corrected.fasta \
-	-o data/isoseq/cpc2/cpc2_from_sqanti \
+	-i runs/isoseq/sqanti/output_polished/combined.collapsed.min_fl_2.filtered_corrected.fasta \
+	-o runs/isoseq/cpc2/cpc2_from_sqanti \
 	--ORF
 ```
 
@@ -188,24 +188,24 @@ Convert CPC2 output into a gtf file of predicted transcript coding sequences. CP
 
 ```{r}
 # prepare a bed file from the results of cpc2
-cpc2 <- read.table("data/isoseq/cpc2/cpc2_from_sqanti.txt")
+cpc2 <- read.table("runs/isoseq/cpc2/cpc2_from_sqanti.txt")
 # filter for coding transcripts with an intact ORF
 cpc2 <- cpc2 %>%
   filter(V9 == "coding" & V6 == 1) %>%
   summarise(ID=V1, start=V7-1, end=V7+(V3*3)-1)
 # write as bed file for extraction of CDS
-write_tsv(cpc2, 
-          file="data/isoseq/cpc2/cpc2_extraction.bed",
+write_tsv(cpc2,
+          file="runs/isoseq/cpc2/cpc2_extraction.bed",
           quote = "none",
           col_names = F)
 
 # Convert cpc2 output to gtf file:
 # read in gtf file of isoseq transcript data
-isoseq.gtf <- read.gtf("data/isoseq/sqanti/output_polished/combined.collapsed.min_fl_2.filtered_corrected.gtf")
+isoseq.gtf <- read.gtf("runs/isoseq/sqanti/output_polished/combined.collapsed.min_fl_2.filtered_corrected.gtf")
 isoseq.gtf <- isoseq.gtf[isoseq.gtf$type == "exon",]
 
 # read in cpc2 output file, the bed file used for extraction does suffice
-cpc2.bed <- read.table("data/isoseq_data/cpc2/cpc2_extraction.bed")
+cpc2.bed <- read.table("runs/isoseq_runs/cpc2/cpc2_extraction.bed")
 colnames(cpc2.bed) <- c("ID", "CDS_start", "CDS_end")
 # bed format is 0 based, convert back to a 1 based format, end position does not have to converted
 cpc2.bed$CDS_start <- cpc2.bed$CDS_start+1
@@ -242,7 +242,7 @@ for (i in 1:length(ids)){
   shifted[length(shifted)+1] <- shifted[length(shifted)]+1
   # which values change by more than 1 after one shift? These values represent exon boundaries
   boundaries <- which(abs(cds_coordinates - shifted) != 1)
-  
+
   # create matrix from which to construct the start and end positions of the gtf file
   # create matrices for single exon genes also, as they are converted to vectors in a later step otherwise
   if (length(boundaries) > 0){
@@ -262,13 +262,13 @@ for (i in 1:length(ids)){
     mat[1,2] <- cds_coordinates[1]
     mat[1,1] <- cds_coordinates[length(cds_coordinates)]
   }
-  
+
   # invert matrix for minus strand in order to start with the lowest number
   # only if there are multiple exons
   if (stranded == "-" & nrow(mat) != 1){
     mat <- apply(apply(mat, 1, rev), 1, rev)
   }
-  
+
   # use the previously created gtf structure to store the CDS coordinates
   # the number of CDS rows is always <= number of exon rows
   out.subset <- gtf.subset[1:nrow(mat),]
@@ -283,33 +283,33 @@ for (i in 1:length(ids)){
 # output represents all CDS records
 
 # write gtf file
-write.table(output[,1:9], 
-            "data/isoseq/cpc2/cpc2_extracted_cds.gtf",
+write.table(output[,1:9],
+            "runs/isoseq/cpc2/cpc2_extracted_cds.gtf",
             col.names = F, row.names = F, quote=F, sep ="\t")
 ```
 
 Run TSEBRA to combine computational prediction with Iso-Seq transcripts, using gffread for deduplication:
 
 ```{bash}
-mkdir -p data/braker_analysis/TSEBRA
-mkdir -p data/reannotation_correction/computational
-mkdir -p data/reannotation_correction/manual
+mkdir -p runs/braker_analysis/TSEBRA
+mkdir -p runs/reannotation_correction/computational
+mkdir -p runs/reannotation_correction/manual
 
 # -g specifies the braker gtf, -c long read config file, -e braker hint file -l isoseq cpc2 extracted gtf file
 /home/tom/Documents/tools/TSEBRA/bin/tsebra.py \
-	-g data/braker_analysis/external_evidence/fixed_subsets/full_partial_support.gtf \
+	-g runs/braker_analysis/external_evidence/fixed_subsets/full_partial_support.gtf \
 	-c /home/tom/Documents/tools/TSEBRA/config/long_reads.cfg \
-	-e data/braker2/polished_prot_rna/hintsfile.gff \
-	-l data/isoseq/cpc2/cpc2_extracted_cds.gtf \
-	-o data/braker_analysis/TSEBRA/tsebra.gtf
+	-e runs/braker2/polished_prot_rna/hintsfile.gff \
+	-l runs/isoseq/cpc2/cpc2_extracted_cds.gtf \
+	-o runs/braker_analysis/TSEBRA/tsebra.gtf
 
 
 # To remove any duplicated entries and to cluster the predicted transcripts into loci from the tsebra gtf file I used gffread.
 # -M option merges identical entries, -K option causes stricter merge, -T outputs as gtf (did also output as gff file)
 /home/tom/Documents/tools/gffread-0.12.7/gffread -M -K -T \
-	-d data/braker_analysis/TSEBRA/duplication_info.txt \
-	data/braker_analysis/TSEBRA/tsebra.gtf > \
-	data/braker_analysis/TSEBRA/tsebra_dedup.gtf
+	-d runs/braker_analysis/TSEBRA/duplication_info.txt \
+	runs/braker_analysis/TSEBRA/tsebra.gtf > \
+	runs/braker_analysis/TSEBRA/tsebra_dedup.gtf
 ```
 
 
@@ -317,7 +317,7 @@ After the finalisation of the genome prediction using TSEBRA and the deduplicati
 
 ```{r}
 # read in the annotation
-tsebra.gtf <- read.gtf("data/braker_analysis/TSEBRA/tsebra_dedup.gtf")
+tsebra.gtf <- read.gtf("runs/braker_analysis/TSEBRA/tsebra_dedup.gtf")
 
 # create a mapping from locus the gene id, the locus is only found in rows of the "transcript" type
 transcript_locus_mapping <- tsebra.gtf %>%
@@ -332,7 +332,7 @@ tsebra.gtf <- left_join(tsebra.gtf, transcript_locus_mapping)
 # save information as RDS
 output <- tsebra.gtf %>%
   select(source, gene_id, transcript_id, locus)
-saveRDS(output, "data/reannotation_correction/computational/locus.RDS")
+saveRDS(output, "runs/reannotation_correction/computational/locus.RDS")
 
 
 # create a dataframe indicating the order of the scaffolds and contigs
@@ -355,9 +355,9 @@ tsebra.gtf.sorted <- tsebra.gtf %>%
 
 # use only one record per transcript, sort by locus and add number with the number of transcript at that locus
 # to later create the transcript names
-transcript_id <- tsebra.gtf.sorted %>% 
-  filter(type == "transcript") %>% 
-  group_by(locus) %>% 
+transcript_id <- tsebra.gtf.sorted %>%
+  filter(type == "transcript") %>%
+  group_by(locus) %>%
   mutate(occ = 1:n()) %>%
   ungroup() %>%
   select(transcript_id, occ)
@@ -366,15 +366,15 @@ tsebra.gtf.sorted <- left_join(tsebra.gtf.sorted, transcript_id, by=c("transcrip
 
 
 # write temporary RDS file:
-saveRDS(tsebra.gtf.sorted, 
-          "data/reannotation_correction/computational/tsebra_temp3.RDS")
+saveRDS(tsebra.gtf.sorted,
+          "runs/reannotation_correction/computational/tsebra_temp3.RDS")
 ```
 
 Rename the transcripts and save again as a renamed gtf file:
 
 ```{r}
 # read in RDS file
-tsebra.gtf.sorted <- readRDS("data/reannotation_correction/computational/tsebra_temp3.RDS")
+tsebra.gtf.sorted <- readRDS("runs/reannotation_correction/computational/tsebra_temp3.RDS")
 
 # rename the locus and transcript ID records to match
 # add column indicating the gene order
@@ -393,19 +393,19 @@ gene_identifier <- paste("AHp", order_filled, sep="")
 gene.id.df <- data.frame(ids, order3, order_filled, gene_identifier)
 tsebra.gtf.sorted <- left_join(tsebra.gtf.sorted, gene.id.df, by = c("locus" = "ids"))
 
-# create the transcript identifier based on gene identifier 
+# create the transcript identifier based on gene identifier
 tsebra.gtf.sorted <- tsebra.gtf.sorted %>%
   mutate(transcript_identifier = paste0(gene_identifier, ".", occ))
 
 # include new attribute column
-tsebra.gtf.sorted$new_attributes <- paste0("gene_id \"", 
-                                            tsebra.gtf.sorted$gene_identifier, 
-                                            "\"; transcript_id \"", 
-                                            tsebra.gtf.sorted$transcript_identifier, 
+tsebra.gtf.sorted$new_attributes <- paste0("gene_id \"",
+                                            tsebra.gtf.sorted$gene_identifier,
+                                            "\"; transcript_id \"",
+                                            tsebra.gtf.sorted$transcript_identifier,
                                             "\";")
 # create mapping file of old and new names:
 mapping <- tsebra.gtf.sorted %>% select(gene_id, transcript_id, locus, gene_identifier, transcript_identifier)
-saveRDS(mapping, "data/reannotation_correction/computational/mapping.RDS")
+saveRDS(mapping, "runs/reannotation_correction/computational/mapping.RDS")
 
 # create final gtf file in the correct column order:
 output <- tsebra.gtf.sorted %>%
@@ -413,14 +413,14 @@ output <- tsebra.gtf.sorted %>%
   select(chr, source, type, start, end, score, strand, phase, new_attributes)
 
 # write gtf
-write.table(output[,1:9], 
-          "data/reannotation_correction/computational/tsebra_renamed.gtf",
+write.table(output[,1:9],
+          "runs/reannotation_correction/computational/tsebra_renamed.gtf",
           col.names = F, row.names = F, quote=F, sep ="\t")
 ```
 
-Based on evidence from sequencing data, the sequence of the AmMYBl1 gene (AHp014591) was manually adjusted. The resulting file was saved as data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gtf. Gffread was used to extract cds and protein fasta files of the annotated genes and convert the gtf file to the gff3 format, representing the genome annotation v2.2.
+Based on evidence from sequencing data, the sequence of the AmMYBl1 gene (AHp014591) was manually adjusted. The resulting file was saved as runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gtf. Gffread was used to extract cds and protein fasta files of the annotated genes and convert the gtf file to the gff3 format, representing the genome annotation v2.2.
 
-More manual adjustment is required. First, the phase field in the annotation file is incorrect (likely an issue with TSEBRA). Secondly, seemingly a bug in TSEBRA causes the annotated sequence to be changed in the case that the stop codon is split up by an intron. I will save the annotation with the manually corrected AmMYBl1 as a gtf file under a different name and fix the phasing issue. The new manually corrected input file is data/reannotation_correction/manual/manually_MYBl1_corrected.gtf.
+More manual adjustment is required. First, the phase field in the annotation file is incorrect (likely an issue with TSEBRA). Secondly, seemingly a bug in TSEBRA causes the annotated sequence to be changed in the case that the stop codon is split up by an intron. I will save the annotation with the manually corrected AmMYBl1 as a gtf file under a different name and fix the phasing issue. The new manually corrected input file is runs/reannotation_correction/manual/manually_MYBl1_corrected.gtf.
 
 As a first step, the genes with wrong values in their last exons are manually adjusted. This is the list of identifiers with an incorrect last exon, the genes were corrected based on their annotated cpc2 cds predictions.
 ERROR: Proteins do not match for transcript AHp001894.1	Strand:+	Exons: 2 checked, corrected
@@ -450,7 +450,7 @@ names(annotation.cds.fasta[which((width(annotation.cds.fasta) %% 3) != 0)])
 
 
 ```{r}
-annotation.gtf <- read.gtf("data/reannotation_correction/manual/manually_MYBl1_corrected.gtf")
+annotation.gtf <- read.gtf("runs/reannotation_correction/manual/manually_MYBl1_corrected.gtf")
 # searched the above list in command line using grep -n to identify the lines which need to be corrected
 annotation.gtf[14134,4] <- 31584484
 annotation.gtf[93858,4] <- 340181
@@ -486,8 +486,8 @@ annotation.gtf[178221,5] <- 807 # minus strand, only adjust by 1
 
 
 # write corrected table
-write.table(annotation.gtf[,1:9], 
-          "data/reannotation_correction/manual/manually_genes_corrected.gtf",
+write.table(annotation.gtf[,1:9],
+          "runs/reannotation_correction/manual/manually_genes_corrected.gtf",
           col.names = F, row.names = F, quote=F, sep ="\t")
 ```
 
@@ -495,40 +495,40 @@ After manual correction of genes, fix the phase attribute.
 
 ```{bash}
 # Add a gene line and convert to gff for gffvalidator. Correct the phasing field using gffvalidator.
-/home/tom/Documents/tools/gffread-0.12.7/gffread -E data/reannotation_correction/manual/manually_genes_corrected.gtf --keep-genes > data/reannotation_correction/manual/manually_genes_corrected.gene.gff
+/home/tom/Documents/tools/gffread-0.12.7/gffread -E runs/reannotation_correction/manual/manually_genes_corrected.gtf --keep-genes > runs/reannotation_correction/manual/manually_genes_corrected.gene.gff
 
 # the genes with wrong coordinates should be fixed at this step, so that they can be assigned the correct phasing attribute
-gt gff3 -tidy -force -retainids -addids no -o data/reannotation_correction/manual/manually_genes_corrected.gene.phase.gff data/reannotation_correction/manual/manually_genes_corrected.gene.gff
+gt gff3 -tidy -force -retainids -addids no -o runs/reannotation_correction/manual/manually_genes_corrected.gene.phase.gff runs/reannotation_correction/manual/manually_genes_corrected.gene.gff
 
 # remove empty lines from the gtf file, with only "###"
-grep -v "###" data/reannotation_correction/manual/manually_genes_corrected.gene.phase.gff > data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gff
+grep -v "###" runs/reannotation_correction/manual/manually_genes_corrected.gene.phase.gff > runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gff
 ```
 
 Convert to other formats using gffread.
 
 ```{bash}
 # convert to gtf format
-/home/tom/Documents/tools/gffread-0.12.7/gffread -v -T --keep-genes data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gff > data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gtf
+/home/tom/Documents/tools/gffread-0.12.7/gffread -v -T --keep-genes runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gff > runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gtf
 
 # extract prot (-y) and cds (-x) fasta files
-/home/tom/Documents/tools/gffread-0.12.7/gffread -x data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.cds.fasta \
-	-y data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.prot.fasta \
-	-g polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta \
-	data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gtf
-	
+/home/tom/Documents/tools/gffread-0.12.7/gffread -x runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.cds.fasta \
+	-y runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.prot.fasta \
+	-g runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta \
+	runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gtf
+
 # convert to gff3 format
-#/home/tom/Documents/tools/gffread-0.12.7/gffread data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gtf > \
-#	data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gff
-	
+#/home/tom/Documents/tools/gffread-0.12.7/gffread runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gtf > \
+#	runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.gff
+
 # copy to the annotation directory:
-cp data/reannotation_correction/manual/A* polished_genome_annotation/annotation/
+cp runs/reannotation_correction/manual/A* runs/polished_genome_annotation/annotation/
 ```
 
 Perform quality control of the annotated genes:
 
 ```{r}
 # load in cds fasta
-annotation.cds.fasta <- readBStringSet(filepath = "data/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.cds.fasta")
+annotation.cds.fasta <- readBStringSet(filepath = "runs/reannotation_correction/manual/Ahypochondriacus_2.2_polished_corrected.cds.fasta")
 
 # are there genes with length != a multiple of 3?
 which((width(annotation.cds.fasta) %% 3) != 0)
@@ -549,30 +549,28 @@ Compare annotation completeness of genome annotation v2.2 with previously publis
 # genome annotation v2.2
 # compare against the orthoDBv10 Embryophyta dataset, using 7 threads
 busco -m protein \
-  -i polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.prot.fasta \
+  -i runs/polished_genome_annotation/annotation/Ahypochondriacus_2.2_polished_corrected.prot.fasta \
   -o genome_annotation_v2.2 \
   -l embryophyta_odb10 \
-  --out_path data/annotation_analysis/busco/ \
-  --download_path data/annotation_analysis/busco/datasets/ \
+  --out_path runs/annotation_analysis/busco/ \
+  --download_path runs/annotation_analysis/busco/datasets/ \
   -c 6
-  
+
 # genome annotation v2.1
 busco -m protein \
   -i /home/tom/Documents/reference_genomes/Ahypochondriacus/annotation/Ahypochondriacus_459_v2.1.protein.fa \
   -o genome_annotation_v2.1 \
   -l embryophyta_odb10 \
-  --out_path data/annotation_analysis/busco/ \
-  --download_path data/annotation_analysis/busco/datasets/ \
+  --out_path runs/annotation_analysis/busco/ \
+  --download_path runs/annotation_analysis/busco/datasets/ \
   -c 6
-  
+
 # A. cruentus annotation
 busco -m protein \
   -i /home/tom/Documents/reference_genomes/Acruentus/annotation/Amacr_pep_20210312.tfa \
   -o Acruentus_annotation \
   -l embryophyta_odb10 \
-  --out_path data/annotation_analysis/busco/ \
-  --download_path data/annotation_analysis/busco/datasets/ \
+  --out_path runs/annotation_analysis/busco/ \
+  --download_path runs/annotation_analysis/busco/datasets/ \
   -c 6
 ```
-
-
diff --git a/workflows/other_inferences/infer_coverage_along_genome.sh b/workflows/other_inferences/infer_coverage_along_genome.sh
index f534a07f001a3ebfbb676bd2d448a4e45c08cccb..8db44b51b0be8b847020d4121ab7cb8fa517668b 100644
--- a/workflows/other_inferences/infer_coverage_along_genome.sh
+++ b/workflows/other_inferences/infer_coverage_along_genome.sh
@@ -16,10 +16,10 @@ module load samtools/1.13
 
 
 #Set input and parameters
-read1=raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz
-read2=raw_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz
-input=polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
-outdir=data/mapping_bias_inference/genome_coverage/
+read1=studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_1.fastq.gz
+read2=studies/additional_data/lightfoot_WGS_short_reads/SRR2106212/SRR2106212_2.fastq.gz
+input=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
+outdir=runs/mapping_bias_inference/genome_coverage/
 outbam=SRR2106212.sorted.bam
 outdedup=SRR2106212.sorted.dedup.bam
 outcov=SRR2106212.coverage
diff --git a/workflows/other_inferences/mapping_bias_inference_analysis.Rmd b/workflows/other_inferences/mapping_bias_inference_analysis.Rmd
index 5de4884a74dc617ac05bf6f467e3afcb015b986f..97eb942b6a09d110545361c60a301a5394fbcc6c 100644
--- a/workflows/other_inferences/mapping_bias_inference_analysis.Rmd
+++ b/workflows/other_inferences/mapping_bias_inference_analysis.Rmd
@@ -18,7 +18,7 @@ Analyse the observed bias in read mapping observed in ATAC and WG sequencing. A
 
 ```{r}
 # read in data
-coverage <- read.table(file = "data/mapping_bias_inference/genome_coverage/SRR2106212.sorted.dedup.10.depth")
+coverage <- read.table(file = "runs/mapping_bias_inference/genome_coverage/SRR2106212.sorted.dedup.10.depth")
 
 # calculate coverage in windows with fixed window size
 windowsize <- 5000
@@ -46,7 +46,7 @@ ggplot(data = window_coverage) +
   theme(text = element_text(size = 22))
 
 # save plot
-ggsave(filename = "plots/mapping_bias_inference/50k_window_read_coverage.png",
+ggsave(filename = "runs/plots/mapping_bias_inference/50k_window_read_coverage.png",
        width = 14, height = 8)
 ```
 
@@ -54,11 +54,11 @@ Investigate if genomic features correlate with the observed mapping bias. Calcul
 
 ```{bash}
 # create genome file for bedtools
-awk -F'\t' 'BEGIN {OFS = FS} {print $1,$2}' /projects/ag-stetter/reference_genomes/Ahypochondriacus/V2_2/Ahypochondriacus_2.2_polished.softmasked.fasta.fai > data/mapping_bias_inference/bedtools_genome.txt
+awk -F'\t' 'BEGIN {OFS = FS} {print $1,$2}' /projects/ag-stetter/reference_genomes/Ahypochondriacus/V2_2/Ahypochondriacus_2.2_polished.softmasked.fasta.fai > runs/mapping_bias_inference/bedtools_genome.txt
 # make windows using bedtools, windowsize 5k, include only Scaffolds
-bedtools makewindows -g data/mapping_bias_inference/bedtools_genome.txt -w 5000 | grep "Scaffold" > data/mapping_bias_inference/all_genome_windows.bed
+bedtools makewindows -g runs/mapping_bias_inference/bedtools_genome.txt -w 5000 | grep "Scaffold" > runs/mapping_bias_inference/all_genome_windows.bed
 # calculate statistics in windows
-bedtools nuc -fi /projects/ag-stetter/reference_genomes/Ahypochondriacus/V2_2/Ahypochondriacus_2.2_polished.softmasked.fasta -bed data/mapping_bias_inference/genome_windows.bed > data/mapping_bias_inference/window_stats.txt
+bedtools nuc -fi /projects/ag-stetter/reference_genomes/Ahypochondriacus/V2_2/Ahypochondriacus_2.2_polished.softmasked.fasta -bed runs/mapping_bias_inference/genome_windows.bed > runs/mapping_bias_inference/window_stats.txt
 ```
 
 
@@ -66,14 +66,14 @@ Plot Scaffold 10, with read depth and plastid genome content
 
 ```{r}
 # read in paf files from minimap2
-chloroplast.paf <- read_paf("data/mapping_bias_inference/plastid_to_genome/chloroplast_to_genome.paf")
-mito.paf <- read_paf("data/mapping_bias_inference/plastid_to_genome/mitochondrium_to_genome.paf")
+chloroplast.paf <- read_paf("runs/mapping_bias_inference/plastid_to_genome/chloroplast_to_genome.paf")
+mito.paf <- read_paf("runs/mapping_bias_inference/plastid_to_genome/mitochondrium_to_genome.paf")
 mito.df <- as.data.frame(mito.paf)
 chloro.df <- as.data.frame(chloroplast.paf)
 
 # read in mapping files for cruentus
-chloroplast.cruentus.paf <- read_paf("data/mapping_bias_inference/plastid_to_genome/chloroplast_to_genome_cruentus.paf")
-mito.cruentus.paf <- read_paf("data/mapping_bias_inference/plastid_to_genome/mitochondrium_to_genome_cruentus.paf")
+chloroplast.cruentus.paf <- read_paf("runs/mapping_bias_inference/plastid_to_genome/chloroplast_to_genome_cruentus.paf")
+mito.cruentus.paf <- read_paf("runs/mapping_bias_inference/plastid_to_genome/mitochondrium_to_genome_cruentus.paf")
 mito.cruentus.df <- as.data.frame(mito.cruentus.paf)
 chloro.cruentus.df <- as.data.frame(chloroplast.cruentus.paf)
 
@@ -110,7 +110,7 @@ ggplot(data = window_coverage) +
   theme(text = element_text(size = 22))
 
 
-ggsave(filename = "plots/mapping_bias_inference/5kb_outlier_windows_zoom.png",
+ggsave(filename = "runs/plots/mapping_bias_inference/5kb_outlier_windows_zoom.png",
        width = 10, height = 6, bg = "white")
 ```
 
@@ -123,7 +123,7 @@ mito.df %>%
   group_by(tname) %>%
   summarize(total_align_length = sum(alen)) %>%
   mutate(percent_aligned = (total_align_length/mito.df$qlen[1])*100) %>%
-  arrange(desc(percent_aligned)) 
+  arrange(desc(percent_aligned))
 
 # how much chloroplast was mapped?
 chloro.df %>%
@@ -172,8 +172,8 @@ There are structural differences in GC content between the different genomes. Wh
 
 ```{bash}
 # GC content of different genomes
-seqkit fx2tab --name --gc data/mapping_bias_inference/plastid_to_genome/Ah_chloroplast.fasta
-seqkit fx2tab --name --gc data/mapping_bias_inference/plastid_to_genome/Bv_mitochondrium.fasta
+seqkit fx2tab --name --gc runs/mapping_bias_inference/plastid_to_genome/Ah_chloroplast.fasta
+seqkit fx2tab --name --gc runs/mapping_bias_inference/plastid_to_genome/Bv_mitochondrium.fasta
 seqkit fx2tab --name --gc polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta | grep "Scaffold"
 ```
 
@@ -247,7 +247,7 @@ Compare with functional annotation:
 
 ```{r}
 # load functional annotation
-functional_annotation <- readxl::read_xlsx(path = "data/functional_annotation/eggnog_mapper/MM_8wexw920.emapper.annotations.xlsx")
+functional_annotation <- readxl::read_xlsx(path = "runs/functional_annotation/eggnog_mapper/MM_8wexw920.emapper.annotations.xlsx")
 colnames(functional_annotation) <- functional_annotation[2,]
 functional_annotation <- functional_annotation[-(1:2),]
 
@@ -258,8 +258,3 @@ mito_functions <- functional_annotation %>%
 chloro_functions <- functional_annotation %>%
   filter(query %in% chloro_overlap$transcript_id)
 ```
-
-
-
-
-
diff --git a/workflows/other_inferences/plastid_to_genome_mapping.sh b/workflows/other_inferences/plastid_to_genome_mapping.sh
index 279e7c7d1e95e65d840836c43c53adcf0858d999..b2c694e75b569ead2d428a8f7bc76d07f07ecaf6 100644
--- a/workflows/other_inferences/plastid_to_genome_mapping.sh
+++ b/workflows/other_inferences/plastid_to_genome_mapping.sh
@@ -13,12 +13,12 @@ source $CONDA_PREFIX/etc/profile.d/conda.sh
 conda activate isoseq
 
 # create output directory
-MAPPINGOUT=data/mapping_bias_inference/plastid_to_genome/
+MAPPINGOUT=runs/mapping_bias_inference/plastid_to_genome/
 mkdir -p "$MAPPINGOUT"
 
 ### MAPPING
 # map reads onto the reference genome
-REFERENCE=/projects/ag-stetter/reference_genomes/Ahypochondriacus/V2_2/Ahypochondriacus_2.2_polished.softmasked.fasta
+REFERENCE=runs/polished_genome_annotation/assembly/Ahypochondriacus_2.2_polished.softmasked.fasta
 
 # Align sequences to reference genome
 # sequences obtained from (Beta vulgaris, mitchondrium): https://www.ncbi.nlm.nih.gov/nuccore/BA000009.3
diff --git a/workflows/other_inferences/readme.txt b/workflows/other_inferences/readme.txt
index 9b3f718dbe1354c1915fb386295cd3c12e209776..b49fe8f5741a93d84e891444d9e737e2759ae58e 100644
--- a/workflows/other_inferences/readme.txt
+++ b/workflows/other_inferences/readme.txt
@@ -4,11 +4,11 @@ Mapping bias and plastid content investigation
 
 ### Script order:
 
-- code/other_inferences/infer_coverage_along_genome.sh
+- workflows/other_inferences/infer_coverage_along_genome.sh
 Map WGS reads against the reference genome to investigate mapping bias
 
-- code/other_inferences/plastid_to_genome_mapping.sh
+- workflows/other_inferences/plastid_to_genome_mapping.sh
 Map chloroplast and mitochondria sequences to the reference genome
 
-- code/other_inferences/mapping_bias_inference_analysis.Rmd
+- workflows/other_inferences/mapping_bias_inference_analysis.Rmd
 Plot mapping bias with annotated chloroplast and mitochondrial sequences
diff --git a/workflows/readme.txt b/workflows/readme.txt
index 665da22f3863d059cfb0b6ac3f70fd0bd3be8701..63f041c071444fc733ff4b8b46b4519ab744ac09 100644
--- a/workflows/readme.txt
+++ b/workflows/readme.txt
@@ -4,26 +4,26 @@ All code used for polishing, masking and reannotation of the A. hypochondriacus
 
 ### Order of usage:
 
-- code/genome_polishing/
+- workflows/genome_polishing/
 polish the previously published A. hypochondriacus reference genome
 
-- code/repeat_masking/
+- workflows/repeat_masking/
 to prepare computational annotation, softmask repetitive elements in the polished genome
 
-- code/braker2/
+- workflows/braker2/
 perform computational annotation of the softmasked reference genome using BRAKER2
 
-- code/isoseq_assembly/
+- workflows/isoseq_assembly/
 assembly long read transcript sequencing data and compare effects of genome polishing on reported completeness
 
-- code/merge_annotation/
+- workflows/merge_annotation/
 merge computational annotation with long-read transcript sequencing data and process output into genome annotation v2.2
 
-- code/annotation_analysis/
+- workflows/annotation_analysis/
 identify flavonoid and betalain pathway genes, as well as MYB transcription factors in the genome annotation v2.2
 
-- code/other_inferences/
+- workflows/other_inferences/
 mapping of chloroplast and mitchondrium sequences to the reference genome, analysis of mapping coverage bias
 
-- code/BSA/
+- workflows/BSA/
 bulk segregant analysis and analysis of RNA-seq data from pooled flower tissue
diff --git a/workflows/repeat_masking/analyse_repetitive_elements.Rmd b/workflows/repeat_masking/analyse_repetitive_elements.Rmd
index 149b81c0d133112fa45d751176ceba893326bb0d..40eaa9afe525e587dac3e0b3a2d8e792db1967b6 100644
--- a/workflows/repeat_masking/analyse_repetitive_elements.Rmd
+++ b/workflows/repeat_masking/analyse_repetitive_elements.Rmd
@@ -16,7 +16,7 @@ Prepare input file for processing using R:
 
 ```{bash}
 # process build summary output
-tail -n +41 data/repeatmasking/repeatmasker/Ahypochondriacus_2.2_polished.capital.fasta.buildSummary | head -n -907 > data/repeatmasking/repeatmasker/Ahypochondriacus_2.2_polished.capital.fasta.buildSummary.tsv
+tail -n +41 runs/repeatmasking/repeatmasker/Ahypochondriacus_2.2_polished.capital.fasta.buildSummary | head -n -907 > runs/repeatmasking/repeatmasker/Ahypochondriacus_2.2_polished.capital.fasta.buildSummary.tsv
 ```
 
 
@@ -24,12 +24,12 @@ Analyse repetitive element content and composition:
 
 ```{r}
 # read in buildSummary
-buildSummary <- read_table(file = "data/repeatmasking/repeatmasker/Ahypochondriacus_2.2_polished.capital.fasta.buildSummary.tsv",
+buildSummary <- read_table(file = "runs/repeatmasking/repeatmasker/Ahypochondriacus_2.2_polished.capital.fasta.buildSummary.tsv",
                            col_names = c("repeat", "count", "length", "percent","drop"))
 buildSummary <- buildSummary[,1:4]
 
 # read in repeat classification
-new_TE_classification <- read_delim(file = "data/repeatmasking/reclassification/classified.updated.txt",
+new_TE_classification <- read_delim(file = "runs/repeatmasking/reclassification/classified.updated.txt",
                                     delim = "#",
                                     col_names = c("TE_family", "classification"))
 
@@ -49,6 +49,6 @@ summary_table %>%
   summarize(sum_total_length = sum(total_length),
             sum_percentage = sum(percentage))
 
-write.csv(summary_table, file = "data/repeatmasking/reclassification/reclassified.output.tbl",
+write.csv(summary_table, file = "runs/repeatmasking/reclassification/reclassified.output.tbl",
           quote = F, row.names = F)
 ```
diff --git a/workflows/repeat_masking/readme.txt b/workflows/repeat_masking/readme.txt
index 0fa63432df4359bd85b9c9d507b8363b471fd415..609c3be3937d01c2b7449f1346498ec293ac7c58 100644
--- a/workflows/repeat_masking/readme.txt
+++ b/workflows/repeat_masking/readme.txt
@@ -4,11 +4,11 @@ The polished reference genome is masked for repetitive elements in order to prep
 
 ### Script order:
 
-- code/repeat_masking/run_repeatmodeler.sh
+- workflows/repeat_masking/run_repeatmodeler.sh
 run Repeatmodeler to identify repetitive elements in the polished reference genome
 
-- code/repeat_masking/run_repeatmasker.sh
+- workflows/repeat_masking/run_repeatmasker.sh
 run Repeatmasker on the Repeatmodeler output to classify identified elements and mask the polished reference genome
 
-- code/repeat_masking/analyse_repetitive_elements.Rmd
+- workflows/repeat_masking/analyse_repetitive_elements.Rmd
 analyse the repetitive element composition
diff --git a/workflows/repeat_masking/run_repeatmasker.sh b/workflows/repeat_masking/run_repeatmasker.sh
index d848d5e77107ff63567b624be9cdda4c7ccfdf8e..fa9cdbade24c88f58b6bd8d13a076326608a91a5 100644
--- a/workflows/repeat_masking/run_repeatmasker.sh
+++ b/workflows/repeat_masking/run_repeatmasker.sh
@@ -17,7 +17,7 @@
 module load repeatmasker/4.1.1
 
 # create database directory
-RMOUT=data/repeatmasking/repeatmasker
+RMOUT=runs/repeatmasking/repeatmasker
 
 mkdir -p $RMOUT
 
@@ -26,12 +26,12 @@ mkdir -p $RMOUT
 # for 20 threads, the pa setting while using rmblast should be 5
 
 # Converted the polished reference genome fasta to all capital letters as preparation to repeatmasking:
-awk '/^>/ {print($0)}; /^[^>]/ {print(toupper($0))}' data/NextPolish/processed/Ahypochondriacus/V2_2/Ahypochondriacus_2.2_polished.softmasked.fasta \
+awk '/^>/ {print($0)}; /^[^>]/ {print(toupper($0))}' runs/NextPolish/processed/Ahypochondriacus/V2_2/Ahypochondriacus_2.2_polished.softmasked.fasta \
 	> "$RMOUT"/Ahypochondriacus_2.2_polished.capital.fasta
 
 ### run RepeatMasker
 # lib = repeat database created with repeatmodeler
-RepeatMasker -lib data/repeatmasking/repeatmodeler/consensi.fa.classified \
+RepeatMasker -lib runs/repeatmasking/repeatmodeler/consensi.fa.classified \
 	-pa 5 \
 	-small \
 	-e rmblast \
@@ -50,4 +50,4 @@ bedtools maskfasta \
 	-fo "$RMOUT"/output/Ahypochondriacus_2.2_polished.softmasked.fasta
 
 # more detailed summary:
-buildSummary.pl data/repeatmasking/repeatmasker/Ahypochondriacus_2.2_polished.capital.fasta.out > data/repeatmasking/repeatmasker/Ahypochondriacus_2.2_polished.capital.fasta.buildSummary
+buildSummary.pl runs/repeatmasking/repeatmasker/Ahypochondriacus_2.2_polished.capital.fasta.out > runs/repeatmasking/repeatmasker/Ahypochondriacus_2.2_polished.capital.fasta.buildSummary
diff --git a/workflows/repeat_masking/run_repeatmodeler.sh b/workflows/repeat_masking/run_repeatmodeler.sh
index c33ef68b266852defd7cd38671bed273e9532dfd..a2904a74a71e9548265f4f58f9fa4aa04f87a864 100644
--- a/workflows/repeat_masking/run_repeatmodeler.sh
+++ b/workflows/repeat_masking/run_repeatmodeler.sh
@@ -19,23 +19,23 @@
 module load repeatmodeler/2.0.1
 
 # create database directory
-mkdir -p data/repeatmodeler/database/
+mkdir -p runs/repeatmodeler/database/
 
 ### Main
 # increased number of tasks, each rmblast job will take 4 threads
 # for 20 threads, the pa setting while using rmblast should be 5
 
 # Create database
-BuildDatabase -name data/repeatmodeler/database/polished \
-	data/NextPolish/processed/Ahypochondriacus_2.2_polished.fasta
+BuildDatabase -name runs/repeatmodeler/database/polished \
+	runs/NextPolish/processed/Ahypochondriacus_2.2_polished.fasta
 
 # run Repeatmodeler
-RepeatModeler -database data/repeatmodeler/database/polished \
+RepeatModeler -database runs/repeatmodeler/database/polished \
 	-pa 5 \
 	-LTRStruct
 
 # reclassify identified repeats
-mkdir -p data/repeatmasking/reclassification/
+mkdir -p runs/repeatmasking/reclassification/
 
 # repeatmasker version 4.1.5
-RepeatClassifier -consensi data/repeatmasking/reclassification/consensi.fa -stockholm data/repeatmasking/reclassification/families.stk
+RepeatClassifier -consensi runs/repeatmasking/reclassification/consensi.fa -stockholm runs/repeatmasking/reclassification/families.stk