diff --git a/.gitattributes b/.gitattributes index 2635f9cbbd98377405c62ee003f1d6b4b772e7b2..253b39455edcb03315deaf867c0f7f5f9e5b839b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ **/dataset/** filter=lfs diff=lfs merge=lfs -text +/assays/cmQTL_val1_GH_2020_polar_LC_MS/dataset/all_clusters_summed_intensity.gda filter=lfs diff=lfs merge=lfs -text diff --git a/Figure_1.pdf b/Figure_1.pdf deleted file mode 100644 index ddc10317c7b00be259947516552980f2cdaacbd2..0000000000000000000000000000000000000000 Binary files a/Figure_1.pdf and /dev/null differ diff --git a/assays/cmQTL_val1_GH_2020_apolar_LC_MS/dataset/210809_lip_lib_cmQTL_val_adapt.txt b/assays/cmQTL_val1_GH_2020_apolar_LC_MS/dataset/210809_lip_lib_cmQTL_val_adapt.txt new file mode 100644 index 0000000000000000000000000000000000000000..bffba321ef504bb5e971f97770018f80e7eb842b --- /dev/null +++ b/assays/cmQTL_val1_GH_2020_apolar_LC_MS/dataset/210809_lip_lib_cmQTL_val_adapt.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6a668c5bc864494c525e2987d756602950e9f8360de94d640ac2f6e080e5bfd +size 30452 diff --git a/assays/cmQTL_val1_GH_2020_apolar_LC_MS/dataset/cmQTL_val_1_2_lipids_clusters.gda b/assays/cmQTL_val1_GH_2020_apolar_LC_MS/dataset/cmQTL_val_1_2_lipids_clusters.gda new file mode 100644 index 0000000000000000000000000000000000000000..7796d09e58e8fd6f5469d12ce30e43e13bdb785b --- /dev/null +++ b/assays/cmQTL_val1_GH_2020_apolar_LC_MS/dataset/cmQTL_val_1_2_lipids_clusters.gda @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e88be16b85b21c08c6c59e794bf1c60b8ac6b46e6ff03b1fefbea142add8b86 +size 30445094 diff --git a/assays/cmQTL_val1_GH_2020_polar_LC_MS/dataset/Sly_sec_met_library.txt b/assays/cmQTL_val1_GH_2020_polar_LC_MS/dataset/Sly_sec_met_library.txt new file mode 100644 index 0000000000000000000000000000000000000000..3e762899bff5fa2b6ce789443b15434e5e7a912b --- /dev/null +++ b/assays/cmQTL_val1_GH_2020_polar_LC_MS/dataset/Sly_sec_met_library.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd81c4ed24e00a1c9370cc70cae9f3be40be3f416631be3134526d544acf9980 +size 57995 diff --git a/assays/cmQTL_val1_GH_2020_polar_LC_MS/dataset/all_clusters_summed_intensity.gda b/assays/cmQTL_val1_GH_2020_polar_LC_MS/dataset/all_clusters_summed_intensity.gda new file mode 100644 index 0000000000000000000000000000000000000000..a63a4227b0d644e35a6057178337b5c08918ae06 --- /dev/null +++ b/assays/cmQTL_val1_GH_2020_polar_LC_MS/dataset/all_clusters_summed_intensity.gda @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b73eb94d6c6b4e8afcea7a587af2587ead21eceeb773d59129c2403745b35ed +size 196568863 diff --git a/runs/GC-MS analysis/Thumbs.db b/runs/GC-MS analysis/Thumbs.db index 3f4eebbb10fd9cde90e60abe568afd1e90f063f8..d2ecd2542cdac8bfa6cab7fed27e06343956beca 100644 Binary files a/runs/GC-MS analysis/Thumbs.db and b/runs/GC-MS analysis/Thumbs.db differ diff --git a/workflows/Whole_metabolome_analysis/Cross_metabolite_comparisons.R b/workflows/Whole_metabolome_analysis/Cross_metabolite_comparisons.R new file mode 100644 index 0000000000000000000000000000000000000000..3ea02a744e7941ef16d8b7af7d04b42b7f308a37 --- /dev/null +++ b/workflows/Whole_metabolome_analysis/Cross_metabolite_comparisons.R @@ -0,0 +1,1455 @@ +rm(list = ls()) +library(tidyverse) +library(ggpubr) +library(glue) +library(ggtext) +library(cowplot) +library(ggbeeswarm) +library(extrafont) +library(ggprism) +library(viridis) + +# Directory setting ------------------------------------------------------- + +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +getwd() + +current <- getwd() +source <- str_c(current,"/..") + +cur_date <- str_c(str_replace_all(Sys.Date(),"^.{2}|-","")) + +out <- str_c(cur_date, "Figures", sep = "_") + +if (file.exists(out)) { + cat("The folder already exists") +} else { + dir.create(out) +} + +out_dir <- str_c(current, out, sep = "/") + +# Primary loading --------------------------------------------------------- +setwd(source) +prim_source <- str_c(getwd(), "/1. Primary") +setwd(prim_source) + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"), + pattern = "^\\d{6}_analysis"), + decreasing = T)[[1]] + +latest_analysis <- str_c(prim_source, "/", latest) + +setwd(latest_analysis) + +latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}") + +fc_1_prim <- read_csv("mean_values_se_n.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + mutate(platform = "prim") + +genotypes <- fc_1_prim %>% + distinct(alias, genotype) %>% + mutate(genotype_label = str_remove_all(genotype, "\\*")) + +fc_1_ind_prim <- read_csv("individual_values.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_prim <- read_csv("p_values.csv") + +fc_1_cv_prim <- read_csv("mean_values_se_n_cv.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_ind_cv_prim <- read_csv("individual_values_cv.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_prim_cv <- read_csv("p_values_cv.csv") %>% + mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))), + group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")), + group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))), + group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_lt_prim <- read_csv("mean_values_se_n_levene.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_ind_lt_prim <- read_csv("individual_values_levene.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_prim_lt <- read_csv("p_values_levene.csv") + +met_prim <- fc_1_prim %>% + distinct(met, Compound_Name, Compound_Class) + +cv_prim_out <- fc_1_cv_prim %>% + left_join(met_prim) %>% + left_join(sig_prim_cv, by = c("tissue", "met", "alias" = "alias2")) %>% + select(tissue, genotype, Compound_Name, mean_cv, sd_cv, se_cv, n, p.value) %>% + mutate(platform = "prim") + +# Secondary loading --------------------------------------------------------- +setwd(source) +sec_source <- str_c(getwd(), "/2. Secondary") +setwd(sec_source) + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"), + pattern = "^\\d{6}_analysis"), + decreasing = T)[[1]] + +latest_analysis <- str_c(sec_source, "/", latest) + +setwd(latest_analysis) + +latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}") + +fc_1_sec <- read_csv("mean_values_se_n.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + mutate(platform = "sec") + +genotypes <- fc_1_sec %>% + distinct(alias, genotype) %>% + mutate(genotype_label = str_remove_all(genotype, "\\*")) + +fc_1_ind_sec <- read_csv("individual_values.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_sec <- read_csv("p_values.csv") + +fc_1_cv_sec <- read_csv("mean_values_se_n_cv.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_ind_cv_sec <- read_csv("individual_values_cv.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_sec_cv <- read_csv("p_values_cv.csv") %>% + mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))), + group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")), + group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))), + group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_lt_sec <- read_csv("mean_values_se_n_levene.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_ind_lt_sec <- read_csv("individual_values_levene.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_sec_lt <- read_csv("p_values_levene.csv") + +met_sec <- fc_1_sec %>% + distinct(met, Compound_Name, Compound_Class) + +cv_sec_out <- fc_1_cv_sec %>% + left_join(met_sec) %>% + left_join(sig_sec_cv, by = c("tissue", "met", "alias" = "alias2")) %>% + select(tissue, genotype, Compound_Name, mean_cv, sd_cv, se_cv, n, p.value) %>% + mutate(platform = "sec") + +# Lipids loading --------------------------------------------------------- +setwd(source) +lip_source <- str_c(getwd(), "/3. Lipids") +setwd(lip_source) + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"), + pattern = "^\\d{6}_analysis"), + decreasing = T)[[1]] + +latest_analysis <- str_c(lip_source, "/", latest) + +setwd(latest_analysis) + +latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}") + +fc_1_lip <- read_csv("mean_values_se_n.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))%>% + mutate(platform = "lip") + +genotypes <- fc_1_lip %>% + distinct(alias, genotype) %>% + mutate(genotype_label = str_remove_all(genotype, "\\*")) + +fc_1_ind_lip <- read_csv("individual_values.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_lip <- read_csv("p_values.csv") + +fc_1_cv_lip <- read_csv("mean_values_se_n_cv.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_ind_cv_lip <- read_csv("individual_values_cv.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_lip_cv <- read_csv("p_values_cv.csv") %>% + mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))), + group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")), + group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))), + group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_lt_lip <- read_csv("mean_values_se_n_levene.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_ind_lt_lip <- read_csv("individual_values_levene.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_lip_lt <- read_csv("p_values_levene.csv") + +met_lip <- fc_1_lip %>% + distinct(met, Compound_Name, Compound_Class) + +cv_lip_out <- fc_1_cv_lip %>% + left_join(met_lip) %>% + left_join(sig_lip_cv, by = c("tissue", "met", "alias" = "alias2")) %>% + select(tissue, genotype, Compound_Name, mean_cv, sd_cv, se_cv, n, p.value) %>% + mutate(platform = "lip") + +# Data combination -------------------------------------------------------- +setwd(out_dir) + +fc_1 <- fc_1_prim %>% + bind_rows(fc_1_sec, fc_1_lip) + +fc_1_ind <- fc_1_ind_prim %>% + bind_rows(fc_1_ind_sec, fc_1_ind_lip) + +fc_1_cv <- fc_1_cv_prim %>% + bind_rows(fc_1_cv_sec, fc_1_cv_lip) + +met_dat <- fc_1 %>% + distinct(met, Compound_Name, Compound_Class) + +classes_tidy <- met_dat %>% + mutate(`ChEBI simplified` = if_else(str_detect(Compound_Class,"carbohydrate"),"carbohydrate or derivative", + if_else(str_detect(Compound_Class,"amino_acid"), "amino acid or derivative", + if_else(str_detect(Compound_Class,"carboxylic_acid"), "carboxylic acid", + "other")))) + +genotypes <- fc_1 %>% + distinct(alias, genotype) %>% + mutate(genotype_label = str_remove_all(genotype, "\\*")) + +per_comp_y <- fc_1 %>% + group_by(tissue, treatment, met) %>% + summarise(tot_val = max(mean_fc + se)) + +sig_lt <- sig_prim_lt %>% + bind_rows(sig_sec_lt, sig_lip_lt) + +sig <- sig_prim %>% + bind_rows(sig_sec, sig_lip) + +sig_cv <- sig_prim_cv %>% + bind_rows(sig_sec_cv, sig_lip_cv) + +cb_scale <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", + "#0072B2", "#D55E00", "#CC79A7","#000000") +bw_scale <- c("black", "black", "black", "black", "black", "black", "black") +vir_scale <- plasma(4, begin = 0.2, end = 1) + +cv_out <- bind_rows(cv_prim_out, cv_sec_out, cv_lip_out) + +write_csv(cv_out, "CV_supplementary.csv") + +# Theme -------------------------------------------------------------- +setwd(out_dir) +com_theme <- theme(axis.text.x = element_markdown(angle = 45, hjust = 1, size = 6, family = "sans"), + axis.text.y = element_text(size = 6, family = "sans"), + axis.title.x = element_blank(), + axis.title.y = element_text(size = 6, family = "sans"), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + strip.text = element_text(size = 8, family = "sans", margin = margin(t = 1, r = 1, b = 1, l = 1 , unit = "pt")), + text = element_text(size = 6, family = "sans"), + legend.title = element_blank(), + legend.text = element_markdown(size = 6), + plot.margin = unit(c(1,1,1,1), "mm"), + legend.margin = margin(t = 0, r = 2, b = 0, l = 2 , unit = "mm")) + + +# CV distribution --------------------------------------------------------- + +fruit_cv <- fc_1_cv %>% + filter(tissue == "fruits", genotype == "MoneyMaker") %>% + select(met, fruit_cv = mean_cv) + +leaf_cv <- fc_1_cv %>% + filter(tissue == "leaves", genotype == "MoneyMaker") %>% + select(met, leaf_cv = mean_cv) + +fl_cv <- full_join(fruit_cv, leaf_cv) + +t.test(fl_cv$fruit_cv, fl_cv$leaf_cv) +cor(fl_cv$fruit_cv, fl_cv$leaf_cv) + +fc_1_cv %>% + filter(genotype == "MoneyMaker") %>% + ggplot(aes(x = tissue, y = mean_cv)) + + geom_boxplot() + + geom_line(aes(group = met), alpha = 0.1) + +fl_cv %>% + ggplot(aes(x = leaf_cv, y = fruit_cv)) + + geom_point() + +# Clustering k-means--------------------------------------------------------- + +library(pheatmap) + +heat_base <- fc_1 %>% + group_by(met,tissue, treatment, Compound_Name, Compound_Class) %>% + summarise(mean_fc = mean(mean_fc)) %>% + filter(#Compound_Class %in% c("Phospholipid", "Phosphatidylcholine"), + #genotype %in% c("MoneyMaker"), + tissue == "fruits") + +heat.GC <- heat_base %>% + group_by(Compound_Name, met) %>% + mutate(log_norm = log2(mean_fc), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm)), + z_score = (mean_fc - mean(mean_fc))/(max(mean_fc)-min(mean_fc))) %>% + ungroup() %>% + mutate(group = as_factor(str_c(tissue, treatment, sep = "_"))) %>% + pivot_wider(id_cols = c(Compound_Name, Compound_Class, met), + names_from = group, + values_from = z_score) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.GC) <- heat.GC$met + +mat.heat.GC <- heat.GC %>% + select(contains("0.4"), contains("0.6"), + contains("0.8"), + contains("1"), + contains("leaves"), contains("fruits")) %>% as.matrix() + +set.seed(1640) +k_means <- kmeans(mat.heat.GC, centers = 6) + +clusters <- k_means$cluster + +clust_tidy <- tibble("clust" = clusters) %>% + mutate(met = names(clusters)) + +heat_base %>% + left_join(clust_tidy) %>% + #left_join(classes_tidy) %>% + group_by(Compound_Name, met) %>% + mutate(log_norm = log2(mean_fc), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm)), + z_score = (mean_fc - mean(mean_fc))/(max(mean_fc)-min(mean_fc))) %>% + ungroup() %>% + group_by(clust, treatment) %>% + mutate(n = n()) %>% + ungroup() %>% + ggplot(aes(x = treatment, y = z_score)) + + #geom_line(aes(group = met)) + + geom_smooth(aes(group = clust), method = "lm", formula = y~poly(x,3)) + + geom_label(aes(x = 0.7, y = -0.5, label = n), size = 3, inherit.aes = F) + + facet_wrap(vars(clust), nrow = 2) + + scale_color_manual(values = cb_scale[5:8]) + + com_theme + + ylab("range-scaled level") + + theme(legend.position = "bottom") + +ggsave("clusters_all_lines.png", + width = 10.5, + height = 8.25, + units = "cm", + dpi = 300) + +saveRDS(last_plot(), "met_clust_all_lines.RDS") +# Clustering k-means leaves--------------------------------------------------------- + +library(pheatmap) + +heat_base <- fc_1 %>% + group_by(met,tissue, treatment, Compound_Name, Compound_Class) %>% + summarise(mean_fc = mean(mean_fc)) %>% + filter(#Compound_Class %in% c("Phospholipid", "Phosphatidylcholine"), + #genotype %in% c("MoneyMaker"), + tissue == "leaves") + +heat.GC <- heat_base %>% + group_by(Compound_Name, met) %>% + mutate(log_norm = log2(mean_fc), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm)), + z_score = (mean_fc - mean(mean_fc))/(max(mean_fc)-min(mean_fc))) %>% + ungroup() %>% + mutate(group = as_factor(str_c(tissue, treatment, sep = "_"))) %>% + pivot_wider(id_cols = c(Compound_Name, Compound_Class, met), + names_from = group, + values_from = z_score) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.GC) <- heat.GC$met + +mat.heat.GC <- heat.GC %>% + select(contains("0.4"), contains("0.6"), + contains("0.8"), + contains("1"), + contains("leaves"), contains("fruits")) %>% as.matrix() + +set.seed(1640) +k_means <- kmeans(mat.heat.GC, centers = 6) + +clusters <- k_means$cluster + +clust_tidy <- tibble("clust" = clusters) %>% + mutate(met = names(clusters)) + +heat_base %>% + left_join(clust_tidy) %>% + #left_join(classes_tidy) %>% + group_by(Compound_Name, met) %>% + mutate(log_norm = log2(mean_fc), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm)), + z_score = (mean_fc - mean(mean_fc))/(max(mean_fc)-min(mean_fc))) %>% + ungroup() %>% + group_by(clust, treatment) %>% + mutate(n = n()) %>% + ungroup() %>% + ggplot(aes(x = treatment, y = z_score)) + + #geom_line(aes(group = met)) + + geom_smooth(aes(group = clust), method = "lm", formula = y~poly(x,3)) + + geom_label(aes(x = 0.7, y = -0.55, label = n), size = 3, inherit.aes = F) + + facet_wrap(vars(clust), nrow = 2) + + scale_color_manual(values = cb_scale[5:8]) + + com_theme + + ylab("range-scaled level") + + theme(legend.position = "bottom") + +ggsave("clusters_all_lines_leaves.png", + width = 10.5, + height = 8.25, + units = "cm", + dpi = 300) + +saveRDS(last_plot(), "met_clust_all_lines_leaves.RDS") + +# Clustering k-means levene--------------------------------------------------------- + +fc_1_ind_lt <- fc_1_ind_lt_prim %>% + bind_rows(fc_1_ind_lt_sec, fc_1_ind_lt_lip) + +library(pheatmap) + +heat_base <- fc_1_ind_lt %>% + left_join(met_dat) %>% + group_by(met,tissue, treatment, Compound_Name, Compound_Class) %>% + summarise(mean_fc = mean(lev_t)) %>% + filter(#Compound_Class %in% c("Phospholipid", "Phosphatidylcholine"), + #genotype %in% c("MoneyMaker"), + tissue == "fruits") + +heat.GC <- heat_base %>% + group_by(Compound_Name, met) %>% + mutate(log_norm = log2(mean_fc), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm)), + z_score = (mean_fc - mean(mean_fc))/(max(mean_fc)-min(mean_fc))) %>% + ungroup() %>% + mutate(group = as_factor(str_c(tissue, treatment, sep = "_"))) %>% + pivot_wider(id_cols = c(Compound_Name, Compound_Class, met), + names_from = group, + values_from = z_score) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.GC) <- heat.GC$met + +mat.heat.GC <- heat.GC %>% + select(contains("0.4"), contains("0.6"), + contains("0.8"), + contains("1"), + contains("leaves"), contains("fruits")) %>% as.matrix() + +set.seed(1640) +k_means <- kmeans(mat.heat.GC, centers = 6) + +clusters <- k_means$cluster + +clust_tidy <- tibble("clust" = clusters) %>% + mutate(met = names(clusters)) + +heat_base %>% + left_join(clust_tidy) %>% + #left_join(classes_tidy) %>% + group_by(Compound_Name, met) %>% + mutate(log_norm = log2(mean_fc), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm)), + z_score = (mean_fc - mean(mean_fc))/(max(mean_fc)-min(mean_fc))) %>% + ungroup() %>% + group_by(clust, treatment) %>% + mutate(n = n()) %>% + ungroup() %>% + ggplot(aes(x = treatment, y = z_score)) + + #geom_line(aes(group = met)) + + geom_smooth(aes(group = clust), method = "lm", formula = y~poly(x,3)) + + geom_label(aes(x = 0.7, y = -0.6, label = n), size = 3, inherit.aes = F) + + facet_wrap(facets = vars(clust), nrow = 2) + + scale_color_manual(values = cb_scale[5:8]) + + com_theme + + ylab("range-scaled level") + + theme(legend.position = "bottom") + +saveRDS(last_plot(), "clusters_all_lines_levene.RDS") + +ggsave("clusters_all_lines_levene.png", + width = 10.5, + height = 8.25, + units = "cm", + dpi = 300) + +# Heatmap Phospholipids transp1-1 ---------------------------------------------------------------- + +fc_1_wt <- fc_1 %>% + filter(genotype == "MoneyMaker") %>% + select(mean_fc_wt = mean_fc, tissue, treatment, met) + +heat_base <- fc_1 %>% + left_join(fc_1_wt) %>% + mutate(mean_fc = mean_fc/mean_fc_wt) %>% + filter(Compound_Class %in% c("Phospholipid"), + genotype %in% c("*transp1-1*"), + tissue == "fruits") + +heat_nom <- heat_base %>% + group_by(Compound_Name, met) %>% + mutate(log_norm = log2(mean_fc), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm)), + z_score = (mean_fc - mean(mean_fc))/(max(mean_fc)-min(mean_fc))) %>% + ungroup() %>% + mutate(group = as_factor(str_c(tissue, genotype, treatment, sep = "_"))) %>% + pivot_wider(id_cols = c(Compound_Name, Compound_Class, met), + names_from = group, + values_from = log_norm) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat_nom) <- heat_nom$Compound_Name + +mat.heat_nom <- heat_nom %>% + select(contains("0.4"), contains("0.6"), + contains("0.8"), + contains("1"), + contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat_nom %>% + select(Compound_Class) + +rownames(annotation_row) <- heat_nom$Compound_Name + +annotation_col <- heat_base %>% + distinct(tissue, treatment, genotype) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, treatment, tissue, genotype) + +#heat_nom_signif <- heat_base %>% +# distinct(tissue, treatment, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% +# left_join(sig_GC, by = c("met", "tissue", "treatment", "alias" = "group2")) %>% + #left_join(sig_mets) %>% + #filter(sig == T) %>% +# mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_")), +# signif = if_else(p.value >= 0.05| is.na(p.value), "","X")) %>% +# pivot_wider(id_cols = c(met), +# names_from = group, +# values_from = signif) %>% +# left_join(met_dat) %>% + #left_join(GCid_classes) %>% +# arrange(Compound_Class, Compound_Name) %>% +# as.data.frame() + +heat_cols <- colnames(mat.heat_nom) + +#rownames(heat_nom_signif) <- heat_nom_signif$met + +#mat.heat_nom_signif <- heat_nom_signif %>% +# select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = cb_scale[1], `*panK4-1*` = "brown", `log2-1` = "blue", `*transp1-1*` = cb_scale[4]), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + +pheatmap.GC <- pheatmap(mat.heat_nom, + color = colorRampPalette(c("blue", "white", "red"))(65), + #cellwidth = 16, + #cellheight = 4, + breaks = seq(-3.25, 3.25, 0.1), + #clustering_distance_rows = dist((mat.heat_nom), method = "euclidean"), + cluster_rows = T, + cluster_cols = F, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + #annotation_col = annotation_col, + #display_numbers = mat.heat_nom_signif, + number_color = "black", + fontsize_number = 6, + # annotation_colors = ann_colors, + #filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + # "cmQTL_val_1_heatmap_rel_tissue_wt.jpg", + # sep = "_") +) + +dev.off() + + +## CV ---------------------------------------------------------------------- +setwd(current) +simple_class <- read_csv(file = "simplified_classes.csv") +setwd(out_dir) + +fc_1_cv_wt <- fc_1_cv %>% + filter(genotype == "MoneyMaker") %>% + select(mean_cv_wt = mean_cv, tissue, met) + +heat_base <- fc_1_cv %>% + left_join(met_dat) %>% + left_join(simple_class) %>% + left_join(fc_1_cv_wt) %>% + left_join(sig_cv) %>% + mutate(mean_cv_fc = mean_cv/mean_cv_wt, + adj.p.value = p.adjust(p.value, method = "fdr"), + treatment = as_factor("CV")) %>% + filter(Compound_Class %in% c("Phospholipid"), + genotype %in% c("transp1-1"), group2 == "*transp1-1*", + tissue == "fruits", + #adj.p.value <= 0.05 + ) + +heat_cv <- heat_base %>% + group_by(Compound_Name, met) %>% + mutate(log_norm = log2(mean_cv_fc), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm)), + z_score = (mean_cv - mean(mean_cv))/(max(mean_cv)-min(mean_cv))) %>% + ungroup() %>% + mutate(group = as_factor(str_c(tissue, genotype, treatment, sep = "_"))) %>% + pivot_wider(id_cols = c(Compound_Name, Compound_Class, Compound_Class_simple, met), + names_from = group, + values_from = log_norm) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat_cv) <- heat_cv$Compound_Name + +mat.heat_cv <- heat_cv %>% + select(contains("0.4"), contains("0.6"), + contains("0.8"), + contains("1"), + contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat_cv %>% + select(Compound_Class_simple) + +rownames(annotation_row) <- heat_cv$Compound_Name + +annotation_col <- heat_base %>% + distinct(tissue, genotype) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, tissue, genotype) + +#heat_cv_signif <- heat_base %>% +# distinct(tissue, alias, genotype, met) %>% +# #filter(genotype != "MoneyMaker") %>% +# left_join(sig_GC, by = c("met", "tissue", "treatment", "alias" = "group2")) %>% +# #left_join(sig_mets) %>% +# #filter(sig == T) %>% +# mutate(group = as_factor(str_c(tissue, genotype, sep = "_")), +# signif = if_else(p.value >= 0.05| is.na(p.value), "","X")) %>% +# pivot_wider(id_cols = c(met), +# names_from = group, +# values_from = signif) %>% +# left_join(met_dat) %>% +# #left_join(GCid_classes) %>% +# arrange(Compound_Class_simple, Compound_Name) %>% +# as.data.frame() + +heat_cols <- colnames(mat.heat_cv) + +#rownames(heat_cv_signif) <- heat_cv_signif$met + +#mat.heat_cv_signif <- heat_cv_signif %>% +# select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = cb_scale[1], `*panK4-1*` = "brown", `log2-1` = "blue", `*transp1-1*` = cb_scale[4]), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + +pheatmap.GC <- pheatmap(mat.heat_cv, + color = colorRampPalette(c("blue", "white", "red"))(33), + #cellwidth = 16, + #cellheight = 4, + breaks = seq(-1.65, 1.65, 0.1), + #clustering_distance_rows = dist((mat.heat_cv), method = "euclidean"), + cluster_rows = T, + cluster_cols = F, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + #annotation_col = annotation_col, + #display_numbers = mat.heat_cv_signif, + number_color = "black", + fontsize_number = 6, + # annotation_colors = ann_colors, + #filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + # "cmQTL_val_1_heatmap_rel_tissue_wt.jpg", + # sep = "_") +) + +dev.off() + + +## combined ---------------------------------------------------------------- + +mat_heat_comb <- mat.heat_nom %>% + cbind(mat.heat_cv) + +annotation_col <- tibble(group = colnames(mat_heat_comb)) %>% + mutate(sep = group) %>% + separate(col = sep,sep = "_", + into = c("tissue", "genotype", "treatment")) %>% + select(group, treatment) %>% + #mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group) + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = cb_scale[1], `*panK4-1*` = "brown", `log2-1` = "blue", `*transp1-1*` = cb_scale[4]), + treatment = c("0.4" = vir_scale[1], "0.6" = vir_scale[2], "0.8" = vir_scale[3], "1" = vir_scale[4], "CV" = "gray")) + + +pheatmap.GC <- pheatmap(mat_heat_comb, + gaps_col = 4, + colorRampPalette(c("#440154FF", "white", "#FDE725FF"))(33), + #cellwidth = 16, + #cellheight = 4, + breaks = seq(-1.65, 1.65, 0.1), + #clustering_distance_rows = dist((mat.heat_nom), method = "euclidean"), + cluster_rows = T, + cluster_cols = F, + annotation_names_row = F, + show_rownames = T, + show_colnames = F, + #annotation_row = annotation_row, + annotation_col = annotation_col, + #display_numbers = mat.heat_nom_signif, + number_color = "black", + fontsize_number = 6, + treeheight_row = 10, + width = 1.5, + height = 3.25, + fontsize = 8, + annotation_colors = ann_colors + #filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + # "Heatmap_transp1-1.png", + # sep = "_") +) + +#dev.off() + +heat_phos <- ggplotify::as.ggplot(pheatmap.GC) +saveRDS(heat_phos, "Phospholipids_transp1.RDS") + +ggsave(plot = heat_phos, + "Heatmap_transp1-1.png", + width = 6, + height = 8.25, + units = "cm", + dpi = 300) + +if(!is.null(dev.cur())){ + dev.off() +} + +# Heatmap WT all primary---------------------------------------------------------------- + +red_met_class <- read_csv(file = "H:/3. cmQTL mapping/Shared_source_files/reduced_met_classes.csv") %>% + select(-n) + +fc_1_wt <- fc_1 %>% + filter(genotype == "MoneyMaker") %>% + select(mean_fc_wt = mean_fc, tissue, treatment, met) + +heat_base <- fc_1 %>% + #left_join(fc_1_wt) %>% + # mutate(mean_fc = mean_fc/mean_fc_wt) %>% + filter(#Compound_Class %in% c("Phospholipid"), + genotype %in% c("MoneyMaker"), + tissue == "fruits", platform == "prim") + +heat_nom <- heat_base %>% + group_by(Compound_Name, met) %>% + mutate(log_norm = log2(mean_fc), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm)), + z_score = (mean_fc - mean(mean_fc))/(max(mean_fc)-min(mean_fc))) %>% + ungroup() %>% + mutate(group = as_factor(str_c(tissue, genotype, treatment, sep = "_"))) %>% + pivot_wider(id_cols = c(Compound_Name, Compound_Class, met), + names_from = group, + values_from = z_score) %>% + left_join(red_met_class) %>% + select(-Compound_Class, Compound_Class = Compound_Class_dense) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat_nom) <- heat_nom$met + +mat.heat_nom <- heat_nom %>% + select(contains("0.4"), contains("0.6"), + contains("0.8"), + contains("1"), + contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat_nom %>% + mutate(ChEBI = if_else(str_detect(Compound_Class,"Carbohydrate"),"CH", + if_else(str_detect(Compound_Class,"Amino Acid"), "AA", + if_else(str_detect(Compound_Class,"Carboxylic Acid"), "CA", + "other"))), + #ChEBI = as_factor(ChEBI))#, + ChEBI = fct_relevel(ChEBI, c("AA", "CH", "CA", "other"))) %>% + select(ChEBI) + +rownames(annotation_row) <- heat_nom$met + +annotation_col <- heat_base %>% + arrange(genotype, treatment) %>% + distinct(tissue, treatment, genotype) %>% + mutate(group = as_factor(str_c(tissue, genotype, treatment, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, treatment, -tissue, -genotype) + +#heat_nom_signif <- heat_base %>% +# distinct(tissue, treatment, alias, genotype, met) %>% +#filter(genotype != "MoneyMaker") %>% +# left_join(sig_GC, by = c("met", "tissue", "treatment", "alias" = "group2")) %>% +#left_join(sig_mets) %>% +#filter(sig == T) %>% +# mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_")), +# signif = if_else(p.value >= 0.05| is.na(p.value), "","X")) %>% +# pivot_wider(id_cols = c(met), +# names_from = group, +# values_from = signif) %>% +# left_join(met_dat) %>% +#left_join(GCid_classes) %>% +# arrange(Compound_Class, Compound_Name) %>% +# as.data.frame() + +heat_cols <- colnames(mat.heat_nom) + +#rownames(heat_nom_signif) <- heat_nom_signif$met + +#mat.heat_nom_signif <- heat_nom_signif %>% +# select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + #tissue = c(fruits = "red",leaves = "darkgreen"), + #genotype = c(MoneyMaker = cb_scale[1], `*panK4-1*` = cb_scale[2], `log2-1` = cb_scale[3], `*transp1-1*` = cb_scale[4]), + treatment = c("0.4" = vir_scale[1], "0.6" = vir_scale[2], "0.8" = vir_scale[3], "1" = vir_scale[4]), + ChEBI = c("AA" = "#0072B2", + "CH" = "#D55E00", + "CA" = "#CC79A7", + "other" = "#000000")) + +pheatmap.GC <- pheatmap(mat.heat_nom, + colorRampPalette(c("#440154FF", "white", "#FDE725FF"))(45), + #cellwidth = 16, + #cellheight = 4, + breaks = seq(-0.45, 0.45, 0.02), + #clustering_distance_rows = dist((mat.heat_nom), method = "euclidean"), + cluster_rows = T, + cluster_cols = F, + annotation_names_row = F, + show_rownames = F, + show_colnames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + #display_numbers = mat.heat_nom_signif, + number_color = "black", + fontsize_number = 6, + annotation_colors = ann_colors, + #filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + # "cmQTL_val_1_heatmap_rel_tissue_wt_all_metabolites.jpg", + # sep = "_") +) + +#dev.off() + +heat_prim_wt <- ggplotify::as.ggplot(pheatmap.GC) +saveRDS(heat_prim_wt, "MoneyMaker_heatmap.RDS") + +ggsave("MoneyMaker.png", + width = 6, + height = 8.25, + units = "cm", + dpi = 300) + +if(!is.null(dev.cur())){ + dev.off() +} + +# Heatmap all genotypes all metabolites---------------------------------------------------------------- + +red_met_class <- read_csv(file = "H:/3. cmQTL mapping/Shared_source_files/reduced_met_classes.csv") %>% + select(-n) + +fc_1_wt <- fc_1 %>% + filter(genotype == "MoneyMaker") %>% + select(mean_fc_wt = mean_fc, tissue, treatment, met) + +heat_base <- fc_1 %>% + left_join(fc_1_wt) %>% + mutate(mean_fc = mean_fc/mean_fc_wt, + genotype = str_remove_all(genotype, "\\*"))# %>% + #filter(#Compound_Class %in% c("Phospholipid"), + #genotype %in% c("MoneyMaker"), +#) + +heat_nom <- heat_base %>% + left_join(sig) %>% + group_by(Compound_Name, met, tissue) %>% + mutate(log_norm = log2(mean_fc), + log_norm = if_else(p.signif == "*", log_norm, 0), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm)), + z_score = (mean_fc - mean(mean_fc))/(max(mean_fc)-min(mean_fc))) %>% + ungroup() %>% + mutate(group = as_factor(str_c(tissue, genotype, treatment, sep = "_"))) %>% + arrange(tissue, treatment, genotype) %>% + pivot_wider(id_cols = c(Compound_Name, Compound_Class, met), + names_from = group, + values_from = log_norm) %>% + left_join(red_met_class) %>% + select(-Compound_Class, Compound_Class = Compound_Class_dense) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat_nom) <- heat_nom$met + +mat.heat_nom <- heat_nom %>% + select(#contains("0.4"), contains("0.6"), + #contains("0.8"), + #contains("1"), + contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat_nom %>% + select(Compound_Class) + +rownames(annotation_row) <- heat_nom$met + +annotation_col <- heat_base %>% + arrange(genotype, treatment) %>% + distinct(tissue, treatment, genotype) %>% + mutate(group = as_factor(str_c(tissue, genotype, treatment, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, treatment, tissue, genotype) + +#heat_nom_signif <- heat_base %>% +# distinct(tissue, treatment, alias, genotype, met) %>% +#filter(genotype != "MoneyMaker") %>% +# left_join(sig_GC, by = c("met", "tissue", "treatment", "alias" = "group2")) %>% +#left_join(sig_mets) %>% +#filter(sig == T) %>% +# mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_")), +# signif = if_else(p.value >= 0.05| is.na(p.value), "","X")) %>% +# pivot_wider(id_cols = c(met), +# names_from = group, +# values_from = signif) %>% +# left_join(met_dat) %>% +#left_join(GCid_classes) %>% +# arrange(Compound_Class, Compound_Name) %>% +# as.data.frame() + +heat_cols <- colnames(mat.heat_nom) + +#rownames(heat_nom_signif) <- heat_nom_signif$met + +#mat.heat_nom_signif <- heat_nom_signif %>% +# select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "magenta",leaves = "green"), + genotype = c(MoneyMaker = cb_scale[1], `panK4-1` = cb_scale[2], `log2-1` = cb_scale[3], `transp1-1` = cb_scale[4]), + treatment = c("0.4" = vir_scale[1], "0.6" = vir_scale[2], "0.8" = vir_scale[3], "1" = vir_scale[4]), + Compound_Class = c("Amino Acid or derivative" = "#0072B2", + "Carboxylic Acid" = "#D55E00", + "Carbohydrate or derivative" = "#CC79A7", + "Cinnamic Acid" = "#484E37", + "Dipeptide" = "#4AEE2F", + "Flavonoid (glycosides)" = "#BAD23A", + "Galactolipid" = "#EF000B", + "Phospholipid" = "#19605B", + "Steroidal Glycoalkaloids" = "#7A5DF0", + "Triacylglyceride" = "#6E3455", + "Other" = "#000000") + ) + +pheatmap.GC <- pheatmap(mat.heat_nom, + colorRampPalette(c("#440154FF", "white", "#FDE725FF"))(41), + #cellwidth = 16, + #cellheight = 4, + breaks = seq(-2.05, 2.05, 0.1), + #clustering_distance_rows = dist((mat.heat_nom), method = "euclidean"), + cluster_rows = T, + cluster_cols = F, + annotation_names_row = F, + show_rownames = F, + show_colnames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + #display_numbers = mat.heat_nom_signif, + number_color = "black", + fontsize_number = 6, + annotation_colors = ann_colors, + #filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + # "cmQTL_val_1_heatmap_rel_tissue_all_genotypes_all_metabolites.jpg", + # sep = "_") +) + +heat_all <- ggplotify::as.ggplot(pheatmap.GC) +saveRDS(heat_all, "Heat_all.RDS") + +ggsave(plot = heat_all, + "Heatmap_all.png", + width = 6, + height = 8.25, + units = "cm", + dpi = 300) + +if(!is.null(dev.cur())){ + dev.off() +} + +# PCA --------------------------------------------------------------------- + +pca_theme <- theme(axis.text.x = element_markdown(size = 6, family = "sans"), + axis.text.y = element_text(size = 6, family = "sans"), + axis.title.x = element_text(size = 6, family = "sans"), + axis.title.y = element_text(size = 6, family = "sans"), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + panel.spacing = margin(t = 0, r = 0, b = 0, l = 0 , unit = "mm"), + strip.text = element_text(size = 10, family = "sans"), + text = element_text(size = 6, family = "sans"), + legend.position = "right", + legend.title = element_text(size = 6, family = "sans"), + legend.text = element_markdown(size = 6), + legend.box.spacing = unit(1, "mm"), + plot.margin = unit(c(1,1,1,1), "mm"), + legend.margin = margin(t = 0, r = 0, b = 0, l = 0 , unit = "mm")) + +pca_base <- fc_1_ind %>% + arrange(met, tissue, treatment, genotype, LIMS_ID, fc) %>% + mutate(log_fc = log2(mean_tec_rep), + log_fc = if_else(!is.finite(log_fc), 1, log_fc)) %>% + pivot_wider(id_cols = c(tissue, treatment, genotype, LIMS_ID), + names_from = met, + values_from = log_fc) %>% + #mutate(across(where(is.numeric), ~if_else(is.na(.x), 0, .x))) %>% + filter(!is.na(m_9)) + +pca <- summary(prcomp( + pca_base%>% + select(starts_with("m_"), starts_with("Cluster_")))) + +sam_vars <- pca_base %>% select(-starts_with("m_"), -starts_with("Cluster_")) %>% colnames() + +pca_plot <- as_tibble(pca$x) %>% + mutate(join_num = 1:nrow(pca$x)) %>% + left_join(pca_base %>% + mutate(join_num=1:nrow(pca$x))) %>% + #left_join(sam_dat) %>% + mutate(treatment= as_factor(treatment), + tissue= as_factor(tissue)) %>% + select(all_of(sam_vars), everything()) + +exp_var <- as_tibble(pca[["importance"]]) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = tissue)) + + stat_ellipse(aes(x=PC1, y=PC2, color = tissue)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = treatment, shape = tissue)) + + stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + + pca_theme + + scale_color_manual(values = cb_scale[5:8]) + +ggsave("PCA_all_lines_all_metabolites.png", + width = 6, + height = 8.25, + units = "cm", + dpi = 300) + +saveRDS(last_plot(), "PCA_all_met.RDS") + +## sub PCA fruits ---------------------------------------------------------- + +pca_base <- fc_1_ind %>% + filter(tissue == "fruits") %>% + arrange(met, tissue, treatment, genotype, LIMS_ID, fc) %>% + mutate(log_fc = log2(mean_tec_rep), + log_fc = if_else(!is.finite(log_fc), 1, log_fc)) %>% + pivot_wider(id_cols = c(tissue, treatment, genotype, LIMS_ID), + names_from = met, + values_from = log_fc) %>% + #mutate(across(where(is.numeric), ~if_else(is.na(.x), 0, .x))) %>% + filter(!is.na(m_9)) + +pca <- summary(prcomp( + pca_base%>% + select(starts_with("m_"), starts_with("Cluster_")))) + +sam_vars <- pca_base %>% select(-starts_with("m_"), -starts_with("Cluster_")) %>% colnames() + +pca_plot <- as_tibble(pca$x) %>% + mutate(join_num = 1:nrow(pca$x)) %>% + left_join(pca_base %>% + mutate(join_num=1:nrow(pca$x))) %>% + #left_join(sam_dat) %>% + mutate(treatment= as_factor(treatment), + tissue= as_factor(tissue)) %>% + select(all_of(sam_vars), everything()) + +exp_var <- as_tibble(pca[["importance"]]) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = treatment)) + + stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + + pca_theme + + scale_color_manual(values = cb_scale[5:8]) + +ggsave("PCA_all_lines_all_metabolites_fruits.png", + width = 8.25, + height = 8.25, + units = "cm", + dpi = 300) +saveRDS(last_plot(), "PCA_all_met_fruits.RDS") + +## sub PCA leaves ---------------------------------------------------------- + +pca_base <- fc_1_ind %>% + filter(tissue == "leaves") %>% + arrange(met, tissue, treatment, genotype, LIMS_ID, fc) %>% + mutate(log_fc = log2(mean_tec_rep), + log_fc = if_else(!is.finite(log_fc), 1, log_fc)) %>% + pivot_wider(id_cols = c(tissue, treatment, genotype, LIMS_ID), + names_from = met, + values_from = log_fc) %>% + filter(!is.na(m_9)) + +pca <- summary(prcomp( + pca_base%>% + select(starts_with("m_"), starts_with("Cluster_")))) + +sam_vars <- pca_base %>% select(-starts_with("m_"), -starts_with("Cluster_")) %>% colnames() + +pca_plot <- as_tibble(pca$x) %>% + mutate(join_num = 1:nrow(pca$x)) %>% + left_join(pca_base %>% + mutate(join_num=1:nrow(pca$x))) %>% + mutate(treatment= as_factor(treatment), + tissue= as_factor(tissue)) %>% + select(all_of(sam_vars), everything()) + +exp_var <- as_tibble(pca[["importance"]]) + +pca_plot %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = treatment)) + + stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + + pca_theme + + scale_color_manual(values = cb_scale[5:8]) + +ggsave("PCA_all_lines_all_metabolites_leaves.png", + width = 8.25, + height = 8.25, + units = "cm", + dpi = 300) + +saveRDS(last_plot(), "PCA_all_met_leaves.RDS") + + +# Levene p-value distribution --------------------------------------------- + +sig_lt %>% + ggplot( aes(x = p.value)) + geom_histogram(binwidth = 0.05, boundary = T) + + geom_hline(aes(yintercept = 9840*5/100), color = "red") + + xlab("p-value") + + pca_theme + +ggsave("p_val_dist_lt.png", + width = 16.5, + height = 6, + units = "cm", + dpi = 300) + +saveRDS(last_plot(), "p_val_dist_lt.RDS") + + +sig %>% + ggplot(aes(x = p.value)) + geom_histogram(binwidth = 0.05, boundary = T) + + geom_hline(aes(yintercept = 9840*5/100), color = "red") + + xlab("p-value") + + pca_theme + +ggsave("p_val_dist_nom.png", + width = 16.5, + height = 6, + units = "cm", + dpi = 300) + +saveRDS(last_plot(), "pval_dist_nom.RDS") + + +sig_cv %>% + ggplot(aes(x = p.value)) + geom_histogram(binwidth = 0.05, boundary = T) + + geom_hline(aes(yintercept = 2460*5/100), color = "red") + + xlab("p-value") + + pca_theme + +ggsave("p_val_dist_cv.png", + width = 16.5, + height = 6, + units = "cm", + dpi = 300) + +saveRDS(last_plot(), "p_val_dist_cv.RDS") + + +# Miniplots --------------------------------------------------------------- + +min_theme <- theme(axis.text.x = element_markdown(vjust = 2, hjust = 0.5, size = 6, family = "sans", margin = margin(t = 0, r = 0, b = 0, l = 0 , unit = "mm")), + axis.text.y = element_text(size = 6, vjust = 0.5, hjust = 1, family = "sans", margin = margin(t = 0, r = 0, b = 0, l = 0 , unit = "mm")), + axis.title.x = element_text(size = 6, family = "sans", margin = margin(t = 0, r = 0, b = 0, l = 0 , unit = "mm")), + axis.title.y = element_text(size = 6, family = "sans", margin = margin(t = 0, r = 0, b = 0, l = 0 , unit = "mm")), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + strip.text = element_text(size = 10, family = "sans"), + text = element_text(size = 6, family = "sans"), + legend.title = element_blank(), + legend.text = element_markdown(size = 6), + legend.position = "", + plot.margin = unit(c(2,1,1,2), "pt"), + legend.margin = margin(t = 0, r = 2, b = 0, l = 2 , unit = "mm")) + + + + +make_pointrange_line_plot <- function(plot_met, plot_tissue, plot_label, plot_legend, plot_genotypes, plot_fill){ + + per_comp_y <- fc_1 %>% + filter(met == plot_met, tissue == plot_tissue, + genotype %in% plot_genotypes) %>% + group_by(tissue, treatment, met) %>% + summarise(tot_val = max(mean_fc + se)) + + sig_bar <- sig %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y) %>% + mutate(y.position = 1.1 * tot_val, + treatment = as_factor(treatment)) %>% + filter(met == plot_met, tissue == plot_tissue, + group1 %in% plot_genotypes, group2 %in% plot_genotypes) + + ylim_top <- max(sig_bar$tot_val) + + breaks = seq(0,round(ylim_top), ylim_top %/% 5 + 1) + + + labels = replace(breaks, list = breaks %% 1 != 0, values = "") + + min_scale_y <- scale_y_continuous(expand = expansion(mult = 0.05), + limits = c(0, ylim_top), guide = guide_prism_minor(), + breaks = breaks) + min_scale_x <- scale_x_discrete(expand = expansion(mult = 0.1)) + + fc_1_plot <- fc_1%>% + filter(met == plot_met, tissue == plot_tissue, genotype %in% plot_genotypes) %>% + mutate(treatment = as_factor(treatment)) + + plot_out <- fc_1 %>% + filter(met == plot_met, tissue == plot_tissue, genotype %in% plot_genotypes) %>% + mutate(treatment = as_factor(treatment)) %>% + ggplot(aes(x = treatment, y = mean_fc)) + + geom_line(aes(x = treatment, y = mean_fc, color = genotype, group = genotype), size = 0.25) + + geom_point(aes(color = genotype), size = 0.4) + + geom_errorbar(aes(ymin = mean_fc-se, ymax = mean_fc +se, color = genotype), size = 0.25, width = 0.25) + + #stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + # step.increase = 0.07, + # hide.ns = T) + + ylab("FC")+ + xlab("irrigation") + + #ggtitle(label = plot_label) + + scale_color_manual(values = cb_scale[plot_fill], aesthetics = "color") + + min_theme + + min_scale_x + + min_scale_y + + plot_out + +} + +p1 <- make_pointrange_line_plot("m_70", "fruits", "Malic acid", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("Malic_acid_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("Malic_acid_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + + +p1 <- make_pointrange_line_plot("m_90", "fruits", "Pyruvate", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("Pyruvate_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("Pyruvate_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("m_21", "fruits", "Citrate", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("Citrate_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("Citrate_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("m_63", "fruits", "Isocitrate", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("Isocitrate_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("Isocitrate_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("m_48", "fruits", "Oxoglutaric acid", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("Oxoglutarate_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("Oxoglutarate_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("m_102", "fruits", "Succinnic acid", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("Succinate_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("Succinate_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("m_32", "fruits", "Fumaric acid", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("Fumarate_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("Fumarate_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + + +p1 <- make_pointrange_line_plot("m_47", "fruits", "", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("Glutamine_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("Glutamine_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + + +p1 <- make_pointrange_line_plot("m_17", "fruits", "", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("GABA_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("GABA_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("m_13", "fruits", "", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("Aspartate_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("Aspartate_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("m_12", "fruits", "", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("Asparagine_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("m_6", "fruits", "", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("Alanine_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("Alanine_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("m_8", "fruits", "", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("beta-Alanine_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("beta-Alanine_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("m_122", "fruits", "", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("valine_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("valine_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("Cluster_029173", "fruits", "", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("Pantothenate_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("Pantothenate_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("m_120", "fruits", "", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 +ggsave("Uracil_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) +ggsave("Uracil_pointrange_line_plot_nominal.svg",width = 2, height = 2, units = "cm", dpi = 300) + +#log2 + +p1 <- make_pointrange_line_plot("Cluster_025242", "fruits", "", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p1 +ggsave("Phenylalanine_log2-1_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("Cluster_024086", "fruits", "", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p1 +ggsave("Trans-cinnamic_acid_log2-1_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) + +p1 <- make_pointrange_line_plot("m_91", "fruits", "", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p1 +ggsave("Trans-cinnamic_acid_log2-1_pointrange_line_plot_nominal.png",width = 2, height = 2, units = "cm", dpi = 300) + + +# File output for metaboanalyst ------------------------------------------- + +met_out <- met_dat %>% + select(Compound_Name) + +write_csv(met_out, "metabolite_raw_names.csv") diff --git a/workflows/Whole_metabolome_analysis/Get_metabolite_data.R b/workflows/Whole_metabolome_analysis/Get_metabolite_data.R new file mode 100644 index 0000000000000000000000000000000000000000..08553d201a607c39ee5c30bb7f8a31b4b5ae827c --- /dev/null +++ b/workflows/Whole_metabolome_analysis/Get_metabolite_data.R @@ -0,0 +1,382 @@ +rm(list = ls()) +library(tidyverse) +library(ggpubr) +library(glue) +library(ggtext) +library(cowplot) +library(ggbeeswarm) +library(extrafont) +library(ggprism) +library(viridis) + +# Directory setting ------------------------------------------------------- + +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +getwd() + +current <- getwd() +source <- str_c(current,"/..") + +cur_date <- str_c(str_replace_all(Sys.Date(),"^.{2}|-","")) + +out <- str_c(cur_date, "Metabolitedata", sep = "_") + +if (file.exists(out)) { + cat("The folder already exists") +} else { + dir.create(out) +} + +out_dir <- str_c(current, out, sep = "/") + +# Primary loading --------------------------------------------------------- +setwd(source) +prim_source <- str_c(getwd(), "/1. Primary") +setwd(prim_source) + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"), + pattern = "^\\d{6}_analysis"), + decreasing = T)[[1]] + +latest_analysis <- str_c(prim_source, "/", latest) + +setwd(latest_analysis) + +latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}") + +fc_1_prim <- read_csv("mean_values_se_n.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")), + platform = "polar GC-MS") + +genotypes <- fc_1_prim %>% + distinct(alias, genotype) %>% + mutate(genotype_label = str_remove_all(genotype, "\\*")) + +fc_1_ind_prim <- read_csv("individual_values.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_prim <- read_csv("p_values.csv") + +fc_1_cv_prim <- read_csv("mean_values_se_n_cv.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_ind_cv_prim <- read_csv("individual_values_cv.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_prim_cv <- read_csv("p_values_cv.csv") %>% + mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))), + group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")), + group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))), + group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_lt_prim <- read_csv("mean_values_se_n_levene.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + mutate(platform = "polar GC-MS") + +fc_1_ind_lt_prim <- read_csv("individual_values_levene.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_prim_lt <- read_csv("p_values_levene.csv") + +met_prim <- fc_1_prim %>% + distinct(met, Compound_Name, Compound_Class) + +cv_prim_out <- fc_1_cv_prim %>% + left_join(met_prim) %>% + left_join(sig_prim_cv, by = c("tissue", "met", "alias" = "alias2")) %>% + select(tissue, genotype, Compound_Name, mean_cv, sd_cv, se_cv, n, p.value) %>% + mutate(platform = "polar GC-MS") + + +setwd(prim_source) + +prim_met_lib <- readxl::read_xlsx("H:/3. cmQTL mapping/Shared_source_files/210118_primary_metabolites_classification.xlsx") +prim_met_lib_tidy <- prim_met_lib %>% + select(component = Xcal_name_xreport, mz_mean = mz) + + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_normalization$"), + pattern = "^\\d{6}_normalization"), + decreasing = T)[[1]] + +latest_normalization <- str_c(prim_source, "/", latest) +latest_normalization_date <- str_extract(latest, pattern = "^\\d{6}") + +setwd(latest_normalization) +prim_met_dat <- read_csv(str_c(latest_normalization_date, "_cmQTL_val_1_2_met_dat_GC.csv")) %>% + select(Compound_Name, met, RT_mean, component) + +prim_met_total <- met_prim %>% + left_join(prim_met_dat) %>% + left_join(prim_met_lib_tidy) %>% + mutate(platform = "polar GC-MS") %>% + mutate(Compound_Class = if_else(str_detect(Compound_Class,"carbohydrate"),"carbohydrate or derivative", + if_else(str_detect(Compound_Class,"amino_acid"), "amino acid or derivative", + if_else(str_detect(Compound_Class,"carboxylic_acid"), "carboxylic acid", + "other")))) + +# Secondary loading --------------------------------------------------------- +setwd(source) +sec_source <- str_c(getwd(), "/2. Secondary") +setwd(sec_source) + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"), + pattern = "^\\d{6}_analysis"), + decreasing = T)[[1]] + +latest_analysis <- str_c(sec_source, "/", latest) + +setwd(latest_analysis) + +latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}") + +fc_1_sec <- read_csv("mean_values_se_n.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + mutate(platform = "polar LC-MS") %>% + rename(n = n.x) + +genotypes <- fc_1_sec %>% + distinct(alias, genotype) %>% + mutate(genotype_label = str_remove_all(genotype, "\\*")) + +fc_1_ind_sec <- read_csv("individual_values.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_sec <- read_csv("p_values.csv") + +fc_1_cv_sec <- read_csv("mean_values_se_n_cv.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_ind_cv_sec <- read_csv("individual_values_cv.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_sec_cv <- read_csv("p_values_cv.csv") %>% + mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))), + group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")), + group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))), + group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_lt_sec <- read_csv("mean_values_se_n_levene.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + mutate(platform = "polar LC-MS") %>% + rename(n = n.x) + +fc_1_ind_lt_sec <- read_csv("individual_values_levene.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_sec_lt <- read_csv("p_values_levene.csv") + +met_sec <- fc_1_sec %>% + distinct(met, Compound_Name, Compound_Class) + +cv_sec_out <- fc_1_cv_sec %>% + left_join(met_sec) %>% + left_join(sig_sec_cv, by = c("tissue", "met", "alias" = "alias2")) %>% + select(tissue, genotype, Compound_Name, mean_cv, sd_cv, se_cv, n, p.value) %>% + mutate(platform = "polar LC-MS") + +setwd(sec_source) +sec_met_lib <- read_delim("cmQTL_val1_selected_secondary.txt", delim = "\t") +sec_met_lib_tidy <- sec_met_lib %>% + select(Name, mz_mean = mz_mean_new, RT_mean = RT_mean_new, mode, adduct_pos, adduct_neg, + Mol.formula, mz_lit_pos, mz_lit_neg, + RT_lit, adduct_lit, Species.detected.before, lit) %>% + distinct(mz_mean, RT_mean, .keep_all = T) + + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_normalization$"), + pattern = "^\\d{6}_normalization"), + decreasing = T)[[1]] + +latest_normalization <- str_c(sec_source, "/", latest) +latest_normalization_date <- str_extract(latest, pattern = "^\\d{6}") + +setwd(latest_normalization) +sec_met_dat <- read_csv(str_c(latest_normalization_date, "_cmQTL_val_1_2_met_dat_LC.csv")) + +sec_met_total <- met_sec %>% + left_join(sec_met_dat) %>% + left_join(sec_met_lib_tidy, by = c("mz_mean", "RT_mean")) %>% + mutate(platform = "polar LC-MS") + +# Lipids loading --------------------------------------------------------- +setwd(source) +lip_source <- str_c(getwd(), "/3. Lipids") +setwd(lip_source) + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"), + pattern = "^\\d{6}_analysis"), + decreasing = T)[[1]] + +latest_analysis <- str_c(lip_source, "/", latest) + +setwd(latest_analysis) + +latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}") + +fc_1_lip <- read_csv("mean_values_se_n.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))%>% + mutate(platform = "apolar LC-MS") %>% + rename(n = n.x) + +genotypes <- fc_1_lip %>% + distinct(alias, genotype) %>% + mutate(genotype_label = str_remove_all(genotype, "\\*")) + +fc_1_ind_lip <- read_csv("individual_values.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_lip <- read_csv("p_values.csv") + +fc_1_cv_lip <- read_csv("mean_values_se_n_cv.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_ind_cv_lip <- read_csv("individual_values_cv.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_lip_cv <- read_csv("p_values_cv.csv") %>% + mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))), + group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")), + group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))), + group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_lt_lip <- read_csv("mean_values_se_n_levene.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + mutate(platform = "apolar LC-MS") %>% + rename(n = n.x) + +fc_1_ind_lt_lip <- read_csv("individual_values_levene.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_lip_lt <- read_csv("p_values_levene.csv") + +met_lip <- fc_1_lip %>% + distinct(met, Compound_Name, Compound_Class) + +cv_lip_out <- fc_1_cv_lip %>% + left_join(met_lip) %>% + left_join(sig_lip_cv, by = c("tissue", "met", "alias" = "alias2")) %>% + select(tissue, genotype, Compound_Name, mean_cv, sd_cv, se_cv, n, p.value) %>% + mutate(platform = "apolar LC-MS") + +setwd(lip_source) +lip_met_lib <- read_delim("cmQTL_val1_selected_lipids.txt", delim = "\t") +lip_met_lib_tidy <- lip_met_lib %>% + select(Name, mz_mean = mz_mean_new, RT_mean = RT_mean_new, adduct_pos = Adduct, + Mol.formula = Chemical_Formula) %>% + mutate(mode = "pos", + mz_mean = round(mz_mean, 4), + RT_mean = round(RT_mean, 6)) %>% + distinct(mz_mean, RT_mean, .keep_all = T) + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_normalization$"), + pattern = "^\\d{6}_normalization"), + decreasing = T)[[1]] + +latest_normalization <- str_c(lip_source, "/", latest) +latest_normalization_date <- str_extract(latest, pattern = "^\\d{6}") + +setwd(latest_normalization) +lip_met_dat <- read_csv(str_c(latest_normalization_date, "_cmQTL_val_1_2_met_dat_lip.csv")) %>% + mutate(mz_mean = round(mz_mean, 4), + RT_mean = round(RT_mean, 6)) + +lip_met_total <- met_lip %>% + left_join(lip_met_dat) %>% + left_join(lip_met_lib_tidy) %>% + mutate(platform = "apolar LC-MS") + +# Data combination -------------------------------------------------------- +setwd(out_dir) + +fc_1 <- fc_1_prim %>% + bind_rows(fc_1_sec, fc_1_lip) %>% + select(Tissue = tissue, Irrigation = treatment, Genotype = genotype, + Compound = Compound_Name, `Relative fold change` = mean_fc, + SD = sd, SE = se, N = n, 'P-value' = p.value, Platform = platform) %>% + mutate(Platform = as_factor(Platform), + Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>% + arrange(Platform, Tissue, Irrigation, Compound, Genotype) %>% + mutate(Genotype = str_remove_all(Genotype, "\\*")) + +fc_1_lt <- fc_1_lt_prim %>% + bind_rows(fc_1_lt_sec, fc_1_lt_lip) %>% + select(Tissue = tissue, Irrigation = treatment, Genotype = genotype, + Compound = Compound_Name, `Relative fold change` = mean_fc, + SD = sd, SE = se, N = n, 'P-value' = p.value, Platform = platform) %>% + mutate(Platform = as_factor(Platform), + Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>% + arrange(Platform, Tissue, Irrigation, Compound, Genotype) %>% + mutate(Genotype = str_remove_all(Genotype, "\\*")) + +fc_1_ind <- fc_1_ind_prim %>% + bind_rows(fc_1_ind_sec, fc_1_ind_lip) + +fc_1_cv <- fc_1_cv_prim %>% + bind_rows(fc_1_cv_sec, fc_1_cv_lip) + +met_dat <- fc_1 %>% + distinct(met, Compound_Name, Compound_Class) + +classes_tidy <- met_dat %>% + mutate(`ChEBI simplified` = if_else(str_detect(Compound_Class,"carbohydrate"),"carbohydrate or derivative", + if_else(str_detect(Compound_Class,"amino_acid"), "amino acid or derivative", + if_else(str_detect(Compound_Class,"carboxylic_acid"), "carboxylic acid", + "other")))) + +genotypes <- fc_1 %>% + distinct(alias, genotype) %>% + mutate(genotype_label = str_remove_all(genotype, "\\*")) + +per_comp_y <- fc_1 %>% + group_by(tissue, treatment, met) %>% + summarise(tot_val = max(mean_fc + se)) + +sig_lt <- sig_prim_lt %>% + bind_rows(sig_sec_lt, sig_lip_lt) + +sig <- sig_prim %>% + bind_rows(sig_sec, sig_lip) + +sig_cv <- sig_prim_cv %>% + bind_rows(sig_sec_cv, sig_lip_cv) + +cb_scale <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", + "#0072B2", "#D55E00", "#CC79A7","#000000") +bw_scale <- c("black", "black", "black", "black", "black", "black", "black") +vir_scale <- plasma(4, begin = 0.2, end = 1) + +cv_out <- bind_rows(cv_prim_out, cv_sec_out, cv_lip_out) %>% + select(Tissue = tissue, Genotype = genotype, + Compound = Compound_Name, `Mean CV` = mean_cv, + SD = sd_cv, SE = se_cv, N = n, 'P-value' = p.value, Platform = platform) %>% + mutate(Platform = as_factor(Platform), + Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>% + arrange(Platform, Tissue, Compound, Genotype) %>% + mutate(Genotype = str_remove_all(Genotype, "\\*")) + +met_dat <- prim_met_total %>% + bind_rows(sec_met_total, lip_met_total) %>% + select(Platform = platform, everything()) %>% + mutate(Platform = as_factor(Platform), + Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>% + arrange(Platform, RT_mean, Compound_Name) %>% + mutate('Peak no.' = str_c("M",row_number()), + 'Identification level' = "") %>% + select('Peak no.', Compound = Compound_Name, 'Compound Class' = Compound_Class, 'm/z' = mz_mean, + 'Ret. Time' = RT_mean, 'Identification level', Platform) + +write_csv(fc_1, "FC1_supplementary.csv") +write_csv(fc_1_lt, "FC1_LT_supplementary.csv") +write_csv(cv_out, "CV_supplementary.csv") +write_csv(met_dat, "met_dat_supplementary.csv") + diff --git a/workflows/Whole_metabolome_analysis/Get_raw_data.R b/workflows/Whole_metabolome_analysis/Get_raw_data.R new file mode 100644 index 0000000000000000000000000000000000000000..9be1f5ddb1788443512c7e4350eb1b5d4030eb1a --- /dev/null +++ b/workflows/Whole_metabolome_analysis/Get_raw_data.R @@ -0,0 +1,798 @@ +rm(list = ls()) +library(tidyverse) +library(ggpubr) +library(glue) +library(ggtext) +library(cowplot) +library(ggbeeswarm) +library(extrafont) +library(ggprism) +library(viridis) + +# Directory setting ------------------------------------------------------- + +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +getwd() + +current <- getwd() +source <- str_c(current,"/..") + +cur_date <- str_c(str_replace_all(Sys.Date(),"^.{2}|-","")) + +out <- str_c(cur_date, "Raw_data_normalized", sep = "_") + +if (file.exists(out)) { + cat("The folder already exists") +} else { + dir.create(out) +} + +out_dir <- str_c(current, out, sep = "/") + +# Primary loading --------------------------------------------------------- +setwd(source) +prim_source <- str_c(getwd(), "/1. Primary") +setwd(prim_source) + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"), + pattern = "^\\d{6}_analysis"), + decreasing = T)[[1]] + +latest_analysis <- str_c(prim_source, "/", latest) + +setwd(latest_analysis) + +latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}") + +fc_1_prim <- read_csv("mean_values_se_n.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")), + platform = "polar GC-MS") + +genotypes <- fc_1_prim %>% + distinct(alias, genotype) %>% + mutate(genotype_label = str_remove_all(genotype, "\\*")) + +fc_1_ind_prim <- read_csv("individual_values.csv") %>% + select(-genotype) %>% + left_join(genotypes) %>% + mutate(platform = "polar GC-MS") + +sig_prim <- read_csv("p_values.csv") + +fc_1_cv_prim <- read_csv("mean_values_se_n_cv.csv") %>% + mutate(platform = "polar GC-MS") + +fc_1_ind_cv_prim <- read_csv("individual_values_cv.csv") %>% + select(-genotype) %>% + left_join(genotypes) %>% + mutate(platform = "polar GC-MS") + +sig_prim_cv <- read_csv("p_values_cv.csv") %>% + mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))), + group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")), + group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))), + group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_lt_prim <- read_csv("mean_values_se_n_levene.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + mutate(platform = "polar GC-MS") + +fc_1_ind_lt_prim <- read_csv("individual_values_levene.csv") %>% + select(-genotype) %>% + left_join(genotypes) %>% + mutate(platform = "polar GC-MS") + +sig_prim_lt <- read_csv("p_values_levene.csv") + +met_prim <- fc_1_prim %>% + distinct(met, Compound_Name, Compound_Class) + +cv_prim_out <- fc_1_cv_prim %>% + left_join(met_prim) %>% + left_join(sig_prim_cv, by = c("tissue", "met", "alias" = "alias2")) %>% + select(tissue, genotype, Compound_Name, mean_cv, sd_cv, se_cv, n, p.value) %>% + mutate(platform = "polar GC-MS") + + +setwd(prim_source) + +prim_met_lib <- readxl::read_xlsx("H:/3. cmQTL mapping/Shared_source_files/210118_primary_metabolites_classification.xlsx") +prim_met_lib_tidy <- prim_met_lib %>% + select(component = Xcal_name_xreport, mz_mean = mz) + + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_normalization$"), + pattern = "^\\d{6}_normalization"), + decreasing = T)[[1]] + +latest_normalization <- str_c(prim_source, "/", latest) +latest_normalization_date <- str_extract(latest, pattern = "^\\d{6}") + +setwd(latest_normalization) +prim_met_dat <- read_csv(str_c(latest_normalization_date, "_cmQTL_val_1_2_met_dat_GC.csv")) %>% + select(Compound_Name, met, RT_mean, component) + +prim_met_total <- met_prim %>% + left_join(prim_met_dat) %>% + left_join(prim_met_lib_tidy) %>% + mutate(platform = "polar GC-MS") %>% + mutate(Compound_Class = if_else(str_detect(Compound_Class,"carbohydrate"),"carbohydrate or derivative", + if_else(str_detect(Compound_Class,"amino_acid"), "amino acid or derivative", + if_else(str_detect(Compound_Class,"carboxylic_acid"), "carboxylic acid", + "other")))) +fc_1_ind_prim <- fc_1_ind_prim %>% + left_join(met_prim) + +fc_1_ind_lt_prim <- fc_1_ind_lt_prim %>% + left_join(met_prim) + +fc_1_ind_cv_prim <- fc_1_ind_cv_prim %>% + left_join(met_prim) + +# Secondary loading --------------------------------------------------------- +setwd(source) +sec_source <- str_c(getwd(), "/2. Secondary") +setwd(sec_source) + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"), + pattern = "^\\d{6}_analysis"), + decreasing = T)[[1]] + +latest_analysis <- str_c(sec_source, "/", latest) + +setwd(latest_analysis) + +latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}") + +fc_1_sec <- read_csv("mean_values_se_n.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + mutate(platform = "polar LC-MS") %>% + rename(n = n.x) + +genotypes <- fc_1_sec %>% + distinct(alias, genotype) %>% + mutate(genotype_label = str_remove_all(genotype, "\\*")) + +fc_1_ind_sec <- read_csv("individual_values.csv") %>% + select(-genotype) %>% + left_join(genotypes) %>% + mutate(platform = "polar LC-MS") + +sig_sec <- read_csv("p_values.csv") + +fc_1_cv_sec <- read_csv("mean_values_se_n_cv.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + mutate(platform = "polar LC-MS") + +fc_1_ind_cv_sec <- read_csv("individual_values_cv.csv") %>% + select(-genotype) %>% + left_join(genotypes) %>% + mutate(platform = "polar LC-MS") + +sig_sec_cv <- read_csv("p_values_cv.csv") %>% + mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))), + group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")), + group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))), + group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_lt_sec <- read_csv("mean_values_se_n_levene.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + mutate(platform = "polar LC-MS") %>% + rename(n = n.x) + +fc_1_ind_lt_sec <- read_csv("individual_values_levene.csv") %>% + select(-genotype) %>% + left_join(genotypes) %>% + mutate(platform = "polar LC-MS") + +sig_sec_lt <- read_csv("p_values_levene.csv") + +met_sec <- fc_1_sec %>% + distinct(met, Compound_Name, Compound_Class) + +cv_sec_out <- fc_1_cv_sec %>% + left_join(met_sec) %>% + left_join(sig_sec_cv, by = c("tissue", "met", "alias" = "alias2")) %>% + select(tissue, genotype, Compound_Name, mean_cv, sd_cv, se_cv, n, p.value) %>% + mutate(platform = "polar LC-MS") + +setwd(sec_source) +sec_met_lib <- read_delim("cmQTL_val1_selected_secondary.txt", delim = "\t") +sec_met_lib_tidy <- sec_met_lib %>% + select(Name, mz_mean = mz_mean_new, RT_mean = RT_mean_new, mode, adduct_pos, adduct_neg, + Mol.formula, mz_lit_pos, mz_lit_neg, + RT_lit, adduct_lit, Species.detected.before, lit) %>% + distinct(mz_mean, RT_mean, .keep_all = T) + + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_normalization$"), + pattern = "^\\d{6}_normalization"), + decreasing = T)[[1]] + +latest_normalization <- str_c(sec_source, "/", latest) +latest_normalization_date <- str_extract(latest, pattern = "^\\d{6}") + +setwd(latest_normalization) +sec_met_dat <- read_csv(str_c(latest_normalization_date, "_cmQTL_val_1_2_met_dat_LC.csv")) + +sec_met_total <- met_sec %>% + left_join(sec_met_dat) %>% + left_join(sec_met_lib_tidy, by = c("mz_mean", "RT_mean")) %>% + mutate(platform = "polar LC-MS") + +fc_1_ind_sec <- fc_1_ind_sec %>% + left_join(met_sec) + +fc_1_ind_lt_sec <- fc_1_ind_lt_sec %>% + left_join(met_sec) + +fc_1_ind_cv_sec <- fc_1_ind_cv_sec %>% + left_join(met_sec) + +# Lipids loading --------------------------------------------------------- +setwd(source) +lip_source <- str_c(getwd(), "/3. Lipids") +setwd(lip_source) + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"), + pattern = "^\\d{6}_analysis"), + decreasing = T)[[1]] + +latest_analysis <- str_c(lip_source, "/", latest) + +setwd(latest_analysis) + +latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}") + +fc_1_lip <- read_csv("mean_values_se_n.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))%>% + mutate(platform = "apolar LC-MS") %>% + rename(n = n.x) + +genotypes <- fc_1_lip %>% + distinct(alias, genotype) %>% + mutate(genotype_label = str_remove_all(genotype, "\\*")) + +fc_1_ind_lip <- read_csv("individual_values.csv") %>% + select(-genotype) %>% + left_join(genotypes) %>% + mutate(platform = "apolar LC-MS") + +sig_lip <- read_csv("p_values.csv") + +fc_1_cv_lip <- read_csv("mean_values_se_n_cv.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + mutate(platform = "apolar LC-MS") + +fc_1_ind_cv_lip <- read_csv("individual_values_cv.csv") %>% + select(-genotype) %>% + left_join(genotypes) %>% + mutate(platform = "apolar LC-MS") + +sig_lip_cv <- read_csv("p_values_cv.csv") %>% + mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))), + group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")), + group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))), + group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_lt_lip <- read_csv("mean_values_se_n_levene.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + mutate(platform = "apolar LC-MS") %>% + rename(n = n.x) + +fc_1_ind_lt_lip <- read_csv("individual_values_levene.csv") %>% + select(-genotype) %>% + left_join(genotypes) %>% + mutate(platform = "apolar LC-MS") + +sig_lip_lt <- read_csv("p_values_levene.csv") + +met_lip <- fc_1_lip %>% + distinct(met, Compound_Name, Compound_Class) + +cv_lip_out <- fc_1_cv_lip %>% + left_join(met_lip) %>% + left_join(sig_lip_cv, by = c("tissue", "met", "alias" = "alias2")) %>% + select(tissue, genotype, Compound_Name, mean_cv, sd_cv, se_cv, n, p.value) %>% + mutate(platform = "apolar LC-MS") + +setwd(lip_source) +lip_met_lib <- read_delim("cmQTL_val1_selected_lipids.txt", delim = "\t") +lip_met_lib_tidy <- lip_met_lib %>% + select(Name, mz_mean = mz_mean_new, RT_mean = RT_mean_new, adduct_pos = Adduct, + Mol.formula = Chemical_Formula) %>% + mutate(mode = "pos", + mz_mean = round(mz_mean, 4), + RT_mean = round(RT_mean, 6)) %>% + distinct(mz_mean, RT_mean, .keep_all = T) + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_normalization$"), + pattern = "^\\d{6}_normalization"), + decreasing = T)[[1]] + +latest_normalization <- str_c(lip_source, "/", latest) +latest_normalization_date <- str_extract(latest, pattern = "^\\d{6}") + +setwd(latest_normalization) +lip_met_dat <- read_csv(str_c(latest_normalization_date, "_cmQTL_val_1_2_met_dat_lip.csv")) %>% + mutate(mz_mean = round(mz_mean, 4), + RT_mean = round(RT_mean, 6)) + +lip_met_total <- met_lip %>% + left_join(lip_met_dat) %>% + left_join(lip_met_lib_tidy) %>% + mutate(platform = "apolar LC-MS") + +fc_1_ind_lip <- fc_1_ind_lip %>% + left_join(met_lip) + +fc_1_ind_lt_lip <- fc_1_ind_lt_lip %>% + left_join(met_lip) + +fc_1_ind_cv_lip <- fc_1_ind_cv_lip %>% + left_join(met_lip) + +# Data combination -------------------------------------------------------- +setwd(out_dir) + +fc_1 <- fc_1_prim %>% + bind_rows(fc_1_sec, fc_1_lip) %>% + select(Tissue = tissue, Irrigation = treatment, Genotype = genotype, + Compound = Compound_Name, `Relative fold change` = mean_fc, + SD = sd, SE = se, N = n, 'P-value' = p.value, Platform = platform) %>% + mutate(Platform = as_factor(Platform), + Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>% + arrange(Platform, Tissue, Irrigation, Compound, Genotype) %>% + mutate(Genotype = str_remove_all(Genotype, "\\*")) + +fc_1_lt <- fc_1_lt_prim %>% + bind_rows(fc_1_lt_sec, fc_1_lt_lip) %>% + select(Tissue = tissue, Irrigation = treatment, Genotype = genotype, + Compound = Compound_Name, `Relative fold change` = mean_fc, + SD = sd, SE = se, N = n, 'P-value' = p.value, Platform = platform) %>% + mutate(Platform = as_factor(Platform), + Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>% + arrange(Platform, Tissue, Irrigation, Compound, Genotype) %>% + mutate(Genotype = str_remove_all(Genotype, "\\*")) + +fc_1_cv <- fc_1_cv_prim %>% + bind_rows(fc_1_cv_sec, fc_1_cv_lip) + +fc_1_ind <- fc_1_ind_prim %>% + bind_rows(fc_1_ind_sec, fc_1_ind_lip) %>% + select(Tissue = tissue, Irrigation = treatment, Genotype = genotype, + Compound = Compound_Name, `Relative fold change` = fc, Platform = platform) %>% + mutate(Platform = as_factor(Platform), + Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>% + arrange(Platform, Tissue, Irrigation, Compound, Genotype) %>% + mutate(Genotype = str_remove_all(Genotype, "\\*")) + +fc_1_ind_lt <- fc_1_ind_lt_prim %>% + bind_rows(fc_1_ind_lt_sec, fc_1_ind_lt_lip) %>% + select(Tissue = tissue, Irrigation = treatment, Genotype = genotype, + Compound = Compound_Name, `Levene's transformed value` = lev_t, Platform = platform) %>% + mutate(Platform = as_factor(Platform), + Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>% + arrange(Platform, Tissue, Irrigation, Compound, Genotype) %>% + mutate(Genotype = str_remove_all(Genotype, "\\*")) + +fc_1_ind_cv <- fc_1_ind_cv_prim %>% + bind_rows(fc_1_ind_cv_sec, fc_1_ind_cv_lip) %>% + select(Tissue = tissue, Genotype = genotype, + Compound = Compound_Name, `Jack-value` = cv, Platform = platform) %>% + mutate(Platform = as_factor(Platform), + Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>% + arrange(Platform, Tissue, Compound, Genotype) %>% + mutate(Genotype = str_remove_all(Genotype, "\\*")) + +sig_lt <- sig_prim_lt %>% + bind_rows(sig_sec_lt, sig_lip_lt) + +sig <- sig_prim %>% + bind_rows(sig_sec, sig_lip) + +sig_cv <- sig_prim_cv %>% + bind_rows(sig_sec_cv, sig_lip_cv) + +cb_scale <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", + "#0072B2", "#D55E00", "#CC79A7","#000000") +bw_scale <- c("black", "black", "black", "black", "black", "black", "black") +vir_scale <- plasma(4, begin = 0.2, end = 1) + +cv_out <- bind_rows(cv_prim_out, cv_sec_out, cv_lip_out) %>% + select(Tissue = tissue, Genotype = genotype, + Compound = Compound_Name, `Mean CV` = mean_cv, + SD = sd_cv, SE = se_cv, N = n, 'P-value' = p.value, Platform = platform) %>% + mutate(Platform = as_factor(Platform), + Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>% + arrange(Platform, Tissue, Compound, Genotype) %>% + mutate(Genotype = str_remove_all(Genotype, "\\*")) + +met_dat <- prim_met_total %>% + bind_rows(sec_met_total, lip_met_total) %>% + select(Platform = platform, everything()) %>% + mutate(Platform = as_factor(Platform), + Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>% + arrange(Platform, RT_mean, Compound_Name) %>% + mutate('Peak no.' = str_c("M",row_number()), + 'Identification level' = "") %>% + select('Peak no.', Compound = Compound_Name, 'Compound Class' = Compound_Class, 'm/z' = mz_mean, + 'Ret. Time' = RT_mean, 'Identification level', Platform) + +write_csv(fc_1, "FC1_supplementary.csv") +write_csv(fc_1_lt, "FC1_LT_supplementary.csv") +write_csv(cv_out, "CV_supplementary.csv") +write_csv(met_dat, "met_dat_supplementary.csv") + +write_csv(fc_1_ind, "FC1_all_supplementary.csv") +write_csv(fc_1_ind_lt, "FC1_LT_all_supplementary.csv") +write_csv(fc_1_ind_cv, "CV_all_supplementary.csv") + + +# Prerequisite tests ------------------------------------------------------ + +library(RVAideMemoire) +library(ggbeeswarm) + + +## Nominal ----------------------------------------------------------------- + +normality <- fc_1_ind %>% + group_by(Tissue, Irrigation, Genotype, Platform, Compound) %>% + summarise(normality = shapiro.test(`Relative fold change`)$p.value) %>% + ungroup() #%>% + #mutate(normality = p.adjust(normality, method = "fdr")) + +normality_summary <- normality %>% + mutate(normal = if_else(normality <= 0.05, T, F)) %>% + group_by(normal) %>% + summarise(n = n()) %>% + ungroup() %>% + mutate(percentage = n/sum(n)) + +normality %>% + ggplot(aes(x = normality)) + + stat_bin(boundary = T, binwidth = 0.05) + + #geom_histogram(binwidth = 0.05, ) + + geom_vline(xintercept = 0.05, color = "red") + + facet_grid(rows = vars(Platform), cols = vars(Tissue)) + + +normality_wt <- normality %>% + filter(Genotype == "MoneyMaker") %>% + select(Tissue, Irrigation, Compound, Platform, + group2 = Genotype, normality2 = normality) + +normality_mut <- normality %>% + filter(Genotype != "MoneyMaker") %>% + select(Tissue, Irrigation, Compound, Platform, + group1 = Genotype, normality1 = normality) + +normality_comb <- normality_mut %>% + left_join(normality_wt) + +normality %>% + mutate(log_p = -log10(normality), x = "x") %>% + ggplot(aes(x = x, y = normality)) + + geom_quasirandom() + + #geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.01) + + #geom_histogram(binwidth = 0.05, ) + + geom_hline(yintercept = (0.05), color = "red") #+ +facet_grid(cols = vars(Tissue)) + +variance <- fc_1_ind %>% + mutate(Genotype = as_factor(Genotype)) %>% + group_by(Tissue, Irrigation, Compound, Platform) %>% + nest() %>% + mutate(lev = map(.x = data, .f = ~pairwise.var.test(.x$`Relative fold change`, .x$Genotype, p.method = "none")$p.value)) + +variance_tidy <- variance %>% + mutate(var_tbl = map(.x = lev, .f = ~as_tibble(as.data.frame(.x), rownames = "group1"))) + +variance_unnest <- variance_tidy %>% + select(Tissue, Irrigation, Compound, Platform, var_tbl) %>% + unnest(cols = Tissue, Irrigation, Compound, Platform, var_tbl) %>% + mutate(group2 = "MoneyMaker") %>% + select(Tissue, Irrigation, Compound, Platform, group1, group2, var_p = MoneyMaker) #%>% + #mutate(var_p = p.adjust(var_p, method = "fdr")) + +variance_unnest %>% + ggplot(aes(x = var_p)) + + stat_bin(boundary = T, binwidth = 0.05) + + #geom_histogram(binwidth = 0.05, ) + + geom_vline(xintercept = 0.05, color = "red") + + facet_grid(rows = vars(Platform), cols = vars(Tissue)) + +variance_unnest %>% + mutate(log_p = -log10(var_p), x = "x") %>% + ggplot(aes(x = x, y = var_p)) + + geom_quasirandom() + + #geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.01) + + #geom_histogram(binwidth = 0.05, ) + + geom_hline(yintercept = (0.05), color = "red") #+ + facet_grid(cols = vars(Tissue)) + +variance_summary <- variance_unnest %>% + mutate(normal = if_else(var_p <= 0.05, T, F)) %>% + group_by(normal) %>% + summarise(n = n()) %>% + ungroup() %>% + mutate(percentage = n/sum(n)) + +prereq <- normality_comb %>% + left_join(variance_unnest) %>% + mutate(fulfill = if_else(normality1 >= 0.05 & normality2 >= 0.05 & var_p >= 0.05, T, F)) + +prereq_summary <- prereq %>% + group_by(fulfill) %>% + summarise(n = n()) %>% + ungroup() %>% + mutate(percentage = n/sum(n)) + +variance_wt <- variance_unnest %>% + select(Tissue, Irrigation, Compound, Platform, + Genotype = group2) %>% + distinct(Tissue, Irrigation, Compound, Platform, Genotype) %>% + mutate(var_p = NA) + +variance_mut <- variance_unnest %>% + select(Tissue, Irrigation, Compound, Platform, + Genotype = group1, var_p) + +variance_long <- variance_mut %>% + bind_rows(variance_wt) + +prereq_long <- normality %>% + left_join(variance_long) %>% + rename(normality = normality, + `Equal variance` = var_p) + +fc_1 <- fc_1 %>% + left_join(prereq_long) + +## Levene ----------------------------------------------------------------- + +normality_lt <- fc_1_ind_lt %>% + group_by(Tissue, Irrigation, Genotype, Platform, Compound) %>% + summarise(normality_lt = shapiro.test(`Levene's transformed value`)$p.value) %>% + ungroup() #%>% +#mutate(normality_lt = p.adjust(normality_lt, method = "fdr")) + +normality_lt_summary <- normality_lt %>% + mutate(normal = if_else(normality_lt <= 0.05, T, F)) %>% + group_by(normal) %>% + summarise(n = n()) %>% + ungroup() %>% + mutate(percentage = n/sum(n)) + +normality_lt %>% + ggplot(aes(x = normality_lt)) + + stat_bin(boundary = T, binwidth = 0.05) + + #geom_histogram(binwidth = 0.05, ) + + geom_vline(xintercept = 0.05, color = "red") + + facet_grid(rows = vars(Platform), cols = vars(Tissue)) + + +normality_lt_wt <- normality_lt %>% + filter(Genotype == "MoneyMaker") %>% + select(Tissue, Irrigation, Compound, Platform, + group2 = Genotype, normality_lt2 = normality_lt) + +normality_lt_mut <- normality_lt %>% + filter(Genotype != "MoneyMaker") %>% + select(Tissue, Irrigation, Compound, Platform, + group1 = Genotype, normality_lt1 = normality_lt) + +normality_lt_comb <- normality_lt_mut %>% + left_join(normality_lt_wt) + +normality_lt %>% + mutate(log_p = -log10(normality_lt), x = "x") %>% + ggplot(aes(x = x, y = normality_lt)) + + geom_quasirandom() + + #geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.01) + + #geom_histogram(binwidth = 0.05, ) + + geom_hline(yintercept = (0.05), color = "red") #+ +facet_grid(cols = vars(Tissue)) + +variance_lt <- fc_1_ind_lt %>% + mutate(Genotype = as_factor(Genotype)) %>% + group_by(Tissue, Irrigation, Compound, Platform) %>% + nest() %>% + mutate(lev = map(.x = data, .f = ~pairwise.var.test(.x$`Levene's transformed value`, .x$Genotype, p.method = "none")$p.value)) + +variance_lt_tidy <- variance_lt %>% + mutate(var_tbl = map(.x = lev, .f = ~as_tibble(as.data.frame(.x), rownames = "group1"))) + +variance_lt_unnest <- variance_lt_tidy %>% + select(Tissue, Irrigation, Compound, Platform, var_tbl) %>% + unnest(cols = Tissue, Irrigation, Compound, Platform, var_tbl) %>% + mutate(group2 = "MoneyMaker") %>% + select(Tissue, Irrigation, Compound, Platform, group1, group2, var_p = MoneyMaker) #%>% +#mutate(var_p = p.adjust(var_p, method = "fdr")) + +variance_lt_unnest %>% + ggplot(aes(x = var_p)) + + stat_bin(boundary = T, binwidth = 0.05) + + #geom_histogram(binwidth = 0.05, ) + + geom_vline(xintercept = 0.05, color = "red") + + facet_grid(rows = vars(Platform), cols = vars(Tissue)) + +variance_lt_unnest %>% + mutate(log_p = -log10(var_p), x = "x") %>% + ggplot(aes(x = x, y = var_p)) + + geom_quasirandom() + + #geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.01) + + #geom_histogram(binwidth = 0.05, ) + + geom_hline(yintercept = (0.05), color = "red") #+ +facet_grid(cols = vars(Tissue)) + +variance_lt_summary <- variance_lt_unnest %>% + mutate(normal = if_else(var_p <= 0.05, T, F)) %>% + group_by(normal) %>% + summarise(n = n()) %>% + ungroup() %>% + mutate(percentage = n/sum(n)) + +prereq_lt <- normality_lt_comb %>% + left_join(variance_lt_unnest) %>% + mutate(fulfill = if_else(normality_lt1 >= 0.05 & normality_lt2 >= 0.05 & var_p >= 0.05, T, F)) + +prereq_summary_lt <- prereq_lt %>% + group_by(fulfill) %>% + summarise(n = n()) %>% + ungroup() %>% + mutate(percentage = n/sum(n)) + +variance_lt_wt <- variance_lt_unnest %>% + select(Tissue, Irrigation, Compound, Platform, + Genotype = group2) %>% + distinct(Tissue, Irrigation, Compound, Platform, Genotype) %>% + mutate(var_p = NA) + +variance_lt_mut <- variance_lt_unnest %>% + select(Tissue, Irrigation, Compound, Platform, + Genotype = group1, var_p) + +variance_lt_long <- variance_lt_mut %>% + bind_rows(variance_lt_wt) + +prereq_long_lt <- normality_lt %>% + left_join(variance_lt_long) %>% + rename(Normality = normality_lt, + `Equal variance` = var_p) + +fc_1_lt <- fc_1_lt %>% + left_join(prereq_long_lt) + +## CV ----------------------------------------------------------------- + +normality_cv <- fc_1_ind_cv %>% + group_by(Tissue, Genotype, Platform, Compound) %>% + summarise(normality_cv = shapiro.test(`Jack-value`)$p.value) %>% + ungroup() #%>% +#mutate(normality_lt = p.adjust(normality_lt, method = "fdr")) + +normality_cv_summary <- normality_cv %>% + mutate(normal = if_else(normality_cv <= 0.05, T, F)) %>% + group_by(normal) %>% + summarise(n = n()) %>% + ungroup() %>% + mutate(percentage = n/sum(n)) + +normality_cv %>% + ggplot(aes(x = normality_cv)) + + stat_bin(boundary = T, binwidth = 0.05) + + #geom_histogram(binwidth = 0.05, ) + + geom_vline(xintercept = 0.05, color = "red") + + facet_grid(rows = vars(Platform), cols = vars(Tissue)) + + +normality_cv_wt <- normality_cv %>% + filter(Genotype == "MoneyMaker") %>% + select(Tissue, Compound, Platform, + group2 = Genotype, normality_cv2 = normality_cv) + +normality_cv_mut <- normality_cv %>% + filter(Genotype != "MoneyMaker") %>% + select(Tissue, Compound, Platform, + group1 = Genotype, normality_cv1 = normality_cv) + +normality_cv_comb <- normality_cv_mut %>% + left_join(normality_cv_wt) + +normality_cv %>% + mutate(log_p = -log10(normality_cv), x = "x") %>% + ggplot(aes(x = x, y = normality_cv)) + + geom_quasirandom() + + #geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.01) + + #geom_histogram(binwidth = 0.05, ) + + geom_hline(yintercept = (0.05), color = "red") #+ +facet_grid(cols = vars(Tissue)) + +variance_cv <- fc_1_ind_cv %>% + mutate(Genotype = as_factor(Genotype)) %>% + group_by(Tissue, Compound, Platform) %>% + nest() %>% + mutate(cv = map(.x = data, .f = ~pairwise.var.test(.x$`Jack-value`, .x$Genotype, p.method = "none")$p.value)) + +variance_cv_tidy <- variance_cv %>% + mutate(var_tbl = map(.x = cv, .f = ~as_tibble(as.data.frame(.x), rownames = "group1"))) + +variance_cv_unnest <- variance_cv_tidy %>% + select(Tissue, Compound, Platform, var_tbl) %>% + unnest(cols = Tissue, Compound, Platform, var_tbl) %>% + mutate(group2 = "MoneyMaker") %>% + select(Tissue, Compound, Platform, group1, group2, var_p = MoneyMaker) #%>% +#mutate(var_p = p.adjust(var_p, method = "fdr")) + +variance_cv_unnest %>% + ggplot(aes(x = var_p)) + + stat_bin(boundary = T, binwidth = 0.05) + + #geom_histogram(binwidth = 0.05, ) + + geom_vline(xintercept = 0.05, color = "red") + + facet_grid(rows = vars(Platform), cols = vars(Tissue)) + +variance_cv_unnest %>% + mutate(log_p = -log10(var_p), x = "x") %>% + ggplot(aes(x = x, y = var_p)) + + geom_quasirandom() + + #geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.01) + + #geom_histogram(binwidth = 0.05, ) + + geom_hline(yintercept = (0.05), color = "red") #+ +facet_grid(cols = vars(Tissue)) + +variance_cv_summary <- variance_cv_unnest %>% + mutate(normal = if_else(var_p <= 0.05, T, F)) %>% + group_by(normal) %>% + summarise(n = n()) %>% + ungroup() %>% + mutate(percentage = n/sum(n)) + +prereq_cv <- normality_cv_comb %>% + left_join(variance_cv_unnest) %>% + mutate(fulfill = if_else(normality_cv1 >= 0.05 & normality_cv2 >= 0.05 & var_p >= 0.05, T, F)) + +prereq_cv_summary <- prereq_cv %>% + group_by(fulfill) %>% + summarise(n = n()) %>% + ungroup() %>% + mutate(percentage = n/sum(n)) + +variance_cv_wt <- variance_cv_unnest %>% + select(Tissue, Compound, Platform, + Genotype = group2) %>% + distinct(Tissue, Compound, Platform, Genotype) %>% + mutate(var_p = NA) + +variance_cv_mut <- variance_cv_unnest %>% + select(Tissue, Compound, Platform, + Genotype = group1, var_p) + +variance_cv_long <- variance_cv_mut %>% + bind_rows(variance_cv_wt) + +prereq_cv_long <- normality_cv %>% + left_join(variance_cv_long) %>% + rename(Normality = normality_cv, + `Equal variance` = var_p) + +fc_1_cv <- cv_out %>% + left_join(prereq_cv_long) + +write_csv(fc_1, "FC1_supplementary.csv") +write_csv(fc_1_lt, "FC1_LT_supplementary.csv") +write_csv(fc_1_cv, "CV_supplementary.csv") + +# All combined ------------------------------------------------------------ + + +prereq_all <- prereq %>% + bind_rows(prereq_lt, prereq_cv) + +prereq_sum_all <- prereq_all %>% + group_by(fulfill) %>% + summarise(n = n()) %>% + ungroup() %>% + mutate(percentage = n/sum(n)) + \ No newline at end of file diff --git a/workflows/Whole_metabolome_analysis/simplified_classes.csv b/workflows/Whole_metabolome_analysis/simplified_classes.csv new file mode 100644 index 0000000000000000000000000000000000000000..b97be6543a5d7c93a05e5c9a32ab0a4e94e0c1c7 --- /dev/null +++ b/workflows/Whole_metabolome_analysis/simplified_classes.csv @@ -0,0 +1,43 @@ +Compound_Class,Compound_Class_simple +purines,other +amino_acid,Amino acid or derivative +carboxylic_acid,Carboxylic acid +alcohol,other +no_chebi,other +organic_heterocyclic_compound,other +polyol,other +carbohydrate_phosphate,Carbohydrate or derivative +carbohydrate,Carbohydrate or derivative +carbohydrate_derivative,Carbohydrate or derivative +unannotated,other +one_carbon_compound,other +pyridines,other +phosphoric_acids,other +polyamine,other +amino_acid_derivative,Amino acid or derivative +primary_amino_compound,Amino acid or derivative +pyrimidines,other +Cinnamic acid,Cinnamic acid +Metabolism: Amino acid,Amino acid or derivative +Amino acid,Amino acid or derivative +Aromatic acid derivatives,other +Cofactor,other +Metabolism:TCA,other +Dipeptide,Dipeptide +Metabolism: Chlorophyll,other +Nucleotide,other +Steroidal Glycoalkaloids,Glycoalkaloid +Aliphatic acid glycosides,other +Aromatic alcohol glycosides,other +Flavonoid (glycosides),Flavonoid (glycosides) +lyso Phospholipid,Phospholipid +Lyso-Monogalactosyldiacylglycerol,Galactolipid +Lyso-Digalactosyl-Diacylglycerol,Galactolipid +Diacylglycerol,DAG +Phospholipid,Phospholipid +Phosphatidylcholine,Phospholipid +Sphingolipid,Sphingolipid +Monogalactosyldiacylglycerol,Galactolipid +Digalactosyl-Diacylglycerol,Galactolipid +Phosphatidylethanolamine,Phospholipid +Triacylglyceride,TAG diff --git a/workflows/apolar_LC_MS_analysis/210813_lipid_analysis_cmQTL_val_1.R b/workflows/apolar_LC_MS_analysis/210813_lipid_analysis_cmQTL_val_1.R new file mode 100644 index 0000000000000000000000000000000000000000..6dc9f8a11cb91a1f61ef719bc77d8b2356b03b2e --- /dev/null +++ b/workflows/apolar_LC_MS_analysis/210813_lipid_analysis_cmQTL_val_1.R @@ -0,0 +1,1516 @@ +rm(list = ls()) +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +getwd() + +library(openxlsx) +library(tidyverse) +library(car) +library(pheatmap) +library(broom) +library(ggpubr) +library(viridisLite) +library(ggtext) +library(glue) + +# Directory setting ------------------------------------------------------- + + +current <- getwd() +source <- str_c(current,"/..") + +cur_date <- str_c(str_replace_all(Sys.Date(),"^.{2}|-","")) + +out <- str_c(cur_date, "analysis", sep = "_") + +if (file.exists(out)) { + cat("The folder already exists") +} else { + dir.create(out) +} + +out_dir <- str_c(current, out, sep = "/") + +# Data loading ------------------------------------------------------------ + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_normalization$"), + pattern = "^\\d{6}_normalization"), + decreasing = T)[[1]] + +latest_norm <- str_c(current, "/", latest) + +setwd(latest_norm) + +latest_norm_date <- str_extract(latest, pattern = "^\\d{6}") + +sam_dat <- read_csv(str_c(latest_norm_date, "_cmQTL_val_1_2_sam_dat_lip.csv")) +met_dat <- read_csv(str_c(latest_norm_date, "_cmQTL_val_1_2_met_dat_lip.csv"), col_types = "cddcf") +feat_dat <- read_csv(str_c(latest_norm_date, "_cmQTL_val_1_2_feat_dat_lip.csv"), col_types = "fcffffTiifidfficccfdddd") + +setwd(out_dir) +# Data combination -------------------------------------------------------- + +mets <- met_dat$met +genotypes <- sam_dat %>% distinct(genotype,.keep_all = T) %>% + select(genotype, alias) + +lipid_classes <- feat_dat %>% + distinct(Compound_Class, Compound_Name, met) + +lip_long <- feat_dat %>% + filter(exp == 1, class == "sample") + + +# Means ------------------------------------------------------------------- + +means_tec_rep <- lip_long %>% + group_by(plantline, alias, genotype, treatment, met, tissue, LIMS_ID) %>% + summarise(mean_tec_rep = mean(rescaled)) %>% + ungroup() + +means <- means_tec_rep %>% + group_by(met, tissue, treatment, genotype, alias) %>% + summarise(mean = mean(mean_tec_rep), + sd = sd(mean_tec_rep), + n = n()) %>% + ungroup() %>% + mutate(se = sd/sqrt(n)) + +miss_per_treat <- lip_long %>% + group_by(met, tissue, treatment) %>% + summarise(na = sum(is.na(area)), + n = n()) %>% + mutate(percent_na = na/n*100) %>% + ungroup() + + +# Significance analysis --------------------------------------------------- + + + +lip_tidy <- means_tec_rep %>% + pivot_wider(id_cols = c(genotype, alias, treatment, tissue, LIMS_ID), + names_from = met, + values_from = mean_tec_rep) + +lip_tidy_numeric <- lip_tidy %>% + select(all_of(mets)) + +lip_aov <- map(.x = lip_tidy_numeric, .f = ~aov(.x ~ alias * treatment*tissue, data = lip_tidy)) + + +lip_tuk <- map(.x = lip_aov, .f = ~TukeyHSD(.x)) %>% + map(.f = tidy) %>% + map2(.y = names(lip_aov), .f = ~.x %>% mutate(var = .y)) %>% + purrr::reduce(bind_rows) + +sig_lip_groups <- lip_tuk %>% + filter(term == "alias:treatment:tissue") %>% + separate(col = contrast, into = c("group1", "group2"), sep = "-") + +sig_lip <- sig_lip_groups %>% + separate(group1, into = c("alias1", "treatment1", "tissue1"), sep = ":") %>% + separate(group2, into = c("alias2", "treatment2", "tissue2"), sep = ":") %>% + filter(treatment1 == treatment2, tissue1 == tissue2) %>% + filter(alias1 == "967514 MM WT" | alias2 == "967514 MM WT") %>% + ungroup() %>% + mutate(p.signif = if_else(adj.p.value <= 0.05, "*", "ns")) %>% + left_join(means, by = c("var" = "met", "treatment1" = "treatment", "alias1" = "alias", "tissue1" = "tissue")) %>% + select(var, treatment1, treatment2, alias1, alias2, tissue1, tissue2, p.signif, mean1 = mean, se1 = se) %>% + left_join(means, by = c("var" = "met", "treatment2" = "treatment", "alias2" = "alias", "tissue2" = "tissue")) %>% + select(var, treatment1, treatment2, alias1, alias2, tissue1, tissue2, p.signif, mean1, mean2 = mean, se1, se2 = se) %>% + group_by(treatment1, tissue1, var) %>% + mutate(tot_val1 = mean1 + se1, + tot_val2 = mean2 + se2, + y.position = 1.1*(max(tot_val1, tot_val2))) %>% + rename(met = var, + group1 = alias1, + group2 = alias2, + treatment = treatment1, + tissue = tissue1) %>% + mutate(met = as_factor(met), + group1 = as_factor(group1), + group2 = as_factor(group2), + treatment = as_factor(treatment), + tissue = as_factor(tissue)) %>% + ungroup() + +sig_mets <- sig_lip %>% + filter(p.signif == "*") %>% + distinct(met) %>% + mutate(sig = T) + + +# Significance with t-test ------------------------------------------------ + +lip_tidy <- means_tec_rep %>% + pivot_wider(id_cols = c(genotype, alias, treatment, tissue, LIMS_ID), + names_from = met, + values_from = mean_tec_rep) %>% + mutate(group = str_c(tissue, treatment, alias, sep = "_")) + +lip_tidy_numeric <- lip_tidy %>% + select(all_of(mets)) + +lip_t <- map(.x = lip_tidy_numeric, + .f = ~pairwise.t.test(x = .x , + g = lip_tidy$group, + p.adjust.method = "none", + pool.sd = F)) %>% + map(.f = tidy) + +lip_t_tidy <- lip_t %>% + map2(.y = names(lip_t), .f = ~.x %>% mutate(var = .y)) %>% + purrr::reduce(bind_rows) + +sig_lip<- lip_t_tidy %>% + separate(group1, into = c("tissue1", "treatment1", "alias1"), sep = "_") %>% + separate(group2, into = c("tissue2", "treatment2", "alias2"), sep = "_") %>% + filter(alias1 == "967514 MM WT" | alias2 == "967514 MM WT", + tissue1 == tissue2, treatment1 == treatment2) %>% + group_by(var) %>% + mutate(adj.p.value = p.adjust(p.value)) %>% + ungroup() %>% + mutate(p.signif = if_else(adj.p.value <= 0.05, "*", "ns")) %>% + left_join(means, by = c("var" = "met", "treatment1" = "treatment", "alias1" = "alias", "tissue1" = "tissue")) %>% + select(p.value, var, treatment1, treatment2, alias1, alias2, tissue1, tissue2, p.signif, mean1 = mean, se1 = se) %>% + left_join(means, by = c("var" = "met", "treatment2" = "treatment", "alias2" = "alias", "tissue2" = "tissue")) %>% + select(p.value, var, treatment1, treatment2, alias1, alias2, tissue1, tissue2, p.signif, mean1, mean2 = mean, se1, se2 = se) %>% + group_by(treatment1, tissue1, var) %>% + mutate(tot_val1 = mean1 + se1, + tot_val2 = mean2 + se2, + y.position = 1.1*(max(tot_val1, tot_val2))) %>% + ungroup() %>% + rename(met = var, + group1 = alias1, + group2 = alias2, + treatment = treatment1, + tissue = tissue1) %>% + left_join(miss_per_treat) %>% + mutate(met = as_factor(met), + group1 = as_factor(group1), + group2 = as_factor(group2), + treatment = as_factor(treatment), + tissue = as_factor(tissue), + p.signif = if_else(percent_na >= 60, "ns", p.signif)) + + +sig_mets <- sig_lip %>% + filter(p.signif == "*") %>% + distinct(met) %>% + mutate(sig = T) + +# Heatmap scaled all----------------------------------------------------------------- + +heat.lip <- means %>% + group_by(met) %>% + mutate(se = sd/sqrt(n), + total_norm = mean/mean(mean), + log_norm = log2(total_norm), + log_norm = if_else(is.na(log_norm)| is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm)), + log_norm_level = if_else(is.na(log_norm_level), 0, log_norm_level)) %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = log_norm_level) %>% + left_join(met_dat) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.lip) <- heat.lip$met + +mat.heat.lip <- heat.lip %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.lip %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.lip$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, treatment, genotype) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, treatment, tissue, genotype) + +heat.lip_signif <- means %>% + distinct(tissue, treatment, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_lip, by = c("met", "tissue", "treatment", "alias" = "group2")) %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.lip) + +rownames(heat.lip_signif) <- heat.lip_signif$met + +mat.heat.lip_signif <- heat.lip_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + +pheatmap.lip <- pheatmap(mat.heat.lip, + #color = plasma(14), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-6.5,-5.5,-4.5,-3.5,-2.5,-1.5, -0.5 ,0.5,1.5,2.5,3.5,4.5,5.5,6.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + display_numbers = mat.heat.lip_signif, + number_color = "black", + fontsize_number = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_heatmap_rel_all.jpg", + sep = "_") +) + +# Heatmap scaled per tissue----------------------------------------------------------------- + +heat.lip <- means%>% + group_by(tissue, met) %>% + mutate(total_norm = mean/mean(mean), + log_norm = log2(total_norm), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% + ungroup() %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = log_norm_level) %>% + left_join(met_dat) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.lip) <- heat.lip$met + +mat.heat.lip <- heat.lip %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.lip %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.lip$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, treatment, genotype) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, treatment, tissue, genotype) + +heat.lip_signif <- means %>% + distinct(tissue, treatment, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_lip, by = c("met", "tissue", "treatment", "alias" = "group2")) %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.lip) + +rownames(heat.lip_signif) <- heat.lip_signif$met + +mat.heat.lip_signif <- heat.lip_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + + +pheatmap.lip <- pheatmap(mat.heat.lip, + #color = plasma(14), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-6.5,-5.5,-4.5,-3.5,-2.5,-1.5, -0.5 ,0.5,1.5,2.5,3.5,4.5,5.5,6.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + display_numbers = mat.heat.lip_signif, + number_color = "black", + fontsize_number = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_heatmap_rel_tissue.jpg", + sep = "_") +) + + +# Heatmap scaled per tissue and treatment----------------------------------------------------------------- + +heat.lip <- means%>% + group_by(tissue, met, treatment) %>% + mutate(total_norm = mean/mean(mean), + log_norm = log2(total_norm), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% + ungroup() %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = log_norm_level) %>% + left_join(met_dat) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.lip) <- heat.lip$met + +mat.heat.lip <- heat.lip %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.lip %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.lip$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, treatment, genotype) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, treatment, tissue, genotype) + +heat.lip_signif <- means %>% + distinct(tissue, treatment, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_lip, by = c("met", "tissue", "treatment", "alias" = "group2")) %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.lip) + +rownames(heat.lip_signif) <- heat.lip_signif$met + +mat.heat.lip_signif <- heat.lip_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + + +pheatmap.lip <- pheatmap(mat.heat.lip, + #color = plasma(14), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-6.5,-5.5,-4.5,-3.5,-2.5,-1.5, -0.5 ,0.5,1.5,2.5,3.5,4.5,5.5,6.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + display_numbers = mat.heat.lip_signif, + number_color = "black", + fontsize_number = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_heatmap_rel_tissue_treatment.jpg", + sep = "_") +) + + + +# Per metabolite comparisons ---------------------------------------------- + +norm_MM <- means %>% + filter(genotype == "MoneyMaker") %>% + select(tissue, treatment, met, MM_mean = mean) + +fc <- means_tec_rep %>% + left_join(norm_MM) %>% + mutate(fc = mean_tec_rep/MM_mean) %>% + group_by(tissue, treatment, alias, genotype, met) %>% + summarise(mean_fc = mean(fc), + sd = sd(fc), + n = n()) %>% + mutate(se = sd/sqrt(n), + group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + left_join(sig_lip, by = c("tissue", "treatment", "met", "alias" = "group2")) %>% + group_by(met) %>% + filter(any(p.signif == "*"), all(is.finite(mean_fc)), tissue != "flowers") %>% + left_join(met_dat) %>% + arrange(Compound_Class, Compound_Name) %>% + ungroup() + +plotmets <- fc %>% distinct(met) %>% as_vector() +plottissues <- fc %>% distinct(tissue) %>% as_vector() %>% as.character + +labelnames <- plotmets %>% as_tibble() %>% + rename(met = value) %>% + left_join(met_dat) %>% left_join(lipid_classes) %>% + select(Compound_Name) %>% as_vector() + + +plot_out <- vector("list", length = length(plotmets)) +per_comp_y <- fc %>% + group_by(tissue, treatment, met) %>% + summarise(mean = max(mean_fc), + se = max(se)) + +for(tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + + + sig_bar <- fc %>% + group_by(tissue, treatment, met) %>% + mutate(tot_val = max(mean_fc + se)) %>% + mutate(y.position = tot_val + 0.25*max(tot_val)) %>% + ungroup() %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + select(-group1) %>% + rename(group1 = genotype.x, group2 = genotype.y) %>% + filter(!is.na(p.signif)) %>% + filter(met == plotmets[[meta]], tissue2 == plottissues[[tiss]]) + + + plot_out[[tiss]][[meta]] <- fc %>% + filter(met == plotmets[[meta]], tissue == plottissues[[tiss]]) %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "panK4-1", "log2-1", "transp1-1")), + treatment = fct_relevel(treatment, c("0.4", "0.6", "0.8", "1"))) %>% + ggplot(aes(x = genotype, y = mean_fc)) + + geom_col(position = "dodge", aes(fill = genotype), color = "black") + + geom_errorbar(aes(ymin = (mean_fc-se), ymax = (mean_fc + se)), position = position_dodge(0.9), width = 0.25, size = 0.75)+ + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_text(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 8)) + + ggtitle(label = str_c(labelnames[[meta]], "in", plottissues[[tiss]], sep = " ")) + + scale_fill_grey(start = 1, end = 0, aesthetics = "fill") + } + +} + +plot_out[[1]][[2]] + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"cmQTL_val1_lip_col_plots.pdf")) + +for (tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + print(plot_out[[tiss]][[meta]]) + } + +} + +dev.off() + +# Per metabolite comparisons scaled 1---------------------------------------------- + +norm_MM_1 <- means %>% + filter(genotype == "MoneyMaker", treatment == 1) %>% + select(tissue, met, MM_mean = mean) + +fc_1_ind <- means_tec_rep %>% + left_join(norm_MM_1) %>% + mutate(fc = mean_tec_rep/MM_mean) + +fc_1 <- means_tec_rep %>% + left_join(norm_MM_1) %>% + mutate(fc = mean_tec_rep/MM_mean) %>% + group_by(tissue, treatment, alias, genotype, met) %>% + summarise(mean_fc = mean(fc), + sd = sd(fc), + n = n()) %>% + mutate(se = sd/sqrt(n), + group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + left_join(sig_lip, by = c("tissue", "treatment", "met", "alias" = "group2")) %>% + group_by(met) %>% + #filter(any(p.signif == "*"), all(is.finite(mean_fc)), tissue != "flowers") %>% + left_join(met_dat) %>% + arrange(Compound_Class, Compound_Name) %>% + ungroup() %>% + mutate(genotype = as_factor(if_else(genotype == "MoneyMaker", glue("{genotype}"),glue("*{genotype}*"))), + genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + + +plotmets <- fc_1 %>% distinct(met) %>% as_vector() +plottissues <- fc_1 %>% distinct(tissue) %>% as_vector() %>% as.character + +labelnames <- plotmets %>% as_tibble() %>% + rename(met = value) %>% + left_join(met_dat) %>% left_join(lipid_classes) %>% + select(Compound_Name) %>% as_vector() + + +plot_out <- vector("list", length = length(plotmets)) +per_comp_y <- fc_1 %>% + group_by(tissue, treatment, met) %>% + summarise(mean = max(mean_fc), + se = max(se)) + +for(tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + + + sig_bar <- fc_1 %>% + group_by(tissue, treatment, met) %>% + mutate(tot_val = max(mean_fc + se)) %>% + mutate(y.position = tot_val + 0.25*max(tot_val)) %>% + ungroup() %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + select(-group1) %>% + mutate(genotype = as_factor(if_else(genotype == "MoneyMaker", glue("{genotype}"),glue("*{genotype}*")))) %>% + rename(group1 = genotype1, group2 = genotype) %>% + filter(!is.na(p.signif)) %>% + filter(met == plotmets[[meta]], tissue2 == plottissues[[tiss]]) + + plot_out[[tiss]][[meta]] <- fc_1 %>% + filter(met == plotmets[[meta]], tissue == plottissues[[tiss]]) %>% + mutate(treatment = fct_relevel(treatment, c("0.4", "0.6", "0.8", "1"))) %>% + ggplot(aes(x = genotype, y = mean_fc)) + + geom_col(position = "dodge", aes(fill = genotype), color = "black") + + geom_errorbar(aes(ymin = (mean_fc-se), ymax = (mean_fc + se)), position = position_dodge(0.9), width = 0.25, size = 0.75)+ + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 14), + legend.text = element_markdown()) + + ylab("Mean fold-change")+ + ggtitle(label = str_c(labelnames[[meta]], "in", plottissues[[tiss]], sep = " ")) + + scale_fill_grey(start = 1, end = 0, aesthetics = "fill") + } + +} + +plot_out[[1]][[2]] + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"cmQTL_val1_lip_col_plots_scaled_1.pdf"), + width = 15.8/2.54, + height = 8/2.54) + +for (tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + print(plot_out[[tiss]][[meta]]) + } + +} + +dev.off() + + +# Heatmap scaled Wildtype----------------------------------------------------------------- + +heat.lip <- fc %>% + group_by(Compound_Name, met) %>% + mutate(log_norm = log2(mean_fc), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% + ungroup() %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(Compound_Name, met), + names_from = group, + values_from = log_norm) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.lip) <- heat.lip$met + +mat.heat.lip <- heat.lip %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.lip %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.lip$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, treatment, genotype) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, treatment, tissue, genotype) + +heat.lip_signif <- means %>% + distinct(tissue, treatment, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_lip, by = c("met", "tissue", "treatment", "alias" = "group2")) %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.lip) + +rownames(heat.lip_signif) <- heat.lip_signif$met + +mat.heat.lip_signif <- heat.lip_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + +pheatmap.lip <- pheatmap(mat.heat.lip, + #color = RColorBrewer::brewer.pal(7,"BuRd"), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-2.5,-1.5, -0.5 ,0.5,1.5,2.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + display_numbers = mat.heat.lip_signif, + number_color = "black", + fontsize_number = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val_1_heatmap_rel_tissue_wt.jpg", + sep = "_") +) + + +# Levene analysis simple anova------------------------------------------------------------- + +miss_per_clust <- lip_long %>% + group_by(met, tissue) %>% + summarise(na = sum(is.na(area)), + n = n()) %>% + ungroup() %>% + mutate(percent_na_clust = na/n*100) %>% + select(met, tissue, percent_na_clust) + +miss_any_treat <- miss_per_treat %>% + group_by(met, tissue) %>% + summarise(miss_comp_treat = if_else(any(percent_na == 100), T,F)) + +means_tec_rep_lt <- means_tec_rep %>% + group_by(met, tissue, treatment, genotype, alias) %>% + mutate(lev_t = abs(log10(mean_tec_rep) - log10(median(mean_tec_rep)))) %>% + ungroup() + + +means_lt <- means_tec_rep_lt %>% + ungroup() %>% + group_by(met, tissue, treatment, genotype, alias) %>% + summarise(mean = mean(lev_t), + sd = sd(lev_t), + n = n()) %>% + ungroup() %>% + mutate(se = sd/sqrt(n)) + +lt_tidy <- means_tec_rep_lt %>% + filter(tissue == "fruits") %>% + ungroup() %>% + pivot_wider(id_cols = c(tissue, treatment, genotype, alias, LIMS_ID), + names_from = met, + values_from = lev_t) + +lt_tidy_numeric <- lt_tidy %>% + select(all_of(mets)) + +lt_aov <- map(.x = lt_tidy_numeric, .f = ~aov(.x ~ alias*treatment, data = lt_tidy)) + +lt_aov_tidy <- lt_aov %>% + map(tidy) %>% + map2(.y = names(lt_aov), .f = ~.x %>% mutate(lt_vars = .y)) %>% + purrr::reduce(bind_rows) + +lt_tuk <- map(.x = lt_aov, .f = ~TukeyHSD(.x)) %>% + map(.f = tidy) %>% + map2(.y = names(lt_aov), .f = ~.x %>% mutate(var = .y)) %>% + purrr::reduce(bind_rows) + +sig_lip_lt_groups <- lt_tuk %>% + filter(term == "alias:treatment") %>% + separate(col = contrast, into = c("group1", "group2"), sep = "-") + +sig_lip_lt <- sig_lip_lt_groups %>% + separate(group1, into = c("alias1", "treatment1"), sep = ":") %>% + separate(group2, into = c("alias2", "treatment2"), sep = ":") %>% + filter(treatment1 == treatment2) %>% + filter(alias1 == "967514 MM WT" | alias2 == "967514 MM WT") %>% + ungroup() %>% + mutate(p.signif = if_else(adj.p.value <= 0.05, "*", "ns")) %>% + left_join(means_lt, by = c("var" = "met", "treatment1" = "treatment", "alias1" = "alias")) %>% + select(var, treatment1, treatment2, alias1, alias2, p.signif, mean1 = mean, se1 = se) %>% + left_join(means_lt, by = c("var" = "met", "treatment2" = "treatment", "alias2" = "alias")) %>% + select(var, treatment1, treatment2, alias1, alias2, p.signif, mean1, mean2 = mean, se1, se2 = se) %>% + group_by(treatment1, var) %>% + mutate(tot_val1 = mean1 + se1, + tot_val2 = mean2 + se2, + y.position = 1.1*(max(tot_val1, tot_val2))) %>% + rename(met = var, + group1 = alias1, + group2 = alias2, + treatment = treatment1) %>% + mutate(met = as_factor(met), + group1 = as_factor(group1), + group2 = as_factor(group2), + treatment = as_factor(treatment)) %>% + ungroup() + +sig_mets <- sig_lip_lt %>% + filter(p.signif == "*") %>% + distinct(met) %>% + mutate(sig = T) + +# Levene with t-test ------------------------------------------------ +skip <- F + +if(skip == T) { + print("Levene t-test skipped") +} else { + lip_tidy <- means_tec_rep_lt %>% + pivot_wider(id_cols = c( genotype, alias, treatment, tissue, LIMS_ID), + names_from = met, + values_from = lev_t) %>% + mutate(group = str_c(tissue, treatment, alias, sep = "_")) + + lip_tidy_numeric <- lip_tidy %>% + select(all_of(mets)) + + lip_t <- map(.x = lip_tidy_numeric, + .f = ~pairwise.t.test(x = .x , + g = lip_tidy$group, + p.adjust.method = "none", + pool.sd = F)) %>% + map(.f = tidy) + + lip_t_tidy <- lip_t %>% + map2(.y = names(lip_t), .f = ~.x %>% mutate(var = .y)) %>% + purrr::reduce(bind_rows) + + sig_lip_lt<- lip_t_tidy %>% + separate(group1, into = c("tissue1", "treatment1", "alias1"), sep = "_") %>% + separate(group2, into = c("tissue2", "treatment2", "alias2"), sep = "_") %>% + filter(alias1 == "967514 MM WT" | alias2 == "967514 MM WT", + tissue1 == tissue2, treatment1 == treatment2) %>% + group_by(var) %>% + mutate(adj.p.value = p.adjust(p.value)) %>% + #mutate(adj.p.value = p.value * 121) %>% + ungroup() %>% + mutate(p.signif = if_else(p.value <= 0.05, "*", "ns")) %>% + left_join(means_lt, by = c("var" = "met", "treatment1" = "treatment", "alias1" = "alias", "tissue1" = "tissue")) %>% + select(var, treatment1, treatment2, alias1, alias2, tissue1, tissue2, p.signif, mean1 = mean, se1 = se, p.value, adj.p.value) %>% + left_join(means_lt, by = c("var" = "met", "treatment2" = "treatment", "alias2" = "alias", "tissue2" = "tissue")) %>% + select(var, treatment1, treatment2, alias1, alias2, tissue1, tissue2, p.signif, mean1, mean2 = mean, se1, se2 = se, p.value, adj.p.value) %>% + group_by(treatment1, tissue1, var) %>% + mutate(tot_val1 = mean1 + se1, + tot_val2 = mean2 + se2, + y.position = 1.1*(max(tot_val1, tot_val2))) %>% + ungroup() %>% + rename(met = var, + group1 = alias1, + group2 = alias2, + treatment = treatment1, + tissue = tissue1) %>% + left_join(miss_per_treat) %>% + mutate(met = as_factor(met), + group1 = as_factor(group1), + group2 = as_factor(group2), + treatment = as_factor(treatment), + tissue = as_factor(tissue), + p.signif = if_else(percent_na >= 60, "ns", p.signif)) + + + sig_mets <- sig_lip_lt %>% + filter(p.signif == "*") %>% + distinct(met) %>% + mutate(sig = T) + +} +# Per metabolite comparisons unscaled levene---------------------------------------------- + +norm_MM_1_lt <- means_lt %>% + filter(genotype == "MoneyMaker", treatment == 1) %>% + select(tissue, met, MM_mean = mean) + +fc_1_lt <- means_tec_rep_lt %>% + #left_join(norm_MM_1) %>% + #mutate(fc = mean_tec_rep/MM_mean) %>% + group_by(tissue, treatment, alias, genotype, met) %>% + summarise(mean_fc = mean(lev_t), + sd = sd(lev_t), + n = n()) %>% + mutate(se = sd/sqrt(n), + group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + left_join(sig_lip_lt, by = c("tissue", "treatment", "met", "alias" = "group2")) %>% + group_by(met) %>% + #filter(any(p.signif == "*"), all(is.finite(mean_fc)), tissue != "flowers") %>% + left_join(met_dat) %>% + arrange(Compound_Class, Compound_Name) %>% + ungroup() %>% + mutate(genotype = as_factor(if_else(genotype == "MoneyMaker", glue("{genotype}"),glue("*{genotype}*"))), + genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + + +plotmets <- fc_1_lt %>% distinct(met) %>% as_vector() +plottissues <- fc_1_lt %>% distinct(tissue) %>% as_vector() %>% as.character + +labelnames <- plotmets %>% as_tibble() %>% + rename(met = value) %>% + left_join(met_dat) %>% left_join(lipid_classes) %>% + select(Compound_Name) %>% as_vector() + + +plot_out <- vector("list", length = length(plotmets)) +per_comp_y <- fc_1_lt %>% + group_by(tissue, treatment, met) %>% + summarise(mean = max(mean_fc), + se = max(se)) + +for(tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + + + sig_bar <- fc_1_lt %>% + group_by(tissue, treatment, met) %>% + mutate(tot_val = max(mean_fc + se)) %>% + mutate(y.position = tot_val + 0.25*max(tot_val)) %>% + ungroup() %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + select(-group1) %>% + mutate(genotype = as_factor(if_else(genotype == "MoneyMaker", glue("{genotype}"),glue("*{genotype}*")))) %>% + rename(group1 = genotype1, group2 = genotype) %>% + filter(!is.na(p.signif)) %>% + filter(met == plotmets[[meta]], tissue2 == plottissues[[tiss]]) + + + plot_out[[tiss]][[meta]] <- fc_1_lt %>% + filter(met == plotmets[[meta]], tissue == plottissues[[tiss]]) %>% + mutate(treatment = fct_relevel(treatment, c("0.4", "0.6", "0.8", "1"))) %>% + ggplot(aes(x = genotype, y = mean_fc)) + + geom_col(position = "dodge", aes(fill = genotype), color = "black") + + geom_errorbar(aes(ymin = (mean_fc-se), ymax = (mean_fc + se)), position = position_dodge(0.9), width = 0.25, size = 0.75)+ + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 14), + legend.text = element_markdown()) + + ylab("Mean fold-change")+ + ggtitle(label = str_c(labelnames[[meta]], "in", plottissues[[tiss]], sep = " ")) + + scale_fill_grey(start = 1, end = 0, aesthetics = "fill") + } + +} + +plot_out[[tiss]][[meta]] +plot_out[[1]][[2]] + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"cmQTL_val1_scaled1_lip_lev_t_col_plots.pdf"), + width = 15.8/2.54, + height = 8/2.54) + +for (tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + print(plot_out[[tiss]][[meta]]) + } + +} + +dev.off() + + + + +# CV analysis ------------------------------------------------------------- + +miss_per_clust <- lip_long %>% + group_by(met, tissue) %>% + summarise(na = sum(is.na(area)), + n = n()) %>% + ungroup() %>% + mutate(percent_na_clust = na/n*100) %>% + select(met, tissue, percent_na_clust) + +miss_any_treat <- miss_per_treat %>% + group_by(met, tissue) %>% + summarise(miss_comp_treat = if_else(any(percent_na == 100), T,F)) + +library(bootstrap) + +theta <- function(x){ + sd(x)/mean(x) +} + +cv <- means %>% + ungroup() %>% + group_by(genotype, tissue, alias, met) %>% + summarise(grand_mean = mean(mean), + grand_sd = sd(mean), + n = n()) %>% + mutate(cv = grand_sd/grand_mean) + +cv_jack <- means %>% + group_by(genotype, tissue, alias, met) %>% + summarise(cv = jackknife(mean, theta)$jack.values) %>% + mutate(jack_rep = row_number()) %>% + ungroup() + +cv_jack_mean <- cv_jack %>% + group_by(genotype,tissue, alias, met) %>% + summarise(mean_cv = mean(cv), + sd_cv = sd(cv), + n = n()) %>% + ungroup() %>% + mutate(se_cv = sd_cv/sqrt(n)) + +cv_jack_wide <- cv_jack %>% + pivot_wider(id_cols = c(genotype, tissue, alias, jack_rep), + names_from = met, + values_from = cv) %>% + mutate(group = str_c(tissue, alias, sep = "_")) + +cv_jack_numeric <- cv_jack_wide %>% + select(-c(genotype, tissue, alias, jack_rep, group)) + +lip_cv_jack <- map(.x = cv_jack_numeric, .f = ~aov(.x ~ alias, data = cv_jack_wide)) + + +lip_jack_t <- map(.x = cv_jack_numeric, .f = ~pairwise.t.test(x = .x , g = cv_jack_wide$group, p.adjust.method = "none")) %>% + map(.f = tidy) + +lip_jack_t_tidy <- lip_jack_t %>% + map2(.y = names(lip_jack_t), .f = ~.x %>% mutate(var = .y)) %>% + purrr::reduce(bind_rows) + +sig_lip_cv_groups <- lip_jack_t_tidy %>% + separate(group1, into = c("tissue1", "alias1"), sep = "_") %>% + separate(group2, into = c("tissue2", "alias2"), sep = "_") %>% + filter(alias1 == "967514 MM WT" | alias2 == "967514 MM WT", + tissue1 == tissue2) %>% + group_by(var) %>% + mutate(adj.p.value = p.adjust(p.value)) %>% + ungroup() %>% + mutate(p.signif = if_else(adj.p.value <= 0.05, "*", "ns")) %>% + left_join(cv_jack_mean, by = c("var" = "met", "alias1" = "alias", "tissue1" = "tissue")) %>% + select(p.value, var, alias1, alias2, p.signif, mean_cv1 = mean_cv, se_cv1 = se_cv, tissue1, tissue2) %>% + left_join(cv_jack_mean, by = c("var" = "met", "alias2" = "alias", "tissue2" = "tissue")) %>% + select(p.value, var, alias1, alias2, p.signif, mean_cv1, mean_cv2 = mean_cv, se_cv1, se_cv2 = se_cv, tissue1, tissue2) %>% + group_by(var, tissue1) %>% + mutate(tot_val1 = mean_cv1 + se_cv1, + tot_val2 = mean_cv2 + se_cv2, + y.position = 1.1*(max(tot_val1, tot_val2))) %>% + ungroup() %>% + left_join(genotypes, by = c("alias1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("alias2" = "alias")) %>% + rename(genotype2 = genotype) %>% + rename(met = var, + group1 = genotype1, + group2 = genotype2, + tissue = tissue1) %>% + left_join(miss_any_treat) %>% + left_join(miss_per_clust) %>% + group_by(met, tissue) %>% + mutate(p.signif = if_else(miss_comp_treat == T| percent_na_clust >= 50, "ns", p.signif), + met = as_factor(met), + group1 = as_factor(group1), + group2 = as_factor(group2)) %>% + ungroup() + + +sig_mets_cv <- sig_lip_cv_groups %>% + filter(p.signif == "*") %>% + distinct(met, .keep_all = T) %>% + mutate(sig = T) %>% + select(met, sig) + +# CV Heatmap scaled per tissue----------------------------------------------------------------- + +heat.lip <- cv_jack_mean%>% + group_by(tissue, met) %>% + mutate(total_norm = mean_cv/mean(mean_cv), + log_norm = log2(total_norm), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% + ungroup() %>% + left_join(sig_mets_cv) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = log_norm_level) %>% + left_join(met_dat) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.lip) <- heat.lip$met + +mat.heat.lip <- heat.lip %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.lip %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.lip$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, genotype) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, tissue, genotype) + +heat.lip_signif <- cv_jack_mean %>% + distinct(tissue, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_lip_cv_groups, by = c("met", "tissue" = "tissue2", "alias" = "alias2")) %>% + left_join(sig_mets_cv) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.lip) + +rownames(heat.lip_signif) <- heat.lip_signif$met + +mat.heat.lip_signif <- heat.lip_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + + +pheatmap.lip <- pheatmap(mat.heat.lip, + #color = plasma(14), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-6.5,-5.5,-4.5,-3.5,-2.5,-1.5, -0.5 ,0.5,1.5,2.5,3.5,4.5,5.5,6.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + display_numbers = mat.heat.lip_signif, + number_color = "black", + fontsize_number = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_cv_heatmap_rel_tissue.jpg", + sep = "_") +) + + + +# CV Heatmap unscaled----------------------------------------------------------------- + +heat.lip <- cv_jack_mean%>% + group_by(tissue, met) %>% + mutate(total_norm = mean_cv/mean(mean_cv), + log_norm = log2(total_norm), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% + ungroup() %>% + left_join(sig_mets_cv) %>% + #filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = mean_cv) %>% + left_join(met_dat) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.lip) <- heat.lip$met + +mat.heat.lip <- heat.lip %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.lip %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.lip$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, genotype) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, tissue, genotype) + +heat.lip_signif <- cv_jack_mean %>% + distinct(tissue, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_lip_cv_groups, by = c("met", "tissue" = "tissue2", "alias" = "alias2")) %>% + left_join(sig_mets_cv) %>% + #filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.lip) + +rownames(heat.lip_signif) <- heat.lip_signif$met + +mat.heat.lip_signif <- heat.lip_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + + +pheatmap.lip <- pheatmap(mat.heat.lip, + #color = plasma(14), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-6.5,-5.5,-4.5,-3.5,-2.5,-1.5, -0.5 ,0.5,1.5,2.5,3.5,4.5,5.5,6.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + #display_numbers = mat.heat.lip_signif, + number_color = "black", + fontsize_number = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_cv_unscaled.jpg", + sep = "_") +) + + + +# CV dotplots ------------------------------------------------------------- + +cv %>% + filter(genotype == "MoneyMaker") %>% + mutate(`cv > 1` = if_else(cv > 1, "yes", "no")) %>% + ggplot(aes(x = tissue, y = cv)) + + geom_dotplot(aes(fill = `cv > 1`), stackdir = "center", binaxis = "y", + binwidth = 0.1, dotsize = 0.8) + + geom_hline(aes(yintercept = 1), color = "red") + + theme(panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + legend.position = "bottom", + legend.text = element_text(size = 8), + text = element_text(size = 10)) + + ylim(c(-0.15, 2)) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cv_dotplot.jpg", + sep = "_"), units = "cm", + width = 15.9, height = 6) + + + + +# CV Heatmap wildtype----------------------------------------------------------------- + +cv_jack_mean_wildtype <- cv_jack_mean %>% + filter(genotype == "MoneyMaker") %>% + select(tissue, met, mean_cv_wt = mean_cv) + +heat.lip <- cv_jack_mean%>% + left_join(cv_jack_mean_wildtype) %>% + mutate(fc_cv = mean_cv/mean_cv_wt) %>% + group_by(tissue, met) %>% + mutate(total_norm = mean_cv/mean(mean_cv), + log_norm = log2(fc_cv), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% + ungroup() %>% + left_join(sig_mets_cv) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = log_norm) %>% + left_join(met_dat) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.lip) <- heat.lip$met + +mat.heat.lip <- heat.lip %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.lip %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.lip$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, genotype) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, tissue, genotype) + +heat.lip_signif <- cv_jack_mean %>% + distinct(tissue, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_lip_cv_groups, by = c("met", "tissue" = "tissue2", "alias" = "alias2")) %>% + left_join(sig_mets_cv) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.lip) + +rownames(heat.lip_signif) <- heat.lip_signif$met + +mat.heat.lip_signif <- heat.lip_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + + +pheatmap.lip <- pheatmap(mat.heat.lip, + #color = plasma(14), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-6.5,-5.5,-4.5,-3.5,-2.5,-1.5, -0.5 ,0.5,1.5,2.5,3.5,4.5,5.5,6.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + display_numbers = mat.heat.lip_signif, + number_color = "black", + fontsize_number = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_cv_heatmap_rel_wt.jpg", + sep = "_") +) + +# Per metabolite comparisons cv---------------------------------------------- + +norm_MM <- means %>% + filter(genotype == "MoneyMaker") %>% + select(tissue, treatment, met, MM_mean = mean) + +fc_cv <- cv_jack%>% + left_join(cv_jack_mean_wildtype) %>% + mutate(fc_cv = cv/mean_cv_wt) %>% + group_by(tissue, alias, genotype, met) %>% + summarise(mean_fc = mean(fc_cv), + sd = sd(fc_cv), + n = n()) %>% + mutate(se = sd/sqrt(n), + group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + left_join(sig_lip_cv_groups, by = c("tissue", "met", "alias" = "alias2")) %>% + group_by(met) %>% + filter(any(p.signif == "*"), all(is.finite(mean_fc)), tissue != "flowers") %>% + left_join(met_dat) %>% + arrange(Compound_Class, Compound_Name) %>% + ungroup() %>% + mutate(genotype = as_factor(if_else(genotype == "MoneyMaker", glue("{genotype}"),glue("*{genotype}*"))), + genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + + +plotmets <- fc_cv %>% distinct(met) %>% as_vector() +plottissues <- fc_cv %>% distinct(tissue) %>% as_vector() %>% as.character + +labelnames <- plotmets %>% as_tibble() %>% + rename(met = value) %>% + left_join(met_dat) %>% left_join(lipid_classes) %>% + select(Compound_Name) %>% as_vector() + + +plot_out <- vector("list", length = length(plotmets)) + + + +for(tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + + + sig_bar <- fc_cv %>% + group_by(tissue, met) %>% + mutate(tot_val = max(mean_fc + se)) %>% + mutate(y.position = tot_val + 0.25*max(tot_val)) %>% + ungroup() %>% + filter(!is.na(p.signif))%>% + mutate(group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))), + group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + filter(met == plotmets[[meta]], tissue2 == plottissues[[tiss]]) + + + plot_out[[tiss]][[meta]] <- fc_cv %>% + filter(met == plotmets[[meta]], tissue == plottissues[[tiss]]) %>% + #mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "panK4-1", "log2-1", "transp1-1"))) %>% + ggplot(aes(x = genotype, y = mean_fc)) + + geom_col(position = "dodge", aes(fill = genotype), color = "black") + + geom_errorbar(aes(ymin = (mean_fc-se), ymax = (mean_fc + se)), position = position_dodge(0.9), width = 0.25, size = 0.75)+ + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 14), + legend.text = element_markdown()) + + ylab("Mean fold-change")+ + ggtitle(label = str_c(labelnames[[meta]], "in", plottissues[[tiss]], sep = " ")) + + scale_fill_grey(start = 1, end = 0, aesthetics = "fill") + + } + +} + +plot_out[[1]][[2]] + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"cmQTL_val1_cv_lip_col_plots.pdf"), + width = 15.8/2.54, + height = 8/2.54) + +for (tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + print(plot_out[[tiss]][[meta]]) + } + +} + +dev.off() + +# Files to remake figures ------------------------------------------------- + +write_csv(fc_1_ind, "individual_values.csv") +write_csv(fc_1, "mean_values_se_n.csv") +write_csv(sig_lip, "p_values.csv") + +write_csv(means_tec_rep_lt, "individual_values_levene.csv") +write_csv(fc_1_lt, "mean_values_se_n_levene.csv") +write_csv(sig_lip_lt, "p_values_levene.csv") + +#write_csv(cv_jack_ind, "individual_values_cv.csv") +#write_csv(cv_jack_ind_mean, "mean_values_se_n_cv.csv") +#write_csv(sig_cv_jack_ind, "p_values_cv.csv") + +write_csv(cv_jack, "individual_values_cv.csv") +write_csv(cv_jack_mean, "mean_values_se_n_cv.csv") +write_csv(sig_lip_cv_groups, "p_values_cv.csv") + +# Log used code ------------------------------------------------------------ + +file_name <- sys.frame(1)$ofile + +file.copy(file_name, + to = str_c(out_dir, str_remove(file_name, current), "_", str_replace_all(Sys.Date(),"^.{2}|-",""), ".R"),overwrite = T) diff --git a/workflows/apolar_LC_MS_figures/Figure maker_cmQTL1_paper.R b/workflows/apolar_LC_MS_figures/Figure maker_cmQTL1_paper.R new file mode 100644 index 0000000000000000000000000000000000000000..d0fb96a1de2bd448b32707867dc8d0a93af10fe3 --- /dev/null +++ b/workflows/apolar_LC_MS_figures/Figure maker_cmQTL1_paper.R @@ -0,0 +1,937 @@ +rm(list = ls()) +library(tidyverse) +library(ggpubr) +library(glue) +library(ggtext) +library(cowplot) +library(ggbeeswarm) +library(extrafont) + +# Directory setting ------------------------------------------------------- + + +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +getwd() + +current <- getwd() +source <- str_c(current,"/..") + +cur_date <- str_c(str_replace_all(Sys.Date(),"^.{2}|-","")) + +out <- str_c(cur_date, "Figures", sep = "_") + +if (file.exists(out)) { + cat("The folder already exists") +} else { + dir.create(out) +} + +out_dir <- str_c(current, out, sep = "/") + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"), + pattern = "^\\d{6}_analysis"), + decreasing = T)[[1]] + +latest_analysis <- str_c(current, "/", latest) + +setwd(latest_analysis) + +latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}") + +# File loading ------------------------------------------------------------ + +fc_1 <- read_csv("mean_values_se_n.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +genotypes <- fc_1 %>% + distinct(alias, genotype) + +fc_1_ind <- read_csv("individual_values.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_GC <- read_csv("p_values.csv") + +fc_1_lt <- read_csv("mean_values_se_n_levene.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_ind_lt <- read_csv("individual_values_levene.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_GC_lt <- read_csv("p_values_levene.csv") + +fc_1_cv <- read_csv("mean_values_se_n_cv.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_ind_cv <- read_csv("individual_values_cv.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_GC_cv <- read_csv("p_values_cv.csv") %>% + mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))), + group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")), + group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))), + group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +setwd(out_dir) + + +# Data combination -------------------------------------------------------- + + +met_dat <- fc_1 %>% + distinct(met, Compound_Name, Compound_Class) + +genotypes <- fc_1 %>% + distinct(alias, genotype) + +per_comp_y <- fc_1 %>% + group_by(tissue, treatment, met) %>% + summarise(tot_val = max(mean_fc + se)) + +cb_scale <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", + "#0072B2", "#D55E00","#000000") +bw_scale <- c("black", "black", "black", "black", "black", "black", "black") + + +# Used plots -------------------------------------------------------------- + +com_theme <- theme(axis.text.x = element_markdown(angle = 45, hjust = 1, size = 6, family = "sans"), + axis.text.y = element_text(size = 6, family = "sans"), + axis.title.x = element_blank(), + axis.title.y = element_text(size = 6, family = "sans"), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + strip.text = element_text(size = 8, family = "sans", margin = margin(t = 1, r = 1, b = 1, l = 1 , unit = "pt")), + text = element_text(size = 6, family = "sans"), + legend.title = element_blank(), + legend.text = element_markdown(size = 6), + plot.margin = unit(c(1,1,1,1), "mm"), + legend.margin = margin(t = 0, r = 2, b = 0, l = 2 , unit = "mm")) + +make_box_dot_plot <- function(plot_met, plot_tissue, plot_label, plot_legend, plot_genotypes, plot_fill){ + + per_comp_y <- fc_1_ind %>% + filter(genotype %in% plot_genotypes) %>% + group_by(tissue, treatment, met) %>% + summarise(min_y = min(fc), + max_y = max(fc)) %>% + mutate(tot_val = 1.1*max_y) %>% + ungroup() + + sig_bar <- sig_GC %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + #mutate(genotype1 = as_factor(if_else(genotype1 == "MoneyMaker", glue("{genotype1}"),glue("*{genotype1}*"))), + # genotype2 = as_factor(if_else(genotype2 == "MoneyMaker", glue("{genotype2}"),glue("*{genotype2}*")))) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y) %>% + group_by(tissue, treatment, met) %>% + mutate(y.position = 1.1 * tot_val, + p.signif = if_else(p.value <= 0.05, "*", "ns")) %>% + ungroup() %>% + filter(met == plot_met, tissue == plot_tissue, + group1 %in% plot_genotypes, group2 %in% plot_genotypes) + + binwidth <- sig_bar %>% + summarise(min_y_comp = min(min_y), + max_y_comp = max(max_y)) %>% + mutate(binwidth = (max_y_comp - min_y_comp)/50) %>% + select(binwidth) %>% + as_vector() %>% + as.numeric() + + + ylim_top <- 1.1*max(sig_bar$y.position) + + plot_out <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue, genotype %in% plot_genotypes) %>% + ggplot(aes(x = genotype, y = fc)) + + geom_boxplot(position = "dodge", aes(fill = genotype), color = "black") + + geom_dotplot(aes(fill = genotype, color = genotype), binaxis = "y", stackdir = "center", binwidth = binwidth, dotsize = 1.5) + + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(legend.position = plot_legend) + + com_theme + + ylab("Mean fold-change")+ + #ylim(c(0, ylim_top))+ + xlab("") + + #ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale[plot_fill], aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + +make_box_dot_plot_levene <- function(plot_met, plot_tissue, plot_label, plot_legend, plot_genotypes, plot_fill){ + + per_comp_y <- fc_1_ind_lt %>% + filter(genotype %in% plot_genotypes) %>% + group_by(tissue, treatment, met) %>% + summarise(min_y = min(lev_t), + max_y = max(lev_t)) %>% + mutate(tot_val = 1.1*max_y) %>% + ungroup() + + sig_bar <- sig_GC_lt %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + #mutate(genotype1 = as_factor(if_else(genotype1 == "MoneyMaker", glue("{genotype1}"),glue("*{genotype1}*"))), + # genotype2 = as_factor(if_else(genotype2 == "MoneyMaker", glue("{genotype2}"),glue("*{genotype2}*")))) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y) %>% + group_by(tissue, treatment, met) %>% + mutate(y.position = 1.1 * tot_val, + p.signif = if_else(p.value <= 0.05, "*", "ns")) %>% + ungroup() %>% + filter(met == plot_met, tissue == plot_tissue, + group1 %in% plot_genotypes, group2 %in% plot_genotypes) + + binwidth <- sig_bar %>% + summarise(min_y_comp = min(min_y), + max_y_comp = max(max_y)) %>% + mutate(binwidth = (max_y_comp - min_y_comp)/50) %>% + select(binwidth) %>% + as_vector() %>% + as.numeric() + + ylim_top <- 1.1*max(sig_bar$y.position) + + plot_out <- fc_1_ind_lt %>% + filter(met == plot_met, tissue == plot_tissue, genotype %in% plot_genotypes) %>% + ggplot(aes(x = genotype, y = lev_t)) + + geom_boxplot(position = "dodge", aes(fill = genotype), color = "black") + + geom_dotplot(aes(fill = genotype, color = genotype), binaxis = "y", stackdir = "center", binwidth = binwidth, dotsize = 1.5) + + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(legend.position = plot_legend) + + com_theme + + ylab("Levene's transformed value")+ + #ylim(c(0, ylim_top))+ + xlab("") + + #ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale[plot_fill], aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + + +make_box_dot_plot_cv <- function(plot_met, plot_tissue, plot_label, plot_legend, plot_genotypes, plot_fill){ + + per_comp_y_cv <- fc_1_ind_cv %>% + filter(genotype %in% plot_genotypes) %>% + group_by(tissue, met) %>% + summarise(min_y = min(cv), + max_y = max(cv)) %>% + mutate(tot_val = 1.1*max_y) %>% + ungroup() + + sig_bar <- sig_GC_cv %>% + left_join(per_comp_y_cv) %>% + group_by(tissue, met) %>% + mutate(y.position = 1.1 * tot_val, + p.signif = if_else(p.value <= 0.05, "*", "ns")) %>% + ungroup() %>% + filter(met == plot_met, tissue == plot_tissue, + group1 %in% plot_genotypes, group2 %in% plot_genotypes) + + binwidth <- sig_bar %>% + summarise(min_y_comp = min(min_y), + max_y_comp = max(max_y)) %>% + mutate(binwidth = (max_y_comp - min_y_comp)/50) %>% + select(binwidth) %>% + as_vector() %>% + as.numeric() + + plot_out <- fc_1_ind_cv %>% + left_join(met_dat) %>% + filter(met == plot_met, tissue == plot_tissue, genotype %in% plot_genotypes) %>% + ggplot(aes(x = genotype, y = cv)) + + geom_boxplot(position = "dodge", aes(fill = genotype), color = "black") + + geom_dotplot(aes(fill = genotype, color = genotype), binaxis = "y", stackdir = "center", binwidth = binwidth, dotsize = 2) + + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + #facet_grid(cols = vars(treatment)) + + com_theme + + theme(legend.position = plot_legend) + + ylab("CV")+ + #ylim(c(0, ylim_top))+ + xlab("") + + #ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale[plot_fill], aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + +# PC 32:1 (1) fruits transp1-1------------------------------------------------------- + + + +p1 <- make_box_dot_plot("Cluster_06277", "fruits", "PC 32:1 (1) nominal", "none", c("MoneyMaker","*transp1-1*"), c(1,4)) +p1 +saveRDS(last_plot(), "PC32_1_transp1-1_nominal_fruits.RDS") + +p2 <- make_box_dot_plot_cv("Cluster_06277", "fruits", "PC 32:1 (1) CV", "none", c("MoneyMaker","*transp1-1*"), c(1,4)) +p2 +saveRDS(last_plot(), "PC32_1_transp1-1_cv_fruits.RDS") + +leg <- get_legend(make_box_dot_plot("Cluster_06277", "fruits", "PC 32:1 (1)", "bottom", c("MoneyMaker","*transp1-1*"), c(1,4))) +leg +saveRDS(leg, "PC32_1_transp1-1_leg_fruits.RDS") + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("PC_32_1_1_fruits_combined_transp1-1.png",width = 16.5, height = 10, units = "cm", dpi = 300) + +p2 <- make_box_dot_plot_levene("Cluster_06277", "fruits", "PC 32:1 (1) levene transformed", "bottom", c("MoneyMaker","*transp1-1*"), c(1,4)) +p2 +ggsave("PC 32_1 (1)_fruits_box_dot_plot_levene_transp1-1.png",width = 15.8, height = 8, units = "cm", dpi = 300) +saveRDS(last_plot(), "PC32_1_transp1-1_lt_fruits.RDS") + +# PC 32:1 (1) leaves transp1-1------------------------------------------------------- + +p1 <- make_box_dot_plot("Cluster_06277", "leaves", "PC 32:1 (1) nominal", "none", c("MoneyMaker","*transp1-1*"), c(1,4)) +p1 +saveRDS(last_plot(), "PC32_1_transp1-1_nominal_leaves.RDS") + +p2 <- make_box_dot_plot_cv("Cluster_06277", "leaves", "PC 32:1 (1) CV", "none", c("MoneyMaker","*transp1-1*"), c(1,4)) +p2 +saveRDS(leg, "PC32_1_transp1-1_leg_leaves.RDS") +leg <- get_legend(make_box_dot_plot("Cluster_06277", "leaves", "PC 32:1 (1)", "bottom", c("MoneyMaker","*transp1-1*"), c(1,4))) +leg + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("PC 32_1 (1)_leaves_combined_transp1-1.png",width = 16.5, height = 10, units = "cm", dpi = 300) + + +p2 <- make_box_dot_plot_levene("Cluster_06277", "leaves", "PC 32:1 (1)", "bottom", c("MoneyMaker","*transp1-1*"), c(1,4)) +p2 +ggsave("PC 32_1 (1)_leaves_box_dot_plot_levene.png",width = 15.8, height = 8, units = "cm", dpi = 300) + +saveRDS(last_plot(), "PC32_1_transp1-1_lt_leaves.RDS") + +# PC 34:5 fruits transp1-1------------------------------------------------------- + + + +p1 <- make_box_dot_plot("Cluster_06748", "fruits", "PC 34:5 nominal", "none", c("MoneyMaker","*transp1-1*"), c(1,4)) +p1 + + +p2 <- make_box_dot_plot_cv("Cluster_06748", "fruits", "PC 34:5 CV", "none", c("MoneyMaker","*transp1-1*"), c(1,4)) +p2 + +leg <- get_legend(make_box_dot_plot("Cluster_06748", "fruits", "PC 34:5", "bottom", c("MoneyMaker","*transp1-1*"), c(1,4))) +leg + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("PC 34:5_fruits_combined_transp1-1.png",width = 16.5, height = 10, units = "cm", dpi = 300) + +p2 <- make_box_dot_plot_levene("Cluster_06748", "fruits", "PC 34:5 levene transformed", "bottom", c("MoneyMaker","*transp1-1*"), c(1,4)) +p2 +ggsave("PC 34:5_fruits_box_dot_plot_levene_transp1-1.png",width = 15.8, height = 8, units = "cm", dpi = 300) + +# PC 34:5 leaves transp1-1------------------------------------------------------- + +p1 <- make_box_dot_plot("Cluster_06748", "leaves", "PC 34:5 nominal", "none", c("MoneyMaker","*transp1-1*"), c(1,4)) +p1 + +p2 <- make_box_dot_plot_cv("Cluster_06748", "leaves", "PC 34:5 CV", "none", c("MoneyMaker","*transp1-1*"), c(1,4)) +p2 + +leg <- get_legend(make_box_dot_plot("Cluster_06748", "leaves", "PC 34:5", "bottom", c("MoneyMaker","*transp1-1*"), c(1,4))) +leg + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("PC 34:5_leaves_combined_transp1-1.png",width = 16.5, height = 10, units = "cm", dpi = 300) + + +p2 <- make_box_dot_plot_levene("Cluster_06748", "leaves", "PC 34:5", "bottom", c("MoneyMaker","*transp1-1*"), c(1,4)) +p2 +ggsave("PC 34:5_leaves_box_dot_plot_levene.png",width = 15.8, height = 8, units = "cm", dpi = 300) + + +# Heatmap PC transp1-1----------------------------------------------------------------- +library(pheatmap) + +heat_base <- fc_1 %>% + filter(#Compound_Class %in% c("Phospholipid", "Phosphatidylcholine"), + genotype %in% c("MoneyMaker"), tissue == "fruits") + +heat.lip <- heat_base %>% + group_by(Compound_Name, met) %>% + mutate(log_norm = log2(mean_fc), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% + ungroup() %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(Compound_Name, Compound_Class, met), + names_from = group, + values_from = log_norm) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.lip) <- heat.lip$met + +mat.heat.lip <- heat.lip %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.lip %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.lip$met + +annotation_col <- heat_base %>% + distinct(tissue, treatment, genotype) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, treatment, tissue, genotype) + +heat.lip_signif <- heat_base %>% + distinct(tissue, treatment, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_GC, by = c("met", "tissue", "treatment", "alias" = "group2")) %>% + #left_join(sig_mets) %>% + #filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_")), + signif = if_else(p.value >= 0.05| is.na(p.value), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + #left_join(lipid_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.lip) + +rownames(heat.lip_signif) <- heat.lip_signif$met + +mat.heat.lip_signif <- heat.lip_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = cb_scale[1], `panK4-1` = "brown", `log2-1` = "blue", `*transp1-1*` = cb_scale[4]), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + +pheatmap.lip <- pheatmap(mat.heat.lip, + color = colorRampPalette(c("blue", "white", "red"))(65), + #cellwidth = 16, + #cellheight = 4, + breaks = seq(-3.25, 3.25, 0.1), + cluster_rows = T, + cluster_cols = F, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + display_numbers = mat.heat.lip_signif, + number_color = "black", + fontsize_number = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val_1_heatmap_rel_tissue_wt.jpg", + sep = "_") +) + +dev.off() + + + +# Line plot PC transp1-1 -------------------------------------------------- + +fc_1 %>% + mutate(treatment = as_factor(treatment)) %>% + filter(Compound_Class %in% c("Phospholipid"), + genotype %in% c("MoneyMaker", "*transp1-1*"), tissue == "fruits") %>% + group_by(genotype, treatment) %>% + summarise(grand_mean_fc = mean(mean_fc), + grand_sd = sd(mean_fc), + n = n()) %>% + ungroup() %>% + mutate(se = grand_sd/sqrt(n), + ymin = grand_mean_fc - se, + ymax = grand_mean_fc + se) %>% + ggplot(aes(x = treatment, y = grand_mean_fc, color = genotype)) + + geom_line(aes(group = genotype)) + + geom_point() + + geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.1) + + #geom_boxplot(aes(x = treatment, y = mean_fc, color = genotype),inherit.aes = F) + + #geom_violin(aes(fill = genotype)) + + #stat_summary() + + #geom_smooth(aes(group = genotype), method = "lm", se = F, formula = y ~ x) + + com_theme + + ylab("Mean fold-change")+ + #ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = "Phospholipids") + + scale_fill_manual(values = cb_scale[c(1,4)], aesthetics = "color") + +ggsave("Phospholipids_fruits_combined_transp1-1.png",width = 16.5, height = 8, units = "cm", dpi = 300) + +stop(print("current stop")) +# Test correlation analysis ----------------------------------------------- + +library(corrplot) + +lip_wide <- fc_1_ind %>% + pivot_wider(names_from = met, + values_from = fc, + id_cols = c(tissue, treatment, genotype, LIMS_ID)) %>% + filter(tissue == "fruits", genotype == "*panK4-1*") + +lip_corr <- lip_wide %>% + select(starts_with("Cluster")) #%>% + t() + +lip_corr_mat <- cor(lip_corr) + + +annotation_row_lip_corr <- met_dat %>% + select(Compound_Class) %>% + as.data.frame() + +rownames(annotation_row_lip_corr) <- met_dat$met + +annotation_col_lip_corr <- met_dat %>% + select(Compound_Class) %>% + as.data.frame() + +rownames(annotation_col_lip_corr) <- met_dat$met + +ann_colors_lip_corr = list( + ChEBI_Ontology_dense = c(amino_acid = "red", + carbohydrate = "white", + carbohydrate_derivative = "grey", + carbohydrate_phosphate = "brown", + carboxylic_acid = "blue", + other = "black", + unannotated = "magenta") +) + +pheatmap <- pheatmap(lip_corr_mat, + color = colorRampPalette(c("blue","white", "red"))(21), + #cellwidth = 8, + #cellheight = 8, + breaks = seq(-1.05,1.05,0.1), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = T, + show_colnames = F, + annotation_row = annotation_row_lip_corr, + annotation_col = annotation_col_lip_corr, + display_numbers = F, + number_color = "black", + fontsize_number = 6, + #annotation_colors = ann_colors_lip_corr, + #filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + # "metabolite_lip_correlation_pank_heatmap.jpg", + # sep = "_") +) + + + +# Test combine ------------------------------------------------------------ + +p1 <- make_box_dot_plot("m_81", "fruits", "Phenylalanine", "none", c("MoneyMaker","*log2-1*")) +p1 + +p2 <- make_box_dot_plot_levene("m_81", "fruits", "Phenylalanine", "none", c("MoneyMaker","*log2-1*")) +p2 + +p3 <- make_box_dot_plot_cv("m_81", "fruits", "Phenylalanine", "none", c("MoneyMaker","*log2-1*")) +p3 + +leg46 <- get_legend(make_box_dot_plot("m_81", "fruits", "Phenylalanine", "bottom", c("MoneyMaker","*log2-1*"))) +leg46 + +main_plot <- plot_grid(p1,p2, nrow = 2, labels = "AUTO",rel_heights = c(1,1)) +main_plot + +main_plot_2 <- plot_grid(main_plot, p3, ncol = 2, labels = list("", "C"),rel_widths = c(3,1)) +main_plot_2 + +comp_plot <- plot_grid(main_plot_2, leg46, ncol = 1, rel_heights = c(10,1)) +comp_plot + +ggsave("Phenylalanine_combined.png",width = 15.8, height = 16, units = "cm", dpi = 300) + +#sugars? + +p1 <- make_box_dot_plot("m_72", "fruits", "Maltose", "none", c("MoneyMaker","*transp1-1*")) +p1 + +p2 <- make_box_dot_plot_levene("m_72", "fruits", "Maltose", "none", c("MoneyMaker","*transp1-1*")) +p2 + +p3 <- make_box_dot_plot_cv("m_72", "fruits", "Maltose", "none", c("MoneyMaker","*transp1-1*")) +p3 + +leg46 <- get_legend(make_box_dot_plot("m_72", "fruits", "Maltose", "bottom", c("MoneyMaker","*transp1-1*"))) +leg46 + +main_plot <- plot_grid(p1,p2, nrow = 2, labels = "AUTO",rel_heights = c(1,1)) +main_plot + +main_plot_2 <- plot_grid(main_plot, p3, ncol = 2, labels = list("", "C"),rel_widths = c(3,1)) +main_plot_2 + + +ggsave("Maltose_combined.png",width = 15.8, height = 16, units = "cm", dpi = 300) + + + +# Recycling --------------------------------------------------------------- + +testtheme <- theme(axis.text.x = element_markdown(angle = 45, hjust = 1, margin = unit(c(1,0,0,0), "mm")), + axis.title.x = element_blank(), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 8, family = "Arial Narrow"), + legend.title = element_blank(), + legend.text = element_markdown(), + legend.position = "right", + legend.direction = "vertical", + plot.margin = unit(c(1,0,1,2), "mm"), + legend.margin = margin(t = 0, r = 2, b = 0, l = 2 , unit = "mm")) + + +make_box_point_plot <- function(plot_met, plot_tissue, plot_label, plot_legend){ + + sig_bar <- sig_GC %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + #mutate(genotype1 = as_factor(if_else(genotype1 == "MoneyMaker", glue("{genotype1}"),glue("*{genotype1}*"))), + # genotype2 = as_factor(if_else(genotype2 == "MoneyMaker", glue("{genotype2}"),glue("*{genotype2}*")))) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y) %>% + mutate(y.position = 1.1 * tot_val) %>% + filter(met == plot_met, tissue == plot_tissue) + + ylim_top <- 1.1*max(sig_bar$y.position) + + fc_1_ind_plot <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue) + + plot_out <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue) %>% + ggplot(aes(x = genotype, y = fc)) + + geom_boxplot(position = "dodge", aes(fill = genotype), color = "black") + + geom_point(aes(fill = genotype, color = genotype),size = 2, shape = 21) + + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 10), + legend.text = element_markdown(), + legend.position = plot_legend) + + ylab("Mean fold-change")+ + #ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale, aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + +make_box_beeswarm_plot <- function(plot_met, plot_tissue, plot_label, plot_legend){ + + sig_bar <- sig_GC %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + #mutate(genotype1 = as_factor(if_else(genotype1 == "MoneyMaker", glue("{genotype1}"),glue("*{genotype1}*"))), + # genotype2 = as_factor(if_else(genotype2 == "MoneyMaker", glue("{genotype2}"),glue("*{genotype2}*")))) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y) %>% + mutate(y.position = 1.1 * tot_val) %>% + filter(met == plot_met, tissue == plot_tissue) + + ylim_top <- 1.1*max(sig_bar$y.position) + + fc_1_ind_plot <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue) + + plot_out <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue) %>% + ggplot(aes(x = genotype, y = fc)) + + geom_boxplot(position = "dodge", aes(fill = genotype), color = "black") + + geom_beeswarm(aes(fill = genotype, color = genotype),size = 2, shape = 21, cex = 2, priority = "density") + + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 10), + legend.text = element_markdown(), + legend.position = plot_legend) + + ylab("Mean fold-change")+ + #ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale, aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + +make_box_dot_line_plot <- function(plot_met, plot_tissue, plot_label, plot_legend, plot_genotypes){ + + sig_bar <- sig_GC %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y) %>% + mutate(y.position = 1.1 * tot_val, + treatment = as_factor(treatment)) %>% + filter(met == plot_met, tissue == plot_tissue, + group1 %in% plot_genotypes, group2 %in% plot_genotypes) + + ylim_top <- 1.1*max(sig_bar$y.position) + + fc_1_plot <- fc_1%>% + filter(met == plot_met, tissue == plot_tissue, genotype %in% plot_genotypes) %>% + mutate(treatment = as_factor(treatment)) + + plot_out <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue, genotype %in% plot_genotypes) %>% + mutate(treatment = as_factor(treatment)) %>% + ggplot(position = "dodge", aes(x = treatment, y = fc)) + + geom_boxplot(aes(fill = genotype), color = "black") + + geom_dotplot(aes(fill = genotype, color = genotype), position = position_dodge(0.75), + binaxis = "y", stackdir = "center", binwidth = 0.1, dotsize = 1) + + geom_smooth(aes(x = treatment, y = fc, color = genotype, group = genotype),method = "lm", position = position_dodge(0.75)) + + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + #facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 10), + legend.text = element_markdown(), + legend.position = plot_legend) + + ylab("Mean fold-change")+ + #ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale, aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + +p1 <- make_box_dot_line_plot("m_70", "fruits", "Malic acid", "none", c("MoneyMaker","*panK4-1*")) +p1 +ggsave("Malic_acid_box_dot_line_plot_nominal.png",width = 15.8, height = 8, units = "cm", dpi = 300) + +make_col_plot_cv <- function(plot_met, plot_tissue, plot_label, plot_legend){ + + sig_bar <- sig_GC_cv %>% + left_join(genotypes, by = c("alias1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("alias2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + #mutate(genotype1 = as_factor(if_else(genotype1 == "MoneyMaker", glue("{genotype1}"),glue("*{genotype1}*"))), + # genotype2 = as_factor(if_else(genotype2 == "MoneyMaker", glue("{genotype2}"),glue("*{genotype2}*")))) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y_cv) %>% + mutate(y.position = 1.1 * tot_val) %>% + filter(met == plot_met, tissue == plot_tissue) + + ylim_top <- 1.1*max(sig_bar$y.position) + + plot_out <- fc_cv %>% + filter(met == plot_met, tissue == plot_tissue) %>% + ggplot(aes(x = genotype, y = mean_fc)) + + geom_col(position = "dodge", aes(fill = genotype), color = "black") + + geom_errorbar(aes(ymin = (mean_fc-se), ymax = (mean_fc + se)), position = position_dodge(0.9), width = 0.25, size = 0.75)+ + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + #facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 10), + legend.text = element_markdown(), + legend.position = plot_legend) + + ylab("CV fold-change")+ + ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = plot_label) + + scale_fill_grey(start = 1, end = 0, aesthetics = "fill") + + plot_out + +} + +make_col_plot <- function(plot_met, plot_tissue, plot_label, plot_legend){ + + sig_bar <- sig_GC %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + #mutate(genotype1 = as_factor(if_else(genotype1 == "MoneyMaker", glue("{genotype1}"),glue("*{genotype1}*"))), + # genotype2 = as_factor(if_else(genotype2 == "MoneyMaker", glue("{genotype2}"),glue("*{genotype2}*")))) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y) %>% + mutate(y.position = 1.1 * tot_val) %>% + filter(met == plot_met, tissue == plot_tissue) + + ylim_top <- 1.1*max(sig_bar$y.position) + + fc_1_ind_plot <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue) + + plot_out <- fc_1 %>% + filter(met == plot_met, tissue == plot_tissue) %>% + ggplot(aes(x = genotype, y = mean_fc)) + + geom_col(position = "dodge", aes(fill = genotype), color = "black") + + geom_jitter(aes(fill = genotype, color = genotype, y = fc), size = 2, shape = 21, data = fc_1_ind_plot) + + geom_errorbar(aes(ymin = (mean_fc-se), ymax = (mean_fc + se)), position = position_dodge(0.9), width = 0.25, size = 0.75)+ + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 10), + legend.text = element_markdown(), + legend.position = plot_legend) + + ylab("Mean fold-change")+ + #ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale, aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + +# Figure 46 --------------------------------------------------------------- + +p1 <- make_col_plot("m_70", "fruits", "Malic acid", "none") +p1 + + +p2 <- make_col_plot_cv("m_119", "fruits", "Malic acid CV", "none") +p2 + + +leg46 <- get_legend(make_col_plot("m_123", "fruits", "sucrose", "bottom")) +leg46 + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, leg46, ncol = 1, rel_heights = c(10,1)) +comp_plot + +ggsave("figure_46.wmf",width = 15.8, height = 8, units = "cm", dpi = 300) + +# Figure 47 --------------------------------------------------------------- + +p1 <- make_col_plot("m_74", "fruits", "Phenylalanine", "none") +p1 + +p2 <- make_col_plot_cv("m_74", "fruits", "Phe CV", "none") +p2 + + +leg46 <- get_legend(make_col_plot("m_74", "fruits", "sucrose", "bottom")) +leg46 + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, leg46, ncol = 1, rel_heights = c(10,1)) +comp_plot + +ggsave("figure_47.wmf",width = 15.8, height = 8, units = "cm", dpi = 300) + +# Figure 48 --------------------------------------------------------------- + +p1 <- make_col_plot_cv("m_25", "fruits", "F6P CV", "none") +p1 + +p2 <- make_col_plot_cv("m_38", "fruits", "G6P CV", "none") +p2 + +p3 <- make_col_plot_cv("m_65", "fruits", "Maltose CV", "none") +p3 + +leg48 <- get_legend(make_col_plot("m_65", "fruits", "sucrose", "bottom")) +leg48 + +main_plot <- plot_grid(p1,p2,p3, ncol = 3, labels = "AUTO") +main_plot + +comp_plot <- plot_grid(main_plot, leg48, ncol = 1, rel_heights = c(10,1)) +comp_plot + +ggsave("figure_48.wmf",width = 15.8, height = 8, units = "cm", dpi = 300) diff --git a/workflows/apolar_LC_MS_library_annotation/Lipid_mod.R b/workflows/apolar_LC_MS_library_annotation/Lipid_mod.R new file mode 100644 index 0000000000000000000000000000000000000000..8e886de7e519560724962f270c760903c3499b57 --- /dev/null +++ b/workflows/apolar_LC_MS_library_annotation/Lipid_mod.R @@ -0,0 +1,103 @@ +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +getwd() + +library(tidyverse) +#tmp.wd <- "E:/R/Lipid" +#setwd(tmp.wd) + +file1 <- list.files(pattern = "clusters.gda$") +file2 <- "210809_lip_lib_cmQTL_val_adapt.txt" +fileout <- "cmQTL_val1_selected_lipids.txt" + +options(stringsAsFactors=FALSE) +mat1 <- read_delim(file1, skip = 8, delim = "\t") %>% + filter(str_detect(Name, "Cluster")) %>% + select(Name, mz_mean = `m/z`, RT_mean = RT, everything()) %>% + as.data.frame() +mat2 <- read.delim(file2) + + +mat1[is.na(mat1)] <- 0 + + +rt.dev.range <- c(-0.2, 0.2) #rt.dev.range <- c(-0.2, 0.2) +mz.dev.range <- c(-0.005, 0.005) #mz.dev.range <- c(-0.005, 0.005) + +out <- NULL + +res <- vector("list",nrow(mat1)) +for(i in 1:nrow(mat1)) { + mz.diff <- mat2$mz_mean - mat1$mz_mean[i] + rt.diff <- mat2$RT_mean - mat1$RT_mean[i] + j <- which(rt.diff > rt.dev.range[1] & rt.diff < rt.dev.range[2] & + mz.diff > mz.dev.range[1] & mz.diff < mz.dev.range[2]) + res[[i]] <- if(length(j) == 0) NA else j +} + +i <- rep.int(1:nrow(mat1), sapply(res, length)) +j <- unlist(res) +out <- cbind(mat1[i, ], mat2[j, ], RTdiff=mat2$RT_mean[j] - mat1$RT_mean[i]) + +colnames(out)[c(2,3)] <- c("mz_mean_new", "RT_mean_new") # colnames were non-unique + + + + + + + + + +# density plots +n.breaks=4 #to be adjusted +ranges <- cut(out$RT_mean, n.breaks, labels=F) + +dens <- lapply(1:n.breaks, function(i) { + if(sum(!is.na(out$RTdiff[ranges == i]))) density(out$RTdiff[ranges == i], na.rm=T) + else NULL}) + +m <- ceiling(sqrt(n.breaks)) +n <- ceiling(n.breaks/ m) +par(mfrow=c(n,m)) +mx2 <- numeric(n.breaks) + +RTrange <- matrix(NA, n.breaks, 2) + +for(i in 1:n.breaks) { + if(is.null(dens[[i]])) + next + mx2[i] <- mx <- dens[[i]]$x[which.max(dens[[i]]$y)] + RTrange[i,] <- range(out$RT_mean[ranges == i], na.rm=TRUE) + plot(dens[[i]], main=sprintf("Range: %.2f - %.2f [min] | max RT dev: %.3f", RTrange[i,1], RTrange[i,2], mx), + xlim=c(-1,1)) + abline(v=mx + c(-0.1, -0.05,0,0.05, 0.1), col=c(3,2,1,2,3)) +} + + + + + + + + +rt2 <- sapply(1:n.breaks, function(i) median(out$RT_mean[ranges == i], na.rm=T)) +x11() +plot(rt2, mx2, main="RT time vs. RT diff", xlab="RT [min]", ylab="RT deviation [min]", pch=19) + + + + + + +# adjust the deviations parameters (a matrix nrow=n.breaks; ncol=2), for example +RTdevs <- cbind(mx2 - 0.1, mx2 + 0.1) # Adjust +# or edit the object RTdevs manually... +# RTdevs <- edit(RTdevs) + +indexes <- unlist(sapply(1:n.breaks, function(i) + which(ranges==i & out$RTdiff > RTdevs[i,1] & out$RTdiff < RTdevs[i,2]))) + +write.table(out[indexes,], file=fileout, sep="\t", quote=FALSE, row.names=FALSE) + + + diff --git a/workflows/apolar_LC_MS_normalization/210812_lipid_normalization.R b/workflows/apolar_LC_MS_normalization/210812_lipid_normalization.R new file mode 100644 index 0000000000000000000000000000000000000000..02ceee94b2ceef5fccef20bcd9df8e5ad87d5655 --- /dev/null +++ b/workflows/apolar_LC_MS_normalization/210812_lipid_normalization.R @@ -0,0 +1,811 @@ +rm(list = ls()) +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +getwd() +current <- getwd() +source <- ".." +library(openxlsx) +library(tidyverse) +library(car) +library(pheatmap) +library(broom) +library(ggpubr) +library(viridisLite) +library(modelr) +library(dlookr) +library(imputeLCMD) + +# Directory setting ------------------------------------------------------- + + +current <- getwd() +source <- str_c(current,"/..") + +cur_date <- str_c(str_replace_all(Sys.Date(),"^.{2}|-","")) + +out <- str_c(cur_date, "normalization", sep = "_") + +if (file.exists(out)) { + cat("The folder already exists") +} else { + dir.create(out) +} + +out_dir <- str_c(current, out, sep = "/") + + + +# Data loading ------------------------------------------------------------ +setwd(source) + +sam_dat1 <- read_csv("210812_cmQTL_val1_samplelist.csv") +lip_run1 <- readxl::read_xlsx("200923_samplelist_WIJESI-030820-13_cmQTL_validation.xlsx", sheet = 3) + +sam_dat2 <- read_csv("210812_cmQTL_val2_samplelist.csv") +lip_run2 <- readxl::read_xlsx("210324_WIJESI-130121-15_cmQTL_validation2.xlsx", sheet = 4) + +genotypes <- readxl::read_xlsx("Genotype_names.xlsx") + +setwd(current) +area <- read_delim(file = "cmQTL_val1_selected_lipids.txt", delim = "\t") %>% + rename(met = Name) %>% + distinct(met, .keep_all = T) %>% + group_by(Compound_Name) %>% + mutate(peak_no = rank(RT_mean, ties.method = "first"), + Compound_Name = if_else(duplicated(Compound_Name), + str_c(Compound_Name, "peak", peak_no, sep = "_"), + Compound_Name)) + +met_dat = area %>% + select(met, mz_mean,RT_mean, Compound_Name, Compound_Class) + +setwd(out_dir) + +# Data combination -------------------------------------------------------- +sam_vars <- c("plantline", "alias", "LIMS_ID", + "treatment", "tissue", "batch_lip", "run_date_lip", + "extraction_num", "sample_num", + "class", "run_num_lip", "sample_weight", "exp", "genotype") + + + +sam_dat1_tidy <- lip_run1 %>% + mutate(class = as_factor(if_else(str_detect(extraction_num, "run_qc"), "run_qc", "sample")), + extraction_num = as.numeric(if_else(str_detect(extraction_num, "run_qc"), "0", extraction_num)), + exp = as_factor(1)) %>% + left_join(sam_dat1) %>% + left_join(genotypes) %>% + select(treatment = irrigation, everything()) %>% + select(all_of(sam_vars)) + +sam_dat2_tidy <- lip_run2 %>% + mutate(class = as_factor(if_else(str_detect(extraction_num, "run_qc"), "run_qc", "sample")), + extraction_num = as.numeric(if_else(str_detect(extraction_num, "run_qc"), "0", extraction_num)), + exp = as_factor(2)) %>% + left_join(sam_dat2) %>% + left_join(genotypes) %>% + select(treatment = irrigation, everything()) %>% + select(all_of(sam_vars)) + +sam_dat <- sam_dat1_tidy %>% + bind_rows(sam_dat2_tidy) %>% + group_by(batch_lip) %>% + mutate(daily_num = row_number()) %>% + fill(tissue, .direction = "updown") %>% + ungroup() %>% + left_join(genotypes) %>% + mutate(machine_num_lip = str_c(run_num_lip, "pos", sep = "_"), + treatment = as_factor(treatment)) + +area_long <- area %>% + pivot_longer(matches("^\\d+_pos$"), + names_to = "machine_num_lip", + values_to = "area") %>% + left_join(sam_dat) + +# Data filtering ---------------------------------------------------------- + + +set_back_NA <- function(x){ + for (i in seq_along(x)) { + if (x[[i]] == 0) { + x[[i]] <- NA + } else { + x[[i]] <- x[[i]] + } + } + x +} + + +# Imputation -------------------------------------------------------------- + +features_na <- area_long %>% + group_by(met) %>% + mutate(area = set_back_NA(area)) %>% + ungroup() + +missingness <- features_na %>% + group_by(exp, met, tissue, treatment) %>% + summarise(na = sum(is.na(area)), + n = n()) %>% + mutate(percent_na = na/n*100) %>% + ungroup() + +features <- features_na %>% + left_join(missingness) %>% + group_by(met) %>% + #mutate(area = ifelse(percent_na >= 0 & is.na(area), rnorm(n = 1, mean = 0.5*min(area, na.rm = T)), area)) %>% + ungroup() + +#miss_for_imp <- features %>% +# pivot_wider(names_from = met, +# values_from = area, +# id_cols = c(exp, machine_num_lip, met, tissue, treatment)) %>% +# select(exp, machine_num_lip, tissue, treatment, matches("Cluster_")) %>% +# group_by(exp, tissue, treatment) %>% +# nest() %>% +# mutate(new = map(.x = data, .f = as.data.frame), +# new = map(.x = new, .f = ~`row.names<-`(.x, .x$machine_num_lip)), +# new = map(.x = new, .f = ~select(.x, !starts_with("machine"))), +# new = map(.x = new, .f = as.matrix), +# imp = map_dfc(.x = new, .f = impute.QRILC)[[1]]) + +#imp_out_loop <- vector(mode = "list", length = length(miss_for_imp$new)) + +#for (j in seq_along(miss_for_imp$new)) { +# imp_out_loop[[j]] <- impute.QRILC(miss_for_imp$new[[j]])[[1]] +#} + +#can currently not solve per tissue, treatment and exp +#proceed with qrilc on all + +imp <- features_na %>% + pivot_wider(names_from = met, + values_from = area, + id_cols = c(exp, machine_num_lip, tissue, treatment)) %>% + select(exp, machine_num_lip, tissue, treatment, matches("Cluster_")) %>% + as.data.frame() + +row.names(imp) <- imp$machine_num_lip +imp <- imp %>% + select(-exp, -tissue, -treatment, -machine_num_lip) %>% + as.matrix() + +imp <- impute.QRILC(imp) + + +features <- imp[[1]] %>% + as_tibble(rownames = "machine_num_lip") %>% + pivot_longer(cols = matches("Cluster_"), + names_to = "met", + values_to = "imp") %>% + left_join(features_na) + +adjust_imp <- features %>% + group_by(met) %>% + summarise(offset = if_else (any(imp <0), 1.001*abs(min(imp)),0)) + + +features_filtered <- features %>% + left_join(missingness) %>% + #filter(percent_na <= 60) + left_join(adjust_imp) %>% + mutate(imp = imp + offset) %>% + select(-offset) + +# Internal Standard Normalization ----------------------------------------- + +#isvec <- features %>% +# filter(met=="m_44") %>% +# select(sample_id, is = imp) + +features <- features_filtered %>% +# full_join(isvec) %>% + mutate(isnorm=imp) %>% #currently no int_stand +# filter(met != "m_44") %>% + mutate(sample_weight = if_else(sample_weight == 0|is.na(sample_weight), 50, sample_weight)) + + +# Fit linear model on QCs ------------------------------------------------- + +by_batch_lip <- features %>% + filter(class=="run_qc") %>% + group_by(batch_lip, met) %>% # need to change treatment_batch_lip_corr + nest() + +feature_model <- function(df) { + lm(isnorm ~ daily_num, data = df) +} + + +by_batch_lip <- by_batch_lip %>% + mutate(model = map(data, feature_model), + predicts = map2(data, model, add_predictions), + coefficients = map(model, tidy), + aug = map(model, augment), + glance = map (model, glance)) + +coefs_aug <- unnest(by_batch_lip, aug) %>% + select(batch_lip, met, .resid, daily_num) + +coefs_glance <- unnest(by_batch_lip,glance) %>% + select(batch_lip, met, adj.r.squared) + +coefs_term <- unnest(by_batch_lip, coefficients) %>% + pivot_wider(id_cols = c(batch_lip, met), + names_from = "term", + values_from = estimate) %>% + rename(x = daily_num, + intercept = `(Intercept)`) + + +coefs_pvalue <- by_batch_lip %>% + select(batch_lip, met, coefficients) %>% + ungroup() %>% + unnest(coefficients) %>% + pivot_wider(id_cols = c(batch_lip, met), + names_from = "term", + values_from = p.value) %>% + rename(intercept_pval = `(Intercept)`, + daily_num_pval = daily_num) %>% + full_join(features) %>% + full_join(coefs_term) %>% + #left_join(coefs_aug) %>% + full_join(coefs_glance) + +median <- features %>% + filter(class=="run_qc") %>% + group_by(batch_lip, met) %>% + summarise(median = median (isnorm)) + +features_lin <- unnest(by_batch_lip, predicts) %>% + full_join(coefs_pvalue) %>% + full_join(median) %>% + mutate(pred = intercept + x*daily_num, + predp = ifelse(daily_num_pval <=0.05 & adj.r.squared >= 0.75, pred, median), + predp = ifelse(is.na(predp), median, predp)) %>% + select(- c("data", "model", "coefficients")) + +adjust_lin <- features_lin %>% + ungroup() %>% + group_by(met) %>% + summarise(lin_offset = if_else(any(predp <0), 1.001*abs(min(predp)),0)) + +features_lin <- features_lin %>% + ungroup() %>% + full_join(adjust_lin) %>% + full_join(features) %>% + mutate(linnorm = (isnorm+lin_offset)/(predp+lin_offset), + linnorm_fw = linnorm/sample_weight, + linnorm_fw_log = log2(linnorm_fw)) + + + + +#Fit loess-model for QC-RLSC batch_lipwise#### +rejoin <- features +features <- rejoin + +QC_loess <- features %>% + ungroup() %>% + filter(class == "run_qc") %>% + #filter(met!="m_44") %>% + group_by(met, batch_lip) %>% + nest() + +loess_model <- function (df) { + loess(isnorm ~ daily_num, span = 1.5, data = df,control = loess.control(surface = "interpolate")) +} + +start <- Sys.time() +QC_loess <- QC_loess %>% + mutate(model = map (data, loess_model), + daily_num = list(seq(1,65,1)), + predict = map2(model,daily_num,stats::predict)) +end <- Sys.time() +end-start + +features_loess <- QC_loess %>% + unnest(c(predict, daily_num, batch_lip)) %>% + select(-model, -data) %>% + full_join(rejoin) %>% + filter(!is.na(machine_num_lip)) + +adjust <- features_loess %>% + ungroup() %>% + group_by(met) %>% + summarise(offset = if_else (any(predict <0), 1.001*abs(min(predict)),0)) + +features_loess <- features_loess %>% + left_join(adjust) %>% + mutate(loess_norm = (isnorm+offset)/(predict+offset), + loess_norm_fw = loess_norm/sample_weight, + loess_norm_fw_log = log2(loess_norm_fw))%>% + ungroup() + + + + +# Relative log abundance plots -------------------------------------------- + +features_all <- features_lin %>% + left_join(features_loess) + + +features_all %>% + filter(!is.na(genotype)) %>% + group_by(met, treatment) %>% + mutate(sub = loess_norm_fw, + rla = log2(sub) - median(log2(sub))) %>% + ggplot(aes(x = met, y = rla)) + + geom_boxplot() + + facet_grid(rows = vars(treatment), scales = "free")+ + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + +features_all %>% + #filter(!is.na(taxa)) %>% + filter(treatment == 1, class == "sample") %>% + mutate(run_num_lip = as_factor(run_num_lip), + batch_lip = as_factor(batch_lip)) %>% + group_by(met, treatment) %>% + mutate(rla_imp = log2(imp) - median(log2(imp)), + rla_isnorm = log2(isnorm) - median(log2(isnorm)), + rla_loess_norm = log2(loess_norm_fw) - median(log2(loess_norm_fw))) %>% + pivot_longer(starts_with("rla"), names_to = "normalization", values_to = "rla") %>% + ggplot(aes(x = met, y = rla)) + + geom_boxplot() + + facet_grid(rows = vars(normalization), cols = vars(treatment), scales = "free")+ + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + ylim(c(-2,2)) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "Ath_Dark_0_3_6_GC_met_rla_plot.jpg", + sep = "_")) + +features_all %>% + #filter(!is.na(taxa)) %>% + filter(class == "sample") %>% + mutate(run_num_lip = as_factor(run_num_lip), + batch_lip = as_factor(batch_lip)) %>% + group_by(met, treatment) %>% + mutate(rla_imp = log2(imp) - median(log2(imp)), + rla_isnorm = log2(isnorm) - median(log2(isnorm)), + rla_loess_norm = log2(loess_norm_fw) - median(log2(loess_norm_fw))) %>% + pivot_longer(starts_with("rla"), names_to = "normalization", values_to = "rla") %>% + ggplot(aes(x = run_num_lip, y = rla, color = batch_lip)) + + geom_boxplot() + + facet_grid(rows = vars(normalization), scales = "free")+ + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + ylim(c(-1,1)) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_2_samples_rla_plot.jpg", + sep = "_")) + + +# RSD estimation ---------------------------------------------------------- + + + + +RSD_bio_reps <- features_all %>% + filter(class == "sample") %>% + #filter(!is.na(taxa)) %>% + group_by(met) %>% + #filter((any(qc_imputed) == T)==F) %>% + group_by(met, treatment, genotype, tissue, exp) %>% + summarise(RSD_imp = sd(imp)/mean(imp), + RSD_isnorm = sd(isnorm) / mean(isnorm), + RSD_linnorm = abs(sd (linnorm_fw)/ mean(linnorm_fw)), + RSD_loess_norm = abs(sd(loess_norm_fw) / mean(loess_norm_fw))) + + +RSD_bio_reps_mean <- RSD_bio_reps %>% + #mutate(RSD_diff = RSD_imp-RSD_loess) %>% + ungroup() %>% + #group_by(treatment, tissue, exp) %>% + summarise(#mean_RSD_loess = mean(RSD_loess), + mean_imp = mean(na.omit(RSD_imp)), + mean_loess_norm = mean(na.omit(RSD_loess_norm)), + mean_RSD_isnorm = mean(na.omit(RSD_isnorm )), + mean_linnorm = mean(na.omit(RSD_linnorm))) + +RSD_qcs <- features_all %>% + filter(class == "run_qc") %>% + group_by(met) %>% + #filter((any(qc_imputed) == T)==F) %>% + group_by(met, exp) %>% + summarise(RSD_imp = sd(imp)/mean(imp), + RSD_isnorm = sd(isnorm) / mean(isnorm), + RSD_linnorm = abs(sd (linnorm_fw)/ mean(linnorm_fw)), + RSD_loess_norm = abs(sd(loess_norm_fw) / mean(loess_norm_fw))) + +RSD_qcs_mean <- RSD_qcs %>% + #mutate(RSD_diff = RSD_imp-RSD_loess) %>% + ungroup() %>% + group_by(exp) %>% + summarise(#mean_RSD_loess = mean(RSD_loess), + mean_imp = mean(na.omit(RSD_imp)), + mean_loess_norm = mean(na.omit(RSD_loess_norm)), + mean_RSD_isnorm = mean(na.omit(RSD_isnorm )), + mean_linnorm = mean(na.omit(RSD_linnorm))) + + + +#PCA testing chunk#### +##https://www.intechopen.com/books/metabolomics-fundamentals-and-applications/processing-and-visualization-of-metabolomics-data-using-r +#exp1 +pca <- summary(prcomp( + features_all %>% + ungroup() %>% + filter(exp == 1) %>% + # group_by(treatment) %>% + arrange(run_num_lip) %>% + # filter(class=="sample") %>% + # filter(met %in% overlap_met$met) %>% + mutate(log_imp = log2(imp)) %>% + pivot_wider(id_cols = c(run_num_lip, class), + names_from = met, + values_from = linnorm_fw_log) %>% + select(starts_with("Cluster_")))) + +sam_vars <- colnames(sam_dat) + +pca_plot <- as_tibble(pca$x) %>% + mutate(join_num = 1:nrow(pca$x)) %>% + full_join(sam_dat %>% + #group_by(treatment) %>% + arrange(run_num_lip) %>% + filter(exp == 1) %>% + mutate(join_num=1:nrow(pca$x))) %>% + left_join(sam_dat) %>% + mutate(batch_lip= as_factor(batch_lip), + class= as_factor(class)) %>% + select(all_of(sam_vars), everything()) +#filter(PC1 >-1000, PC2 <30) + +exp_var <- as_tibble(pca[["importance"]]) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = tissue)) + + stat_ellipse(aes(x=PC1, y=PC2, color = tissue)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_tissue_lin_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = treatment)) + + stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_treatment_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) + + +#PCA after loess normalization + +pca <- summary(prcomp( + features_all %>% + ungroup() %>% + filter(exp == 1) %>% + # group_by(treatment) %>% + arrange(run_num_lip) %>% + # filter(class=="sample") %>% + # filter(met %in% overlap_met$met) %>% + mutate(log_imp = log2(imp)) %>% + pivot_wider(id_cols = c(run_num_lip, class), + names_from = met, + values_from = loess_norm_fw_log) %>% + select(starts_with("Cluster_")))) + +sam_vars <- colnames(sam_dat) + +pca_plot <- as_tibble(pca$x) %>% + mutate(join_num = 1:nrow(pca$x)) %>% + full_join(sam_dat %>% + #group_by(treatment) %>% + arrange(run_num_lip) %>% + filter(exp == 1) %>% + mutate(join_num=1:nrow(pca$x))) %>% + left_join(sam_dat) %>% + mutate(batch_lip= as_factor(batch_lip), + class= as_factor(class)) %>% + select(all_of(sam_vars), everything()) +#filter(PC1 >-1000, PC2 <30) + +exp_var <- as_tibble(pca[["importance"]]) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = tissue)) + + stat_ellipse(aes(x=PC1, y=PC2, color = tissue)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_tissue_loess_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = treatment)) + + stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_treatment_loess_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) + +tissues <- c("leaves", "fruits") +for(tiss in seq_along(tissues)){ + + pca <- summary(prcomp( + features_all %>% + ungroup() %>% + filter(exp == 1, tissue == tissues[[tiss]]) %>% + # group_by(treatment) %>% + arrange(run_num_lip) %>% + # filter(class=="sample") %>% + # filter(met %in% overlap_met$met) %>% + mutate(log_imp = log2(imp)) %>% + pivot_wider(id_cols = c(run_num_lip, class), + names_from = met, + values_from = loess_norm_fw_log) %>% + select(starts_with("Cluster_")))) + + sam_vars <- colnames(sam_dat) + + pca_plot <- as_tibble(pca$x) %>% + mutate(join_num = 1:nrow(pca$x)) %>% + full_join(sam_dat %>% + #group_by(treatment) %>% + arrange(run_num_lip) %>% + filter(exp == 1, tissue == tissues[[tiss]]) %>% + mutate(join_num=1:nrow(pca$x))) %>% + left_join(sam_dat) %>% + mutate(batch_lip= as_factor(batch_lip), + class= as_factor(class)) %>% + select(all_of(sam_vars), everything()) + #filter(PC1 >-1000, PC2 <30) + + exp_var <- as_tibble(pca[["importance"]]) + + pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = treatment)) + + stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + + ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1", tissues[[tiss]], "treatment_loess_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) +} + + +# PCA before normalization + +pca <- summary(prcomp( + features_all %>% + ungroup() %>% + filter(exp == 1) %>% + # group_by(treatment) %>% + arrange(run_num_lip) %>% + # filter(class=="sample") %>% + # filter(met %in% overlap_met$met) %>% + mutate(log_imp = log2(imp)) %>% + pivot_wider(id_cols = c(run_num_lip, class), + names_from = met, + values_from = log_imp) %>% + select(starts_with("Cluster_")))) + +sam_vars <- colnames(sam_dat) + +pca_plot <- as_tibble(pca$x) %>% + mutate(join_num = 1:nrow(pca$x)) %>% + full_join(sam_dat %>% + #group_by(treatment) %>% + arrange(run_num_lip) %>% + filter(exp == 1) %>% + mutate(join_num=1:nrow(pca$x))) %>% + left_join(sam_dat) %>% + mutate(batch_lip= as_factor(batch_lip), + class= as_factor(class)) %>% + select(all_of(sam_vars), everything()) +#filter(PC1 >-1000, PC2 <30) + +exp_var <- as_tibble(pca[["importance"]]) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = tissue)) + + stat_ellipse(aes(x=PC1, y=PC2, color = tissue)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_tissue_imp_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = batch_lip)) + + stat_ellipse(aes(x=PC1, y=PC2, color = batch_lip)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_batch_imp_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) + + + + +# Plot all metabolites ---------------------------------------------------- + +plotmets <- features_all %>% distinct(met) %>% as_vector +plotmet_labs <- plotmets %>% as_tibble() %>% + left_join(met_dat, by= c("value" = "met")) %>% + #mutate(peak_num = base::rank(HMDB_clear_name, ties.method = "first"), + # dup = sum(peak_num), + # HMDB_clear_name_unique = if_else(dup>1, str_c(HMDB_clear_name, peak_num), HMDB_clear_name)) %>% + #ungroup() %>% + select(Compound_Name) %>% as_vector() + +plot_out <- vector("list", length = length(plotmets)) + +for (meta in seq_along(plotmets)) { + + plot_out [[meta]] <- features_all %>% + mutate(xint = if_else(daily_num == 4, run_num_lip-3.5, max(run_num_lip)), + is_miss = as_factor(if_else(is.na(area), T, F))) %>% + filter(met == plotmets[[meta]]) %>% + filter(class!="blank") %>% + ggplot(aes(x=run_num_lip, y=isnorm)) + + geom_point(aes(color = class, shape = is_miss)) + + geom_point(aes(y=predict), color="black", size=0.1) + + geom_vline(aes(xintercept = xint))+ + # facet_grid(rows = vars(treatment), cols = vars(rep), scales = "free") + + ggtitle(label = plotmet_labs[[meta]]) + +} + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_loess_fit.pdf")) + + +for (meta in seq_along(plotmets)) { + print(plot_out[[meta]]) +} + +dev.off() + +plot_out <- vector("list", length = length(plotmets)) + +for (meta in seq_along(plotmets)) { + + plot_out [[meta]] <- features_all %>% + mutate(xint = if_else(daily_num == 4, run_num_lip-3.5, max(run_num_lip))) %>% + filter(met == plotmets[[meta]]) %>% + filter(class!="blank") %>% + ggplot(aes(x=run_num_lip, y=loess_norm_fw_log)) + + geom_point(aes(color=class)) + + #geom_point(aes(y=predp), color="black", size=0.1) + + geom_vline(aes(xintercept = xint))+ + # facet_grid(rows = vars(treatment), cols = vars(rep), scales = "free") + + ggtitle(label = plotmet_labs[[meta]]) + +} + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_loess_norm_fw_log.pdf")) + + +for (meta in seq_along(plotmets)) { + print(plot_out[[meta]]) +} + +dev.off() + +for (meta in seq_along(plotmets)) { + + plot_out [[meta]] <- features_all %>% + mutate(xint = if_else(daily_num == 4, run_num_lip-3.5, max(run_num_lip))) %>% + filter(met == plotmets[[meta]]) %>% + filter(class!="blank") %>% + ggplot(aes(x=run_num_lip, y=linnorm_fw)) + + geom_point(aes(color=class)) + + geom_vline(aes(xintercept = xint))+ + facet_grid(rows = vars(treatment), scales = "free") + + ggtitle(label = plotmet_labs[[meta]]) + +} + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_linnorm.pdf")) + +for (meta in seq_along(plotmets)) { + print(plot_out[[meta]]) +} + +dev.off() + +rescale <- features_all %>% + group_by(met, tissue, exp) %>% + summarise(rescale = median(imp)) + +features_out <- features_all %>% + left_join(rescale) %>% + group_by(met, tissue, exp) %>% + mutate(loess_norm_med = loess_norm_fw/median(loess_norm_fw), + rescaled = loess_norm_med*rescale) %>% + select(all_of(sam_vars), met, Compound_Name, Compound_Class, loess_norm_fw, loess_norm_med, area, rescaled) + +features_out %>% + filter(met == "Cluster_02177", tissue == "fruits", exp == 1) %>% + ggplot(aes(x = run_num_lip, y = loess_norm_fw)) + + geom_point() + +# Write files ------------------------------------------------------------- + + + +write_csv(features_out, + str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val_1_2_feat_dat_lip.csv", + sep = "_")) + +write_csv(met_dat, + str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val_1_2_met_dat_lip.csv", + sep = "_")) + +write_csv(sam_dat, + str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val_1_2_sam_dat_lip.csv", + sep = "_")) + +write_csv(missingness, + str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val_1_2_miss_lip.csv", + sep = "_")) diff --git a/workflows/polar_LC_MS_analysis/210903_secondary_analysis_cmQTL_val_1.R b/workflows/polar_LC_MS_analysis/210903_secondary_analysis_cmQTL_val_1.R new file mode 100644 index 0000000000000000000000000000000000000000..c8fe67bfc84304e3ec983a42711b2d17ab5bf734 --- /dev/null +++ b/workflows/polar_LC_MS_analysis/210903_secondary_analysis_cmQTL_val_1.R @@ -0,0 +1,1535 @@ +rm(list = ls()) +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +getwd() + +library(openxlsx) +library(tidyverse) +library(car) +library(pheatmap) +library(broom) +library(ggpubr) +library(viridisLite) +library(ggtext) +library(glue) + +# Directory setting ------------------------------------------------------- + + +current <- getwd() +source <- str_c(current,"/..") + +cur_date <- str_c(str_replace_all(Sys.Date(),"^.{2}|-","")) + +out <- str_c(cur_date, "analysis", sep = "_") + +if (file.exists(out)) { + cat("The folder already exists") +} else { + dir.create(out) +} + +out_dir <- str_c(current, out, sep = "/") + +# Data loading ------------------------------------------------------------ + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_normalization$"), + pattern = "^\\d{6}_normalization"), + decreasing = T)[[1]] + +latest_norm <- str_c(current, "/", latest) + +setwd(latest_norm) + +latest_norm_date <- str_extract(latest, pattern = "^\\d{6}") + +sam_dat <- read_csv(str_c(latest_norm_date, "_cmQTL_val_1_2_sam_dat_LC.csv")) +met_dat <- read_csv(str_c(latest_norm_date, "_cmQTL_val_1_2_met_dat_LC.csv"), col_types = "cddcf") %>% + filter(Compound_Name != "isovitexin") +feat_dat <- read_csv(str_c(latest_norm_date, "_cmQTL_val_1_2_feat_dat_LC.csv"), col_types = "fcffffTiifidfficccfdddd") + +setwd(out_dir) +# Data combination -------------------------------------------------------- + +mets <- met_dat$met +genotypes <- sam_dat %>% distinct(genotype,.keep_all = T) %>% + select(genotype, alias) + +LC_classes <- feat_dat %>% + distinct(Compound_Class, Compound_Name, met) + +LC_long <- feat_dat %>% + filter(exp == 1, class == "sample") + + +# Means ------------------------------------------------------------------- + +means_tec_rep <- LC_long %>% + group_by(plantline, alias, genotype, treatment, met, tissue, LIMS_ID) %>% + summarise(mean_tec_rep = mean(rescaled)) %>% + ungroup() + +means <- means_tec_rep %>% + group_by(met, tissue, treatment, genotype, alias) %>% + summarise(mean = mean(mean_tec_rep), + sd = sd(mean_tec_rep), + n = n()) %>% + ungroup() %>% + mutate(se = sd/sqrt(n)) + +miss_per_treat <- LC_long %>% + group_by(met, tissue, treatment) %>% + summarise(na = sum(is.na(area)), + n = n()) %>% + mutate(percent_na = na/n*100) %>% + ungroup() + + +# Significance analysis --------------------------------------------------- + + + +LC_tidy <- means_tec_rep %>% + pivot_wider(id_cols = c(genotype, alias, treatment, tissue, LIMS_ID), + names_from = met, + values_from = mean_tec_rep) + +LC_tidy_numeric <- LC_tidy %>% + select(all_of(mets)) + +LC_aov <- map(.x = LC_tidy_numeric, .f = ~aov(.x ~ alias * treatment*tissue, data = LC_tidy)) + + +LC_tuk <- map(.x = LC_aov, .f = ~TukeyHSD(.x)) %>% + map(.f = tidy) %>% + map2(.y = names(LC_aov), .f = ~.x %>% mutate(var = .y)) %>% + purrr::reduce(bind_rows) + +sig_LC_groups <- LC_tuk %>% + filter(term == "alias:treatment:tissue") %>% + separate(col = contrast, into = c("group1", "group2"), sep = "-") + +sig_LC <- sig_LC_groups %>% + separate(group1, into = c("alias1", "treatment1", "tissue1"), sep = ":") %>% + separate(group2, into = c("alias2", "treatment2", "tissue2"), sep = ":") %>% + filter(treatment1 == treatment2, tissue1 == tissue2) %>% + filter(alias1 == "967514 MM WT" | alias2 == "967514 MM WT") %>% + ungroup() %>% + mutate(p.signif = if_else(adj.p.value <= 0.05, "*", "ns")) %>% + left_join(means, by = c("var" = "met", "treatment1" = "treatment", "alias1" = "alias", "tissue1" = "tissue")) %>% + select(var, treatment1, treatment2, alias1, alias2, tissue1, tissue2, p.signif, mean1 = mean, se1 = se) %>% + left_join(means, by = c("var" = "met", "treatment2" = "treatment", "alias2" = "alias", "tissue2" = "tissue")) %>% + select(var, treatment1, treatment2, alias1, alias2, tissue1, tissue2, p.signif, mean1, mean2 = mean, se1, se2 = se) %>% + group_by(treatment1, tissue1, var) %>% + mutate(tot_val1 = mean1 + se1, + tot_val2 = mean2 + se2, + y.position = 1.1*(max(tot_val1, tot_val2))) %>% + rename(met = var, + group1 = alias1, + group2 = alias2, + treatment = treatment1, + tissue = tissue1) %>% + mutate(met = as_factor(met), + group1 = as_factor(group1), + group2 = as_factor(group2), + treatment = as_factor(treatment), + tissue = as_factor(tissue)) %>% + ungroup() + +sig_mets <- sig_LC %>% + filter(p.signif == "*") %>% + distinct(met) %>% + mutate(sig = T) + + +# Significance with t-test ------------------------------------------------ + +LC_tidy <- means_tec_rep %>% + pivot_wider(id_cols = c(genotype, alias, treatment, tissue, LIMS_ID), + names_from = met, + values_from = mean_tec_rep) %>% + mutate(group = str_c(tissue, treatment, alias, sep = "_")) + +LC_tidy_numeric <- LC_tidy %>% + select(all_of(mets)) + +LC_t <- map(.x = LC_tidy_numeric, + .f = ~pairwise.t.test(x = .x , + g = LC_tidy$group, + p.adjust.method = "none", + pool.sd = F)) %>% + map(.f = tidy) + +LC_t_tidy <- LC_t %>% + map2(.y = names(LC_t), .f = ~.x %>% mutate(var = .y)) %>% + purrr::reduce(bind_rows) + +sig_LC<- LC_t_tidy %>% + separate(group1, into = c("tissue1", "treatment1", "alias1"), sep = "_") %>% + separate(group2, into = c("tissue2", "treatment2", "alias2"), sep = "_") %>% + filter(alias1 == "967514 MM WT" | alias2 == "967514 MM WT", + tissue1 == tissue2, treatment1 == treatment2) %>% + group_by(var) %>% + mutate(adj.p.value = p.adjust(p.value)) %>% + ungroup() %>% + mutate(p.signif = if_else(adj.p.value <= 0.05, "*", "ns")) %>% + left_join(means, by = c("var" = "met", "treatment1" = "treatment", "alias1" = "alias", "tissue1" = "tissue")) %>% + select(p.value, var, treatment1, treatment2, alias1, alias2, tissue1, tissue2, p.signif, mean1 = mean, se1 = se) %>% + left_join(means, by = c("var" = "met", "treatment2" = "treatment", "alias2" = "alias", "tissue2" = "tissue")) %>% + select(p.value, var, treatment1, treatment2, alias1, alias2, tissue1, tissue2, p.signif, mean1, mean2 = mean, se1, se2 = se) %>% + group_by(treatment1, tissue1, var) %>% + mutate(tot_val1 = mean1 + se1, + tot_val2 = mean2 + se2, + y.position = 1.1*(max(tot_val1, tot_val2))) %>% + ungroup() %>% + rename(met = var, + group1 = alias1, + group2 = alias2, + treatment = treatment1, + tissue = tissue1) %>% + left_join(miss_per_treat) %>% + mutate(met = as_factor(met), + group1 = as_factor(group1), + group2 = as_factor(group2), + treatment = as_factor(treatment), + tissue = as_factor(tissue), + p.signif = if_else(percent_na >= 60, "ns", p.signif)) + + +sig_mets <- sig_LC %>% + filter(p.signif == "*") %>% + distinct(met) %>% + mutate(sig = T) + +# Heatmap scaled all----------------------------------------------------------------- + +heat.LC <- means %>% + group_by(met) %>% + mutate(se = sd/sqrt(n), + total_norm = mean/mean(mean), + log_norm = log2(total_norm), + log_norm = if_else(is.na(log_norm)| is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm)), + log_norm_level = if_else(is.na(log_norm_level), 0, log_norm_level)) %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = log_norm_level) %>% + left_join(met_dat) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.LC) <- heat.LC$met + +mat.heat.LC <- heat.LC %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.LC %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.LC$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, treatment, genotype) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, treatment, tissue, genotype) + +heat.LC_signif <- means %>% + distinct(tissue, treatment, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_LC, by = c("met", "tissue", "treatment", "alias" = "group2")) %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.LC) + +rownames(heat.LC_signif) <- heat.LC_signif$met + +mat.heat.LC_signif <- heat.LC_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + +pheatmap.LC <- pheatmap(mat.heat.LC, + #color = plasma(14), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-6.5,-5.5,-4.5,-3.5,-2.5,-1.5, -0.5 ,0.5,1.5,2.5,3.5,4.5,5.5,6.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + display_numbers = mat.heat.LC_signif, + number_color = "black", + fontsize_number = 6, + angle_col = 45, + fontsize_col = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_heatmap_rel_all.jpg", + sep = "_") +) + +# Heatmap scaled per tissue----------------------------------------------------------------- + +heat.LC <- means%>% + group_by(tissue, met) %>% + mutate(total_norm = mean/mean(mean), + log_norm = log2(total_norm), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% + ungroup() %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = log_norm_level) %>% + left_join(met_dat) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.LC) <- heat.LC$met + +mat.heat.LC <- heat.LC %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.LC %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.LC$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, treatment, genotype) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, treatment, tissue, genotype) + +heat.LC_signif <- means %>% + distinct(tissue, treatment, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_LC, by = c("met", "tissue", "treatment", "alias" = "group2")) %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.LC) + +rownames(heat.LC_signif) <- heat.LC_signif$met + +mat.heat.LC_signif <- heat.LC_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + + +pheatmap.LC <- pheatmap(mat.heat.LC, + #color = plasma(14), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-6.5,-5.5,-4.5,-3.5,-2.5,-1.5, -0.5 ,0.5,1.5,2.5,3.5,4.5,5.5,6.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + display_numbers = mat.heat.LC_signif, + number_color = "black", + fontsize_number = 6, + angle_col = 45, + fontsize_col = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_heatmap_rel_tissue.jpg", + sep = "_") +) + + +# Heatmap scaled per tissue and treatment----------------------------------------------------------------- + +heat.LC <- means%>% + group_by(tissue, met, treatment) %>% + mutate(total_norm = mean/mean(mean), + log_norm = log2(total_norm), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% + ungroup() %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = log_norm_level) %>% + left_join(met_dat) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.LC) <- heat.LC$met + +mat.heat.LC <- heat.LC %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.LC %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.LC$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, treatment, genotype) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, treatment, tissue, genotype) + +heat.LC_signif <- means %>% + distinct(tissue, treatment, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_LC, by = c("met", "tissue", "treatment", "alias" = "group2")) %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.LC) + +rownames(heat.LC_signif) <- heat.LC_signif$met + +mat.heat.LC_signif <- heat.LC_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + + +pheatmap.LC <- pheatmap(mat.heat.LC, + #color = plasma(14), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-6.5,-5.5,-4.5,-3.5,-2.5,-1.5, -0.5 ,0.5,1.5,2.5,3.5,4.5,5.5,6.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + display_numbers = mat.heat.LC_signif, + number_color = "black", + fontsize_number = 6, + angle_col = 45, + fontsize_col = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_heatmap_rel_tissue_treatment.jpg", + sep = "_") +) + + + +# Per metabolite comparisons ---------------------------------------------- + +norm_MM <- means %>% + filter(genotype == "MoneyMaker") %>% + select(tissue, treatment, met, MM_mean = mean) + +fc <- means_tec_rep %>% + left_join(norm_MM) %>% + mutate(fc = mean_tec_rep/MM_mean) %>% + group_by(tissue, treatment, alias, genotype, met) %>% + summarise(mean_fc = mean(fc), + sd = sd(fc), + n = n()) %>% + mutate(se = sd/sqrt(n), + group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + left_join(sig_LC, by = c("tissue", "treatment", "met", "alias" = "group2")) %>% + group_by(met) %>% + filter(any(p.signif == "*"), all(is.finite(mean_fc)), tissue != "flowers") %>% + left_join(met_dat) %>% + arrange(Compound_Class, Compound_Name) %>% + ungroup() %>% + mutate(genotype = as_factor(if_else(genotype == "MoneyMaker", glue("{genotype}"),glue("*{genotype}*"))), + genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + + +plotmets <- fc %>% distinct(met) %>% as_vector() +plottissues <- fc %>% distinct(tissue) %>% as_vector() %>% as.character + +labelnames <- plotmets %>% as_tibble() %>% + rename(met = value) %>% + left_join(met_dat) %>% left_join(LC_classes) %>% + select(Compound_Name) %>% as_vector() + + +plot_out <- vector("list", length = length(plotmets)) +per_comp_y <- fc %>% + group_by(tissue, treatment, met) %>% + summarise(mean = max(mean_fc), + se = max(se)) + +for(tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + + + sig_bar <- fc %>% + group_by(tissue, treatment, met) %>% + mutate(tot_val = max(mean_fc + se)) %>% + mutate(y.position = tot_val + 0.25*max(tot_val)) %>% + ungroup() %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + select(-group1) %>% + mutate(genotype = as_factor(if_else(genotype == "MoneyMaker", glue("{genotype}"),glue("*{genotype}*")))) %>% + rename(group1 = genotype1, group2 = genotype) %>% + filter(!is.na(p.signif)) %>% + filter(met == plotmets[[meta]], tissue2 == plottissues[[tiss]]) + + + plot_out[[tiss]][[meta]] <- fc %>% + filter(met == plotmets[[meta]], tissue == plottissues[[tiss]]) %>% + mutate(treatment = fct_relevel(treatment, c("0.4", "0.6", "0.8", "1"))) %>% + ggplot(aes(x = genotype, y = mean_fc)) + + geom_col(position = "dodge", aes(fill = genotype), color = "black") + + geom_errorbar(aes(ymin = (mean_fc-se), ymax = (mean_fc + se)), position = position_dodge(0.9), width = 0.25, size = 0.75)+ + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 14), + legend.text = element_markdown()) + + ylab("Mean fold-change") + + ggtitle(label = str_c(labelnames[[meta]], "in", plottissues[[tiss]], sep = " ")) + + scale_fill_grey(start = 1, end = 0, aesthetics = "fill") + } + +} + +plot_out[[1]][[2]] + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"cmQTL_val1_LC_col_plots.pdf"), + width = 9, + height = 6) + +for (tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + print(plot_out[[tiss]][[meta]]) + } + +} + +dev.off() + +# Per metabolite comparisons scaled 1---------------------------------------------- + +norm_MM_1 <- means %>% + filter(genotype == "MoneyMaker", treatment == 1) %>% + select(tissue, met, MM_mean = mean) + +fc_1_ind <- means_tec_rep %>% + left_join(norm_MM_1) %>% + mutate(fc = mean_tec_rep/MM_mean) + +fc_1 <- means_tec_rep %>% + left_join(norm_MM_1) %>% + mutate(fc = mean_tec_rep/MM_mean) %>% + group_by(tissue, treatment, alias, genotype, met) %>% + summarise(mean_fc = mean(fc), + sd = sd(fc), + n = n()) %>% + mutate(se = sd/sqrt(n), + group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + left_join(sig_LC, by = c("tissue", "treatment", "met", "alias" = "group2")) %>% + group_by(met) %>% + #filter(any(p.signif == "*"), all(is.finite(mean_fc)), tissue != "flowers") %>% + left_join(met_dat) %>% + arrange(Compound_Class, Compound_Name) %>% + ungroup() %>% + mutate(genotype = as_factor(if_else(genotype == "MoneyMaker", glue("{genotype}"),glue("*{genotype}*"))), + genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + + +plotmets <- fc_1 %>% distinct(met) %>% as_vector() +plottissues <- fc_1 %>% distinct(tissue) %>% as_vector() %>% as.character + +labelnames <- plotmets %>% as_tibble() %>% + rename(met = value) %>% + left_join(met_dat) %>% left_join(LC_classes) %>% + select(Compound_Name) %>% as_vector() + + +plot_out <- vector("list", length = length(plotmets)) +per_comp_y <- fc_1 %>% + group_by(tissue, treatment, met) %>% + summarise(mean = max(mean_fc), + se = max(se)) + +for(tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + + + sig_bar <- fc_1 %>% + group_by(tissue, treatment, met) %>% + mutate(tot_val = max(mean_fc + se)) %>% + mutate(y.position = tot_val + 0.25*max(tot_val)) %>% + ungroup() %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + select(-group1) %>% + mutate(genotype = as_factor(if_else(genotype == "MoneyMaker", glue("{genotype}"),glue("*{genotype}*")))) %>% + rename(group1 = genotype1, group2 = genotype) %>% + filter(!is.na(p.signif)) %>% + filter(met == plotmets[[meta]], tissue2 == plottissues[[tiss]]) + + + plot_out[[tiss]][[meta]] <- fc_1 %>% + filter(met == plotmets[[meta]], tissue == plottissues[[tiss]]) %>% + mutate(treatment = fct_relevel(treatment, c("0.4", "0.6", "0.8", "1"))) %>% + ggplot(aes(x = genotype, y = mean_fc)) + + geom_col(position = "dodge", aes(fill = genotype), color = "black") + + geom_errorbar(aes(ymin = (mean_fc-se), ymax = (mean_fc + se)), position = position_dodge(0.9), width = 0.25, size = 0.75)+ + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 12), + legend.text = element_markdown()) + + ylab("Mean fold-change") + + ggtitle(label = str_c(labelnames[[meta]], "in", plottissues[[tiss]], sep = " ")) + + scale_fill_grey(start = 1, end = 0, aesthetics = "fill") + } + +} + +plot_out[[1]][[2]] + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"cmQTL_val1_LC_scaled1_col_plots.pdf"), + width = 15.8/2.54, + height = 8/2.54) + +for (tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + print(plot_out[[tiss]][[meta]]) + } + +} + +dev.off() + + +# Heatmap scaled Wildtype----------------------------------------------------------------- + +heat.LC <- fc %>% + group_by(Compound_Name, met) %>% + mutate(log_norm = log2(mean_fc), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% + ungroup() %>% + #mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(Compound_Name, met), + names_from = group, + values_from = log_norm) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.LC) <- heat.LC$met + +mat.heat.LC <- heat.LC %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.LC %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.LC$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, treatment, genotype) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, treatment, tissue, genotype) + +heat.LC_signif <- means %>% + distinct(tissue, treatment, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_LC, by = c("met", "tissue", "treatment", "alias" = "group2")) %>% + left_join(sig_mets) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.LC) + +rownames(heat.LC_signif) <- heat.LC_signif$met + +mat.heat.LC_signif <- heat.LC_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + +pheatmap.LC <- pheatmap(mat.heat.LC, + #color = RColorBrewer::brewer.pal(7,"BuRd"), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-2.5,-1.5, -0.5 ,0.5,1.5,2.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + display_numbers = mat.heat.LC_signif, + number_color = "black", + fontsize_number = 6, + angle_col = 45, + fontsize_col = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val_1_heatmap_rel_tissue_wt.jpg", + sep = "_") +) + +# Levene analysis simple anova------------------------------------------------------------- + +miss_per_clust <- LC_long %>% + group_by(met, tissue) %>% + summarise(na = sum(is.na(area)), + n = n()) %>% + ungroup() %>% + mutate(percent_na_clust = na/n*100) %>% + select(met, tissue, percent_na_clust) + +miss_any_treat <- miss_per_treat %>% + group_by(met, tissue) %>% + summarise(miss_comp_treat = if_else(any(percent_na == 100), T,F)) + +means_tec_rep_lt <- means_tec_rep %>% + group_by(met, tissue, treatment, genotype, alias) %>% + mutate(lev_t = abs(log10(mean_tec_rep) - log10(median(mean_tec_rep)))) %>% + ungroup() + + +means_lt <- means_tec_rep_lt %>% + ungroup() %>% + group_by(met, tissue, treatment, genotype, alias) %>% + summarise(mean = mean(lev_t), + sd = sd(lev_t), + n = n()) %>% + ungroup() %>% + mutate(se = sd/sqrt(n)) + +lt_tidy <- means_tec_rep_lt %>% + filter(tissue == "fruits") %>% + ungroup() %>% + pivot_wider(id_cols = c(tissue, treatment, genotype, alias, LIMS_ID), + names_from = met, + values_from = lev_t) + +lt_tidy_numeric <- lt_tidy %>% + select(all_of(mets)) + +lt_aov <- map(.x = lt_tidy_numeric, .f = ~aov(.x ~ alias*treatment, data = lt_tidy)) + +lt_aov_tidy <- lt_aov %>% + map(tidy) %>% + map2(.y = names(lt_aov), .f = ~.x %>% mutate(lt_vars = .y)) %>% + purrr::reduce(bind_rows) + +lt_tuk <- map(.x = lt_aov, .f = ~TukeyHSD(.x)) %>% + map(.f = tidy) %>% + map2(.y = names(lt_aov), .f = ~.x %>% mutate(var = .y)) %>% + purrr::reduce(bind_rows) + +sig_LC_lt_groups <- lt_tuk %>% + filter(term == "alias:treatment") %>% + separate(col = contrast, into = c("group1", "group2"), sep = "-") + +sig_LC_lt <- sig_LC_lt_groups %>% + separate(group1, into = c("alias1", "treatment1"), sep = ":") %>% + separate(group2, into = c("alias2", "treatment2"), sep = ":") %>% + filter(treatment1 == treatment2) %>% + filter(alias1 == "967514 MM WT" | alias2 == "967514 MM WT") %>% + ungroup() %>% + mutate(p.signif = if_else(adj.p.value <= 0.05, "*", "ns")) %>% + left_join(means_lt, by = c("var" = "met", "treatment1" = "treatment", "alias1" = "alias")) %>% + select(var, treatment1, treatment2, alias1, alias2, p.signif, mean1 = mean, se1 = se) %>% + left_join(means_lt, by = c("var" = "met", "treatment2" = "treatment", "alias2" = "alias")) %>% + select(var, treatment1, treatment2, alias1, alias2, p.signif, mean1, mean2 = mean, se1, se2 = se) %>% + group_by(treatment1, var) %>% + mutate(tot_val1 = mean1 + se1, + tot_val2 = mean2 + se2, + y.position = 1.1*(max(tot_val1, tot_val2))) %>% + rename(met = var, + group1 = alias1, + group2 = alias2, + treatment = treatment1) %>% + mutate(met = as_factor(met), + group1 = as_factor(group1), + group2 = as_factor(group2), + treatment = as_factor(treatment)) %>% + ungroup() + +sig_mets <- sig_LC_lt %>% + filter(p.signif == "*") %>% + distinct(met) %>% + mutate(sig = T) + +# Levene with t-test ------------------------------------------------ +skip <- F + +if(skip == T) { + print("Levene t-test skipped") +} else { + LC_tidy <- means_tec_rep_lt %>% + pivot_wider(id_cols = c( genotype, alias, treatment, tissue, LIMS_ID), + names_from = met, + values_from = lev_t) %>% + mutate(group = str_c(tissue, treatment, alias, sep = "_")) + + LC_tidy_numeric <- LC_tidy %>% + select(all_of(mets)) + + LC_t <- map(.x = LC_tidy_numeric, + .f = ~pairwise.t.test(x = .x , + g = LC_tidy$group, + p.adjust.method = "none", + pool.sd = F)) %>% + map(.f = tidy) + + LC_t_tidy <- LC_t %>% + map2(.y = names(LC_t), .f = ~.x %>% mutate(var = .y)) %>% + purrr::reduce(bind_rows) + + sig_LC_lt<- LC_t_tidy %>% + separate(group1, into = c("tissue1", "treatment1", "alias1"), sep = "_") %>% + separate(group2, into = c("tissue2", "treatment2", "alias2"), sep = "_") %>% + filter(alias1 == "967514 MM WT" | alias2 == "967514 MM WT", + tissue1 == tissue2, treatment1 == treatment2) %>% + group_by(var) %>% + mutate(adj.p.value = p.adjust(p.value)) %>% + #mutate(adj.p.value = p.value * 121) %>% + ungroup() %>% + mutate(p.signif = if_else(p.value <= 0.05, "*", "ns")) %>% + left_join(means_lt, by = c("var" = "met", "treatment1" = "treatment", "alias1" = "alias", "tissue1" = "tissue")) %>% + select(var, treatment1, treatment2, alias1, alias2, tissue1, tissue2, p.signif, mean1 = mean, se1 = se, p.value, adj.p.value) %>% + left_join(means_lt, by = c("var" = "met", "treatment2" = "treatment", "alias2" = "alias", "tissue2" = "tissue")) %>% + select(var, treatment1, treatment2, alias1, alias2, tissue1, tissue2, p.signif, mean1, mean2 = mean, se1, se2 = se, p.value, adj.p.value) %>% + group_by(treatment1, tissue1, var) %>% + mutate(tot_val1 = mean1 + se1, + tot_val2 = mean2 + se2, + y.position = 1.1*(max(tot_val1, tot_val2))) %>% + ungroup() %>% + rename(met = var, + group1 = alias1, + group2 = alias2, + treatment = treatment1, + tissue = tissue1) %>% + left_join(miss_per_treat) %>% + mutate(met = as_factor(met), + group1 = as_factor(group1), + group2 = as_factor(group2), + treatment = as_factor(treatment), + tissue = as_factor(tissue), + p.signif = if_else(percent_na >= 60, "ns", p.signif)) + + + sig_mets <- sig_LC_lt %>% + filter(p.signif == "*") %>% + distinct(met) %>% + mutate(sig = T) + +} +# Per metabolite comparisons unscaled levene---------------------------------------------- + +norm_MM_1_lt <- means_lt %>% + filter(genotype == "MoneyMaker", treatment == 1) %>% + select(tissue, met, MM_mean = mean) + +fc_1_lt <- means_tec_rep_lt %>% + #left_join(norm_MM_1) %>% + #mutate(fc = mean_tec_rep/MM_mean) %>% + group_by(tissue, treatment, alias, genotype, met) %>% + summarise(mean_fc = mean(lev_t), + sd = sd(lev_t), + n = n()) %>% + mutate(se = sd/sqrt(n), + group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% + left_join(sig_LC_lt, by = c("tissue", "treatment", "met", "alias" = "group2")) %>% + group_by(met) %>% + #filter(any(p.signif == "*"), all(is.finite(mean_fc)), tissue != "flowers") %>% + left_join(met_dat) %>% + arrange(Compound_Class, Compound_Name) %>% + ungroup() %>% + mutate(genotype = as_factor(if_else(genotype == "MoneyMaker", glue("{genotype}"),glue("*{genotype}*"))), + genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + + +plotmets <- fc_1_lt %>% distinct(met) %>% as_vector() +plottissues <- fc_1_lt %>% distinct(tissue) %>% as_vector() %>% as.character + +labelnames <- plotmets %>% as_tibble() %>% + rename(met = value) %>% + left_join(met_dat) %>% left_join(LC_classes) %>% + select(Compound_Name) %>% as_vector() + + +plot_out <- vector("list", length = length(plotmets)) +per_comp_y <- fc_1_lt %>% + group_by(tissue, treatment, met) %>% + summarise(mean = max(mean_fc), + se = max(se)) + +for(tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + + + sig_bar <- fc_1_lt %>% + group_by(tissue, treatment, met) %>% + mutate(tot_val = max(mean_fc + se)) %>% + mutate(y.position = tot_val + 0.25*max(tot_val)) %>% + ungroup() %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + select(-group1) %>% + mutate(genotype = as_factor(if_else(genotype == "MoneyMaker", glue("{genotype}"),glue("*{genotype}*")))) %>% + rename(group1 = genotype1, group2 = genotype) %>% + filter(!is.na(p.signif)) %>% + filter(met == plotmets[[meta]], tissue2 == plottissues[[tiss]]) + + + plot_out[[tiss]][[meta]] <- fc_1_lt %>% + filter(met == plotmets[[meta]], tissue == plottissues[[tiss]]) %>% + mutate(treatment = fct_relevel(treatment, c("0.4", "0.6", "0.8", "1"))) %>% + ggplot(aes(x = genotype, y = mean_fc)) + + geom_col(position = "dodge", aes(fill = genotype), color = "black") + + geom_errorbar(aes(ymin = (mean_fc-se), ymax = (mean_fc + se)), position = position_dodge(0.9), width = 0.25, size = 0.75)+ + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 14), + legend.text = element_markdown()) + + ylab("Mean fold-change")+ + ggtitle(label = str_c(labelnames[[meta]], "in", plottissues[[tiss]], sep = " ")) + + scale_fill_grey(start = 1, end = 0, aesthetics = "fill") + } + +} + +plot_out[[tiss]][[meta]] +plot_out[[1]][[2]] + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"cmQTL_val1_scaled1_LC_lev_t_col_plots.pdf"), + width = 15.8/2.54, + height = 8/2.54) + +for (tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + print(plot_out[[tiss]][[meta]]) + } + +} + +dev.off() + + + +# CV analysis ------------------------------------------------------------- + +miss_per_clust <- LC_long %>% + group_by(met, tissue) %>% + summarise(na = sum(is.na(area)), + n = n()) %>% + ungroup() %>% + mutate(percent_na_clust = na/n*100) %>% + select(met, tissue, percent_na_clust) + +miss_any_treat <- miss_per_treat %>% + group_by(met, tissue) %>% + summarise(miss_comp_treat = if_else(any(percent_na == 100), T,F)) + +library(bootstrap) + +theta <- function(x){ + sd(x)/mean(x) +} + +cv <- means %>% + ungroup() %>% + group_by(genotype, tissue, alias, met) %>% + summarise(grand_mean = mean(mean), + grand_sd = sd(mean), + n = n()) %>% + mutate(cv = grand_sd/grand_mean) + +cv_jack <- means %>% + group_by(genotype, tissue, alias, met) %>% + summarise(cv = jackknife(mean, theta)$jack.values) %>% + mutate(jack_rep = row_number()) %>% + ungroup() + +cv_jack_mean <- cv_jack %>% + group_by(genotype,tissue, alias, met) %>% + summarise(mean_cv = mean(cv), + sd_cv = sd(cv), + n = n()) %>% + ungroup() %>% + mutate(se_cv = sd_cv/sqrt(n)) + +cv_jack_wide <- cv_jack %>% + pivot_wider(id_cols = c(genotype, tissue, alias, jack_rep), + names_from = met, + values_from = cv) %>% + mutate(group = str_c(tissue, alias, sep = "_")) + +cv_jack_numeric <- cv_jack_wide %>% + select(-c(genotype, tissue, alias, jack_rep, group)) + +LC_cv_jack <- map(.x = cv_jack_numeric, .f = ~aov(.x ~ alias, data = cv_jack_wide)) + + +LC_jack_t <- map(.x = cv_jack_numeric, .f = ~pairwise.t.test(x = .x , g = cv_jack_wide$group, p.adjust.method = "none")) %>% + map(.f = tidy) + +LC_jack_t_tidy <- LC_jack_t %>% + map2(.y = names(LC_jack_t), .f = ~.x %>% mutate(var = .y)) %>% + purrr::reduce(bind_rows) + +sig_LC_cv_groups <- LC_jack_t_tidy %>% + separate(group1, into = c("tissue1", "alias1"), sep = "_") %>% + separate(group2, into = c("tissue2", "alias2"), sep = "_") %>% + filter(alias1 == "967514 MM WT" | alias2 == "967514 MM WT", + tissue1 == tissue2) %>% + group_by(var) %>% + mutate(adj.p.value = p.adjust(p.value)) %>% + ungroup() %>% + mutate(p.signif = if_else(adj.p.value <= 0.05, "*", "ns")) %>% + left_join(cv_jack_mean, by = c("var" = "met", "alias1" = "alias", "tissue1" = "tissue")) %>% + select(p.value, var, alias1, alias2, p.signif, mean_cv1 = mean_cv, se_cv1 = se_cv, tissue1, tissue2) %>% + left_join(cv_jack_mean, by = c("var" = "met", "alias2" = "alias", "tissue2" = "tissue")) %>% + select(p.value, var, alias1, alias2, p.signif, mean_cv1, mean_cv2 = mean_cv, se_cv1, se_cv2 = se_cv, tissue1, tissue2) %>% + group_by(var, tissue1) %>% + mutate(tot_val1 = mean_cv1 + se_cv1, + tot_val2 = mean_cv2 + se_cv2, + y.position = 1.1*(max(tot_val1, tot_val2))) %>% + ungroup() %>% + left_join(genotypes, by = c("alias1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("alias2" = "alias")) %>% + rename(genotype2 = genotype) %>% + rename(met = var, + group1 = genotype1, + group2 = genotype2, + tissue = tissue1) %>% + left_join(miss_any_treat) %>% + left_join(miss_per_clust) %>% + group_by(met, tissue) %>% + mutate(p.signif = if_else(miss_comp_treat == T| percent_na_clust >= 50, "ns", p.signif), + met = as_factor(met), + group1 = as_factor(group1), + group2 = as_factor(group2)) %>% + ungroup() + + +sig_mets_cv <- sig_LC_cv_groups %>% + filter(p.signif == "*") %>% + distinct(met, .keep_all = T) %>% + mutate(sig = T) %>% + select(met, sig) + +# CV Heatmap scaled per tissue----------------------------------------------------------------- + +heat.LC <- cv_jack_mean%>% + group_by(tissue, met) %>% + mutate(total_norm = mean_cv/mean(mean_cv), + log_norm = log2(total_norm), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% + ungroup() %>% + left_join(sig_mets_cv) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = log_norm_level) %>% + left_join(met_dat) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.LC) <- heat.LC$met + +mat.heat.LC <- heat.LC %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.LC %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.LC$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, genotype) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, tissue, genotype) + +heat.LC_signif <- cv_jack_mean %>% + distinct(tissue, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_LC_cv_groups, by = c("met", "tissue" = "tissue2", "alias" = "alias2")) %>% + left_join(sig_mets_cv) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.LC) + +rownames(heat.LC_signif) <- heat.LC_signif$met + +mat.heat.LC_signif <- heat.LC_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + + +pheatmap.LC <- pheatmap(mat.heat.LC, + #color = plasma(14), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-6.5,-5.5,-4.5,-3.5,-2.5,-1.5, -0.5 ,0.5,1.5,2.5,3.5,4.5,5.5,6.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + display_numbers = mat.heat.LC_signif, + number_color = "black", + fontsize_number = 6, + angle_col = 45, + fontsize_col = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_cv_heatmap_rel_tissue.jpg", + sep = "_") +) + + + +# CV Heatmap unscaled----------------------------------------------------------------- + +heat.LC <- cv_jack_mean%>% + group_by(tissue, met) %>% + mutate(total_norm = mean_cv/mean(mean_cv), + log_norm = log2(total_norm), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% + ungroup() %>% + left_join(sig_mets_cv) %>% + #filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = mean_cv) %>% + left_join(met_dat) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.LC) <- heat.LC$met + +mat.heat.LC <- heat.LC %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.LC %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.LC$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, genotype) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, tissue, genotype) + +heat.LC_signif <- cv_jack_mean %>% + distinct(tissue, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_LC_cv_groups, by = c("met", "tissue" = "tissue2", "alias" = "alias2")) %>% + left_join(sig_mets_cv) %>% + #filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.LC) + +rownames(heat.LC_signif) <- heat.LC_signif$met + +mat.heat.LC_signif <- heat.LC_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + + +pheatmap.LC <- pheatmap(mat.heat.LC, + #color = plasma(14), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-6.5,-5.5,-4.5,-3.5,-2.5,-1.5, -0.5 ,0.5,1.5,2.5,3.5,4.5,5.5,6.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + #display_numbers = mat.heat.LC_signif, + number_color = "black", + fontsize_number = 6, + angle_col = 45, + fontsize_col = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_cv_unscaled.jpg", + sep = "_") +) + + +# CV dotplots ------------------------------------------------------------- + +cv %>% + filter(genotype == "MoneyMaker") %>% + mutate(`cv > 1` = if_else(cv > 1, "yes", "no")) %>% + ggplot(aes(x = tissue, y = cv)) + + geom_dotplot(aes(fill = `cv > 1`), stackdir = "center", binaxis = "y", + binwidth = 0.1, dotsize = 1) + + geom_hline(aes(yintercept = 1), color = "red") + + theme(panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + legend.position = "bottom", + legend.text = element_text(size = 8), + text = element_text(size = 10)) + + ylim(c(-0.15, 2)) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cv_dotplot.jpg", + sep = "_"), units = "cm", + width = 15.9, height = 6) + + + +# CV Heatmap wildtype----------------------------------------------------------------- + +cv_jack_mean_wildtype <- cv_jack_mean %>% + filter(genotype == "MoneyMaker") %>% + select(tissue, met, mean_cv_wt = mean_cv) + +heat.LC <- cv_jack_mean%>% + left_join(cv_jack_mean_wildtype) %>% + mutate(fc_cv = mean_cv/mean_cv_wt) %>% + group_by(tissue, met) %>% + mutate(total_norm = mean_cv/mean(mean_cv), + log_norm = log2(fc_cv), + log_norm = if_else(is.infinite(log_norm), 0, log_norm), + log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% + ungroup() %>% + left_join(sig_mets_cv) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = log_norm) %>% + left_join(met_dat) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +rownames(heat.LC) <- heat.LC$met + +mat.heat.LC <- heat.LC %>% + select(contains("leaves"), contains("fruits")) %>% as.matrix() + +annotation_row <- heat.LC %>% + select(Compound_Class) + +rownames(annotation_row) <- heat.LC$met + +annotation_col <- sam_dat %>% + filter(exp == 1, class == "sample") %>% + distinct(tissue, genotype) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + as.data.frame() + +rownames(annotation_col) <- annotation_col$group + +annotation_col <- annotation_col %>% + select(-group, tissue, genotype) + +heat.LC_signif <- cv_jack_mean %>% + distinct(tissue, alias, genotype, met) %>% + #filter(genotype != "MoneyMaker") %>% + left_join(sig_LC_cv_groups, by = c("met", "tissue" = "tissue2", "alias" = "alias2")) %>% + left_join(sig_mets_cv) %>% + filter(sig == T) %>% + mutate(group = as_factor(str_c(tissue, genotype, sep = "_")), + signif = if_else(p.signif == "ns"| is.na(p.signif), "","X")) %>% + pivot_wider(id_cols = c(met), + names_from = group, + values_from = signif) %>% + left_join(met_dat) %>% + left_join(LC_classes) %>% + arrange(Compound_Class, Compound_Name) %>% + as.data.frame() + +heat_cols <- colnames(mat.heat.LC) + +rownames(heat.LC_signif) <- heat.LC_signif$met + +mat.heat.LC_signif <- heat.LC_signif %>% + select(all_of(heat_cols)) %>% as.matrix() + +ann_colors = list( + tissue = c(fruits = "red",leaves = "darkgreen"), + genotype = c(MoneyMaker = "yellow", `panK4-1` = "brown", `log2-1` = "blue", `transp1-1` = "grey"), + treatment = c("0.4" = "red", "0.6" = "orange", "0.8" = "yellow", "1" = "green")) + + +pheatmap.LC <- pheatmap(mat.heat.LC, + #color = plasma(14), + #cellwidth = 16, + #cellheight = 4, + #breaks = c(-6.5,-5.5,-4.5,-3.5,-2.5,-1.5, -0.5 ,0.5,1.5,2.5,3.5,4.5,5.5,6.5), + cluster_rows = T, + cluster_cols = T, + annotation_names_row = F, + show_rownames = F, + annotation_row = annotation_row, + annotation_col = annotation_col, + display_numbers = mat.heat.LC_signif, + number_color = "black", + fontsize_number = 6, + angle_col = 45, + fontsize_col = 6, + annotation_colors = ann_colors, + filename = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_cv_heatmap_rel_wt.jpg", + sep = "_") +) + +# Per metabolite comparisons cv---------------------------------------------- + +norm_MM <- means %>% + filter(genotype == "MoneyMaker") %>% + select(tissue, treatment, met, MM_mean = mean) + +fc_cv <- cv_jack%>% + left_join(cv_jack_mean_wildtype) %>% + mutate(fc_cv = cv/mean_cv_wt) %>% + group_by(tissue, alias, genotype, met) %>% + summarise(mean_fc = mean(fc_cv), + sd = sd(fc_cv), + n = n()) %>% + mutate(se = sd/sqrt(n), + group = as_factor(str_c(tissue, genotype, sep = "_"))) %>% + left_join(sig_LC_cv_groups, by = c("tissue", "met", "alias" = "alias2")) %>% + group_by(met) %>% + #filter(any(p.signif == "*"), all(is.finite(mean_fc)), tissue != "flowers") %>% + left_join(met_dat) %>% + arrange(Compound_Class, Compound_Name) %>% + ungroup() %>% + mutate(genotype = as_factor(if_else(genotype == "MoneyMaker", glue("{genotype}"),glue("*{genotype}*"))), + genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + + +plotmets <- fc_cv %>% distinct(met) %>% as_vector() +plottissues <- fc_cv %>% distinct(tissue) %>% as_vector() %>% as.character + +labelnames <- plotmets %>% as_tibble() %>% + rename(met = value) %>% + left_join(met_dat) %>% left_join(LC_classes) %>% + select(Compound_Name) %>% as_vector() + + +plot_out <- vector("list", length = length(plotmets)) + + + +for(tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + + + sig_bar <- fc_cv %>% + group_by(tissue, met) %>% + mutate(tot_val = max(mean_fc + se)) %>% + mutate(y.position = tot_val + 0.25*max(tot_val)) %>% + ungroup() %>% + filter(!is.na(p.signif)) %>% + mutate(group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))), + group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>% + filter(met == plotmets[[meta]], tissue2 == plottissues[[tiss]]) + + + plot_out[[tiss]][[meta]] <- fc_cv %>% + filter(met == plotmets[[meta]], tissue == plottissues[[tiss]]) %>% + #mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "panK4-1", "log2-1", "transp1-1"))) %>% + ggplot(aes(x = genotype, y = mean_fc)) + + geom_col(position = "dodge", aes(fill = genotype), color = "black") + + geom_errorbar(aes(ymin = (mean_fc-se), ymax = (mean_fc + se)), position = position_dodge(0.9), width = 0.25, size = 0.75)+ + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 14), + legend.text = element_markdown()) + + ylab("Mean fold-change") + + ggtitle(label = str_c(labelnames[[meta]], "in", plottissues[[tiss]], sep = " ")) + + scale_fill_grey(start = 1, end = 0, aesthetics = "fill") + } + +} + +plot_out[[1]][[2]] + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"cmQTL_val1_cv_LC_col_plots.pdf"), + width = 15.8/2.54, + height = 8/2.54) + + +for (tiss in seq_along(plottissues)) { + for (meta in seq_along(plotmets)) { + print(plot_out[[tiss]][[meta]]) + } + +} +dev.off() + +# Files to remake figures ------------------------------------------------- + +write_csv(fc_1_ind, "individual_values.csv") +write_csv(fc_1, "mean_values_se_n.csv") +write_csv(sig_LC, "p_values.csv") + +write_csv(means_tec_rep_lt, "individual_values_levene.csv") +write_csv(fc_1_lt, "mean_values_se_n_levene.csv") +write_csv(sig_LC_lt, "p_values_levene.csv") + +#write_csv(cv_jack_ind, "individual_values_cv.csv") +#write_csv(cv_jack_ind_mean, "mean_values_se_n_cv.csv") +#write_csv(sig_cv_jack_ind, "p_values_cv.csv") + +write_csv(cv_jack, "individual_values_cv.csv") +write_csv(cv_jack_mean, "mean_values_se_n_cv.csv") +write_csv(sig_LC_cv_groups, "p_values_cv.csv") + +# Log used code ------------------------------------------------------------ + +file_name <- sys.frame(1)$ofile + +file.copy(file_name, + to = str_c(out_dir, str_remove(file_name, current), "_", str_replace_all(Sys.Date(),"^.{2}|-",""), ".R"),overwrite = T) diff --git a/workflows/polar_LC_MS_figures/Figure maker_cmQTL1_paper.R b/workflows/polar_LC_MS_figures/Figure maker_cmQTL1_paper.R new file mode 100644 index 0000000000000000000000000000000000000000..fe02b33298118df8f85712dcf4efb0df12b39ed5 --- /dev/null +++ b/workflows/polar_LC_MS_figures/Figure maker_cmQTL1_paper.R @@ -0,0 +1,917 @@ +rm(list = ls()) +library(tidyverse) +library(ggpubr) +library(glue) +library(ggtext) +library(cowplot) +library(ggbeeswarm) +library(extrafont) + +# Directory setting ------------------------------------------------------- + + +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +getwd() + +current <- getwd() +source <- str_c(current,"/..") + +cur_date <- str_c(str_replace_all(Sys.Date(),"^.{2}|-","")) + +out <- str_c(cur_date, "Figures", sep = "_") + +if (file.exists(out)) { + cat("The folder already exists") +} else { + dir.create(out) +} + +out_dir <- str_c(current, out, sep = "/") + +latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"), + pattern = "^\\d{6}_analysis"), + decreasing = T)[[1]] + +latest_analysis <- str_c(current, "/", latest) + +setwd(latest_analysis) + +latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}") + +# File loading ------------------------------------------------------------ + +fc_1 <- read_csv("mean_values_se_n.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +genotypes <- fc_1 %>% + distinct(alias, genotype) + +fc_1_ind <- read_csv("individual_values.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_GC <- read_csv("p_values.csv") + +fc_1_lt <- read_csv("mean_values_se_n_levene.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_ind_lt <- read_csv("individual_values_levene.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_GC_lt <- read_csv("p_values_levene.csv") + +fc_1_cv <- read_csv("mean_values_se_n_cv.csv") %>% + mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +fc_1_ind_cv <- read_csv("individual_values_cv.csv") %>% + select(-genotype) %>% + left_join(genotypes) + +sig_GC_cv <- read_csv("p_values_cv.csv") %>% + mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))), + group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")), + group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))), + group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) + +setwd(out_dir) + + +# Data combination -------------------------------------------------------- + + +met_dat <- fc_1 %>% + distinct(met, Compound_Name, Compound_Class) + +genotypes <- fc_1 %>% + distinct(alias, genotype) + +per_comp_y <- fc_1 %>% + group_by(tissue, treatment, met) %>% + summarise(tot_val = max(mean_fc + se)) + +cb_scale <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", + "#0072B2", "#D55E00","#000000") +bw_scale <- c("black", "black", "black", "black", "black", "black", "black") + + +# Used plots -------------------------------------------------------------- + +com_theme <- theme(axis.text.x = element_markdown(angle = 45, hjust = 1, size = 6, family = "sans"), + axis.text.y = element_text(size = 6, family = "sans"), + axis.title.x = element_blank(), + axis.title.y = element_text(size = 6, family = "sans"), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + strip.text = element_text(size = 8, family = "sans", margin = margin(t = 1, r = 1, b = 1, l = 1 , unit = "pt")), + text = element_text(size = 6, family = "sans"), + legend.title = element_blank(), + legend.text = element_markdown(size = 6), + plot.margin = unit(c(1,1,1,1), "mm"), + legend.margin = margin(t = 0, r = 2, b = 0, l = 2 , unit = "mm")) + +make_box_dot_plot <- function(plot_met, plot_tissue, plot_label, plot_legend, plot_genotypes, plot_fill){ + + per_comp_y <- fc_1_ind %>% + filter(genotype %in% plot_genotypes) %>% + group_by(tissue, treatment, met) %>% + summarise(min_y = min(fc), + max_y = max(fc)) %>% + mutate(tot_val = 1.1*max_y) %>% + ungroup() + + sig_bar <- sig_GC %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + #mutate(genotype1 = as_factor(if_else(genotype1 == "MoneyMaker", glue("{genotype1}"),glue("*{genotype1}*"))), + # genotype2 = as_factor(if_else(genotype2 == "MoneyMaker", glue("{genotype2}"),glue("*{genotype2}*")))) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y) %>% + group_by(tissue, treatment, met) %>% + mutate(y.position = 1.1 * tot_val, + p.signif = if_else(p.value <= 0.05, "*", "ns")) %>% + ungroup() %>% + filter(met == plot_met, tissue == plot_tissue, + group1 %in% plot_genotypes, group2 %in% plot_genotypes) + + binwidth <- sig_bar %>% + summarise(min_y_comp = min(min_y), + max_y_comp = max(max_y)) %>% + mutate(binwidth = (max_y_comp - min_y_comp)/50) %>% + select(binwidth) %>% + as_vector() %>% + as.numeric() + + + ylim_top <- 1.1*max(sig_bar$y.position) + + plot_out <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue, genotype %in% plot_genotypes) %>% + ggplot(aes(x = genotype, y = fc)) + + geom_boxplot(position = "dodge", aes(fill = genotype), color = "black") + + geom_dotplot(aes(fill = genotype, color = genotype), binaxis = "y", stackdir = "center", binwidth = binwidth, dotsize = 1.5) + + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(legend.position = plot_legend) + + com_theme + + ylab("Mean fold-change")+ + #ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale[plot_fill], aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + +make_box_dot_plot_levene <- function(plot_met, plot_tissue, plot_label, plot_legend, plot_genotypes, plot_fill){ + + per_comp_y <- fc_1_ind_lt %>% + filter(genotype %in% plot_genotypes) %>% + group_by(tissue, treatment, met) %>% + summarise(min_y = min(lev_t), + max_y = max(lev_t)) %>% + mutate(tot_val = 1.1*max_y) %>% + ungroup() + + sig_bar <- sig_GC_lt %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + #mutate(genotype1 = as_factor(if_else(genotype1 == "MoneyMaker", glue("{genotype1}"),glue("*{genotype1}*"))), + # genotype2 = as_factor(if_else(genotype2 == "MoneyMaker", glue("{genotype2}"),glue("*{genotype2}*")))) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y) %>% + group_by(tissue, treatment, met) %>% + mutate(y.position = 1.1 * tot_val, + p.signif = if_else(p.value <= 0.05, "*", "ns")) %>% + ungroup() %>% + filter(met == plot_met, tissue == plot_tissue, + group1 %in% plot_genotypes, group2 %in% plot_genotypes) + + binwidth <- sig_bar %>% + summarise(min_y_comp = min(min_y), + max_y_comp = max(max_y)) %>% + mutate(binwidth = (max_y_comp - min_y_comp)/50) %>% + select(binwidth) %>% + as_vector() %>% + as.numeric() + + ylim_top <- 1.1*max(sig_bar$y.position) + + plot_out <- fc_1_ind_lt %>% + filter(met == plot_met, tissue == plot_tissue, genotype %in% plot_genotypes) %>% + ggplot(aes(x = genotype, y = lev_t)) + + geom_boxplot(position = "dodge", aes(fill = genotype), color = "black") + + geom_dotplot(aes(fill = genotype, color = genotype), binaxis = "y", stackdir = "center", binwidth = binwidth, dotsize = 1.5) + + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(legend.position = plot_legend) + + com_theme + + ylab("Levene's transformed value")+ + #ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale[plot_fill], aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + + +make_box_dot_plot_cv <- function(plot_met, plot_tissue, plot_label, plot_legend, plot_genotypes, plot_fill){ + + per_comp_y_cv <- fc_1_ind_cv %>% + filter(genotype %in% plot_genotypes) %>% + group_by(tissue, met) %>% + summarise(min_y = min(cv), + max_y = max(cv)) %>% + mutate(tot_val = 1.1*max_y) %>% + ungroup() + + sig_bar <- sig_GC_cv %>% + left_join(per_comp_y_cv) %>% + group_by(tissue, met) %>% + mutate(y.position = 1.1 * tot_val, + p.signif = if_else(p.value <= 0.05, "*", "ns")) %>% + ungroup() %>% + filter(met == plot_met, tissue == plot_tissue, + group1 %in% plot_genotypes, group2 %in% plot_genotypes) + + binwidth <- sig_bar %>% + summarise(min_y_comp = min(min_y), + max_y_comp = max(max_y)) %>% + mutate(binwidth = (max_y_comp - min_y_comp)/50) %>% + select(binwidth) %>% + as_vector() %>% + as.numeric() + + plot_out <- fc_1_ind_cv %>% + left_join(met_dat) %>% + filter(met == plot_met, tissue == plot_tissue, genotype %in% plot_genotypes) %>% + ggplot(aes(x = genotype, y = cv)) + + geom_boxplot(position = "dodge", aes(fill = genotype), color = "black") + + geom_dotplot(aes(fill = genotype, color = genotype), binaxis = "y", stackdir = "center", binwidth = binwidth, dotsize = 2) + + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + #facet_grid(cols = vars(treatment)) + + com_theme + + theme(legend.position = plot_legend) + + ylab("CV")+ + #ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale[plot_fill], aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + + +# Pantothenic acid fruits PanK4-1------------------------------------------------------- + +p1 <- make_box_dot_plot("Cluster_029173", "fruits", "Pantothenic acid nominal", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 + + +p2 <- make_box_dot_plot_cv("Cluster_029173", "fruits", "Pantothenic acid CV", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p2 + +leg <- get_legend(make_box_dot_plot("Cluster_029173", "fruits", "Pantothenic acid", "bottom", c("MoneyMaker","*panK4-1*"), c(1,2))) +leg + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("Pantothenic_acid_fruits_pank4-1_combined.png",width = 16.5, height = 10, units = "cm", dpi = 300) + +p2 <- make_box_dot_plot_levene("Cluster_029173", "fruits", "Pantothenic acid levene transformed", "bottom", c("MoneyMaker","*panK4-1*"), c(1,2)) +p2 +ggsave("Pantothenic_acid_fruits_pank4-1_box_dot_plot_levene.png",width = 15.8, height = 8, units = "cm", dpi = 300) + +# Pantothenic acid leaves PanK4-1------------------------------------------------------- + +p1 <- make_box_dot_plot("Cluster_029173", "leaves", "Pantothenic acid nominal", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p1 + +p2 <- make_box_dot_plot_cv("Cluster_029173", "leaves", "Pantothenic acid CV", "none", c("MoneyMaker","*panK4-1*"), c(1,2)) +p2 + +leg <- get_legend(make_box_dot_plot("Cluster_029173", "leaves", "Pantothenic acid", "bottom", c("MoneyMaker","*panK4-1*"), c(1,2))) +leg + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("Pantothenic_acid_leaves_panK4-1_combined.png",width = 16.5, height = 10, units = "cm", dpi = 300) + + +p2 <- make_box_dot_plot_levene("Cluster_029173", "leaves", "Pantothenic acid", "bottom", c("MoneyMaker","*panK4-1*"), c(1,2)) +p2 +ggsave("Pantothenic_acid_leaves-panK4-1_box_dot_plot_levene.png",width = 15.8, height = 8, units = "cm", dpi = 300) + +# Phenylalanine fruits log2-1------------------------------------------------------- + +p1 <- make_box_dot_plot("Cluster_025242", "fruits", "Phenylalanine nominal", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p1 + +saveRDS(last_plot(), "Phenylalanine_nominal_log2-1_fruits.RDS") + +p2 <- make_box_dot_plot_cv("Cluster_025242", "fruits", "Phenylalanine CV", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p2 + +saveRDS(last_plot(), "Phenylalanine_cv_log2-1_fruits.RDS") + +leg <- get_legend(make_box_dot_plot("Cluster_025242", "fruits", "Phenylalanine", "bottom", c("MoneyMaker","*log2-1*"), c(1,3))) +leg + +saveRDS(leg, "Phenylalanine_legend_log2-1_fruits.RDS") + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("Phenylalanine_fruits_log2-1_combined.png",width = 16.5, height = 10, units = "cm", dpi = 300) + + +p2 <- make_box_dot_plot_levene("Cluster_025242", "fruits", "Phenylalanine levene transformed", "bottom", c("MoneyMaker","*log2-1*"), c(1,3)) +p2 + +saveRDS(last_plot(), "Phenylalanine_levene_log2-1_fruits.RDS") + + +ggsave("Phenylalanine_fruits_log2-1_box_dot_plot_levene.png",width = 15.8, height = 8, units = "cm", dpi = 300) + + + +# Phenylalanine leaves log2-1------------------------------------------------------- + +p1 <- make_box_dot_plot("Cluster_025242", "leaves", "Phenylalanine nominal", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p1 + +saveRDS(last_plot(), "Phenylalanine_nominal_log2-1_leaves.RDS") + +p2 <- make_box_dot_plot_cv("Cluster_025242", "leaves", "Phenylalanine CV", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p2 + +saveRDS(last_plot(), "Phenylalanine_cv_log2-1_leaves.RDS") + +leg <- get_legend(make_box_dot_plot("Cluster_025242", "leaves", "Phenylalanine", "bottom", c("MoneyMaker","*log2-1*"), c(1,3))) +leg + +saveRDS(last_plot(), "Phenylalanine_leg_log2-1_leaves.RDS") + + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("Phenylalanine_leaves_log2-1_combined.png",width = 16.5, height = 10, units = "cm", dpi = 300) + + +p2 <- make_box_dot_plot_levene("Cluster_025242", "leaves", "Phenylalanine levene transformed", "bottom", c("MoneyMaker","*log2-1*"), c(1,3)) +p2 +ggsave("Phenylalanine_leaves_log-2-1_box_dot_plot_levene.png",width = 15.8, height = 8, units = "cm", dpi = 300) + +saveRDS(last_plot(), "Phenylalanine_levene_log2-1_leaves.RDS") + + +# Trans-cinnamic acid fruits log2-1------------------------------------------------------- + +p1 <- make_box_dot_plot("Cluster_024086", "fruits", "Trans-cinnamic acid nominal", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p1 + +p2 <- make_box_dot_plot_cv("Cluster_024086", "fruits", "Trans-cinnamic acid CV", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p2 + +leg <- get_legend(make_box_dot_plot("Cluster_024086", "fruits", "Trans-cinnamic acid", "bottom", c("MoneyMaker","*log2-1*"), c(1,3))) +leg + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("Trans-cinnamic_acid_fruits_log2-1_combined.png",width = 16.5, height = 10, units = "cm", dpi = 300) + + +p2 <- make_box_dot_plot_levene("Cluster_024086", "fruits", "Trans-cinnamic acid levene transformed", "bottom", c("MoneyMaker","*log2-1*"), c(1,3)) +p2 +ggsave("Trans-cinnamic_acid_fruits_log2-1_box_dot_plot_levene.png",width = 15.8, height = 8, units = "cm", dpi = 300) + + + +# Trans-cinnamic acid leaves log2-1------------------------------------------------------- + +p1 <- make_box_dot_plot("Cluster_024086", "leaves", "Trans-cinnamic acid nominal", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p1 + +p2 <- make_box_dot_plot_cv("Cluster_024086", "leaves", "Trans-cinnamic acid CV", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p2 + +leg <- get_legend(make_box_dot_plot("Cluster_024086", "leaves", "Trans-cinnamic acid", "bottom", c("MoneyMaker","*log2-1*"), c(1,3))) +leg + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("Trans-cinnamic_acid_leaves_log2-1_combined.png",width = 16.5, height = 10, units = "cm", dpi = 300) + + +p2 <- make_box_dot_plot_levene("Cluster_024086", "leaves", "Trans-cinnamic acid levene transformed", "bottom", c("MoneyMaker","*log2-1*"), c(1,3)) +p2 +ggsave("Trans-cinnamic_acid_leaves_log-2-1_box_dot_plot_levene.png",width = 15.8, height = 8, units = "cm", dpi = 300) + + + +# p-Coumaric acid fruits log2-1------------------------------------------------------- + +p1 <- make_box_dot_plot("Cluster_025174", "fruits", "p-Coumaric acid nominal", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p1 + +p2 <- make_box_dot_plot_cv("Cluster_025174", "fruits", "p-Coumaric acid CV", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p2 + +leg <- get_legend(make_box_dot_plot("Cluster_025174", "fruits", "p-Coumaric acid", "bottom", c("MoneyMaker","*log2-1*"), c(1,3))) +leg + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("p-Coumaric_acid_fruits_log2-1_combined.png",width = 16.5, height = 10, units = "cm", dpi = 300) + + +p2 <- make_box_dot_plot_levene("Cluster_025174", "fruits", "p-Coumaric acid levene transformed", "bottom", c("MoneyMaker","*log2-1*"), c(1,3)) +p2 +ggsave("p-Coumaric_acid_fruits_log2-1_box_dot_plot_levene.png",width = 15.8, height = 8, units = "cm", dpi = 300) + + + +# p-Coumaric acid leaves log2-1------------------------------------------------------- + +p1 <- make_box_dot_plot("Cluster_025174", "leaves", "p-Coumaric acid nominal", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p1 + +p2 <- make_box_dot_plot_cv("Cluster_025174", "leaves", "p-Coumaric acid CV", "none", c("MoneyMaker","*log2-1*"), c(1,3)) +p2 + +leg <- get_legend(make_box_dot_plot("Cluster_025174", "leaves", "p-Coumaric acid", "bottom", c("MoneyMaker","*log2-1*"), c(1,3))) +leg + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("p-Coumaric_acid_leaves_log2-1_combined.png",width = 16.5, height = 10, units = "cm", dpi = 300) + + +p2 <- make_box_dot_plot_levene("Cluster_025174", "leaves", "p-Coumaric acid levene transformed", "bottom", c("MoneyMaker","*log2-1*"), c(1,3)) +p2 +ggsave("p-Coumaric_acid_leaves_log-2-1_box_dot_plot_levene.png",width = 15.8, height = 8, units = "cm", dpi = 300) + +# Val-Trp fruits transp1-1------------------------------------------------------- + + + +p1 <- make_box_dot_plot("Cluster_035934", "fruits", "Val-Trp nominal", "none", c("MoneyMaker","*transp1-1*"), c(1,4)) +p1 + + +p2 <- make_box_dot_plot_cv("Cluster_035934", "fruits", "Val-Trp CV", "none", c("MoneyMaker","*transp1-1*"), c(1,4)) +p2 + +leg <- get_legend(make_box_dot_plot("Cluster_035934", "fruits", "Val-Trp", "bottom", c("MoneyMaker","*transp1-1*"), c(1,4))) +leg + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("Val-Trp_fruits_combined_transp1-1.png",width = 16.5, height = 10, units = "cm", dpi = 300) + +p2 <- make_box_dot_plot_levene("Cluster_035934", "fruits", "Val-Trp levene transformed", "bottom", c("MoneyMaker","*transp1-1*"), c(1,4)) +p2 +ggsave("Val-Trp_fruits_box_dot_plot_levene_transp1-1.png",width = 15.8, height = 8, units = "cm", dpi = 300) + +# Val-Trp leaves transp1-1------------------------------------------------------- + +p1 <- make_box_dot_plot("Cluster_035934", "leaves", "Val-Trp nominal", "none", c("MoneyMaker","*transp1-1*"), c(1,4)) +p1 + +p2 <- make_box_dot_plot_cv("Cluster_035934", "leaves", "Val-Trp CV", "none", c("MoneyMaker","*transp1-1*"), c(1,4)) +p2 + +leg <- get_legend(make_box_dot_plot("Cluster_035934", "leaves", "Val-Trp", "bottom", c("MoneyMaker","*transp1-1*"), c(1,4))) +leg + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, ncol = 1, leg, rel_heights = c(10,1)) +comp_plot + +ggsave("Val-Trp_leaves_combined_transp1-1.png",width = 16.5, height = 10, units = "cm", dpi = 300) + + +p2 <- make_box_dot_plot_levene("Cluster_035934", "leaves", "Val-Trp", "bottom", c("MoneyMaker","*transp1-1*"), c(1,4)) +p2 +ggsave("Val-Trp_leaves_box_dot_plot_levene.png",width = 15.8, height = 8, units = "cm", dpi = 300) + +stop(print("current end")) + +# Test combine ------------------------------------------------------------ + +p1 <- make_box_dot_plot("m_81", "fruits", "Phenylalanine", "none", c("MoneyMaker","*log2-1*")) +p1 + +p2 <- make_box_dot_plot_levene("m_81", "fruits", "Phenylalanine", "none", c("MoneyMaker","*log2-1*")) +p2 + +p3 <- make_box_dot_plot_cv("m_81", "fruits", "Phenylalanine", "none", c("MoneyMaker","*log2-1*")) +p3 + +leg46 <- get_legend(make_box_dot_plot("m_81", "fruits", "Phenylalanine", "bottom", c("MoneyMaker","*log2-1*"))) +leg46 + +main_plot <- plot_grid(p1,p2, nrow = 2, labels = "AUTO",rel_heights = c(1,1)) +main_plot + +main_plot_2 <- plot_grid(main_plot, p3, ncol = 2, labels = list("", "C"),rel_widths = c(3,1)) +main_plot_2 + +comp_plot <- plot_grid(main_plot_2, leg46, ncol = 1, rel_heights = c(10,1)) +comp_plot + +ggsave("Phenylalanine_combined.png",width = 15.8, height = 16, units = "cm", dpi = 300) + +#sugars? + +p1 <- make_box_dot_plot("m_72", "fruits", "Maltose", "none", c("MoneyMaker","*transp1-1*")) +p1 + +p2 <- make_box_dot_plot_levene("m_72", "fruits", "Maltose", "none", c("MoneyMaker","*transp1-1*")) +p2 + +p3 <- make_box_dot_plot_cv("m_72", "fruits", "Maltose", "none", c("MoneyMaker","*transp1-1*")) +p3 + +leg46 <- get_legend(make_box_dot_plot("m_72", "fruits", "Maltose", "bottom", c("MoneyMaker","*transp1-1*"))) +leg46 + +main_plot <- plot_grid(p1,p2, nrow = 2, labels = "AUTO",rel_heights = c(1,1)) +main_plot + +main_plot_2 <- plot_grid(main_plot, p3, ncol = 2, labels = list("", "C"),rel_widths = c(3,1)) +main_plot_2 + + +ggsave("Maltose_combined.png",width = 15.8, height = 16, units = "cm", dpi = 300) + + + +# Recycling --------------------------------------------------------------- + +testtheme <- theme(axis.text.x = element_markdown(angle = 45, hjust = 1, margin = unit(c(1,0,0,0), "mm")), + axis.title.x = element_blank(), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 8, family = "Arial Narrow"), + legend.title = element_blank(), + legend.text = element_markdown(), + legend.position = "right", + legend.direction = "vertical", + plot.margin = unit(c(1,0,1,2), "mm"), + legend.margin = margin(t = 0, r = 2, b = 0, l = 2 , unit = "mm")) + + +make_box_point_plot <- function(plot_met, plot_tissue, plot_label, plot_legend){ + + sig_bar <- sig_GC %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + #mutate(genotype1 = as_factor(if_else(genotype1 == "MoneyMaker", glue("{genotype1}"),glue("*{genotype1}*"))), + # genotype2 = as_factor(if_else(genotype2 == "MoneyMaker", glue("{genotype2}"),glue("*{genotype2}*")))) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y) %>% + mutate(y.position = 1.1 * tot_val) %>% + filter(met == plot_met, tissue == plot_tissue) + + ylim_top <- 1.1*max(sig_bar$y.position) + + fc_1_ind_plot <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue) + + plot_out <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue) %>% + ggplot(aes(x = genotype, y = fc)) + + geom_boxplot(position = "dodge", aes(fill = genotype), color = "black") + + geom_point(aes(fill = genotype, color = genotype),size = 2, shape = 21) + + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 10), + legend.text = element_markdown(), + legend.position = plot_legend) + + ylab("Mean fold-change")+ + #ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale, aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + +make_box_beeswarm_plot <- function(plot_met, plot_tissue, plot_label, plot_legend){ + + sig_bar <- sig_GC %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + #mutate(genotype1 = as_factor(if_else(genotype1 == "MoneyMaker", glue("{genotype1}"),glue("*{genotype1}*"))), + # genotype2 = as_factor(if_else(genotype2 == "MoneyMaker", glue("{genotype2}"),glue("*{genotype2}*")))) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y) %>% + mutate(y.position = 1.1 * tot_val) %>% + filter(met == plot_met, tissue == plot_tissue) + + ylim_top <- 1.1*max(sig_bar$y.position) + + fc_1_ind_plot <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue) + + plot_out <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue) %>% + ggplot(aes(x = genotype, y = fc)) + + geom_boxplot(position = "dodge", aes(fill = genotype), color = "black") + + geom_beeswarm(aes(fill = genotype, color = genotype),size = 2, shape = 21, cex = 2, priority = "density") + + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 10), + legend.text = element_markdown(), + legend.position = plot_legend) + + ylab("Mean fold-change")+ + #ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale, aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + +make_box_dot_line_plot <- function(plot_met, plot_tissue, plot_label, plot_legend, plot_genotypes){ + + sig_bar <- sig_GC %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y) %>% + mutate(y.position = 1.1 * tot_val, + treatment = as_factor(treatment)) %>% + filter(met == plot_met, tissue == plot_tissue, + group1 %in% plot_genotypes, group2 %in% plot_genotypes) + + ylim_top <- 1.1*max(sig_bar$y.position) + + fc_1_plot <- fc_1%>% + filter(met == plot_met, tissue == plot_tissue, genotype %in% plot_genotypes) %>% + mutate(treatment = as_factor(treatment)) + + plot_out <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue, genotype %in% plot_genotypes) %>% + mutate(treatment = as_factor(treatment)) %>% + ggplot(position = "dodge", aes(x = treatment, y = fc)) + + geom_boxplot(aes(fill = genotype), color = "black") + + geom_dotplot(aes(fill = genotype, color = genotype), position = position_dodge(0.75), + binaxis = "y", stackdir = "center", binwidth = 0.1, dotsize = 1) + + geom_smooth(aes(x = treatment, y = fc, color = genotype, group = genotype),method = "lm", position = position_dodge(0.75)) + + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + #facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 10), + legend.text = element_markdown(), + legend.position = plot_legend) + + ylab("Mean fold-change")+ + #ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale, aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + +p1 <- make_box_dot_line_plot("m_70", "fruits", "Malic acid", "none", c("MoneyMaker","*panK4-1*")) +p1 +ggsave("Malic_acid_box_dot_line_plot_nominal.png",width = 15.8, height = 8, units = "cm", dpi = 300) + +make_col_plot_cv <- function(plot_met, plot_tissue, plot_label, plot_legend){ + + sig_bar <- sig_GC_cv %>% + left_join(genotypes, by = c("alias1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("alias2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + #mutate(genotype1 = as_factor(if_else(genotype1 == "MoneyMaker", glue("{genotype1}"),glue("*{genotype1}*"))), + # genotype2 = as_factor(if_else(genotype2 == "MoneyMaker", glue("{genotype2}"),glue("*{genotype2}*")))) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y_cv) %>% + mutate(y.position = 1.1 * tot_val) %>% + filter(met == plot_met, tissue == plot_tissue) + + ylim_top <- 1.1*max(sig_bar$y.position) + + plot_out <- fc_cv %>% + filter(met == plot_met, tissue == plot_tissue) %>% + ggplot(aes(x = genotype, y = mean_fc)) + + geom_col(position = "dodge", aes(fill = genotype), color = "black") + + geom_errorbar(aes(ymin = (mean_fc-se), ymax = (mean_fc + se)), position = position_dodge(0.9), width = 0.25, size = 0.75)+ + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + #facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 10), + legend.text = element_markdown(), + legend.position = plot_legend) + + ylab("CV fold-change")+ + ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = plot_label) + + scale_fill_grey(start = 1, end = 0, aesthetics = "fill") + + plot_out + +} + +make_col_plot <- function(plot_met, plot_tissue, plot_label, plot_legend){ + + sig_bar <- sig_GC %>% + left_join(genotypes, by = c("group1" = "alias")) %>% + rename(genotype1 = genotype) %>% + left_join(genotypes, by = c("group2" = "alias")) %>% + rename(genotype2 = genotype) %>% + select(-group1, -group2) %>% + #mutate(genotype1 = as_factor(if_else(genotype1 == "MoneyMaker", glue("{genotype1}"),glue("*{genotype1}*"))), + # genotype2 = as_factor(if_else(genotype2 == "MoneyMaker", glue("{genotype2}"),glue("*{genotype2}*")))) %>% + rename(group1 = genotype1, group2 = genotype2) %>% + left_join(per_comp_y) %>% + mutate(y.position = 1.1 * tot_val) %>% + filter(met == plot_met, tissue == plot_tissue) + + ylim_top <- 1.1*max(sig_bar$y.position) + + fc_1_ind_plot <- fc_1_ind %>% + filter(met == plot_met, tissue == plot_tissue) + + plot_out <- fc_1 %>% + filter(met == plot_met, tissue == plot_tissue) %>% + ggplot(aes(x = genotype, y = mean_fc)) + + geom_col(position = "dodge", aes(fill = genotype), color = "black") + + geom_jitter(aes(fill = genotype, color = genotype, y = fc), size = 2, shape = 21, data = fc_1_ind_plot) + + geom_errorbar(aes(ymin = (mean_fc-se), ymax = (mean_fc + se)), position = position_dodge(0.9), width = 0.25, size = 0.75)+ + stat_pvalue_manual(sig_bar, label = "p.signif", y.position = "y.position", + step.increase = 0.07, + hide.ns = T) + + facet_grid(cols = vars(treatment)) + + theme(axis.text.x = element_markdown(angle = 45, hjust = 1), + panel.background = element_rect(fill = "white"), + panel.border = element_rect(color = "black",fill = NA), + text = element_text(size = 10), + legend.text = element_markdown(), + legend.position = plot_legend) + + ylab("Mean fold-change")+ + #ylim(c(0, ylim_top))+ + xlab("") + + ggtitle(label = plot_label) + + scale_fill_manual(values = cb_scale, aesthetics = "fill") + + scale_color_manual(values = bw_scale, aesthetics = "color") + + plot_out + +} + +# Figure 46 --------------------------------------------------------------- + +p1 <- make_col_plot("m_70", "fruits", "Malic acid", "none") +p1 + + +p2 <- make_col_plot_cv("m_119", "fruits", "Malic acid CV", "none") +p2 + + +leg46 <- get_legend(make_col_plot("m_123", "fruits", "sucrose", "bottom")) +leg46 + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, leg46, ncol = 1, rel_heights = c(10,1)) +comp_plot + +ggsave("figure_46.wmf",width = 15.8, height = 8, units = "cm", dpi = 300) + +# Figure 47 --------------------------------------------------------------- + +p1 <- make_col_plot("m_74", "fruits", "Phenylalanine", "none") +p1 + +p2 <- make_col_plot_cv("m_74", "fruits", "Phe CV", "none") +p2 + + +leg46 <- get_legend(make_col_plot("m_74", "fruits", "sucrose", "bottom")) +leg46 + +main_plot <- plot_grid(p1,p2, ncol = 2, labels = "AUTO",rel_widths = c(3,1)) +main_plot + +comp_plot <- plot_grid(main_plot, leg46, ncol = 1, rel_heights = c(10,1)) +comp_plot + +ggsave("figure_47.wmf",width = 15.8, height = 8, units = "cm", dpi = 300) + +# Figure 48 --------------------------------------------------------------- + +p1 <- make_col_plot_cv("m_25", "fruits", "F6P CV", "none") +p1 + +p2 <- make_col_plot_cv("m_38", "fruits", "G6P CV", "none") +p2 + +p3 <- make_col_plot_cv("m_65", "fruits", "Maltose CV", "none") +p3 + +leg48 <- get_legend(make_col_plot("m_65", "fruits", "sucrose", "bottom")) +leg48 + +main_plot <- plot_grid(p1,p2,p3, ncol = 3, labels = "AUTO") +main_plot + +comp_plot <- plot_grid(main_plot, leg48, ncol = 1, rel_heights = c(10,1)) +comp_plot + +ggsave("figure_48.wmf",width = 15.8, height = 8, units = "cm", dpi = 300) diff --git a/workflows/polar_LC_MS_normalization/210903_secondary_normalization.R b/workflows/polar_LC_MS_normalization/210903_secondary_normalization.R new file mode 100644 index 0000000000000000000000000000000000000000..41c41e5d3fd2c1f8f9496ee9e4dbaf3fc923729d --- /dev/null +++ b/workflows/polar_LC_MS_normalization/210903_secondary_normalization.R @@ -0,0 +1,823 @@ +rm(list = ls()) +setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) +getwd() +current <- getwd() +source <- ".." +library(openxlsx) +library(tidyverse) +library(car) +library(pheatmap) +library(broom) +library(ggpubr) +library(viridisLite) +library(modelr) +library(dlookr) +library(imputeLCMD) + +# Directory setting ------------------------------------------------------- + + +current <- getwd() +source <- str_c(current,"/..") + +cur_date <- str_c(str_replace_all(Sys.Date(),"^.{2}|-","")) + +out <- str_c(cur_date, "normalization", sep = "_") + +if (file.exists(out)) { + cat("The folder already exists") +} else { + dir.create(out) +} + +out_dir <- str_c(current, out, sep = "/") + + + +# Data loading ------------------------------------------------------------ +setwd(source) + +sam_dat1 <- read_csv("210812_cmQTL_val1_samplelist.csv") +LC_run1 <- readxl::read_xlsx("200923_samplelist_WIJESI-030820-13_cmQTL_validation.xlsx", sheet = 4) + +sam_dat2 <- read_csv("210812_cmQTL_val2_samplelist.csv") +LC_run2 <- readxl::read_xlsx("210324_WIJESI-130121-15_cmQTL_validation2.xlsx", sheet = 5) + +genotypes <- readxl::read_xlsx("Genotype_names.xlsx") + +setwd(current) +area <- read_delim(file = "cmQTL_val1_selected_secondary.txt", delim = "\t") %>% + rename(met = Name, + mz_mean_lib = mz_mean, + RT_mean_lib = RT_mean, + mz_mean = mz_mean_new, + RT_mean = RT_mean_new) %>% + group_by(met) %>% + mutate(rt_diff = RT_mean_lib - RT_mean, + rank_rt = rank(abs(rt_diff))) %>% + ungroup() %>% + filter(rank_rt == 1) %>% + arrange(RT_mean) %>% + group_by(Compound_Name) %>% + mutate(peak_no = rank(RT_mean, ties.method = "first"), + Compound_Name = if_else(duplicated(Compound_Name), + str_c(Compound_Name, "peak", peak_no, sep = "_"), + Compound_Name)) + +met_dat = area %>% + select(met, mz_mean,RT_mean, Compound_Name, Compound_Class) +setwd(out_dir) + +# Data combination -------------------------------------------------------- +sam_vars <- c("plantline", "alias", "LIMS_ID", + "treatment", "tissue", "batch_LC", "run_date_LC", + "extraction_num", "sample_num", + "class", "run_num_LC", "sample_weight", "exp", "genotype") + +sam_dat1_tidy <- LC_run1 %>% + mutate(class = as_factor(if_else(str_detect(extraction_num, "run_qc"), "run_qc", "sample")), + extraction_num = as.numeric(if_else(str_detect(extraction_num, "run_qc"), "0", extraction_num)), + exp = as_factor(1)) %>% + left_join(sam_dat1) %>% + left_join(genotypes) %>% + select(treatment = irrigation, everything()) %>% + select(all_of(sam_vars)) + +sam_dat2_tidy <- LC_run2 %>% + mutate(class = as_factor(if_else(str_detect(extraction_num, "run_qc"), "run_qc", "sample")), + extraction_num = as.numeric(if_else(str_detect(extraction_num, "run_qc"), "0", extraction_num)), + exp = as_factor(2)) %>% + left_join(sam_dat2) %>% + left_join(genotypes) %>% + select(treatment = irrigation, everything()) %>% + select(all_of(sam_vars)) + +sam_dat <- sam_dat1_tidy %>% + bind_rows(sam_dat2_tidy) %>% + group_by(batch_LC) %>% + mutate(daily_num = row_number()) %>% + fill(tissue, .direction = "updown") %>% + ungroup() %>% + left_join(genotypes) %>% + mutate(machine_num_LC = str_c(run_num_LC, "neg", sep = "_"), + treatment = as_factor(treatment)) + +area_long <- area %>% + pivot_longer(matches("^\\d+_neg$"), + names_to = "machine_num_LC", + values_to = "area") %>% + left_join(sam_dat) + +# Data filtering ---------------------------------------------------------- + + +set_back_NA <- function(x){ + for (i in seq_along(x)) { + if (x[[i]] == 0) { + x[[i]] <- NA + } else { + x[[i]] <- x[[i]] + } + } + x +} + + +# Imputation -------------------------------------------------------------- + +features_na <- area_long %>% + group_by(met) %>% + mutate(area = set_back_NA(area)) %>% + ungroup() + +missingness <- features_na %>% + group_by(exp, met, tissue, treatment) %>% + summarise(na = sum(is.na(area)), + n = n()) %>% + mutate(percent_na = na/n*100) %>% + ungroup() + +features <- features_na %>% + left_join(missingness) %>% + group_by(met) %>% + #mutate(area = ifelse(percent_na >= 0 & is.na(area), rnorm(n = 1, mean = 0.5*min(area, na.rm = T)), area)) %>% + ungroup() + +#miss_for_imp <- features %>% +# pivot_wider(names_from = met, +# values_from = area, +# id_cols = c(exp, machine_num_lip, met, tissue, treatment)) %>% +# select(exp, machine_num_lip, tissue, treatment, matches("Cluster_")) %>% +# group_by(exp, tissue, treatment) %>% +# nest() %>% +# mutate(new = map(.x = data, .f = as.data.frame), +# new = map(.x = new, .f = ~`row.names<-`(.x, .x$machine_num_lip)), +# new = map(.x = new, .f = ~select(.x, !starts_with("machine"))), +# new = map(.x = new, .f = as.matrix), +# imp = map_dfc(.x = new, .f = impute.QRILC)[[1]]) + +#imp_out_loop <- vector(mode = "list", length = length(miss_for_imp$new)) + +#for (j in seq_along(miss_for_imp$new)) { +# imp_out_loop[[j]] <- impute.QRILC(miss_for_imp$new[[j]])[[1]] +#} + +#can currently not solve per tissue, treatment and exp +#proceed with qrilc on all + +imp <- features_na %>% + pivot_wider(names_from = met, + values_from = area, + id_cols = c(exp, machine_num_LC, tissue, treatment)) %>% + select(exp, machine_num_LC, tissue, treatment, matches("Cluster_")) %>% + as.data.frame() + +row.names(imp) <- imp$machine_num_LC +imp <- imp %>% + select(-exp, -tissue, -treatment, -machine_num_LC) %>% + as.matrix() + +imp <- impute.QRILC(imp) + + +features <- imp[[1]] %>% + as_tibble(rownames = "machine_num_LC") %>% + pivot_longer(cols = matches("Cluster_"), + names_to = "met", + values_to = "imp") %>% + left_join(features_na) + +adjust_imp <- features %>% + group_by(met) %>% + summarise(offset = if_else (any(imp <0), 1.001*abs(min(imp)),0)) + + +features_filtered <- features %>% + left_join(missingness) %>% + #filter(percent_na <= 60) + left_join(adjust_imp) %>% + mutate(imp = imp + offset) %>% + select(-offset) + +# Internal Standard Normalization ----------------------------------------- + +isvec <- features %>% + filter(Compound_Name=="isovitexin") %>% + select(machine_num_LC, is = imp) + +features <- features_filtered %>% + full_join(isvec) %>% + mutate(isnorm = imp/is) %>% + filter(Compound_Name != "isovitexin") %>% + mutate(sample_weight = if_else(sample_weight == 0|is.na(sample_weight), 50, sample_weight)) + + +# Fit linear model on QCs ------------------------------------------------- + +by_batch_LC <- features %>% + filter(class=="run_qc") %>% + group_by(batch_LC, met) %>% # need to change treatment_batch_LC_corr + nest() + +feature_model <- function(df) { + lm(isnorm ~ daily_num, data = df) +} + + +by_batch_LC <- by_batch_LC %>% + mutate(model = map(data, feature_model), + predicts = map2(data, model, add_predictions), + coefficients = map(model, tidy), + aug = map(model, augment), + glance = map (model, glance)) + +coefs_aug <- unnest(by_batch_LC, aug) %>% + select(batch_LC, met, .resid, daily_num) + +coefs_glance <- unnest(by_batch_LC,glance) %>% + select(batch_LC, met, adj.r.squared) + +coefs_term <- unnest(by_batch_LC, coefficients) %>% + pivot_wider(id_cols = c(batch_LC, met), + names_from = "term", + values_from = estimate) %>% + rename(x = daily_num, + intercept = `(Intercept)`) + + +coefs_pvalue <- by_batch_LC %>% + select(batch_LC, met, coefficients) %>% + ungroup() %>% + unnest(coefficients) %>% + pivot_wider(id_cols = c(batch_LC, met), + names_from = "term", + values_from = p.value) %>% + rename(intercept_pval = `(Intercept)`, + daily_num_pval = daily_num) %>% + full_join(features) %>% + full_join(coefs_term) %>% + #left_join(coefs_aug) %>% + full_join(coefs_glance) + +median <- features %>% + filter(class=="run_qc") %>% + group_by(batch_LC, met) %>% + summarise(median = median (isnorm)) + +features_lin <- unnest(by_batch_LC, predicts) %>% + full_join(coefs_pvalue) %>% + full_join(median) %>% + mutate(pred = intercept + x*daily_num, + predp = ifelse(daily_num_pval <=0.05 & adj.r.squared >= 0.75, pred, median), + predp = ifelse(is.na(predp), median, predp)) %>% + select(- c("data", "model", "coefficients")) + +adjust_lin <- features_lin %>% + ungroup() %>% + group_by(met) %>% + summarise(lin_offset = if_else(any(predp <0), 1.001*abs(min(predp)),0)) + +features_lin <- features_lin %>% + ungroup() %>% + full_join(adjust_lin) %>% + full_join(features) %>% + mutate(linnorm = (isnorm+lin_offset)/(predp+lin_offset), + linnorm_fw = linnorm/sample_weight, + linnorm_fw_log = log2(linnorm_fw)) + + + + +#Fit loess-model for QC-RLSC batch_lipwise#### +rejoin <- features +features <- rejoin + +QC_loess <- features %>% + ungroup() %>% + filter(class == "run_qc") %>% + #filter(met!="m_44") %>% + group_by(met, batch_LC) %>% + nest() + +loess_model <- function (df) { + loess(isnorm ~ daily_num, span = 1.5, data = df,control = loess.control(surface = "interpolate")) +} + +start <- Sys.time() +QC_loess <- QC_loess %>% + mutate(model = map (data, loess_model), + daily_num = list(seq(1,65,1)), + predict = map2(model,daily_num,stats::predict)) +end <- Sys.time() +end-start + +features_loess <- QC_loess %>% + unnest(c(predict, daily_num, batch_LC)) %>% + select(-model, -data) %>% + full_join(rejoin) %>% + filter(!is.na(machine_num_LC)) + +adjust <- features_loess %>% + ungroup() %>% + group_by(met) %>% + summarise(offset = if_else (any(predict <0), 1.001*abs(min(predict)),0)) + +features_loess <- features_loess %>% + left_join(adjust) %>% + mutate(loess_norm = (isnorm+offset)/(predict+offset), + loess_norm_fw = loess_norm/sample_weight, + loess_norm_fw_log = log2(loess_norm_fw))%>% + ungroup() + + + + +# Relative log abundance plots -------------------------------------------- + +features_all <- features_lin %>% + left_join(features_loess) + + +features_all %>% + filter(!is.na(genotype)) %>% + group_by(met, treatment) %>% + mutate(sub = loess_norm_fw, + rla = log2(sub) - median(log2(sub))) %>% + ggplot(aes(x = met, y = rla)) + + geom_boxplot() + + facet_grid(rows = vars(treatment), scales = "free")+ + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + +features_all %>% + #filter(!is.na(taxa)) %>% + filter(treatment == 1, class == "sample") %>% + mutate(run_num_LC = as_factor(run_num_LC), + batch_LC = as_factor(batch_LC)) %>% + group_by(met, treatment) %>% + mutate(rla_imp = log2(imp) - median(log2(imp)), + rla_isnorm = log2(isnorm) - median(log2(isnorm)), + rla_loess_norm = log2(loess_norm_fw) - median(log2(loess_norm_fw))) %>% + pivot_longer(starts_with("rla"), names_to = "normalization", values_to = "rla") %>% + ggplot(aes(x = met, y = rla)) + + geom_boxplot() + + facet_grid(rows = vars(normalization), cols = vars(treatment), scales = "free")+ + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + ylim(c(-2,2)) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "Ath_Dark_0_3_6_GC_met_rla_plot.jpg", + sep = "_")) + +features_all %>% + #filter(!is.na(taxa)) %>% + filter(class == "sample") %>% + mutate(run_num_LC = as_factor(run_num_LC), + batch_LC = as_factor(batch_LC)) %>% + group_by(met, treatment) %>% + mutate(rla_imp = log2(imp) - median(log2(imp)), + rla_isnorm = log2(isnorm) - median(log2(isnorm)), + rla_loess_norm = log2(loess_norm_fw) - median(log2(loess_norm_fw))) %>% + pivot_longer(starts_with("rla"), names_to = "normalization", values_to = "rla") %>% + ggplot(aes(x = run_num_LC, y = rla, color = batch_LC)) + + geom_boxplot() + + facet_grid(rows = vars(normalization), scales = "free")+ + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + ylim(c(-1,1)) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_2_samples_rla_plot.jpg", + sep = "_")) + + +# RSD estimation ---------------------------------------------------------- + + + + +RSD_bio_reps <- features_all %>% + filter(class == "sample") %>% + #filter(!is.na(taxa)) %>% + group_by(met) %>% + #filter((any(qc_imputed) == T)==F) %>% + group_by(met, treatment, genotype, tissue, exp) %>% + summarise(RSD_imp = sd(imp)/mean(imp), + RSD_isnorm = sd(isnorm) / mean(isnorm), + RSD_linnorm = abs(sd (linnorm_fw)/ mean(linnorm_fw)), + RSD_loess_norm = abs(sd(loess_norm_fw) / mean(loess_norm_fw))) + + +RSD_bio_reps_mean <- RSD_bio_reps %>% + #mutate(RSD_diff = RSD_imp-RSD_loess) %>% + ungroup() %>% + #group_by(treatment, tissue, exp) %>% + summarise(#mean_RSD_loess = mean(RSD_loess), + mean_imp = mean(na.omit(RSD_imp)), + mean_loess_norm = mean(na.omit(RSD_loess_norm)), + mean_RSD_isnorm = mean(na.omit(RSD_isnorm )), + mean_linnorm = mean(na.omit(RSD_linnorm))) + +RSD_qcs <- features_all %>% + filter(class == "run_qc") %>% + group_by(met) %>% + #filter((any(qc_imputed) == T)==F) %>% + group_by(met, exp) %>% + summarise(RSD_imp = sd(imp)/mean(imp), + RSD_isnorm = sd(isnorm) / mean(isnorm), + RSD_linnorm = abs(sd (linnorm_fw)/ mean(linnorm_fw)), + RSD_loess_norm = abs(sd(loess_norm_fw) / mean(loess_norm_fw))) + +RSD_qcs_mean <- RSD_qcs %>% + #mutate(RSD_diff = RSD_imp-RSD_loess) %>% + ungroup() %>% + group_by(exp) %>% + summarise(#mean_RSD_loess = mean(RSD_loess), + mean_imp = mean(na.omit(RSD_imp)), + mean_loess_norm = mean(na.omit(RSD_loess_norm)), + mean_RSD_isnorm = mean(na.omit(RSD_isnorm )), + mean_linnorm = mean(na.omit(RSD_linnorm))) + + + +#PCA testing chunk#### +##https://www.intechopen.com/books/metabolomics-fundamentals-and-applications/processing-and-visualization-of-metabolomics-data-using-r +#exp1 +pca <- summary(prcomp( + features_all %>% + ungroup() %>% + filter(exp == 1) %>% + # group_by(treatment) %>% + arrange(run_num_LC) %>% + # filter(class=="sample") %>% + # filter(met %in% overlap_met$met) %>% + mutate(log_imp = log2(imp)) %>% + pivot_wider(id_cols = c(run_num_LC, class), + names_from = met, + values_from = linnorm_fw_log) %>% + select(starts_with("Cluster_")))) + +sam_vars <- colnames(sam_dat) + +pca_plot <- as_tibble(pca$x) %>% + mutate(join_num = 1:nrow(pca$x)) %>% + full_join(sam_dat %>% + #group_by(treatment) %>% + arrange(run_num_LC) %>% + filter(exp == 1) %>% + mutate(join_num=1:nrow(pca$x))) %>% + left_join(sam_dat) %>% + mutate(batch_LC= as_factor(batch_LC), + class= as_factor(class)) %>% + select(all_of(sam_vars), everything()) +#filter(PC1 >-1000, PC2 <30) + +exp_var <- as_tibble(pca[["importance"]]) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = tissue)) + + stat_ellipse(aes(x=PC1, y=PC2, color = tissue)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_tissue_lin_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = treatment)) + + stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_treatment_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) + + +#PCA after loess normalization + +pca <- summary(prcomp( + features_all %>% + ungroup() %>% + filter(exp == 1) %>% + # group_by(treatment) %>% + arrange(run_num_LC) %>% + # filter(class=="sample") %>% + # filter(met %in% overlap_met$met) %>% + mutate(log_imp = log2(imp)) %>% + pivot_wider(id_cols = c(run_num_LC, class), + names_from = met, + values_from = loess_norm_fw_log) %>% + select(starts_with("Cluster_")))) + +sam_vars <- colnames(sam_dat) + +pca_plot <- as_tibble(pca$x) %>% + mutate(join_num = 1:nrow(pca$x)) %>% + full_join(sam_dat %>% + #group_by(treatment) %>% + arrange(run_num_LC) %>% + filter(exp == 1) %>% + mutate(join_num=1:nrow(pca$x))) %>% + left_join(sam_dat) %>% + mutate(batch_LC= as_factor(batch_LC), + class= as_factor(class)) %>% + select(all_of(sam_vars), everything()) +#filter(PC1 >-1000, PC2 <30) + +exp_var <- as_tibble(pca[["importance"]]) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = tissue)) + + stat_ellipse(aes(x=PC1, y=PC2, color = tissue)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_tissue_loess_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = treatment)) + + stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_treatment_loess_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) + +tissues <- c("leaves", "fruits") +for(tiss in seq_along(tissues)){ + + pca <- summary(prcomp( + features_all %>% + ungroup() %>% + filter(exp == 1, tissue == tissues[[tiss]]) %>% + # group_by(treatment) %>% + arrange(run_num_LC) %>% + # filter(class=="sample") %>% + # filter(met %in% overlap_met$met) %>% + mutate(log_imp = log2(imp)) %>% + pivot_wider(id_cols = c(run_num_LC, class), + names_from = met, + values_from = loess_norm_fw_log) %>% + select(starts_with("Cluster_")))) + + sam_vars <- colnames(sam_dat) + + pca_plot <- as_tibble(pca$x) %>% + mutate(join_num = 1:nrow(pca$x)) %>% + full_join(sam_dat %>% + #group_by(treatment) %>% + arrange(run_num_LC) %>% + filter(exp == 1, tissue == tissues[[tiss]]) %>% + mutate(join_num=1:nrow(pca$x))) %>% + left_join(sam_dat) %>% + mutate(batch_LC= as_factor(batch_LC), + class= as_factor(class)) %>% + select(all_of(sam_vars), everything()) + #filter(PC1 >-1000, PC2 <30) + + exp_var <- as_tibble(pca[["importance"]]) + + pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = treatment)) + + stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + + ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1", tissues[[tiss]], "treatment_loess_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) +} + + +# PCA before normalization + +pca <- summary(prcomp( + features_all %>% + ungroup() %>% + filter(exp == 1) %>% + # group_by(treatment) %>% + arrange(run_num_LC) %>% + # filter(class=="sample") %>% + # filter(met %in% overlap_met$met) %>% + mutate(log_imp = log2(imp)) %>% + pivot_wider(id_cols = c(run_num_LC, class), + names_from = met, + values_from = log_imp) %>% + select(starts_with("Cluster_")))) + +sam_vars <- colnames(sam_dat) + +pca_plot <- as_tibble(pca$x) %>% + mutate(join_num = 1:nrow(pca$x)) %>% + full_join(sam_dat %>% + #group_by(treatment) %>% + arrange(run_num_LC) %>% + filter(exp == 1) %>% + mutate(join_num=1:nrow(pca$x))) %>% + left_join(sam_dat) %>% + mutate(batch_LC= as_factor(batch_LC), + class= as_factor(class)) %>% + select(all_of(sam_vars), everything()) +#filter(PC1 >-1000, PC2 <30) + +exp_var <- as_tibble(pca[["importance"]]) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = tissue)) + + stat_ellipse(aes(x=PC1, y=PC2, color = tissue)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_tissue_imp_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) + +pca_plot %>% + # filter(treatment == "HL") %>% + ggplot()+ + geom_jitter(aes(x=PC1, y=PC2, color = batch_LC)) + + stat_ellipse(aes(x=PC1, y=PC2, color = batch_LC)) + + ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) + + xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)")) + +ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val1_batch_imp_PCA.jpg", + sep = "_"), + width = 183, + height = 100, + units = "mm", + dpi = 300) + + + + +# Plot all metabolites ---------------------------------------------------- + +plotmets <- features_all %>% distinct(met) %>% as_vector +plotmet_labs <- plotmets %>% as_tibble() %>% + left_join(met_dat, by= c("value" = "met")) %>% + #mutate(peak_num = base::rank(HMDB_clear_name, ties.method = "first"), + # dup = sum(peak_num), + # HMDB_clear_name_unique = if_else(dup>1, str_c(HMDB_clear_name, peak_num), HMDB_clear_name)) %>% + #ungroup() %>% + select(Compound_Name) %>% as_vector() + +plot_out <- vector("list", length = length(plotmets)) + +for (meta in seq_along(plotmets)) { + + plot_out [[meta]] <- features_all %>% + mutate(xint = if_else(daily_num == 4, run_num_LC-3.5, max(run_num_LC)), + is_miss = as_factor(if_else(is.na(area), T, F))) %>% + filter(met == plotmets[[meta]]) %>% + filter(class!="blank") %>% + ggplot(aes(x=run_num_LC, y=isnorm)) + + geom_point(aes(color = class, shape = is_miss)) + + geom_point(aes(y=predict), color="black", size=0.1) + + geom_vline(aes(xintercept = xint))+ + # facet_grid(rows = vars(treatment), cols = vars(rep), scales = "free") + + ggtitle(label = plotmet_labs[[meta]]) + +} + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_loess_fit.pdf")) + + +for (meta in seq_along(plotmets)) { + print(plot_out[[meta]]) +} + +dev.off() + +plot_out <- vector("list", length = length(plotmets)) + +for (meta in seq_along(plotmets)) { + + plot_out [[meta]] <- features_all %>% + mutate(xint = if_else(daily_num == 4, run_num_LC-3.5, max(run_num_LC))) %>% + filter(met == plotmets[[meta]]) %>% + filter(class!="blank") %>% + ggplot(aes(x=run_num_LC, y=loess_norm_fw_log)) + + geom_point(aes(color=class)) + + #geom_point(aes(y=predp), color="black", size=0.1) + + geom_vline(aes(xintercept = xint))+ + # facet_grid(rows = vars(treatment), cols = vars(rep), scales = "free") + + ggtitle(label = plotmet_labs[[meta]]) + +} + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_loess_norm_fw_log.pdf")) + + +for (meta in seq_along(plotmets)) { + print(plot_out[[meta]]) +} + +dev.off() + +for (meta in seq_along(plotmets)) { + + plot_out [[meta]] <- features_all %>% + mutate(xint = if_else(daily_num == 4, run_num_LC-3.5, max(run_num_LC))) %>% + filter(met == plotmets[[meta]]) %>% + filter(class!="blank") %>% + ggplot(aes(x=run_num_LC, y=linnorm_fw)) + + geom_point(aes(color=class)) + + geom_vline(aes(xintercept = xint))+ + facet_grid(rows = vars(treatment), scales = "free") + + ggtitle(label = plotmet_labs[[meta]]) + +} + +pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_linnorm.pdf")) + +for (meta in seq_along(plotmets)) { + print(plot_out[[meta]]) +} + +dev.off() + +rescale <- features_all %>% + group_by(met, tissue, exp) %>% + summarise(rescale = median(imp)) + +features_out <- features_all %>% + left_join(rescale) %>% + group_by(met, tissue, exp) %>% + mutate(loess_norm_med = loess_norm_fw/median(loess_norm_fw), + rescaled = loess_norm_med*rescale) %>% + select(all_of(sam_vars), met, Compound_Name, Compound_Class, loess_norm_fw, loess_norm_med, area, rescaled) + +features_out %>% + filter(met == "Cluster_02177", tissue == "fruits", exp == 1) %>% + ggplot(aes(x = run_num_LC, y = loess_norm_fw)) + + geom_point() + +# Write files ------------------------------------------------------------- + + + +write_csv(features_out, + str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val_1_2_feat_dat_LC.csv", + sep = "_")) + +write_csv(met_dat, + str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val_1_2_met_dat_LC.csv", + sep = "_")) + +write_csv(sam_dat, + str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val_1_2_sam_dat_LC.csv", + sep = "_")) + +write_csv(missingness, + str_c(str_replace_all(Sys.Date(),"^.{2}|-",""), + "cmQTL_val_1_2_miss_LC.csv", + sep = "_")) +# Log used code ------------------------------------------------------------ + +file_name <- sys.frame(1)$ofile + +file.copy(file_name, + to = str_c(out_dir, str_remove(file_name, current), "_", str_replace_all(Sys.Date(),"^.{2}|-",""), ".R"),overwrite = T)