updated LC isa.assay.xlsx files

2b055261 · MWA_AG_Fernie_MPIMP · a0c57eeb · 2b055261 · 2b055261 · 2b055261
Commit 2b055261 authored 2 years ago by MWA_AG_Fernie_MPIMP
--- a/assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx
+++ b/assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx
--- a/assays/cmQTL_val1_GH_2020_apolar_LC_MS/isa.assay.xlsx
+++ b/assays/cmQTL_val1_GH_2020_apolar_LC_MS/isa.assay.xlsx
--- a/assays/cmQTL_val1_GH_2020_phenotype/README.md
+++ b/assays/cmQTL_val1_GH_2020_phenotype/README.md
--- a/assays/cmQTL_val1_GH_2020_phenotype/dataset/.gitkeep
+++ b/assays/cmQTL_val1_GH_2020_phenotype/dataset/.gitkeep
--- a/assays/cmQTL_val1_GH_2020_phenotype/dataset/Phenotype_data.xlsx
+++ b/assays/cmQTL_val1_GH_2020_phenotype/dataset/Phenotype_data.xlsx
--- a/assays/cmQTL_val1_GH_2020_phenotype/isa.assay.xlsx
+++ b/assays/cmQTL_val1_GH_2020_phenotype/isa.assay.xlsx
--- a/assays/cmQTL_val1_GH_2020_phenotype/protocols/.gitkeep
+++ b/assays/cmQTL_val1_GH_2020_phenotype/protocols/.gitkeep
--- a/assays/cmQTL_val1_GH_2020_polar_LC_MS/isa.assay.xlsx
+++ b/assays/cmQTL_val1_GH_2020_polar_LC_MS/isa.assay.xlsx
--- a/isa.investigation.xlsx
+++ b/isa.investigation.xlsx
--- a/studies/cmQTL_val1_GH_2020_phenotype/README.md
+++ b/studies/cmQTL_val1_GH_2020_phenotype/README.md
--- a/studies/cmQTL_val1_GH_2020_phenotype/isa.study.xlsx
+++ b/studies/cmQTL_val1_GH_2020_phenotype/isa.study.xlsx
--- a/studies/cmQTL_val1_GH_2020_phenotype/protocols/.gitkeep
+++ b/studies/cmQTL_val1_GH_2020_phenotype/protocols/.gitkeep
--- a/studies/cmQTL_val1_GH_2020_phenotype/resources/.gitkeep
+++ b/studies/cmQTL_val1_GH_2020_phenotype/resources/.gitkeep
--- a/workflows/GC_MS_correlation/220531_correlation_matrices.R
+++ b/workflows/GC_MS_correlation/220531_correlation_matrices.R
+library(tidyverse)
+library(glue)
+library(here)
+# Directory setting -------------------------------------------------------
+current <- getwd()
+source <- str_c(current,"/..")
+cur_date <- str_c(str_replace_all(Sys.Date(),"^.{2}|-",""))
+out <- str_c(cur_date, "correlation", sep = "_")
+if (file.exists(out)) {
+  cat("The folder already exists")
+} else {
+  dir.create(out)
+}
+out_dir <- str_c(current, out, sep = "/")
+latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"),
+                               pattern = "^\\d{6}_analysis"),
+                   decreasing = T)[[1]]
+latest_analysis <- str_c(current, "/", latest)
+setwd(latest_analysis)
+latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}")
+# File loading ------------------------------------------------------------
+fc_1 <- read_csv("mean_values_se_n.csv") %>% 
+  mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))
+genotypes <- fc_1 %>% 
+  distinct(alias, genotype) %>% 
+  mutate(genotype_label = str_remove_all(genotype, "\\*"))
+fc_1_ind <- read_csv("individual_values.csv") %>% 
+  select(-genotype) %>% 
+  left_join(genotypes)
+sig_GC <- read_csv("p_values.csv")
+fc_1_lt <- read_csv("mean_values_se_n_levene.csv") %>% 
+  mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))
+fc_1_ind_lt <- read_csv("individual_values_levene.csv") %>% 
+  select(-genotype) %>% 
+  left_join(genotypes)
+sig_GC_lt <- read_csv("p_values_levene.csv")
+fc_1_cv <- read_csv("mean_values_se_n_cv.csv") %>% 
+  mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))
+fc_1_ind_cv <- read_csv("individual_values_cv.csv") %>% 
+  select(-genotype) %>% 
+  left_join(genotypes)
+sig_GC_cv <- read_csv("p_values_cv.csv") %>% 
+  mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))),
+         group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")),
+         group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))),
+         group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))
+setwd(out_dir)
+# Data combination --------------------------------------------------------
+met_dat <- fc_1 %>% 
+  distinct(met, Compound_Name, Compound_Class)
+per_comp_y <- fc_1 %>%
+  group_by(tissue, treatment, met) %>% 
+  summarise(tot_val = max(mean_fc + se))
+cb_scale <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
+              "#0072B2", "#D55E00","#000000")
+bw_scale <- c("black", "black", "black", "black", "black", "black", "black")
+# Correlation network -------------------------------------------------------------
+library(jmuOutlier)
+g_vec <- genotypes$genotype %>%
+  sort() %>%
+  as.character()
+g_vec_lab <- genotypes %>%
+  arrange(genotype) %>% 
+  select(genotype_label) %>% 
+  as_vector()
+## Loop start --------------------------------------------------------------
+for(g in seq_along(genotypes$genotype)) {
+  print(str_c("starting with", g_vec_lab[[g]], sep = " "))
+cor_base <- fc_1_ind %>% 
+  filter(#Compound_Class %in% c("Phospholipid", "Phosphatidylcholine"),
+    genotype %in% c(g_vec[[g]]),
+    tissue == "fruits")
+cor.GC <- cor_base %>% 
+  group_by(met) %>% 
+  mutate(log_norm = log2(fc),
+         log_norm = if_else(is.infinite(log_norm), 0, log_norm),
+         log_norm_level = (log_norm - mean(log_norm))/(max(log_norm)-min(log_norm))) %>% 
+  ungroup() %>% 
+  mutate(group = as_factor(str_c(tissue, treatment, genotype, sep = "_"))) %>% 
+  pivot_wider(id_cols = c(genotype, treatment, LIMS_ID),
+              names_from = met,
+              values_from = fc) %>% 
+  #arrange(Compound_Class, Compound_Name) %>% 
+  as.data.frame()
+cor.GC_numeric <- cor.GC %>% 
+  select(starts_with("m_"))
+corr_mat <- cor(cor.GC_numeric, method = "spearman")
+x <- vector(mode = "list", length = length(colnames(cor.GC_numeric)))
+names(x) <- colnames(cor.GC_numeric)
+start <- Sys.time()
+for (i in seq_along(colnames(cor.GC_numeric))) {
+  #names(x[[i]]) <- colnames(cor.GC_numeric)
+  for (n in seq_along(colnames(cor.GC_numeric))) {
+    if(i > n) {
+      x[[i]][[n]] <- NA
+    } else {
+    x[[i]][[n]] <- perm.cor.test(cor.GC_numeric[[i]], cor.GC_numeric[[n]],
+                                 method = "spearman",
+                                 num.sim = 20000
+                                 )$p.value
+    }
+  } 
+  print(i)
+}
+stop <- Sys.time()
+stop-start
+for (i in seq_along(colnames(cor.GC_numeric))) {
+  names(x[[i]]) <- colnames(cor.GC_numeric)
+}
+corr_p <- x %>% 
+  reduce(bind_rows)
+corr_p_long <- corr_p %>% 
+  mutate(met1 = colnames(cor.GC_numeric)) %>% 
+  pivot_longer(cols = starts_with("m_"),
+               values_to = "cor_p_val",
+               names_to = "met2") %>% 
+  mutate(met1_num = as.numeric(str_extract(met1,"(?<=m_)\\d+")),
+         met2_num = as.numeric(str_extract(met2,"(?<=m_)\\d+"))) %>% 
+  mutate(small = if_else(met1_num < met2_num, met1_num, met2_num),
+         big = if_else(met1_num > met2_num, met1_num, met2_num),
+         sorted = str_c(small, big, sep = "_")) %>% 
+  filter(!is.na(cor_p_val)) %>% 
+  mutate(p_adj = p.adjust(cor_p_val, method = "fdr")) %>% 
+  filter(!met1 == met2)
+corr_mat_long <- corr_mat %>% 
+  as_tibble(rownames = "met1") %>% 
+  pivot_longer(cols = starts_with("m_"),
+               values_to = "cor_val",
+               names_to = "met2") %>% 
+  mutate(met1_num = as.numeric(str_extract(met1,"(?<=m_)\\d+")),
+         met2_num = as.numeric(str_extract(met2,"(?<=m_)\\d+"))) %>% 
+  mutate(small = if_else(met1_num < met2_num, met1_num, met2_num),
+         big = if_else(met1_num > met2_num, met1_num, met2_num),
+         sorted = str_c(small, big, sep = "_")) %>% 
+  distinct(sorted, .keep_all = T) %>% 
+  filter(!met1 == met2) %>% 
+  mutate(pos_neg = as_factor(if_else(cor_val > 0, "pos", "neg")),
+         weight = cor_val)
+corr_mat_comb <- corr_mat_long %>% 
+  left_join(corr_p_long)
+write_csv(corr_mat_comb,
+          str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+                "_corr_mat_",
+                g_vec_lab[[g]],
+                ".csv"))
+}
+## Loop end ----------------------------------------------------------------
+stop()
+library(viridis)
+library(igraph)
+library(ggnetwork)
+library(ggraph)
+classes_tidy <- met_dat %>% 
+  mutate(`ChEBI simplified` = if_else(str_detect(Compound_Class,"carbohydrate"),"carbohydrate or derivative",
+                                      if_else(str_detect(Compound_Class,"amino_acid"), "amino acid or derivative",
+                                              if_else(str_detect(Compound_Class,"carboxylic_acid"), "carboxylic acid",
+                                                      "other"))))
+annotation_col_corr <- classes_tidy %>% 
+  select(`ChEBI simplified`) %>% 
+  as.data.frame()
+rownames(annotation_col_corr) <- classes_tidy$met
+vir_scale <- plasma(4, begin = 0.2, end = 1)
+ann_colors_corr = list(
+  `ChEBI simplified`= c("amino acid or derivative"= vir_scale[1],
+                        "carbohydrate or derivative" = vir_scale[2],
+                        "carboxylic acid" = vir_scale[3],
+                        "other" = vir_scale[4])
+)
+ann_colors_graph = tibble(`ChEBI simplified`= names(ann_colors_corr$`ChEBI simplified`),
+                          color = ann_colors_corr$`ChEBI simplified`)
+node_cols <- tibble(met = colnames(cor.GC_numeric)) %>% 
+  left_join(classes_tidy) %>% 
+  left_join(ann_colors_graph)
+node_col_vec <- node_cols$color
+net_raw <- graph_from_data_frame(corr_mat_comb, vertices = node_cols, directed = F)
+set.seed(200)
+plot.igraph(net_raw, vertex.color = node_col_vec, vertex.label = "")
+set.seed(121)
+layout <- layout_with_graphopt(net_raw , charge = 0.2,
+                               spring.length = 1,
+                               spring.constant = 200,
+                               mass = 50)
+ggraph(net_raw, layout = layout) +
+  geom_edge_arc0(aes(color = pos_neg), strength = 0.1) +
+  geom_node_point(aes(color = `ChEBI simplified`), size = 3)
+#  geom_node_text(aes(label = name))
+ggnetwork(net_raw, layout = layout) %>% 
+    ggplot(aes(x = x, y = y, xend = xend, yend = yend)) + 
+    geom_edges(aes(color = pos_neg),curvature = 0.1) +
+    geom_nodes(aes(fill = `ChEBI simplified`), color = "black",  shape = 21, size = 3) +
+    theme_blank() +
+    scale_fill_manual(values = vir_scale, aesthetics = "fill")
+#other methods
+  corr_mat_plot <- corr_mat_long %>% 
+    filter(abs(cor_val) >= 0.75) %>% 
+    select(met1, met2) %>% 
+    as.matrix()
+  corr_mat_plot <- corr_mat
+  corr_mat_plot[abs(corr_mat_plot)<0.5] <- 0
+  net_raw <- graph_from_edgelist(corr_mat_plot, directed = F)
+  net_raw <- graph_from_adjacency_matrix(corr_mat_plot,
+                                         weighted=T,
+                                         mode="undirected",
+                                         diag=F)
--- a/workflows/GC_MS_figures/Figure maker_cmQTL1_paper.R
+++ b/workflows/GC_MS_figures/Figure maker_cmQTL1_paper.R
--- a/workflows/Phenotype_analysis/210114_cmQTL_val_phenotype_analysis.R
+++ b/workflows/Phenotype_analysis/210114_cmQTL_val_phenotype_analysis.R
--- a/workflows/Phenotype_figures/Figure maker_phenotypes.R
+++ b/workflows/Phenotype_figures/Figure maker_phenotypes.R