diff --git a/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/prop/0AF9DF01 b/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/prop/0AF9DF01
new file mode 100644
index 0000000000000000000000000000000000000000..21b544991eb33a2d9039aeda0c4976b754140725
--- /dev/null
+++ b/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/prop/0AF9DF01
@@ -0,0 +1,6 @@
+{
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "41,34",
+    "scrollLine": "24"
+}
\ No newline at end of file
diff --git a/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/prop/INDEX b/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/prop/INDEX
new file mode 100644
index 0000000000000000000000000000000000000000..87046b9700b78bd21f80a92c76154313238eb89e
--- /dev/null
+++ b/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/prop/INDEX
@@ -0,0 +1 @@
+H%3A%2F1.%20cmQTL%20validation%2FcmQTL_val1_arc%2Fruns%2FGC_MS_normalization%2F210927_primary_normalization_with_split.R="0AF9DF01"
diff --git a/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/session-6e7b8359/78975B07 b/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/session-6e7b8359/78975B07
new file mode 100644
index 0000000000000000000000000000000000000000..2098d4826030caeaf648863b57914f1e494462df
--- /dev/null
+++ b/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/session-6e7b8359/78975B07
@@ -0,0 +1,26 @@
+{
+    "id": "78975B07",
+    "path": "H:/1. cmQTL validation/cmQTL_val1_arc/runs/GC_MS_normalization/210927_primary_normalization_with_split.R",
+    "project_path": "210927_primary_normalization_with_split.R",
+    "type": "r_source",
+    "hash": "2490589321",
+    "contents": "",
+    "dirty": false,
+    "created": 1674232784622.0,
+    "source_on_save": false,
+    "relative_order": 1,
+    "properties": {
+        "source_window_id": "",
+        "Source": "Source",
+        "cursorPosition": "41,34",
+        "scrollLine": "24"
+    },
+    "folds": "",
+    "lastKnownWriteTime": 1674233264,
+    "encoding": "UTF-8",
+    "collab_server": "",
+    "source_window": "",
+    "last_content_update": 1674233264176,
+    "read_only": false,
+    "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/session-6e7b8359/78975B07-contents b/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/session-6e7b8359/78975B07-contents
new file mode 100644
index 0000000000000000000000000000000000000000..1fa012bb31cc8cf61c020d075e5ebe0dc15072d2
--- /dev/null
+++ b/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/session-6e7b8359/78975B07-contents
@@ -0,0 +1,1123 @@
+rm(list = ls())
+setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
+getwd()
+current <- getwd()
+source <- ".."
+
+
+library(openxlsx)
+library(tidyverse)
+library(car)
+library(pheatmap)
+library(broom)
+library(ggpubr)
+library(viridisLite)
+library(modelr)
+library(dlookr)
+library(imputeLCMD)
+library(ggrepel)
+library(here)
+
+# Directory setting -------------------------------------------------------
+
+
+current <- getwd()
+source <- str_c(current,"/..")
+
+cur_date <- str_c(str_replace_all(Sys.Date(),"^.{2}|-",""))
+
+out <- str_c(cur_date, "normalization", sep = "_")
+
+if (file.exists(out)) {
+  cat("The folder already exists")
+} else {
+  dir.create(out)
+}
+
+out_dir <- str_c(current, out, sep = "/")
+
+# Data loading ------------------------------------------------------------
+setwd(source)
+
+sam_dat1 <- readxl::read_xlsx(here)
+
+sam_dat1 <- read_csv("210812_cmQTL_val1_samplelist.csv", col_types = "ffidficcccfccccc")
+GC_run1 <- readxl::read_xlsx("200923_samplelist_WIJESI-030820-13_cmQTL_validation.xlsx", sheet = 5)
+
+sam_dat2 <- read_csv("210812_cmQTL_val2_samplelist.csv", col_types = "ccfdifiiiff")
+GC_run2 <- readxl::read_xlsx("210324_WIJESI-130121-15_cmQTL_validation2.xlsx", sheet = 6)
+
+genotypes <- readxl::read_xlsx("Genotype_names.xlsx") %>%
+  mutate(plantline = as_factor(plantline))
+
+GC_machine_nums <- readxl::read_xlsx("200923_samplelist_WIJESI-030820-13_cmQTL_validation.xlsx", sheet = 6)
+
+setwd(current)
+
+take_split <- c("fructose_307_217_rt9.48", "glucose_160_319_rt9.68","glucose_160_rt9.81", "glutamic_acid_246_363_rt8.31",
+                "glutamine_156_245_rt9.80", "malic_acid_233_245_rt7.22", "shikimic_acid_204_462_rt9.57", "shikimic_acid_204_462_rt9.57",
+                "pyroglutamic_acid_156_258_rt8.30", "sucrose_437_361_rt13.77", "sucrose2_204_361_rt13.79", "citric_acid_273_375_rt9.72",
+                "arginine_157_256_rt9.92")
+
+exclude_samples <- c("21106rA_31", "21107rA_54", "21109rA_59", "21109rA_86", "21109rA_78")
+exclude_mets <- c("psicose_103_217_rt9.38", "glutamic_acid_246_363_rt8.31", "lactic_acid_117_219_rt3.07")#glu wrong peak
+
+area1 <- readxl::read_xls("210914_cmQTL_val_1_2_fruits_seq_file_20210914143103_comp_file_area_rt1.bkt.xls", na = c("", "N/A"))
+area2 <- readxl::read_xls("210914_cmQTL_val_1_2_fruits_split_seq_file_20210914164507_comp_file_area_rt1.bkt.xls", na = c("", "N/A"))
+area3 <- readxl::read_xls("210914_cmQTL_val_1_2_leaves_seq_file_20210914125126_comp_file_area_rt1.bkt.xls", na = c("", "N/A"))
+
+
+
+
+metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Ath_Dark_Light_GC_Xcal/current_source_files/210118_primary_metabolites_classification.xlsx") %>% 
+  select(component = Xcal_name_xreport, Compound_Name = HMDB_clear_name, Compound_Class = ChEBI_Ontology_dense)%>% 
+  mutate(RT_mean = str_extract(component, "\\d+\\.\\d+$")) %>% 
+  filter(!is.na(component)) %>% 
+  group_by(Compound_Name)  %>% 
+  mutate(peak_no = rank(RT_mean),
+         Compound_Name = if_else(duplicated(Compound_Name),
+                                 str_c(Compound_Name, "peak", peak_no, sep = "_"),
+                                 Compound_Name))
+
+area <- area1 %>% 
+  bind_rows(area2, area3) %>% 
+  select(component, area, machine_num_GC = machine_num,rt) %>% 
+  mutate(area = as.numeric(area),
+         rt = as.numeric(rt))
+
+rt_mean <- area %>%
+  group_by(component) %>% 
+  summarise(RT_mean = mean(rt, na.rm = T))
+
+metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Shared_source_files/210118_primary_metabolites_classification.xlsx") %>% 
+  select(component = Xcal_name_xreport, Compound_Name = HMDB_clear_name, Compound_Class = ChEBI_Ontology_dense)%>% 
+  left_join(rt_mean) %>% 
+  filter(!is.na(component)) %>% 
+  arrange(Compound_Name, RT_mean) %>% 
+  group_by(Compound_Name)  %>% 
+  mutate(peak_no = rank(RT_mean),
+         Compound_Name = if_else(duplicated(Compound_Name),
+                                 str_c(Compound_Name, "peak", peak_no, sep = "_"),
+                                 Compound_Name))
+
+
+# Data combination --------------------------------------------------------
+
+
+
+sam_vars <- c("plantline", "alias", "LIMS_ID",
+              "treatment", "tissue", "batch_GC", "run_date_GC",
+              "extraction_num", "sample_num", "machine_num_GC",
+              "class", "run_num_GC", "sample_weight", "exp", "genotype")
+
+sam_dat1_tidy <- GC_run1 %>% 
+  left_join(GC_machine_nums) %>% 
+  select(extraction_num = `Sample name`, everything())%>% 
+  mutate(class = as_factor(if_else(str_detect(extraction_num, "run_qc"), "run_qc",
+                                   if_else(str_detect(extraction_num, "blank"), "blank", "sample"))),
+         extraction_num = as.numeric(if_else(str_detect(extraction_num, "run_qc"), "0",
+                                             if_else(str_detect(extraction_num, "blank"), "-1",extraction_num))),
+         exp = as_factor(1)) %>% 
+  left_join(sam_dat1) %>% 
+  left_join(genotypes) %>% 
+  select(treatment = irrigation, everything()) %>% 
+  select(all_of(sam_vars)) %>% 
+  arrange(run_num_GC)
+
+sam_dat2_tidy <- GC_run2 %>% 
+  left_join(GC_machine_nums) %>% 
+  select(extraction_num = `Sample name`, everything()) %>% 
+  mutate(class = as_factor(if_else(str_detect(extraction_num, "run_qc"), "run_qc",
+                                   if_else(str_detect(extraction_num, "blank"), "blank", "sample"))),
+         extraction_num = as.numeric(if_else(str_detect(extraction_num, "run_qc"), "0",
+                                             if_else(str_detect(extraction_num, "blank"), "-1",extraction_num))),
+         exp = as_factor(2)) %>% 
+  left_join(sam_dat2) %>% 
+  left_join(genotypes) %>% 
+  select(treatment = irrigation, everything()) %>% 
+  select(all_of(sam_vars)) %>% 
+  arrange(run_num_GC)
+
+sam_dat <- sam_dat1_tidy %>% 
+  bind_rows(sam_dat2_tidy) %>% 
+  arrange(run_num_GC) %>% 
+  group_by(batch_GC) %>% 
+  mutate(daily_num = row_number()) %>% 
+  fill(tissue, .direction = "updown") %>% 
+  ungroup() %>% 
+  left_join(genotypes) %>% 
+  mutate(treatment = as_factor(treatment))
+
+area_long <- area %>% 
+  left_join(sam_dat) %>% 
+  filter(!is.na(exp))
+
+area <- area_long %>% 
+  left_join(metdat_GC_class) %>% 
+  filter(!str_detect(component, "^\\!|FAME|component|empty"), !is.na(component),
+         !component %in% exclude_mets)
+
+met_dat = area %>% 
+  distinct(RT_mean, Compound_Name, Compound_Class, component) %>% 
+  mutate(met = str_c("m", row_number(), sep = "_"))
+
+area <- area %>% 
+  left_join(met_dat) %>% 
+  mutate(area = as.numeric(area))
+
+setwd(out_dir)
+
+# PCA to justify filtering ------------------------------------------------
+
+features_pca <- area %>% 
+  group_by(met) %>% 
+  mutate(imp = if_else(is.na(area), 0.5*min(area, na.rm = T),area),
+         batch_pca = str_extract(batch_GC, "\\d+")) %>% 
+  ungroup() 
+
+pca_base <- features_pca %>%
+  ungroup() %>% 
+  filter(exp == 1) %>% 
+  arrange(run_num_GC) %>%
+  mutate(log_imp = log2(imp)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_GC, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = log_imp) %>% 
+  filter(!is.na(m_1))
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  left_join(pca_base %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  #left_join(sam_dat) %>% 
+  mutate(batch_GC= as_factor(batch_GC),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = batch_GC)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = batch_GC)) +
+  geom_text_repel(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC == "21106rA_31"| machine_num_GC == "21107rA_54", machine_num_GC, NULL)), box.padding = 1) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_treatment_loess_outlier_exp1_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+
+pca_base <- features_pca %>%
+  ungroup() %>% 
+  filter(exp == 2) %>% 
+  arrange(batch_GC, daily_num) %>%
+  mutate(log_imp = log2(imp)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_GC, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = log_imp) %>% 
+  filter(!is.na(m_1))
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  left_join(pca_base %>% 
+              arrange(batch_GC, daily_num) %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  #left_join(sam_dat) %>% 
+  mutate(batch_GC= as_factor(batch_GC),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = class)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = class)) +
+  geom_label_repel(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC %in% exclude_samples, machine_num_GC, NULL)),force_pull = -0.1) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_treatment_loess_outlier_exp2_imp_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+
+# Data filtering ----------------------------------------------------------
+
+
+set_back_NA <- function(x){
+  for (i in seq_along(x)) {
+    if (x[[i]] == 0) {
+      x[[i]] <- NA
+    } else {
+      x[[i]] <- x[[i]]
+    }
+  }
+  x
+}
+
+area_long <- area %>% 
+  filter(!machine_num_GC %in% exclude_samples)
+
+
+# Imputation --------------------------------------------------------------
+
+features_na <- area_long #%>% 
+  #group_by(met) %>% 
+  #mutate(area = set_back_NA(area)) %>% 
+  #ungroup()
+
+missingness <- features_na %>% 
+  group_by(exp, met, tissue, treatment) %>% 
+  summarise(na = sum(is.na(area)),
+            n = n()) %>% 
+  mutate(percent_na = na/n*100) %>% 
+  ungroup()
+
+features <- features_na %>% 
+  left_join(missingness) %>% 
+  group_by(met) %>% 
+  #mutate(area = ifelse(percent_na >= 0 & is.na(area), rnorm(n = 1, mean = 0.5*min(area, na.rm = T)), area)) %>% 
+  ungroup()
+
+
+features_filtered <- area_long %>% 
+  group_by(met) %>% 
+  mutate(imp = if_else(is.na(area), 0.5*min(area, na.rm = T),area),
+         batch_pca = str_extract(batch_GC, "\\d+")) %>% 
+  ungroup() %>% 
+  left_join(missingness) #%>%
+  #filter(percent_na <= 60)
+
+
+# Internal standard check -------------------------------------------------
+
+features_filtered %>% 
+  filter(str_detect(Compound_Name, "ribitol")) %>% 
+  mutate(xint = if_else(daily_num == 4, run_num_GC-3.5, max(run_num_GC)),
+         is_miss = as_factor(if_else(is.na(area), T, F))) %>% 
+  filter(class!="blank") %>% 
+  ggplot(aes(x=run_num_GC, y = log2(imp))) +
+  geom_point(aes(color = class, shape = is_miss)) +
+  geom_vline(aes(xintercept = xint)) +
+  facet_grid(rows = vars(Compound_Name))
+
+# Internal Standard Normalization -----------------------------------------
+
+isvec <- features_filtered %>% 
+  filter(component == "ribitol_217_rt8.20") %>% 
+  select(machine_num_GC, is = imp)
+
+features <- features_filtered %>% 
+  full_join(isvec) %>% 
+  mutate(isnorm = imp/is) %>% 
+  filter(str_detect(Compound_Name, "ribitol", negate = T)) %>% 
+  mutate(sample_weight = if_else(sample_weight == 0|is.na(sample_weight), 50, sample_weight))
+
+
+# Fit linear model on QCs -------------------------------------------------
+
+by_batch_GC <- features %>% 
+  filter(class=="run_qc") %>% 
+  group_by(batch_GC, met) %>% # need to change treatment_batch_GC_corr
+  nest()
+
+feature_model <- function(df) {
+  lm(isnorm ~ daily_num, data = df)
+}
+
+
+by_batch_GC <- by_batch_GC %>% 
+  mutate(model = map(data, feature_model),
+         predicts = map2(data, model, add_predictions),
+         coefficients = map(model, tidy),
+         aug = map(model, augment),
+         glance = map (model, glance))
+
+coefs_aug <- unnest(by_batch_GC, aug) %>% 
+  select(batch_GC, met, .resid, daily_num)
+
+coefs_glance <- unnest(by_batch_GC,glance) %>% 
+  select(batch_GC, met, adj.r.squared)
+
+coefs_term <- unnest(by_batch_GC, coefficients) %>% 
+  pivot_wider(id_cols = c(batch_GC, met),
+              names_from = "term",
+              values_from = estimate) %>% 
+  rename(x = daily_num,
+         intercept = `(Intercept)`)
+
+
+coefs_pvalue <- by_batch_GC %>% 
+  select(batch_GC, met, coefficients) %>%
+  ungroup() %>% 
+  unnest(coefficients) %>% 
+  pivot_wider(id_cols = c(batch_GC, met),
+              names_from = "term",
+              values_from = p.value) %>% 
+  rename(intercept_pval = `(Intercept)`,
+         daily_num_pval = daily_num) %>% 
+  full_join(features) %>% 
+  full_join(coefs_term) %>% 
+  #left_join(coefs_aug) %>% 
+  full_join(coefs_glance) 
+
+median <- features %>% 
+  filter(class=="run_qc") %>% 
+  group_by(batch_GC, met) %>% 
+  summarise(median = median (isnorm))
+
+features_lin <- unnest(by_batch_GC, predicts) %>% 
+  full_join(coefs_pvalue) %>% 
+  full_join(median) %>% 
+  mutate(pred = intercept + x*daily_num,
+         predp = ifelse(daily_num_pval <=0.05 & adj.r.squared >= 0.75, pred, median),
+         predp = ifelse(is.na(predp), median, predp)) %>%
+  select(- c("data", "model", "coefficients")) 
+
+adjust_lin <- features_lin %>% 
+  ungroup() %>% 
+  group_by(met) %>% 
+  summarise(lin_offset = if_else(any(predp <0), 1.001*abs(min(predp)),0))
+
+features_lin <- features_lin %>%
+  ungroup() %>% 
+  full_join(adjust_lin) %>%
+  full_join(features) %>% 
+  mutate(linnorm = (isnorm+lin_offset)/(predp+lin_offset),
+         linnorm_fw = linnorm/sample_weight,
+         linnorm_fw_log = log2(linnorm_fw))
+
+
+
+
+#Fit loess-model for QC-RLSC batch_GCwise####
+rejoin <- features
+features <- rejoin
+
+QC_loess <- features %>% 
+  ungroup() %>% 
+  filter(class == "run_qc") %>%
+  #filter(met!="m_44") %>% 
+  group_by(met,  batch_GC) %>% 
+  nest()
+#revert span back to 1.5 later
+loess_model <- function (df) {
+  loess(isnorm ~ daily_num, span = 1.5, data = df,control = loess.control(surface = "direct"))
+}
+
+start <- Sys.time()
+QC_loess <- QC_loess %>% 
+  mutate(model = map (data, loess_model),
+         daily_num = list(seq(1,101,1)),
+         predict = map2(model,daily_num,stats::predict))
+end <- Sys.time()
+end-start
+
+features_loess <- QC_loess %>%
+  unnest(c(predict, daily_num, batch_GC)) %>% 
+  select(-model, -data) %>% 
+  full_join(rejoin) %>% 
+  filter(!is.na(machine_num_GC))
+
+adjust <- features_loess %>% 
+  ungroup() %>% 
+  group_by(met) %>% 
+  summarise(offset = if_else (any(predict <0), 1.001*abs(min(predict, na.rm = T)),0))
+
+features_loess <- features_loess %>% 
+  left_join(adjust) %>% 
+  mutate(loess_norm = (isnorm+offset)/(predict+offset),
+         loess_norm_fw = loess_norm/sample_weight,
+         loess_norm_fw_log = log2(loess_norm_fw))%>% 
+  ungroup()
+
+features_all <- features_lin %>% 
+  left_join(features_loess) %>% 
+  mutate(batch_pca = str_extract(batch_GC, "\\d+"))
+
+
+# Relative log abundance plots --------------------------------------------
+
+
+
+
+features_all %>% 
+  filter(!is.na(genotype)) %>% 
+  group_by(met,  treatment) %>% 
+  mutate(sub = loess_norm_fw,
+         rla = log2(sub) - median(log2(sub))) %>% 
+  ggplot(aes(x = met, y = rla)) +
+  geom_boxplot() +
+  facet_grid(rows = vars(treatment), scales = "free")+
+  theme(axis.text.x = element_text(angle = 45, hjust = 1))
+
+features_all %>% 
+  #filter(!is.na(taxa)) %>% 
+  filter(treatment == 1, class == "sample") %>% 
+  mutate(run_num_GC = as_factor(run_num_GC),
+         batch_GC = as_factor(batch_GC)) %>% 
+  group_by(met,  treatment) %>% 
+  mutate(rla_imp = log2(imp) - median(log2(imp)),
+         rla_isnorm = log2(isnorm) - median(log2(isnorm)),
+         rla_loess_norm = log2(loess_norm_fw) - median(log2(loess_norm_fw))) %>%
+  pivot_longer(starts_with("rla"), names_to = "normalization", values_to = "rla") %>% 
+  ggplot(aes(x = met, y = rla)) +
+  geom_boxplot() +
+  facet_grid(rows = vars(normalization), cols = vars(treatment), scales = "free")+
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  ylim(c(-2,2))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_2_GC_met_rla_plot.jpg",
+             sep = "_"))
+
+features_all %>% 
+  #filter(!is.na(taxa)) %>% 
+  filter(class == "sample") %>% 
+  mutate(run_num_GC = as_factor(run_num_GC),
+         batch_GC = as_factor(batch_GC)) %>% 
+  group_by(met,  treatment) %>% 
+  mutate(rla_imp = log2(imp) - median(log2(imp)),
+         rla_isnorm = log2(isnorm) - median(log2(isnorm)),
+         rla_loess_norm = log2(loess_norm_fw) - median(log2(loess_norm_fw))) %>%
+  pivot_longer(starts_with("rla"), names_to = "normalization", values_to = "rla") %>% 
+  ggplot(aes(x = run_num_GC, y = rla, color = batch_GC)) +
+  geom_boxplot() +
+  facet_grid(rows = vars(normalization), scales = "free")+
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  ylim(c(-1,1))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_2_samples_rla_plot.jpg",
+             sep = "_"))
+
+
+# RSD estimation ----------------------------------------------------------
+
+
+
+
+RSD_bio_reps <- features_all %>% 
+  filter(class == "sample") %>% 
+  #filter(!is.na(taxa)) %>% 
+  group_by(met) %>% 
+  #filter((any(qc_imputed) == T)==F) %>% 
+  group_by(met, treatment, genotype, tissue, exp) %>% 
+  summarise(RSD_imp = sd(imp)/mean(imp),
+            RSD_isnorm = sd(isnorm) / mean(isnorm),
+            RSD_linnorm = abs(sd (linnorm_fw)/ mean(linnorm_fw)),
+            RSD_loess_norm = abs(sd(loess_norm_fw) / mean(loess_norm_fw)))
+            
+            
+RSD_bio_reps_mean <- RSD_bio_reps %>% 
+  #mutate(RSD_diff = RSD_imp-RSD_loess) %>% 
+  ungroup() %>% 
+  #group_by(treatment, tissue, exp) %>% 
+  summarise(#mean_RSD_loess = mean(RSD_loess),
+    mean_imp = mean(na.omit(RSD_imp)),
+    mean_loess_norm = mean(na.omit(RSD_loess_norm)),
+    mean_RSD_isnorm = mean(na.omit(RSD_isnorm )),
+    mean_linnorm = mean(na.omit(RSD_linnorm)))
+
+RSD_qcs <- features_all %>% 
+  filter(class == "run_qc") %>% 
+  group_by(met) %>% 
+  #filter((any(qc_imputed) == T)==F) %>% 
+  group_by(met, exp) %>% 
+  summarise(RSD_imp = sd(imp)/mean(imp),
+            RSD_isnorm = sd(isnorm) / mean(isnorm),
+            RSD_linnorm = abs(sd (linnorm_fw)/ mean(linnorm_fw)),
+            RSD_loess_norm = abs(sd(loess_norm_fw) / mean(loess_norm_fw)))
+
+RSD_qcs_mean <- RSD_qcs %>% 
+  #mutate(RSD_diff = RSD_imp-RSD_loess) %>% 
+  ungroup() %>% 
+  group_by(exp) %>% 
+  summarise(#mean_RSD_loess = mean(RSD_loess),
+    mean_imp = mean(na.omit(RSD_imp)),
+    mean_loess_norm = mean(na.omit(RSD_loess_norm)),
+    mean_RSD_isnorm = mean(na.omit(RSD_isnorm )),
+    mean_linnorm = mean(na.omit(RSD_linnorm)))
+
+
+
+#PCA testing chunk####
+##https://www.intechopen.com/books/metabolomics-fundamentals-and-applications/processing-and-visualization-of-metabolomics-data-using-r
+#exp1
+pca_base <- features_all %>%
+  ungroup() %>% 
+  filter(exp == 1) %>% 
+  arrange(batch_pca, daily_num) %>%
+  mutate(log_imp = log2(imp)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_GC, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = linnorm_fw_log) %>% 
+  filter(!is.na(m_1))
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  left_join(pca_base %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  #left_join(sam_dat) %>% 
+  mutate(batch_GC= as_factor(batch_GC),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = tissue)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = tissue)) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_tissue_lin_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = treatment)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_treatment_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+
+#PCA after loess normalization
+
+pca_base <- features_all %>%
+  ungroup() %>% 
+  filter(exp == 1) %>% 
+  arrange(batch_pca, daily_num) %>%
+  mutate(log_imp = log2(imp)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_GC, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = loess_norm_fw_log) %>% 
+  filter(!is.na(m_1))
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  full_join(pca_base %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  #left_join(sam_dat) %>% 
+  mutate(batch_pca= as_factor(batch_GC),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = tissue)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = tissue)) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_tissue_loess_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = treatment)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_treatment_loess_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = class)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = class)) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+
+
+#PCA after loess normalization exp2
+
+pca_base <- features_all %>%
+  ungroup() %>% 
+  filter(exp == 2) %>% 
+  arrange(batch_pca, daily_num) %>%
+  mutate(log_imp = log2(isnorm)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_GC, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = log_imp)
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  full_join(pca_base %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  #left_join(sam_dat) %>% 
+  mutate(batch_pca= as_factor(batch_GC),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = class)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = class)) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = batch_GC)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = batch_GC)) +
+  geom_text(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC %in% exclude_samples, machine_num_GC, NULL))) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_treatment_loess_outlier_exp2_loess_norm_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+
+# Per tissue loess pca ----------------------------------------------------
+
+
+tissues <- c("leaves", "fruits")
+for(tiss in seq_along(tissues)){
+  
+  pca_base <- features_all %>%
+    ungroup() %>% 
+    filter(exp == 1, tissue == tissues[[tiss]]) %>% 
+    arrange(batch_pca, daily_num) %>%
+    mutate(log_imp = log2(imp)) %>% 
+    pivot_wider(id_cols = c(tissue, treatment,batch_GC, daily_num, class, machine_num_GC),
+                names_from = met,
+                values_from = loess_norm_fw_log) %>% 
+    filter(!is.na(m_1))
+  
+  pca <- summary(prcomp(
+    pca_base%>%
+      select(starts_with("m_"))))
+  
+  sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+  
+  pca_plot <- as_tibble(pca$x) %>% 
+    mutate(join_num = 1:nrow(pca$x)) %>% 
+    full_join(pca_base %>% 
+                mutate(join_num=1:nrow(pca$x))) %>%
+    left_join(sam_dat) %>% 
+    mutate(batch_pca= as_factor(batch_GC),
+           class= as_factor(class)) %>% 
+    select(all_of(sam_vars), everything())
+  
+  exp_var <- as_tibble(pca[["importance"]])
+  
+  pca_plot %>% 
+    #  filter(treatment == "HL") %>% 
+    ggplot()+
+    geom_jitter(aes(x=PC1, y=PC2,  color = treatment)) +
+    stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) +
+    ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+    xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+  
+  ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+               "cmQTL_val1", tissues[[tiss]], "treatment_loess_PCA.jpg",
+               sep = "_"),
+         width = 183,
+         height = 100,
+         units = "mm",
+         dpi = 300)
+}
+
+
+# PCA before normalization
+
+pca_base <- features_all %>%
+  ungroup() %>% 
+  filter(exp == 1) %>% 
+  arrange(batch_pca, daily_num) %>%
+  mutate(log_imp = log2(imp)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_pca, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = log_imp)%>% 
+  filter(!is.na(m_1))
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  full_join(pca_base %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  left_join(sam_dat) %>% 
+  mutate(batch_pca= as_factor(batch_pca),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = tissue)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = tissue)) +
+  geom_text(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC == "21106rA_31", machine_num_GC, NULL)),
+            nudge_y = -2,
+            nudge_x = 4) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_tissue_imp_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = batch_GC)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = batch_GC)) +
+  geom_text(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC == "21106rA_31", machine_num_GC, NULL)),
+            nudge_y = -2,
+            nudge_x = 4) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_batch_imp_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = class)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = class)) +
+  geom_text(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC == "21106rA_31", machine_num_GC, NULL)),
+            nudge_y = -2,
+            nudge_x = 4) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_class_imp_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+#cmQTLval2
+pca_base <- features_all %>%
+  ungroup() %>% 
+  filter(exp == 2, batch_GC == "3_split") %>% 
+  arrange(batch_GC, daily_num) %>%
+  mutate(log_imp = log2(isnorm)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_GC, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = log_imp)%>% 
+  filter(!is.na(m_1))
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  full_join(pca_base %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  left_join(sam_dat) %>% 
+  mutate(batch_pca= as_factor(batch_GC),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = class)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = class)) +
+  geom_text(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC == "21109rA_59", machine_num_GC, NULL)),
+            nudge_y = -2,
+            nudge_x = 4) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val2_class_isnorm_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+
+# Plot all metabolites ----------------------------------------------------
+
+plotmets <- features_all %>% distinct(met) %>% as_vector
+plotmet_labs <- plotmets %>% as_tibble() %>% 
+  left_join(met_dat, by= c("value" = "met")) %>% 
+  #mutate(peak_num = base::rank(HMDB_clear_name, ties.method = "first"),
+  #       dup = sum(peak_num),
+  #       HMDB_clear_name_unique = if_else(dup>1, str_c(HMDB_clear_name, peak_num), HMDB_clear_name)) %>% 
+  #ungroup() %>% 
+  select(Compound_Name) %>% as_vector()
+
+plot_out <- vector("list", length = length(plotmets))
+
+for (meta in seq_along(plotmets)) {
+  
+  plot_out [[meta]] <- features_all %>% 
+    mutate(xint = if_else(daily_num == 4, run_num_GC-3.5, max(run_num_GC)),
+           is_miss = as_factor(if_else(is.na(area), T, F))) %>% 
+    filter(met == plotmets[[meta]]) %>%
+    filter(class!="blank") %>% 
+    ggplot(aes(x=run_num_GC, y=isnorm)) +
+    geom_point(aes(color = class, shape = is_miss)) +
+    geom_point(aes(y=predict), color="black", size=0.1) +
+    geom_vline(aes(xintercept = xint))+
+   # facet_grid(rows = vars(treatment), cols = vars(rep), scales = "free") +
+    ggtitle(label = plotmet_labs[[meta]])
+
+}
+
+pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_loess_fit.pdf"))
+
+
+for (meta in seq_along(plotmets)) {
+  print(plot_out[[meta]])
+}
+
+dev.off()
+
+plot_out <- vector("list", length = length(plotmets))
+
+for (meta in seq_along(plotmets)) {
+  
+  plot_out [[meta]] <- features_all %>% 
+    mutate(xint = if_else(daily_num == 4, run_num_GC-3.5, max(run_num_GC))) %>% 
+    filter(met == plotmets[[meta]]) %>%
+    filter(class!="blank") %>% 
+    ggplot(aes(x=run_num_GC, y=loess_norm_fw_log)) +
+    geom_point(aes(color=class)) +
+    #geom_point(aes(y=predp), color="black", size=0.1) +
+    geom_vline(aes(xintercept = xint))+
+    # facet_grid(rows = vars(treatment), cols = vars(rep), scales = "free") +
+    ggtitle(label = plotmet_labs[[meta]])
+  
+}
+
+pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_loess_norm_fw_log.pdf"))
+
+
+for (meta in seq_along(plotmets)) {
+  print(plot_out[[meta]])
+}
+
+dev.off()
+
+for (meta in seq_along(plotmets)) {
+  
+  plot_out [[meta]] <- features_all %>% 
+    mutate(is_miss = as_factor(if_else(is.na(area), T, F))) %>% 
+    filter(met == plotmets[[meta]]) %>%
+    filter(class!="blank") %>% 
+    ggplot(aes(x=daily_num, y=isnorm)) +
+    geom_point(aes(color = class, shape = is_miss)) +
+    geom_point(aes(y=predict), color="black", size=0.1) +
+    facet_wrap(facets = vars(batch_GC), nrow = 1, scales = "free") +
+    ggtitle(label = plotmet_labs[[meta]]) +
+    theme(axis.ticks.y = element_blank(),
+          axis.text.y = element_blank(),
+          panel.spacing = unit(-0.1,"lines"))
+  
+}
+
+pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_loess_norm_no_scale.pdf"),
+    height = 3.5)
+
+
+for (meta in seq_along(plotmets)) {
+  print(plot_out[[meta]])
+}
+
+dev.off()
+
+for (meta in seq_along(plotmets)) {
+  
+  plot_out [[meta]] <- features_all %>% 
+    mutate(xint = if_else(daily_num == 4, run_num_GC-3.5, max(run_num_GC)),
+           is_miss = as_factor(if_else(is.na(area), T, F))) %>% 
+    filter(met == plotmets[[meta]]) %>%
+    filter(class!="blank") %>% 
+    ggplot(aes(x=run_num_GC, y=isnorm)) +
+    geom_point(aes(color = class, shape = is_miss)) +
+    geom_point(aes(y=predp), color="black", size=0.1) +
+    geom_vline(aes(xintercept = xint))+
+    # facet_grid(rows = vars(treatment), cols = vars(rep), scales = "free") +
+    ggtitle(label = plotmet_labs[[meta]])
+  
+}
+
+pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_linnorm.pdf"))
+
+for (meta in seq_along(plotmets)) {
+  print(plot_out[[meta]])
+}
+
+dev.off()
+
+rescale <- features_all %>% 
+  group_by(met, tissue, exp) %>% 
+  summarise(rescale = median(imp))
+
+sam_vars <- c("plantline", "alias", "LIMS_ID",
+              "treatment", "tissue", "batch_GC", "run_date_GC",
+              "extraction_num", "sample_num", "machine_num_GC",
+              "class", "run_num_GC", "sample_weight", "exp", "genotype")
+
+
+features_out <- features_all %>% 
+  left_join(rescale) %>% 
+  filter(class != "blank",
+         (component %in% take_split & str_detect(batch_GC, "split"))|
+           (!component %in% take_split & str_detect(batch_GC, "split", negate = T))|
+         tissue == "leaves") %>% 
+  group_by(met, tissue, exp) %>% 
+  mutate(loess_norm_med = loess_norm_fw/median(loess_norm_fw),
+         rescaled = loess_norm_med*rescale) %>% 
+  select(all_of(sam_vars), met, Compound_Name, Compound_Class, loess_norm_fw, loess_norm_med, area, rescaled)
+
+#features_out %>% 
+#  filter(met == "m_02177", tissue == "fruits", exp == 1) %>% 
+#  ggplot(aes(x = run_num_GC, y = loess_norm_fw)) +
+#  geom_point()
+
+# Write files -------------------------------------------------------------
+
+
+
+write_csv(features_out, 
+          str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+                "cmQTL_val_1_2_feat_dat_GC.csv",
+                sep = "_"))
+
+write_csv(met_dat, 
+          str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+                "cmQTL_val_1_2_met_dat_GC.csv",
+                sep = "_"))
+
+write_csv(sam_dat, 
+          str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+                "cmQTL_val_1_2_sam_dat_GC.csv",
+                sep = "_"))
+
+write_csv(missingness, 
+          str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+                "cmQTL_val_1_2_miss_GC.csv",
+                sep = "_"))
+
+# Log used code ------------------------------------------------------------
+
+file_name <- sys.frame(1)$ofile
+
+file.copy(file_name,
+          to = str_c(out_dir, str_remove(file_name, current), "_", str_replace_all(Sys.Date(),"^.{2}|-",""), ".R"),overwrite = T)
diff --git a/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/session-6e7b8359/lock_file b/workflows/GC_MS_normalization/.Rproj.user/DF293CF6/sources/session-6e7b8359/lock_file
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/workflows/GC_MS_normalization/.Rproj.user/shared/notebooks/patch-chunk-names b/workflows/GC_MS_normalization/.Rproj.user/shared/notebooks/patch-chunk-names
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/workflows/GC_MS_normalization/210927_primary_normalization_with_split.R b/workflows/GC_MS_normalization/210927_primary_normalization_with_split.R
new file mode 100644
index 0000000000000000000000000000000000000000..fc4c856062e174becac027134122742dd9154af3
--- /dev/null
+++ b/workflows/GC_MS_normalization/210927_primary_normalization_with_split.R
@@ -0,0 +1,1124 @@
+rm(list = ls())
+#setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
+#getwd()
+#current <- getwd()
+#source <- ".."
+
+
+library(openxlsx)
+library(tidyverse)
+library(car)
+library(pheatmap)
+library(broom)
+library(ggpubr)
+library(viridisLite)
+library(modelr)
+library(dlookr)
+library(imputeLCMD)
+library(ggrepel)
+here::i_am("workflows/GC_MS_normalization/210927_primary_normalization_with_split.R")
+library(here)
+
+# Directory setting -------------------------------------------------------
+
+
+current <- getwd()
+source <- str_c(current,"/..")
+
+cur_date <- str_c(str_replace_all(Sys.Date(),"^.{2}|-",""))
+
+out <- str_c(cur_date, "normalization", sep = "_")
+
+if (file.exists(out)) {
+  cat("The folder already exists")
+} else {
+  dir.create(out)
+}
+
+out_dir <- str_c(current, out, sep = "/")
+
+# Data loading ------------------------------------------------------------
+setwd(source)
+
+sam_dat1 <- readxl::read_xlsx(here("studies/cmQTL_val1_GH_2020/isa.study.xlsx"))
+
+sam_dat1 <- read_csv("210812_cmQTL_val1_samplelist.csv", col_types = "ffidficcccfccccc")
+GC_run1 <- readxl::read_xlsx("200923_samplelist_WIJESI-030820-13_cmQTL_validation.xlsx", sheet = 5)
+
+sam_dat2 <- read_csv("210812_cmQTL_val2_samplelist.csv", col_types = "ccfdifiiiff")
+GC_run2 <- readxl::read_xlsx("210324_WIJESI-130121-15_cmQTL_validation2.xlsx", sheet = 6)
+
+genotypes <- readxl::read_xlsx("Genotype_names.xlsx") %>%
+  mutate(plantline = as_factor(plantline))
+
+GC_machine_nums <- readxl::read_xlsx("200923_samplelist_WIJESI-030820-13_cmQTL_validation.xlsx", sheet = 6)
+
+setwd(current)
+
+take_split <- c("fructose_307_217_rt9.48", "glucose_160_319_rt9.68","glucose_160_rt9.81", "glutamic_acid_246_363_rt8.31",
+                "glutamine_156_245_rt9.80", "malic_acid_233_245_rt7.22", "shikimic_acid_204_462_rt9.57", "shikimic_acid_204_462_rt9.57",
+                "pyroglutamic_acid_156_258_rt8.30", "sucrose_437_361_rt13.77", "sucrose2_204_361_rt13.79", "citric_acid_273_375_rt9.72",
+                "arginine_157_256_rt9.92")
+
+exclude_samples <- c("21106rA_31", "21107rA_54", "21109rA_59", "21109rA_86", "21109rA_78")
+exclude_mets <- c("psicose_103_217_rt9.38", "glutamic_acid_246_363_rt8.31", "lactic_acid_117_219_rt3.07")#glu wrong peak
+
+area1 <- readxl::read_xls("210914_cmQTL_val_1_2_fruits_seq_file_20210914143103_comp_file_area_rt1.bkt.xls", na = c("", "N/A"))
+area2 <- readxl::read_xls("210914_cmQTL_val_1_2_fruits_split_seq_file_20210914164507_comp_file_area_rt1.bkt.xls", na = c("", "N/A"))
+area3 <- readxl::read_xls("210914_cmQTL_val_1_2_leaves_seq_file_20210914125126_comp_file_area_rt1.bkt.xls", na = c("", "N/A"))
+
+
+
+
+metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Ath_Dark_Light_GC_Xcal/current_source_files/210118_primary_metabolites_classification.xlsx") %>% 
+  select(component = Xcal_name_xreport, Compound_Name = HMDB_clear_name, Compound_Class = ChEBI_Ontology_dense)%>% 
+  mutate(RT_mean = str_extract(component, "\\d+\\.\\d+$")) %>% 
+  filter(!is.na(component)) %>% 
+  group_by(Compound_Name)  %>% 
+  mutate(peak_no = rank(RT_mean),
+         Compound_Name = if_else(duplicated(Compound_Name),
+                                 str_c(Compound_Name, "peak", peak_no, sep = "_"),
+                                 Compound_Name))
+
+area <- area1 %>% 
+  bind_rows(area2, area3) %>% 
+  select(component, area, machine_num_GC = machine_num,rt) %>% 
+  mutate(area = as.numeric(area),
+         rt = as.numeric(rt))
+
+rt_mean <- area %>%
+  group_by(component) %>% 
+  summarise(RT_mean = mean(rt, na.rm = T))
+
+metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Shared_source_files/210118_primary_metabolites_classification.xlsx") %>% 
+  select(component = Xcal_name_xreport, Compound_Name = HMDB_clear_name, Compound_Class = ChEBI_Ontology_dense)%>% 
+  left_join(rt_mean) %>% 
+  filter(!is.na(component)) %>% 
+  arrange(Compound_Name, RT_mean) %>% 
+  group_by(Compound_Name)  %>% 
+  mutate(peak_no = rank(RT_mean),
+         Compound_Name = if_else(duplicated(Compound_Name),
+                                 str_c(Compound_Name, "peak", peak_no, sep = "_"),
+                                 Compound_Name))
+
+
+# Data combination --------------------------------------------------------
+
+
+
+sam_vars <- c("plantline", "alias", "LIMS_ID",
+              "treatment", "tissue", "batch_GC", "run_date_GC",
+              "extraction_num", "sample_num", "machine_num_GC",
+              "class", "run_num_GC", "sample_weight", "exp", "genotype")
+
+sam_dat1_tidy <- GC_run1 %>% 
+  left_join(GC_machine_nums) %>% 
+  select(extraction_num = `Sample name`, everything())%>% 
+  mutate(class = as_factor(if_else(str_detect(extraction_num, "run_qc"), "run_qc",
+                                   if_else(str_detect(extraction_num, "blank"), "blank", "sample"))),
+         extraction_num = as.numeric(if_else(str_detect(extraction_num, "run_qc"), "0",
+                                             if_else(str_detect(extraction_num, "blank"), "-1",extraction_num))),
+         exp = as_factor(1)) %>% 
+  left_join(sam_dat1) %>% 
+  left_join(genotypes) %>% 
+  select(treatment = irrigation, everything()) %>% 
+  select(all_of(sam_vars)) %>% 
+  arrange(run_num_GC)
+
+sam_dat2_tidy <- GC_run2 %>% 
+  left_join(GC_machine_nums) %>% 
+  select(extraction_num = `Sample name`, everything()) %>% 
+  mutate(class = as_factor(if_else(str_detect(extraction_num, "run_qc"), "run_qc",
+                                   if_else(str_detect(extraction_num, "blank"), "blank", "sample"))),
+         extraction_num = as.numeric(if_else(str_detect(extraction_num, "run_qc"), "0",
+                                             if_else(str_detect(extraction_num, "blank"), "-1",extraction_num))),
+         exp = as_factor(2)) %>% 
+  left_join(sam_dat2) %>% 
+  left_join(genotypes) %>% 
+  select(treatment = irrigation, everything()) %>% 
+  select(all_of(sam_vars)) %>% 
+  arrange(run_num_GC)
+
+sam_dat <- sam_dat1_tidy %>% 
+  bind_rows(sam_dat2_tidy) %>% 
+  arrange(run_num_GC) %>% 
+  group_by(batch_GC) %>% 
+  mutate(daily_num = row_number()) %>% 
+  fill(tissue, .direction = "updown") %>% 
+  ungroup() %>% 
+  left_join(genotypes) %>% 
+  mutate(treatment = as_factor(treatment))
+
+area_long <- area %>% 
+  left_join(sam_dat) %>% 
+  filter(!is.na(exp))
+
+area <- area_long %>% 
+  left_join(metdat_GC_class) %>% 
+  filter(!str_detect(component, "^\\!|FAME|component|empty"), !is.na(component),
+         !component %in% exclude_mets)
+
+met_dat = area %>% 
+  distinct(RT_mean, Compound_Name, Compound_Class, component) %>% 
+  mutate(met = str_c("m", row_number(), sep = "_"))
+
+area <- area %>% 
+  left_join(met_dat) %>% 
+  mutate(area = as.numeric(area))
+
+setwd(out_dir)
+
+# PCA to justify filtering ------------------------------------------------
+
+features_pca <- area %>% 
+  group_by(met) %>% 
+  mutate(imp = if_else(is.na(area), 0.5*min(area, na.rm = T),area),
+         batch_pca = str_extract(batch_GC, "\\d+")) %>% 
+  ungroup() 
+
+pca_base <- features_pca %>%
+  ungroup() %>% 
+  filter(exp == 1) %>% 
+  arrange(run_num_GC) %>%
+  mutate(log_imp = log2(imp)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_GC, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = log_imp) %>% 
+  filter(!is.na(m_1))
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  left_join(pca_base %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  #left_join(sam_dat) %>% 
+  mutate(batch_GC= as_factor(batch_GC),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = batch_GC)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = batch_GC)) +
+  geom_text_repel(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC == "21106rA_31"| machine_num_GC == "21107rA_54", machine_num_GC, NULL)), box.padding = 1) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_treatment_loess_outlier_exp1_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+
+pca_base <- features_pca %>%
+  ungroup() %>% 
+  filter(exp == 2) %>% 
+  arrange(batch_GC, daily_num) %>%
+  mutate(log_imp = log2(imp)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_GC, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = log_imp) %>% 
+  filter(!is.na(m_1))
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  left_join(pca_base %>% 
+              arrange(batch_GC, daily_num) %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  #left_join(sam_dat) %>% 
+  mutate(batch_GC= as_factor(batch_GC),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = class)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = class)) +
+  geom_label_repel(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC %in% exclude_samples, machine_num_GC, NULL)),force_pull = -0.1) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_treatment_loess_outlier_exp2_imp_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+
+# Data filtering ----------------------------------------------------------
+
+
+set_back_NA <- function(x){
+  for (i in seq_along(x)) {
+    if (x[[i]] == 0) {
+      x[[i]] <- NA
+    } else {
+      x[[i]] <- x[[i]]
+    }
+  }
+  x
+}
+
+area_long <- area %>% 
+  filter(!machine_num_GC %in% exclude_samples)
+
+
+# Imputation --------------------------------------------------------------
+
+features_na <- area_long #%>% 
+  #group_by(met) %>% 
+  #mutate(area = set_back_NA(area)) %>% 
+  #ungroup()
+
+missingness <- features_na %>% 
+  group_by(exp, met, tissue, treatment) %>% 
+  summarise(na = sum(is.na(area)),
+            n = n()) %>% 
+  mutate(percent_na = na/n*100) %>% 
+  ungroup()
+
+features <- features_na %>% 
+  left_join(missingness) %>% 
+  group_by(met) %>% 
+  #mutate(area = ifelse(percent_na >= 0 & is.na(area), rnorm(n = 1, mean = 0.5*min(area, na.rm = T)), area)) %>% 
+  ungroup()
+
+
+features_filtered <- area_long %>% 
+  group_by(met) %>% 
+  mutate(imp = if_else(is.na(area), 0.5*min(area, na.rm = T),area),
+         batch_pca = str_extract(batch_GC, "\\d+")) %>% 
+  ungroup() %>% 
+  left_join(missingness) #%>%
+  #filter(percent_na <= 60)
+
+
+# Internal standard check -------------------------------------------------
+
+features_filtered %>% 
+  filter(str_detect(Compound_Name, "ribitol")) %>% 
+  mutate(xint = if_else(daily_num == 4, run_num_GC-3.5, max(run_num_GC)),
+         is_miss = as_factor(if_else(is.na(area), T, F))) %>% 
+  filter(class!="blank") %>% 
+  ggplot(aes(x=run_num_GC, y = log2(imp))) +
+  geom_point(aes(color = class, shape = is_miss)) +
+  geom_vline(aes(xintercept = xint)) +
+  facet_grid(rows = vars(Compound_Name))
+
+# Internal Standard Normalization -----------------------------------------
+
+isvec <- features_filtered %>% 
+  filter(component == "ribitol_217_rt8.20") %>% 
+  select(machine_num_GC, is = imp)
+
+features <- features_filtered %>% 
+  full_join(isvec) %>% 
+  mutate(isnorm = imp/is) %>% 
+  filter(str_detect(Compound_Name, "ribitol", negate = T)) %>% 
+  mutate(sample_weight = if_else(sample_weight == 0|is.na(sample_weight), 50, sample_weight))
+
+
+# Fit linear model on QCs -------------------------------------------------
+
+by_batch_GC <- features %>% 
+  filter(class=="run_qc") %>% 
+  group_by(batch_GC, met) %>% # need to change treatment_batch_GC_corr
+  nest()
+
+feature_model <- function(df) {
+  lm(isnorm ~ daily_num, data = df)
+}
+
+
+by_batch_GC <- by_batch_GC %>% 
+  mutate(model = map(data, feature_model),
+         predicts = map2(data, model, add_predictions),
+         coefficients = map(model, tidy),
+         aug = map(model, augment),
+         glance = map (model, glance))
+
+coefs_aug <- unnest(by_batch_GC, aug) %>% 
+  select(batch_GC, met, .resid, daily_num)
+
+coefs_glance <- unnest(by_batch_GC,glance) %>% 
+  select(batch_GC, met, adj.r.squared)
+
+coefs_term <- unnest(by_batch_GC, coefficients) %>% 
+  pivot_wider(id_cols = c(batch_GC, met),
+              names_from = "term",
+              values_from = estimate) %>% 
+  rename(x = daily_num,
+         intercept = `(Intercept)`)
+
+
+coefs_pvalue <- by_batch_GC %>% 
+  select(batch_GC, met, coefficients) %>%
+  ungroup() %>% 
+  unnest(coefficients) %>% 
+  pivot_wider(id_cols = c(batch_GC, met),
+              names_from = "term",
+              values_from = p.value) %>% 
+  rename(intercept_pval = `(Intercept)`,
+         daily_num_pval = daily_num) %>% 
+  full_join(features) %>% 
+  full_join(coefs_term) %>% 
+  #left_join(coefs_aug) %>% 
+  full_join(coefs_glance) 
+
+median <- features %>% 
+  filter(class=="run_qc") %>% 
+  group_by(batch_GC, met) %>% 
+  summarise(median = median (isnorm))
+
+features_lin <- unnest(by_batch_GC, predicts) %>% 
+  full_join(coefs_pvalue) %>% 
+  full_join(median) %>% 
+  mutate(pred = intercept + x*daily_num,
+         predp = ifelse(daily_num_pval <=0.05 & adj.r.squared >= 0.75, pred, median),
+         predp = ifelse(is.na(predp), median, predp)) %>%
+  select(- c("data", "model", "coefficients")) 
+
+adjust_lin <- features_lin %>% 
+  ungroup() %>% 
+  group_by(met) %>% 
+  summarise(lin_offset = if_else(any(predp <0), 1.001*abs(min(predp)),0))
+
+features_lin <- features_lin %>%
+  ungroup() %>% 
+  full_join(adjust_lin) %>%
+  full_join(features) %>% 
+  mutate(linnorm = (isnorm+lin_offset)/(predp+lin_offset),
+         linnorm_fw = linnorm/sample_weight,
+         linnorm_fw_log = log2(linnorm_fw))
+
+
+
+
+#Fit loess-model for QC-RLSC batch_GCwise####
+rejoin <- features
+features <- rejoin
+
+QC_loess <- features %>% 
+  ungroup() %>% 
+  filter(class == "run_qc") %>%
+  #filter(met!="m_44") %>% 
+  group_by(met,  batch_GC) %>% 
+  nest()
+#revert span back to 1.5 later
+loess_model <- function (df) {
+  loess(isnorm ~ daily_num, span = 1.5, data = df,control = loess.control(surface = "direct"))
+}
+
+start <- Sys.time()
+QC_loess <- QC_loess %>% 
+  mutate(model = map (data, loess_model),
+         daily_num = list(seq(1,101,1)),
+         predict = map2(model,daily_num,stats::predict))
+end <- Sys.time()
+end-start
+
+features_loess <- QC_loess %>%
+  unnest(c(predict, daily_num, batch_GC)) %>% 
+  select(-model, -data) %>% 
+  full_join(rejoin) %>% 
+  filter(!is.na(machine_num_GC))
+
+adjust <- features_loess %>% 
+  ungroup() %>% 
+  group_by(met) %>% 
+  summarise(offset = if_else (any(predict <0), 1.001*abs(min(predict, na.rm = T)),0))
+
+features_loess <- features_loess %>% 
+  left_join(adjust) %>% 
+  mutate(loess_norm = (isnorm+offset)/(predict+offset),
+         loess_norm_fw = loess_norm/sample_weight,
+         loess_norm_fw_log = log2(loess_norm_fw))%>% 
+  ungroup()
+
+features_all <- features_lin %>% 
+  left_join(features_loess) %>% 
+  mutate(batch_pca = str_extract(batch_GC, "\\d+"))
+
+
+# Relative log abundance plots --------------------------------------------
+
+
+
+
+features_all %>% 
+  filter(!is.na(genotype)) %>% 
+  group_by(met,  treatment) %>% 
+  mutate(sub = loess_norm_fw,
+         rla = log2(sub) - median(log2(sub))) %>% 
+  ggplot(aes(x = met, y = rla)) +
+  geom_boxplot() +
+  facet_grid(rows = vars(treatment), scales = "free")+
+  theme(axis.text.x = element_text(angle = 45, hjust = 1))
+
+features_all %>% 
+  #filter(!is.na(taxa)) %>% 
+  filter(treatment == 1, class == "sample") %>% 
+  mutate(run_num_GC = as_factor(run_num_GC),
+         batch_GC = as_factor(batch_GC)) %>% 
+  group_by(met,  treatment) %>% 
+  mutate(rla_imp = log2(imp) - median(log2(imp)),
+         rla_isnorm = log2(isnorm) - median(log2(isnorm)),
+         rla_loess_norm = log2(loess_norm_fw) - median(log2(loess_norm_fw))) %>%
+  pivot_longer(starts_with("rla"), names_to = "normalization", values_to = "rla") %>% 
+  ggplot(aes(x = met, y = rla)) +
+  geom_boxplot() +
+  facet_grid(rows = vars(normalization), cols = vars(treatment), scales = "free")+
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  ylim(c(-2,2))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_2_GC_met_rla_plot.jpg",
+             sep = "_"))
+
+features_all %>% 
+  #filter(!is.na(taxa)) %>% 
+  filter(class == "sample") %>% 
+  mutate(run_num_GC = as_factor(run_num_GC),
+         batch_GC = as_factor(batch_GC)) %>% 
+  group_by(met,  treatment) %>% 
+  mutate(rla_imp = log2(imp) - median(log2(imp)),
+         rla_isnorm = log2(isnorm) - median(log2(isnorm)),
+         rla_loess_norm = log2(loess_norm_fw) - median(log2(loess_norm_fw))) %>%
+  pivot_longer(starts_with("rla"), names_to = "normalization", values_to = "rla") %>% 
+  ggplot(aes(x = run_num_GC, y = rla, color = batch_GC)) +
+  geom_boxplot() +
+  facet_grid(rows = vars(normalization), scales = "free")+
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  ylim(c(-1,1))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_2_samples_rla_plot.jpg",
+             sep = "_"))
+
+
+# RSD estimation ----------------------------------------------------------
+
+
+
+
+RSD_bio_reps <- features_all %>% 
+  filter(class == "sample") %>% 
+  #filter(!is.na(taxa)) %>% 
+  group_by(met) %>% 
+  #filter((any(qc_imputed) == T)==F) %>% 
+  group_by(met, treatment, genotype, tissue, exp) %>% 
+  summarise(RSD_imp = sd(imp)/mean(imp),
+            RSD_isnorm = sd(isnorm) / mean(isnorm),
+            RSD_linnorm = abs(sd (linnorm_fw)/ mean(linnorm_fw)),
+            RSD_loess_norm = abs(sd(loess_norm_fw) / mean(loess_norm_fw)))
+            
+            
+RSD_bio_reps_mean <- RSD_bio_reps %>% 
+  #mutate(RSD_diff = RSD_imp-RSD_loess) %>% 
+  ungroup() %>% 
+  #group_by(treatment, tissue, exp) %>% 
+  summarise(#mean_RSD_loess = mean(RSD_loess),
+    mean_imp = mean(na.omit(RSD_imp)),
+    mean_loess_norm = mean(na.omit(RSD_loess_norm)),
+    mean_RSD_isnorm = mean(na.omit(RSD_isnorm )),
+    mean_linnorm = mean(na.omit(RSD_linnorm)))
+
+RSD_qcs <- features_all %>% 
+  filter(class == "run_qc") %>% 
+  group_by(met) %>% 
+  #filter((any(qc_imputed) == T)==F) %>% 
+  group_by(met, exp) %>% 
+  summarise(RSD_imp = sd(imp)/mean(imp),
+            RSD_isnorm = sd(isnorm) / mean(isnorm),
+            RSD_linnorm = abs(sd (linnorm_fw)/ mean(linnorm_fw)),
+            RSD_loess_norm = abs(sd(loess_norm_fw) / mean(loess_norm_fw)))
+
+RSD_qcs_mean <- RSD_qcs %>% 
+  #mutate(RSD_diff = RSD_imp-RSD_loess) %>% 
+  ungroup() %>% 
+  group_by(exp) %>% 
+  summarise(#mean_RSD_loess = mean(RSD_loess),
+    mean_imp = mean(na.omit(RSD_imp)),
+    mean_loess_norm = mean(na.omit(RSD_loess_norm)),
+    mean_RSD_isnorm = mean(na.omit(RSD_isnorm )),
+    mean_linnorm = mean(na.omit(RSD_linnorm)))
+
+
+
+#PCA testing chunk####
+##https://www.intechopen.com/books/metabolomics-fundamentals-and-applications/processing-and-visualization-of-metabolomics-data-using-r
+#exp1
+pca_base <- features_all %>%
+  ungroup() %>% 
+  filter(exp == 1) %>% 
+  arrange(batch_pca, daily_num) %>%
+  mutate(log_imp = log2(imp)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_GC, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = linnorm_fw_log) %>% 
+  filter(!is.na(m_1))
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  left_join(pca_base %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  #left_join(sam_dat) %>% 
+  mutate(batch_GC= as_factor(batch_GC),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = tissue)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = tissue)) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_tissue_lin_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = treatment)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_treatment_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+
+#PCA after loess normalization
+
+pca_base <- features_all %>%
+  ungroup() %>% 
+  filter(exp == 1) %>% 
+  arrange(batch_pca, daily_num) %>%
+  mutate(log_imp = log2(imp)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_GC, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = loess_norm_fw_log) %>% 
+  filter(!is.na(m_1))
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  full_join(pca_base %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  #left_join(sam_dat) %>% 
+  mutate(batch_pca= as_factor(batch_GC),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = tissue)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = tissue)) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_tissue_loess_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = treatment)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_treatment_loess_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = class)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = class)) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+
+
+#PCA after loess normalization exp2
+
+pca_base <- features_all %>%
+  ungroup() %>% 
+  filter(exp == 2) %>% 
+  arrange(batch_pca, daily_num) %>%
+  mutate(log_imp = log2(isnorm)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_GC, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = log_imp)
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  full_join(pca_base %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  #left_join(sam_dat) %>% 
+  mutate(batch_pca= as_factor(batch_GC),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = class)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = class)) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = batch_GC)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = batch_GC)) +
+  geom_text(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC %in% exclude_samples, machine_num_GC, NULL))) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_treatment_loess_outlier_exp2_loess_norm_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+
+# Per tissue loess pca ----------------------------------------------------
+
+
+tissues <- c("leaves", "fruits")
+for(tiss in seq_along(tissues)){
+  
+  pca_base <- features_all %>%
+    ungroup() %>% 
+    filter(exp == 1, tissue == tissues[[tiss]]) %>% 
+    arrange(batch_pca, daily_num) %>%
+    mutate(log_imp = log2(imp)) %>% 
+    pivot_wider(id_cols = c(tissue, treatment,batch_GC, daily_num, class, machine_num_GC),
+                names_from = met,
+                values_from = loess_norm_fw_log) %>% 
+    filter(!is.na(m_1))
+  
+  pca <- summary(prcomp(
+    pca_base%>%
+      select(starts_with("m_"))))
+  
+  sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+  
+  pca_plot <- as_tibble(pca$x) %>% 
+    mutate(join_num = 1:nrow(pca$x)) %>% 
+    full_join(pca_base %>% 
+                mutate(join_num=1:nrow(pca$x))) %>%
+    left_join(sam_dat) %>% 
+    mutate(batch_pca= as_factor(batch_GC),
+           class= as_factor(class)) %>% 
+    select(all_of(sam_vars), everything())
+  
+  exp_var <- as_tibble(pca[["importance"]])
+  
+  pca_plot %>% 
+    #  filter(treatment == "HL") %>% 
+    ggplot()+
+    geom_jitter(aes(x=PC1, y=PC2,  color = treatment)) +
+    stat_ellipse(aes(x=PC1, y=PC2, color = treatment)) +
+    ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+    xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+  
+  ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+               "cmQTL_val1", tissues[[tiss]], "treatment_loess_PCA.jpg",
+               sep = "_"),
+         width = 183,
+         height = 100,
+         units = "mm",
+         dpi = 300)
+}
+
+
+# PCA before normalization
+
+pca_base <- features_all %>%
+  ungroup() %>% 
+  filter(exp == 1) %>% 
+  arrange(batch_pca, daily_num) %>%
+  mutate(log_imp = log2(imp)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_pca, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = log_imp)%>% 
+  filter(!is.na(m_1))
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  full_join(pca_base %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  left_join(sam_dat) %>% 
+  mutate(batch_pca= as_factor(batch_pca),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = tissue)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = tissue)) +
+  geom_text(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC == "21106rA_31", machine_num_GC, NULL)),
+            nudge_y = -2,
+            nudge_x = 4) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_tissue_imp_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = batch_GC)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = batch_GC)) +
+  geom_text(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC == "21106rA_31", machine_num_GC, NULL)),
+            nudge_y = -2,
+            nudge_x = 4) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_batch_imp_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = class)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = class)) +
+  geom_text(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC == "21106rA_31", machine_num_GC, NULL)),
+            nudge_y = -2,
+            nudge_x = 4) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val1_class_imp_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+#cmQTLval2
+pca_base <- features_all %>%
+  ungroup() %>% 
+  filter(exp == 2, batch_GC == "3_split") %>% 
+  arrange(batch_GC, daily_num) %>%
+  mutate(log_imp = log2(isnorm)) %>% 
+  pivot_wider(id_cols = c(tissue, treatment, batch_GC, daily_num, class, machine_num_GC),
+              names_from = met,
+              values_from = log_imp)%>% 
+  filter(!is.na(m_1))
+
+pca <- summary(prcomp(
+  pca_base%>%
+    select(starts_with("m_"))))
+
+sam_vars <- pca_base %>% select(-starts_with("m_")) %>% colnames()
+
+pca_plot <- as_tibble(pca$x) %>% 
+  mutate(join_num = 1:nrow(pca$x)) %>% 
+  full_join(pca_base %>% 
+              mutate(join_num=1:nrow(pca$x))) %>%
+  left_join(sam_dat) %>% 
+  mutate(batch_pca= as_factor(batch_GC),
+         class= as_factor(class)) %>% 
+  select(all_of(sam_vars), everything())
+
+exp_var <- as_tibble(pca[["importance"]])
+
+
+pca_plot %>% 
+  #  filter(treatment == "HL") %>% 
+  ggplot()+
+  geom_jitter(aes(x=PC1, y=PC2,  color = class)) +
+  stat_ellipse(aes(x=PC1, y=PC2, color = class)) +
+  geom_text(aes(x = PC1, y = PC2,
+                label = if_else(machine_num_GC == "21109rA_59", machine_num_GC, NULL)),
+            nudge_y = -2,
+            nudge_x = 4) +
+  ylab(str_c("PC2 ", "(", exp_var$PC2[[2]]*100, "%)")) +
+  xlab(str_c("PC1 ", "(", exp_var$PC1[[2]]*100, "%)"))
+
+ggsave(str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+             "cmQTL_val2_class_isnorm_PCA.jpg",
+             sep = "_"),
+       width = 183,
+       height = 100,
+       units = "mm",
+       dpi = 300)
+
+
+# Plot all metabolites ----------------------------------------------------
+
+plotmets <- features_all %>% distinct(met) %>% as_vector
+plotmet_labs <- plotmets %>% as_tibble() %>% 
+  left_join(met_dat, by= c("value" = "met")) %>% 
+  #mutate(peak_num = base::rank(HMDB_clear_name, ties.method = "first"),
+  #       dup = sum(peak_num),
+  #       HMDB_clear_name_unique = if_else(dup>1, str_c(HMDB_clear_name, peak_num), HMDB_clear_name)) %>% 
+  #ungroup() %>% 
+  select(Compound_Name) %>% as_vector()
+
+plot_out <- vector("list", length = length(plotmets))
+
+for (meta in seq_along(plotmets)) {
+  
+  plot_out [[meta]] <- features_all %>% 
+    mutate(xint = if_else(daily_num == 4, run_num_GC-3.5, max(run_num_GC)),
+           is_miss = as_factor(if_else(is.na(area), T, F))) %>% 
+    filter(met == plotmets[[meta]]) %>%
+    filter(class!="blank") %>% 
+    ggplot(aes(x=run_num_GC, y=isnorm)) +
+    geom_point(aes(color = class, shape = is_miss)) +
+    geom_point(aes(y=predict), color="black", size=0.1) +
+    geom_vline(aes(xintercept = xint))+
+   # facet_grid(rows = vars(treatment), cols = vars(rep), scales = "free") +
+    ggtitle(label = plotmet_labs[[meta]])
+
+}
+
+pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_loess_fit.pdf"))
+
+
+for (meta in seq_along(plotmets)) {
+  print(plot_out[[meta]])
+}
+
+dev.off()
+
+plot_out <- vector("list", length = length(plotmets))
+
+for (meta in seq_along(plotmets)) {
+  
+  plot_out [[meta]] <- features_all %>% 
+    mutate(xint = if_else(daily_num == 4, run_num_GC-3.5, max(run_num_GC))) %>% 
+    filter(met == plotmets[[meta]]) %>%
+    filter(class!="blank") %>% 
+    ggplot(aes(x=run_num_GC, y=loess_norm_fw_log)) +
+    geom_point(aes(color=class)) +
+    #geom_point(aes(y=predp), color="black", size=0.1) +
+    geom_vline(aes(xintercept = xint))+
+    # facet_grid(rows = vars(treatment), cols = vars(rep), scales = "free") +
+    ggtitle(label = plotmet_labs[[meta]])
+  
+}
+
+pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_loess_norm_fw_log.pdf"))
+
+
+for (meta in seq_along(plotmets)) {
+  print(plot_out[[meta]])
+}
+
+dev.off()
+
+for (meta in seq_along(plotmets)) {
+  
+  plot_out [[meta]] <- features_all %>% 
+    mutate(is_miss = as_factor(if_else(is.na(area), T, F))) %>% 
+    filter(met == plotmets[[meta]]) %>%
+    filter(class!="blank") %>% 
+    ggplot(aes(x=daily_num, y=isnorm)) +
+    geom_point(aes(color = class, shape = is_miss)) +
+    geom_point(aes(y=predict), color="black", size=0.1) +
+    facet_wrap(facets = vars(batch_GC), nrow = 1, scales = "free") +
+    ggtitle(label = plotmet_labs[[meta]]) +
+    theme(axis.ticks.y = element_blank(),
+          axis.text.y = element_blank(),
+          panel.spacing = unit(-0.1,"lines"))
+  
+}
+
+pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_loess_norm_no_scale.pdf"),
+    height = 3.5)
+
+
+for (meta in seq_along(plotmets)) {
+  print(plot_out[[meta]])
+}
+
+dev.off()
+
+for (meta in seq_along(plotmets)) {
+  
+  plot_out [[meta]] <- features_all %>% 
+    mutate(xint = if_else(daily_num == 4, run_num_GC-3.5, max(run_num_GC)),
+           is_miss = as_factor(if_else(is.na(area), T, F))) %>% 
+    filter(met == plotmets[[meta]]) %>%
+    filter(class!="blank") %>% 
+    ggplot(aes(x=run_num_GC, y=isnorm)) +
+    geom_point(aes(color = class, shape = is_miss)) +
+    geom_point(aes(y=predp), color="black", size=0.1) +
+    geom_vline(aes(xintercept = xint))+
+    # facet_grid(rows = vars(treatment), cols = vars(rep), scales = "free") +
+    ggtitle(label = plotmet_labs[[meta]])
+  
+}
+
+pdf(file = str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),"_linnorm.pdf"))
+
+for (meta in seq_along(plotmets)) {
+  print(plot_out[[meta]])
+}
+
+dev.off()
+
+rescale <- features_all %>% 
+  group_by(met, tissue, exp) %>% 
+  summarise(rescale = median(imp))
+
+sam_vars <- c("plantline", "alias", "LIMS_ID",
+              "treatment", "tissue", "batch_GC", "run_date_GC",
+              "extraction_num", "sample_num", "machine_num_GC",
+              "class", "run_num_GC", "sample_weight", "exp", "genotype")
+
+
+features_out <- features_all %>% 
+  left_join(rescale) %>% 
+  filter(class != "blank",
+         (component %in% take_split & str_detect(batch_GC, "split"))|
+           (!component %in% take_split & str_detect(batch_GC, "split", negate = T))|
+         tissue == "leaves") %>% 
+  group_by(met, tissue, exp) %>% 
+  mutate(loess_norm_med = loess_norm_fw/median(loess_norm_fw),
+         rescaled = loess_norm_med*rescale) %>% 
+  select(all_of(sam_vars), met, Compound_Name, Compound_Class, loess_norm_fw, loess_norm_med, area, rescaled)
+
+#features_out %>% 
+#  filter(met == "m_02177", tissue == "fruits", exp == 1) %>% 
+#  ggplot(aes(x = run_num_GC, y = loess_norm_fw)) +
+#  geom_point()
+
+# Write files -------------------------------------------------------------
+
+
+
+write_csv(features_out, 
+          str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+                "cmQTL_val_1_2_feat_dat_GC.csv",
+                sep = "_"))
+
+write_csv(met_dat, 
+          str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+                "cmQTL_val_1_2_met_dat_GC.csv",
+                sep = "_"))
+
+write_csv(sam_dat, 
+          str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+                "cmQTL_val_1_2_sam_dat_GC.csv",
+                sep = "_"))
+
+write_csv(missingness, 
+          str_c(str_replace_all(Sys.Date(),"^.{2}|-",""),
+                "cmQTL_val_1_2_miss_GC.csv",
+                sep = "_"))
+
+# Log used code ------------------------------------------------------------
+
+file_name <- sys.frame(1)$ofile
+
+file.copy(file_name,
+          to = str_c(out_dir, str_remove(file_name, current), "_", str_replace_all(Sys.Date(),"^.{2}|-",""), ".R"),overwrite = T)