diff --git a/.Rhistory b/.Rhistory
index 0a2eccedf5c0dbebc967f6220e55b81299e491b5..6015b4e3682cd11ee93cefac51ebe0f6a9b4f6bf 100644
--- a/.Rhistory
+++ b/.Rhistory
@@ -21,128 +21,108 @@ dir.create(out)
 setwd(here())# Not recommended but convenient in Rstudio to start from root
 sam_dat1 <- readxl::read_xlsx(here("studies/cmQTL_val1_GH_2020/isa.study.xlsx"))
 View(sam_dat1)
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = str_extract(`Factor [sample fresh weight]`, "^\\d{2},\\d{2}"))
+isa_ext <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 1)
+isa_gc <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 2)
+isa_ms <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 3)
+take_split <- c("fructose_307_217_rt9.48", "glucose_160_319_rt9.68","glucose_160_rt9.81", "glutamic_acid_246_363_rt8.31",
+"glutamine_156_245_rt9.80", "malic_acid_233_245_rt7.22", "shikimic_acid_204_462_rt9.57", "shikimic_acid_204_462_rt9.57",
+"pyroglutamic_acid_156_258_rt8.30", "sucrose_437_361_rt13.77", "sucrose2_204_361_rt13.79", "citric_acid_273_375_rt9.72",
+"arginine_157_256_rt9.92")
+exclude_samples <- c("21106rA_31", "21107rA_54", "21109rA_59", "21109rA_86", "21109rA_78")
+exclude_mets <- c("psicose_103_217_rt9.38", "glutamic_acid_246_363_rt8.31", "lactic_acid_117_219_rt3.07")#glu wrong peak
+area1 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_fruits_seq_file_20210914143103_comp_file_area_rt1.bkt.xls"), na = c("", "N/A"))
+area2 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_fruits_split_seq_file_20210914164507_comp_file_area_rt1.bkt.xls"), na = c("", "N/A"))
+area3 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_leaves_seq_file_20210914125126_comp_file_area_rt1.bkt.xls"), na = c("", "N/A"))
+#Add primary metabolite MAF
+metdat_GC_class <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/MAF_GC_MS.xlsx"))
+area <- area1 %>%
+bind_rows(area2, area3) %>%
+select(component, area, machine_num_GC = machine_num,rt) %>%
+mutate(area = as.numeric(area),
+rt = as.numeric(rt))
+rt_mean <- area %>%
+group_by(component) %>%
+summarise(RT_mean = mean(rt, na.rm = T))
+View(metdat_GC_class)
+metdat_GC_class <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/MAF_GC_MS.xlsx")) %>%
+select(component = Xcal_name_xreport, Compound_Name = PubChem_Name_mapped)%>%
+left_join(rt_mean) %>%
+filter(!is.na(component)) %>%
+arrange(Compound_Name, RT_mean) %>%
+group_by(Compound_Name)  %>%
+mutate(peak_no = rank(RT_mean),
+Compound_Name = if_else(duplicated(Compound_Name),
+str_c(Compound_Name, "peak", peak_no, sep = "_"),
+Compound_Name))
+View(metdat_GC_class)
+sam_vars <- c("plantline", "alias", "LIMS_ID",
+"treatment", "tissue", "batch_GC", "run_date_GC",
+"extraction_num", "sample_num", "machine_num_GC",
+"class", "run_num_GC", "sample_weight", "exp", "genotype")
+sam_dat1_tidy <- sam_dat1 %>%
+left_join(isa_ext)#
+colnames(sam_dat1)
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`,
+plantline = `Characteristic [plantline]`,
+alias = `Characteristic [alias]`,
+LIMS_ID = `Characteristic [LIMS aliquot]`,
+treatment = `Factor [Irrigation factor]`,
+tissue = `Characteristic [multi-tissue plant structure]`,
+genotype = `Characteristic [genotype]`,
+sample_num = `Characteristic [sample_name_non_unique]`,
+extraction_num = `Characteristic [extract number]`) %>%
+select(%in% sam_vars)
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`,
+plantline = `Characteristic [plantline]`,
+alias = `Characteristic [alias]`,
+LIMS_ID = `Characteristic [LIMS aliquot]`,
+treatment = `Factor [Irrigation factor]`,
+tissue = `Characteristic [multi-tissue plant structure]`,
+genotype = `Characteristic [genotype]`,
+sample_num = `Characteristic [sample_name_non_unique]`,
+extraction_num = `Characteristic [extract number]`) %>%
+select(any_of(sam_vars))
 View(sam_dat1_tidy)
 sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = str_extract(`Factor [sample fresh weight]`, "\\d{2},\\d{2}"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}")))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(str_remove(Parameter))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(str_remove("Parameter"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(str_remove(pattern = "Parameter"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(str_remove(string = .x, pattern = "Parameter"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), str_remove(string = .x, pattern = "Parameter"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), .fn = str_remove(string = .x, pattern = "Parameter"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter"))
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) #%>%
 View(sam_dat1_tidy)
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>%
-rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "\\w"), replacement = "_")
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>%
-rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "\\w", replacement = "_"))
-?stringr
+sam_dat1_tidy$sample_weight
+str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}")
+str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}")
+str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}\\.")
+str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}\\.*")
+str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}\\.")
+str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}\\.*")
+str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}\\.*\\d{0,2}")
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{0,3}\\.*\\d{0,2}"))) #%>%
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{0,3}\\.*\\d{0,2}"))) %>%
+rename(source_name = `Source Name`,
+plantline = `Characteristic [plantline]`,
+alias = `Characteristic [alias]`,
+LIMS_ID = `Characteristic [LIMS aliquot]`,
+treatment = `Factor [Irrigation factor]`,
+tissue = `Characteristic [multi-tissue plant structure]`,
+genotype = `Characteristic [genotype]`,
+sample_num = `Characteristic [sample_name_non_unique]`,
+extraction_num = `Characteristic [extract number]`) %>%
+select(any_of(sam_vars))
 View(sam_dat1_tidy)
 sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>%
-rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "\\s", replacement = "_"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>%
-rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter [")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic [")) %>%
-rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_"))
-sam_dat1_tidy <- GC_run1 %>%
-left_join(GC_machine_nums) %>%
-select(extraction_num = `Sample name`, everything())%>%
-mutate(class = as_factor(if_else(str_detect(extraction_num, "run_qc"), "run_qc",
-if_else(str_detect(extraction_num, "blank"), "blank", "sample"))),
-extraction_num = as.numeric(if_else(str_detect(extraction_num, "run_qc"), "0",
-if_else(str_detect(extraction_num, "blank"), "-1",extraction_num))),
-exp = as_factor(1)) %>%
-left_join(sam_dat1) %>%
-left_join(genotypes) %>%
-select(treatment = irrigation, everything()) %>%
-select(all_of(sam_vars)) %>%
-arrange(run_num_GC)
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter [")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic [")) %>%
-rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>%
-rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Factor \\[")) %>%
-rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_"))
-sam_dat1_tidy <- sam_dat1 %>%
-mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
-rename(source_name = `Source Name`) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Factor \\[")) %>%
-rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "\\]")) %>%
-rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_"))
-x <- c(1,2,3,4)
-y <- c("A", "B","C", "D")
-replace(x, c(3,2,1,4), y)
-colnames(sam_dat1_tidy)
-sam_dat1_tidy$`multi-tissue_plant_structure`
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{0,3}\\.*\\d{0,2}"))) %>%
+rename(source_name = `Source Name`,
+plantline = `Characteristic [plantline]`,
+alias = `Characteristic [alias]`,
+LIMS_ID = `Characteristic [LIMS aliquot]`,
+treatment = `Factor [Irrigation factor]`,
+tissue = `Characteristic [multi-tissue plant structure]`,
+genotype = `Characteristic [genotype]`,
+sample_num = `Characteristic [sample_name_non_unique]`,
+extraction_num = `Characteristic [extract number]`,
+sample_name = `Sample Name`) %>%
+select(any_of(sam_vars), sample_name)
diff --git a/workflows/GC_MS_normalization/210927_primary_normalization_with_split.R b/workflows/GC_MS_normalization/210927_primary_normalization_with_split.R
index 4a0e92ba6dac7cefaa98bf14e2b6478c1e3da928..9b1830c521ca077c4493f4d83741048e1af2dc68 100644
--- a/workflows/GC_MS_normalization/210927_primary_normalization_with_split.R
+++ b/workflows/GC_MS_normalization/210927_primary_normalization_with_split.R
@@ -39,6 +39,7 @@ sam_dat1 <- readxl::read_xlsx(here("studies/cmQTL_val1_GH_2020/isa.study.xlsx"))
 
 isa_ext <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 1)
 isa_gc <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 2)
+isa_ms <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 3)
 #genotypes <- readxl::read_xlsx("Genotype_names.xlsx") %>%
 #  mutate(plantline = as_factor(plantline))
 
@@ -59,16 +60,8 @@ area2 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_c
 area3 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_leaves_seq_file_20210914125126_comp_file_area_rt1.bkt.xls"), na = c("", "N/A"))
 
 #Add primary metabolite MAF
-metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Ath_Dark_Light_GC_Xcal/current_source_files/210118_primary_metabolites_classification.xlsx") %>% 
-  select(component = Xcal_name_xreport, Compound_Name = HMDB_clear_name, Compound_Class = ChEBI_Ontology_dense)%>% 
-  mutate(RT_mean = str_extract(component, "\\d+\\.\\d+$")) %>% 
-  filter(!is.na(component)) %>% 
-  group_by(Compound_Name)  %>% 
-  mutate(peak_no = rank(RT_mean),
-         Compound_Name = if_else(duplicated(Compound_Name),
-                                 str_c(Compound_Name, "peak", peak_no, sep = "_"),
-                                 Compound_Name))
-
+metdat_GC_class <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/MAF_GC_MS.xlsx"))
+  
 area <- area1 %>% 
   bind_rows(area2, area3) %>% 
   select(component, area, machine_num_GC = machine_num,rt) %>% 
@@ -79,8 +72,8 @@ rt_mean <- area %>%
   group_by(component) %>% 
   summarise(RT_mean = mean(rt, na.rm = T))
 
-metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Shared_source_files/210118_primary_metabolites_classification.xlsx") %>% 
-  select(component = Xcal_name_xreport, Compound_Name = HMDB_clear_name, Compound_Class = ChEBI_Ontology_dense)%>% 
+metdat_GC_class <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/MAF_GC_MS.xlsx")) %>% 
+  select(component = Xcal_name_xreport, Compound_Name = PubChem_Name_mapped)%>% 
   left_join(rt_mean) %>% 
   filter(!is.na(component)) %>% 
   arrange(Compound_Name, RT_mean) %>% 
@@ -90,25 +83,32 @@ metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Shared_source_files/21
                                  str_c(Compound_Name, "peak", peak_no, sep = "_"),
                                  Compound_Name))
 
-
 # Data combination --------------------------------------------------------
 
-
-
 sam_vars <- c("plantline", "alias", "LIMS_ID",
               "treatment", "tissue", "batch_GC", "run_date_GC",
               "extraction_num", "sample_num", "machine_num_GC",
               "class", "run_num_GC", "sample_weight", "exp", "genotype")
 
 sam_dat1_tidy <- sam_dat1 %>% 
-  mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% 
-  rename(source_name = `Source Name`) %>% 
-  rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>% 
-  rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>% 
-  rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Factor \\[")) %>% 
-  rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "\\]")) %>% 
-  rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_"))
-  
+  mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{0,3}\\.*\\d{0,2}"))) %>% 
+  rename(source_name = `Source Name`,
+         plantline = `Characteristic [plantline]`,
+         alias = `Characteristic [alias]`,
+         LIMS_ID = `Characteristic [LIMS aliquot]`,
+         treatment = `Factor [Irrigation factor]`,
+         tissue = `Characteristic [multi-tissue plant structure]`,
+         genotype = `Characteristic [genotype]`,
+         sample_num = `Characteristic [sample_name_non_unique]`,
+         extraction_num = `Characteristic [extract number]`,
+         sample_name = `Sample Name`) %>% 
+  select(any_of(sam_vars), sample_name)
+
+isa_ext_tidy <- isa_ext %>% 
+  rename()
+
+isa_gc_tidy
+isa_ms_tidy
 
 sam_dat1_tidy <- GC_run1 %>% 
   left_join(GC_machine_nums) %>% 
@@ -158,7 +158,7 @@ area <- area_long %>%
          !component %in% exclude_mets)
 
 met_dat = area %>% 
-  distinct(RT_mean, Compound_Name, Compound_Class, component) %>% 
+  distinct(RT_mean, Compound_Name,  component) %>% 
   mutate(met = str_c("m", row_number(), sep = "_"))
 
 area <- area %>% 
@@ -1084,7 +1084,7 @@ features_out <- features_all %>%
   group_by(met, tissue, exp) %>% 
   mutate(loess_norm_med = loess_norm_fw/median(loess_norm_fw),
          rescaled = loess_norm_med*rescale) %>% 
-  select(all_of(sam_vars), met, Compound_Name, Compound_Class, loess_norm_fw, loess_norm_med, area, rescaled)
+  select(all_of(sam_vars), met, Compound_Name,  loess_norm_fw, loess_norm_med, area, rescaled)
 
 #features_out %>% 
 #  filter(met == "m_02177", tissue == "fruits", exp == 1) %>%