diff --git a/.Rhistory b/.Rhistory
index ca96489870cdb82792ee9fd2a44711ee29488fae..0a2eccedf5c0dbebc967f6220e55b81299e491b5 100644
--- a/.Rhistory
+++ b/.Rhistory
@@ -20,50 +20,87 @@ dir.create(out)
 }
 setwd(here())# Not recommended but convenient in Rstudio to start from root
 sam_dat1 <- readxl::read_xlsx(here("studies/cmQTL_val1_GH_2020/isa.study.xlsx"))
-isa_ext <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 1)
-isa_gc <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 2)
-take_split <- c("fructose_307_217_rt9.48", "glucose_160_319_rt9.68","glucose_160_rt9.81", "glutamic_acid_246_363_rt8.31",
-"glutamine_156_245_rt9.80", "malic_acid_233_245_rt7.22", "shikimic_acid_204_462_rt9.57", "shikimic_acid_204_462_rt9.57",
-"pyroglutamic_acid_156_258_rt8.30", "sucrose_437_361_rt13.77", "sucrose2_204_361_rt13.79", "citric_acid_273_375_rt9.72",
-"arginine_157_256_rt9.92")
-exclude_samples <- c("21106rA_31", "21107rA_54", "21109rA_59", "21109rA_86", "21109rA_78")
-exclude_mets <- c("psicose_103_217_rt9.38", "glutamic_acid_246_363_rt8.31", "lactic_acid_117_219_rt3.07")#glu wrong peak
-area1 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_fruits_seq_file_20210914143103_comp_file_area_rt1.bkt.xls"), na = c("", "N/A"))
-area2 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_fruits_split_seq_file_20210914164507_comp_file_area_rt1.bkt.xls"), na = c("", "N/A"))
-area3 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_leaves_seq_file_20210914125126_comp_file_area_rt1.bkt.xls"), na = c("", "N/A"))
-#Add primary metabolite MAF
-metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Ath_Dark_Light_GC_Xcal/current_source_files/210118_primary_metabolites_classification.xlsx") %>%
-select(component = Xcal_name_xreport, Compound_Name = HMDB_clear_name, Compound_Class = ChEBI_Ontology_dense)%>%
-mutate(RT_mean = str_extract(component, "\\d+\\.\\d+$")) %>%
-filter(!is.na(component)) %>%
-group_by(Compound_Name)  %>%
-mutate(peak_no = rank(RT_mean),
-Compound_Name = if_else(duplicated(Compound_Name),
-str_c(Compound_Name, "peak", peak_no, sep = "_"),
-Compound_Name))
-area <- area1 %>%
-bind_rows(area2, area3) %>%
-select(component, area, machine_num_GC = machine_num,rt) %>%
-mutate(area = as.numeric(area),
-rt = as.numeric(rt))
-rt_mean <- area %>%
-group_by(component) %>%
-summarise(RT_mean = mean(rt, na.rm = T))
-metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Shared_source_files/210118_primary_metabolites_classification.xlsx") %>%
-select(component = Xcal_name_xreport, Compound_Name = HMDB_clear_name, Compound_Class = ChEBI_Ontology_dense)%>%
-left_join(rt_mean) %>%
-filter(!is.na(component)) %>%
-arrange(Compound_Name, RT_mean) %>%
-group_by(Compound_Name)  %>%
-mutate(peak_no = rank(RT_mean),
-Compound_Name = if_else(duplicated(Compound_Name),
-str_c(Compound_Name, "peak", peak_no, sep = "_"),
-Compound_Name))
-sam_vars <- c("plantline", "alias", "LIMS_ID",
-"treatment", "tissue", "batch_GC", "run_date_GC",
-"extraction_num", "sample_num", "machine_num_GC",
-"class", "run_num_GC", "sample_weight", "exp", "genotype")
+View(sam_dat1)
 sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = str_extract(`Factor [sample fresh weight]`, "^\\d{2},\\d{2}"))
+View(sam_dat1_tidy)
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = str_extract(`Factor [sample fresh weight]`, "\\d{2},\\d{2}"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}")))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(str_remove(Parameter))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(str_remove("Parameter"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(str_remove(pattern = "Parameter"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(str_remove(string = .x, pattern = "Parameter"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), str_remove(string = .x, pattern = "Parameter"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), .fn = str_remove(string = .x, pattern = "Parameter"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter"))
+View(sam_dat1_tidy)
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>%
+rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "\\w"), replacement = "_")
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>%
+rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "\\w", replacement = "_"))
+?stringr
+View(sam_dat1_tidy)
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>%
+rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "\\s", replacement = "_"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>%
+rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter [")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic [")) %>%
+rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_"))
 sam_dat1_tidy <- GC_run1 %>%
 left_join(GC_machine_nums) %>%
 select(extraction_num = `Sample name`, everything())%>%
@@ -77,53 +114,35 @@ left_join(genotypes) %>%
 select(treatment = irrigation, everything()) %>%
 select(all_of(sam_vars)) %>%
 arrange(run_num_GC)
-sam_dat2_tidy <- GC_run2 %>%
-left_join(GC_machine_nums) %>%
-select(extraction_num = `Sample name`, everything()) %>%
-mutate(class = as_factor(if_else(str_detect(extraction_num, "run_qc"), "run_qc",
-if_else(str_detect(extraction_num, "blank"), "blank", "sample"))),
-extraction_num = as.numeric(if_else(str_detect(extraction_num, "run_qc"), "0",
-if_else(str_detect(extraction_num, "blank"), "-1",extraction_num))),
-exp = as_factor(2)) %>%
-left_join(sam_dat2) %>%
-left_join(genotypes) %>%
-select(treatment = irrigation, everything()) %>%
-select(all_of(sam_vars)) %>%
-arrange(run_num_GC)
-sam_dat <- sam_dat1_tidy %>%
-bind_rows(sam_dat2_tidy) %>%
-arrange(run_num_GC) %>%
-group_by(batch_GC) %>%
-mutate(daily_num = row_number()) %>%
-fill(tissue, .direction = "updown") %>%
-ungroup() %>%
-left_join(genotypes) %>%
-mutate(treatment = as_factor(treatment))
-area_long <- area %>%
-left_join(sam_dat) %>%
-filter(!is.na(exp))
-area <- area_long %>%
-left_join(metdat_GC_class) %>%
-filter(!str_detect(component, "^\\!|FAME|component|empty"), !is.na(component),
-!component %in% exclude_mets)
-library(tidyverse)
-library(igraph)
-library(ggnetwork)
-#install.packages("igraph")
-#install.packages("ggnetwork")
-diamonds <- diamonds
-a <- ggplot(economics, aes(date, unemploy))
-a + geom_path()
-economics %>%
-arrange(unemploy) %>%
-ggplot(aes(date, unemploy)) +
-geom_path()
-met_path <- readxl::read_xlsx("KEGG_sly_pathways.xlsx")
-node_cols <- met_path %>%
-pivot_longer(cols = c(educt, product),
-names_to = "node",
-values_to = "met") %>%
-distinct(met) %>%
-select(met, everything()) %>%
-mutate(Compound_Name = met)
-met_path <- readxl::read_xlsx("KEGG_sly_pathways.xlsx")
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter [")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic [")) %>%
+rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>%
+rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Factor \\[")) %>%
+rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_"))
+sam_dat1_tidy <- sam_dat1 %>%
+mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>%
+rename(source_name = `Source Name`) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Factor \\[")) %>%
+rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "\\]")) %>%
+rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_"))
+x <- c(1,2,3,4)
+y <- c("A", "B","C", "D")
+replace(x, c(3,2,1,4), y)
+colnames(sam_dat1_tidy)
+sam_dat1_tidy$`multi-tissue_plant_structure`