diff --git a/.Rhistory b/.Rhistory index ca96489870cdb82792ee9fd2a44711ee29488fae..0a2eccedf5c0dbebc967f6220e55b81299e491b5 100644 --- a/.Rhistory +++ b/.Rhistory @@ -20,50 +20,87 @@ dir.create(out) } setwd(here())# Not recommended but convenient in Rstudio to start from root sam_dat1 <- readxl::read_xlsx(here("studies/cmQTL_val1_GH_2020/isa.study.xlsx")) -isa_ext <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 1) -isa_gc <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 2) -take_split <- c("fructose_307_217_rt9.48", "glucose_160_319_rt9.68","glucose_160_rt9.81", "glutamic_acid_246_363_rt8.31", -"glutamine_156_245_rt9.80", "malic_acid_233_245_rt7.22", "shikimic_acid_204_462_rt9.57", "shikimic_acid_204_462_rt9.57", -"pyroglutamic_acid_156_258_rt8.30", "sucrose_437_361_rt13.77", "sucrose2_204_361_rt13.79", "citric_acid_273_375_rt9.72", -"arginine_157_256_rt9.92") -exclude_samples <- c("21106rA_31", "21107rA_54", "21109rA_59", "21109rA_86", "21109rA_78") -exclude_mets <- c("psicose_103_217_rt9.38", "glutamic_acid_246_363_rt8.31", "lactic_acid_117_219_rt3.07")#glu wrong peak -area1 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_fruits_seq_file_20210914143103_comp_file_area_rt1.bkt.xls"), na = c("", "N/A")) -area2 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_fruits_split_seq_file_20210914164507_comp_file_area_rt1.bkt.xls"), na = c("", "N/A")) -area3 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_leaves_seq_file_20210914125126_comp_file_area_rt1.bkt.xls"), na = c("", "N/A")) -#Add primary metabolite MAF -metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Ath_Dark_Light_GC_Xcal/current_source_files/210118_primary_metabolites_classification.xlsx") %>% -select(component = Xcal_name_xreport, Compound_Name = HMDB_clear_name, Compound_Class = ChEBI_Ontology_dense)%>% -mutate(RT_mean = str_extract(component, "\\d+\\.\\d+$")) %>% -filter(!is.na(component)) %>% -group_by(Compound_Name) %>% -mutate(peak_no = rank(RT_mean), -Compound_Name = if_else(duplicated(Compound_Name), -str_c(Compound_Name, "peak", peak_no, sep = "_"), -Compound_Name)) -area <- area1 %>% -bind_rows(area2, area3) %>% -select(component, area, machine_num_GC = machine_num,rt) %>% -mutate(area = as.numeric(area), -rt = as.numeric(rt)) -rt_mean <- area %>% -group_by(component) %>% -summarise(RT_mean = mean(rt, na.rm = T)) -metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Shared_source_files/210118_primary_metabolites_classification.xlsx") %>% -select(component = Xcal_name_xreport, Compound_Name = HMDB_clear_name, Compound_Class = ChEBI_Ontology_dense)%>% -left_join(rt_mean) %>% -filter(!is.na(component)) %>% -arrange(Compound_Name, RT_mean) %>% -group_by(Compound_Name) %>% -mutate(peak_no = rank(RT_mean), -Compound_Name = if_else(duplicated(Compound_Name), -str_c(Compound_Name, "peak", peak_no, sep = "_"), -Compound_Name)) -sam_vars <- c("plantline", "alias", "LIMS_ID", -"treatment", "tissue", "batch_GC", "run_date_GC", -"extraction_num", "sample_num", "machine_num_GC", -"class", "run_num_GC", "sample_weight", "exp", "genotype") +View(sam_dat1) sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = str_extract(`Factor [sample fresh weight]`, "^\\d{2},\\d{2}")) +View(sam_dat1_tidy) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = str_extract(`Factor [sample fresh weight]`, "\\d{2},\\d{2}")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(str_remove(Parameter)) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(str_remove("Parameter")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(str_remove(pattern = "Parameter")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(str_remove(string = .x, pattern = "Parameter")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), str_remove(string = .x, pattern = "Parameter")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), .fn = str_remove(string = .x, pattern = "Parameter")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) +View(sam_dat1_tidy) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>% +rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "\\w"), replacement = "_") +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>% +rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "\\w", replacement = "_")) +?stringr +View(sam_dat1_tidy) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>% +rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "\\s", replacement = "_")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>% +rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter [")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic [")) %>% +rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_")) sam_dat1_tidy <- GC_run1 %>% left_join(GC_machine_nums) %>% select(extraction_num = `Sample name`, everything())%>% @@ -77,53 +114,35 @@ left_join(genotypes) %>% select(treatment = irrigation, everything()) %>% select(all_of(sam_vars)) %>% arrange(run_num_GC) -sam_dat2_tidy <- GC_run2 %>% -left_join(GC_machine_nums) %>% -select(extraction_num = `Sample name`, everything()) %>% -mutate(class = as_factor(if_else(str_detect(extraction_num, "run_qc"), "run_qc", -if_else(str_detect(extraction_num, "blank"), "blank", "sample"))), -extraction_num = as.numeric(if_else(str_detect(extraction_num, "run_qc"), "0", -if_else(str_detect(extraction_num, "blank"), "-1",extraction_num))), -exp = as_factor(2)) %>% -left_join(sam_dat2) %>% -left_join(genotypes) %>% -select(treatment = irrigation, everything()) %>% -select(all_of(sam_vars)) %>% -arrange(run_num_GC) -sam_dat <- sam_dat1_tidy %>% -bind_rows(sam_dat2_tidy) %>% -arrange(run_num_GC) %>% -group_by(batch_GC) %>% -mutate(daily_num = row_number()) %>% -fill(tissue, .direction = "updown") %>% -ungroup() %>% -left_join(genotypes) %>% -mutate(treatment = as_factor(treatment)) -area_long <- area %>% -left_join(sam_dat) %>% -filter(!is.na(exp)) -area <- area_long %>% -left_join(metdat_GC_class) %>% -filter(!str_detect(component, "^\\!|FAME|component|empty"), !is.na(component), -!component %in% exclude_mets) -library(tidyverse) -library(igraph) -library(ggnetwork) -#install.packages("igraph") -#install.packages("ggnetwork") -diamonds <- diamonds -a <- ggplot(economics, aes(date, unemploy)) -a + geom_path() -economics %>% -arrange(unemploy) %>% -ggplot(aes(date, unemploy)) + -geom_path() -met_path <- readxl::read_xlsx("KEGG_sly_pathways.xlsx") -node_cols <- met_path %>% -pivot_longer(cols = c(educt, product), -names_to = "node", -values_to = "met") %>% -distinct(met) %>% -select(met, everything()) %>% -mutate(Compound_Name = met) -met_path <- readxl::read_xlsx("KEGG_sly_pathways.xlsx") +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter [")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic [")) %>% +rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>% +rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Factor \\[")) %>% +rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_")) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Factor \\[")) %>% +rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "\\]")) %>% +rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_")) +x <- c(1,2,3,4) +y <- c("A", "B","C", "D") +replace(x, c(3,2,1,4), y) +colnames(sam_dat1_tidy) +sam_dat1_tidy$`multi-tissue_plant_structure`