diff --git a/.Rhistory b/.Rhistory index 0a2eccedf5c0dbebc967f6220e55b81299e491b5..6015b4e3682cd11ee93cefac51ebe0f6a9b4f6bf 100644 --- a/.Rhistory +++ b/.Rhistory @@ -21,128 +21,108 @@ dir.create(out) setwd(here())# Not recommended but convenient in Rstudio to start from root sam_dat1 <- readxl::read_xlsx(here("studies/cmQTL_val1_GH_2020/isa.study.xlsx")) View(sam_dat1) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = str_extract(`Factor [sample fresh weight]`, "^\\d{2},\\d{2}")) +isa_ext <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 1) +isa_gc <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 2) +isa_ms <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 3) +take_split <- c("fructose_307_217_rt9.48", "glucose_160_319_rt9.68","glucose_160_rt9.81", "glutamic_acid_246_363_rt8.31", +"glutamine_156_245_rt9.80", "malic_acid_233_245_rt7.22", "shikimic_acid_204_462_rt9.57", "shikimic_acid_204_462_rt9.57", +"pyroglutamic_acid_156_258_rt8.30", "sucrose_437_361_rt13.77", "sucrose2_204_361_rt13.79", "citric_acid_273_375_rt9.72", +"arginine_157_256_rt9.92") +exclude_samples <- c("21106rA_31", "21107rA_54", "21109rA_59", "21109rA_86", "21109rA_78") +exclude_mets <- c("psicose_103_217_rt9.38", "glutamic_acid_246_363_rt8.31", "lactic_acid_117_219_rt3.07")#glu wrong peak +area1 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_fruits_seq_file_20210914143103_comp_file_area_rt1.bkt.xls"), na = c("", "N/A")) +area2 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_fruits_split_seq_file_20210914164507_comp_file_area_rt1.bkt.xls"), na = c("", "N/A")) +area3 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_leaves_seq_file_20210914125126_comp_file_area_rt1.bkt.xls"), na = c("", "N/A")) +#Add primary metabolite MAF +metdat_GC_class <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/MAF_GC_MS.xlsx")) +area <- area1 %>% +bind_rows(area2, area3) %>% +select(component, area, machine_num_GC = machine_num,rt) %>% +mutate(area = as.numeric(area), +rt = as.numeric(rt)) +rt_mean <- area %>% +group_by(component) %>% +summarise(RT_mean = mean(rt, na.rm = T)) +View(metdat_GC_class) +metdat_GC_class <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/MAF_GC_MS.xlsx")) %>% +select(component = Xcal_name_xreport, Compound_Name = PubChem_Name_mapped)%>% +left_join(rt_mean) %>% +filter(!is.na(component)) %>% +arrange(Compound_Name, RT_mean) %>% +group_by(Compound_Name) %>% +mutate(peak_no = rank(RT_mean), +Compound_Name = if_else(duplicated(Compound_Name), +str_c(Compound_Name, "peak", peak_no, sep = "_"), +Compound_Name)) +View(metdat_GC_class) +sam_vars <- c("plantline", "alias", "LIMS_ID", +"treatment", "tissue", "batch_GC", "run_date_GC", +"extraction_num", "sample_num", "machine_num_GC", +"class", "run_num_GC", "sample_weight", "exp", "genotype") +sam_dat1_tidy <- sam_dat1 %>% +left_join(isa_ext)# +colnames(sam_dat1) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`, +plantline = `Characteristic [plantline]`, +alias = `Characteristic [alias]`, +LIMS_ID = `Characteristic [LIMS aliquot]`, +treatment = `Factor [Irrigation factor]`, +tissue = `Characteristic [multi-tissue plant structure]`, +genotype = `Characteristic [genotype]`, +sample_num = `Characteristic [sample_name_non_unique]`, +extraction_num = `Characteristic [extract number]`) %>% +select(%in% sam_vars) +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% +rename(source_name = `Source Name`, +plantline = `Characteristic [plantline]`, +alias = `Characteristic [alias]`, +LIMS_ID = `Characteristic [LIMS aliquot]`, +treatment = `Factor [Irrigation factor]`, +tissue = `Characteristic [multi-tissue plant structure]`, +genotype = `Characteristic [genotype]`, +sample_num = `Characteristic [sample_name_non_unique]`, +extraction_num = `Characteristic [extract number]`) %>% +select(any_of(sam_vars)) View(sam_dat1_tidy) sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = str_extract(`Factor [sample fresh weight]`, "\\d{2},\\d{2}")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(str_remove(Parameter)) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(str_remove("Parameter")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(str_remove(pattern = "Parameter")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(str_remove(string = .x, pattern = "Parameter")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), str_remove(string = .x, pattern = "Parameter")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), .fn = str_remove(string = .x, pattern = "Parameter")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) #%>% View(sam_dat1_tidy) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>% -rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "\\w"), replacement = "_") -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>% -rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "\\w", replacement = "_")) -?stringr +sam_dat1_tidy$sample_weight +str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}") +str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}") +str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}\\.") +str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}\\.*") +str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}\\.") +str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}\\.*") +str_extract(sam_dat1$`Factor [sample fresh weight]`, "\\d{2}\\.*\\d{0,2}") +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{0,3}\\.*\\d{0,2}"))) #%>% +sam_dat1_tidy <- sam_dat1 %>% +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{0,3}\\.*\\d{0,2}"))) %>% +rename(source_name = `Source Name`, +plantline = `Characteristic [plantline]`, +alias = `Characteristic [alias]`, +LIMS_ID = `Characteristic [LIMS aliquot]`, +treatment = `Factor [Irrigation factor]`, +tissue = `Characteristic [multi-tissue plant structure]`, +genotype = `Characteristic [genotype]`, +sample_num = `Characteristic [sample_name_non_unique]`, +extraction_num = `Characteristic [extract number]`) %>% +select(any_of(sam_vars)) View(sam_dat1_tidy) sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>% -rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "\\s", replacement = "_")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic")) %>% -rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter [")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic [")) %>% -rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_")) -sam_dat1_tidy <- GC_run1 %>% -left_join(GC_machine_nums) %>% -select(extraction_num = `Sample name`, everything())%>% -mutate(class = as_factor(if_else(str_detect(extraction_num, "run_qc"), "run_qc", -if_else(str_detect(extraction_num, "blank"), "blank", "sample"))), -extraction_num = as.numeric(if_else(str_detect(extraction_num, "run_qc"), "0", -if_else(str_detect(extraction_num, "blank"), "-1",extraction_num))), -exp = as_factor(1)) %>% -left_join(sam_dat1) %>% -left_join(genotypes) %>% -select(treatment = irrigation, everything()) %>% -select(all_of(sam_vars)) %>% -arrange(run_num_GC) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter [")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic [")) %>% -rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>% -rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Factor \\[")) %>% -rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_")) -sam_dat1_tidy <- sam_dat1 %>% -mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% -rename(source_name = `Source Name`) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Factor \\[")) %>% -rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "\\]")) %>% -rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_")) -x <- c(1,2,3,4) -y <- c("A", "B","C", "D") -replace(x, c(3,2,1,4), y) -colnames(sam_dat1_tidy) -sam_dat1_tidy$`multi-tissue_plant_structure` +mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{0,3}\\.*\\d{0,2}"))) %>% +rename(source_name = `Source Name`, +plantline = `Characteristic [plantline]`, +alias = `Characteristic [alias]`, +LIMS_ID = `Characteristic [LIMS aliquot]`, +treatment = `Factor [Irrigation factor]`, +tissue = `Characteristic [multi-tissue plant structure]`, +genotype = `Characteristic [genotype]`, +sample_num = `Characteristic [sample_name_non_unique]`, +extraction_num = `Characteristic [extract number]`, +sample_name = `Sample Name`) %>% +select(any_of(sam_vars), sample_name) diff --git a/workflows/GC_MS_normalization/210927_primary_normalization_with_split.R b/workflows/GC_MS_normalization/210927_primary_normalization_with_split.R index 4a0e92ba6dac7cefaa98bf14e2b6478c1e3da928..9b1830c521ca077c4493f4d83741048e1af2dc68 100644 --- a/workflows/GC_MS_normalization/210927_primary_normalization_with_split.R +++ b/workflows/GC_MS_normalization/210927_primary_normalization_with_split.R @@ -39,6 +39,7 @@ sam_dat1 <- readxl::read_xlsx(here("studies/cmQTL_val1_GH_2020/isa.study.xlsx")) isa_ext <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 1) isa_gc <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 2) +isa_ms <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/isa.assay.xlsx"), sheet = 3) #genotypes <- readxl::read_xlsx("Genotype_names.xlsx") %>% # mutate(plantline = as_factor(plantline)) @@ -59,16 +60,8 @@ area2 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_c area3 <- readxl::read_xls(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/210914_cmQTL_val_1_2_leaves_seq_file_20210914125126_comp_file_area_rt1.bkt.xls"), na = c("", "N/A")) #Add primary metabolite MAF -metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Ath_Dark_Light_GC_Xcal/current_source_files/210118_primary_metabolites_classification.xlsx") %>% - select(component = Xcal_name_xreport, Compound_Name = HMDB_clear_name, Compound_Class = ChEBI_Ontology_dense)%>% - mutate(RT_mean = str_extract(component, "\\d+\\.\\d+$")) %>% - filter(!is.na(component)) %>% - group_by(Compound_Name) %>% - mutate(peak_no = rank(RT_mean), - Compound_Name = if_else(duplicated(Compound_Name), - str_c(Compound_Name, "peak", peak_no, sep = "_"), - Compound_Name)) - +metdat_GC_class <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/MAF_GC_MS.xlsx")) + area <- area1 %>% bind_rows(area2, area3) %>% select(component, area, machine_num_GC = machine_num,rt) %>% @@ -79,8 +72,8 @@ rt_mean <- area %>% group_by(component) %>% summarise(RT_mean = mean(rt, na.rm = T)) -metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Shared_source_files/210118_primary_metabolites_classification.xlsx") %>% - select(component = Xcal_name_xreport, Compound_Name = HMDB_clear_name, Compound_Class = ChEBI_Ontology_dense)%>% +metdat_GC_class <- readxl::read_xlsx(here("assays/cmQTL_val1_GH_2020_GC_MS/dataset/MAF_GC_MS.xlsx")) %>% + select(component = Xcal_name_xreport, Compound_Name = PubChem_Name_mapped)%>% left_join(rt_mean) %>% filter(!is.na(component)) %>% arrange(Compound_Name, RT_mean) %>% @@ -90,25 +83,32 @@ metdat_GC_class <- readxl::read_xlsx("H:/3. cmQTL mapping/Shared_source_files/21 str_c(Compound_Name, "peak", peak_no, sep = "_"), Compound_Name)) - # Data combination -------------------------------------------------------- - - sam_vars <- c("plantline", "alias", "LIMS_ID", "treatment", "tissue", "batch_GC", "run_date_GC", "extraction_num", "sample_num", "machine_num_GC", "class", "run_num_GC", "sample_weight", "exp", "genotype") sam_dat1_tidy <- sam_dat1 %>% - mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{2}\\.\\d{2}"))) %>% - rename(source_name = `Source Name`) %>% - rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Parameter \\[")) %>% - rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Characteristic \\[")) %>% - rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "Factor \\[")) %>% - rename_with(.cols = everything(), .fn = ~str_remove(string = .x, pattern = "\\]")) %>% - rename_with(.cols = everything(), .fn = ~str_replace_all(string = .x, pattern = "[:blank:]", replacement = "_")) - + mutate(sample_weight = as.double(str_extract(`Factor [sample fresh weight]`, "\\d{0,3}\\.*\\d{0,2}"))) %>% + rename(source_name = `Source Name`, + plantline = `Characteristic [plantline]`, + alias = `Characteristic [alias]`, + LIMS_ID = `Characteristic [LIMS aliquot]`, + treatment = `Factor [Irrigation factor]`, + tissue = `Characteristic [multi-tissue plant structure]`, + genotype = `Characteristic [genotype]`, + sample_num = `Characteristic [sample_name_non_unique]`, + extraction_num = `Characteristic [extract number]`, + sample_name = `Sample Name`) %>% + select(any_of(sam_vars), sample_name) + +isa_ext_tidy <- isa_ext %>% + rename() + +isa_gc_tidy +isa_ms_tidy sam_dat1_tidy <- GC_run1 %>% left_join(GC_machine_nums) %>% @@ -158,7 +158,7 @@ area <- area_long %>% !component %in% exclude_mets) met_dat = area %>% - distinct(RT_mean, Compound_Name, Compound_Class, component) %>% + distinct(RT_mean, Compound_Name, component) %>% mutate(met = str_c("m", row_number(), sep = "_")) area <- area %>% @@ -1084,7 +1084,7 @@ features_out <- features_all %>% group_by(met, tissue, exp) %>% mutate(loess_norm_med = loess_norm_fw/median(loess_norm_fw), rescaled = loess_norm_med*rescale) %>% - select(all_of(sam_vars), met, Compound_Name, Compound_Class, loess_norm_fw, loess_norm_med, area, rescaled) + select(all_of(sam_vars), met, Compound_Name, loess_norm_fw, loess_norm_med, area, rescaled) #features_out %>% # filter(met == "m_02177", tissue == "fruits", exp == 1) %>%