Skip to content
Snippets Groups Projects
Commit 37f9befd authored by MWA_AG_Fernie_MPIMP's avatar MWA_AG_Fernie_MPIMP
Browse files

uploaded scripts and datasets

parent 6b92825f
No related branches found
No related tags found
No related merge requests found
Showing
with 9333 additions and 0 deletions
**/dataset/** filter=lfs diff=lfs merge=lfs -text
/assays/cmQTL_val1_GH_2020_polar_LC_MS/dataset/all_clusters_summed_intensity.gda filter=lfs diff=lfs merge=lfs -text
File deleted
source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
File added
source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
File added
No preview for this file type
This diff is collapsed.
rm(list = ls())
library(tidyverse)
library(ggpubr)
library(glue)
library(ggtext)
library(cowplot)
library(ggbeeswarm)
library(extrafont)
library(ggprism)
library(viridis)
# Directory setting -------------------------------------------------------
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
getwd()
current <- getwd()
source <- str_c(current,"/..")
cur_date <- str_c(str_replace_all(Sys.Date(),"^.{2}|-",""))
out <- str_c(cur_date, "Metabolitedata", sep = "_")
if (file.exists(out)) {
cat("The folder already exists")
} else {
dir.create(out)
}
out_dir <- str_c(current, out, sep = "/")
# Primary loading ---------------------------------------------------------
setwd(source)
prim_source <- str_c(getwd(), "/1. Primary")
setwd(prim_source)
latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"),
pattern = "^\\d{6}_analysis"),
decreasing = T)[[1]]
latest_analysis <- str_c(prim_source, "/", latest)
setwd(latest_analysis)
latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}")
fc_1_prim <- read_csv("mean_values_se_n.csv") %>%
mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")),
platform = "polar GC-MS")
genotypes <- fc_1_prim %>%
distinct(alias, genotype) %>%
mutate(genotype_label = str_remove_all(genotype, "\\*"))
fc_1_ind_prim <- read_csv("individual_values.csv") %>%
select(-genotype) %>%
left_join(genotypes)
sig_prim <- read_csv("p_values.csv")
fc_1_cv_prim <- read_csv("mean_values_se_n_cv.csv") %>%
mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))
fc_1_ind_cv_prim <- read_csv("individual_values_cv.csv") %>%
select(-genotype) %>%
left_join(genotypes)
sig_prim_cv <- read_csv("p_values_cv.csv") %>%
mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))),
group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")),
group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))),
group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))
fc_1_lt_prim <- read_csv("mean_values_se_n_levene.csv") %>%
mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>%
mutate(platform = "polar GC-MS")
fc_1_ind_lt_prim <- read_csv("individual_values_levene.csv") %>%
select(-genotype) %>%
left_join(genotypes)
sig_prim_lt <- read_csv("p_values_levene.csv")
met_prim <- fc_1_prim %>%
distinct(met, Compound_Name, Compound_Class)
cv_prim_out <- fc_1_cv_prim %>%
left_join(met_prim) %>%
left_join(sig_prim_cv, by = c("tissue", "met", "alias" = "alias2")) %>%
select(tissue, genotype, Compound_Name, mean_cv, sd_cv, se_cv, n, p.value) %>%
mutate(platform = "polar GC-MS")
setwd(prim_source)
prim_met_lib <- readxl::read_xlsx("H:/3. cmQTL mapping/Shared_source_files/210118_primary_metabolites_classification.xlsx")
prim_met_lib_tidy <- prim_met_lib %>%
select(component = Xcal_name_xreport, mz_mean = mz)
latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_normalization$"),
pattern = "^\\d{6}_normalization"),
decreasing = T)[[1]]
latest_normalization <- str_c(prim_source, "/", latest)
latest_normalization_date <- str_extract(latest, pattern = "^\\d{6}")
setwd(latest_normalization)
prim_met_dat <- read_csv(str_c(latest_normalization_date, "_cmQTL_val_1_2_met_dat_GC.csv")) %>%
select(Compound_Name, met, RT_mean, component)
prim_met_total <- met_prim %>%
left_join(prim_met_dat) %>%
left_join(prim_met_lib_tidy) %>%
mutate(platform = "polar GC-MS") %>%
mutate(Compound_Class = if_else(str_detect(Compound_Class,"carbohydrate"),"carbohydrate or derivative",
if_else(str_detect(Compound_Class,"amino_acid"), "amino acid or derivative",
if_else(str_detect(Compound_Class,"carboxylic_acid"), "carboxylic acid",
"other"))))
# Secondary loading ---------------------------------------------------------
setwd(source)
sec_source <- str_c(getwd(), "/2. Secondary")
setwd(sec_source)
latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"),
pattern = "^\\d{6}_analysis"),
decreasing = T)[[1]]
latest_analysis <- str_c(sec_source, "/", latest)
setwd(latest_analysis)
latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}")
fc_1_sec <- read_csv("mean_values_se_n.csv") %>%
mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>%
mutate(platform = "polar LC-MS") %>%
rename(n = n.x)
genotypes <- fc_1_sec %>%
distinct(alias, genotype) %>%
mutate(genotype_label = str_remove_all(genotype, "\\*"))
fc_1_ind_sec <- read_csv("individual_values.csv") %>%
select(-genotype) %>%
left_join(genotypes)
sig_sec <- read_csv("p_values.csv")
fc_1_cv_sec <- read_csv("mean_values_se_n_cv.csv") %>%
mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))
fc_1_ind_cv_sec <- read_csv("individual_values_cv.csv") %>%
select(-genotype) %>%
left_join(genotypes)
sig_sec_cv <- read_csv("p_values_cv.csv") %>%
mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))),
group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")),
group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))),
group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))
fc_1_lt_sec <- read_csv("mean_values_se_n_levene.csv") %>%
mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>%
mutate(platform = "polar LC-MS") %>%
rename(n = n.x)
fc_1_ind_lt_sec <- read_csv("individual_values_levene.csv") %>%
select(-genotype) %>%
left_join(genotypes)
sig_sec_lt <- read_csv("p_values_levene.csv")
met_sec <- fc_1_sec %>%
distinct(met, Compound_Name, Compound_Class)
cv_sec_out <- fc_1_cv_sec %>%
left_join(met_sec) %>%
left_join(sig_sec_cv, by = c("tissue", "met", "alias" = "alias2")) %>%
select(tissue, genotype, Compound_Name, mean_cv, sd_cv, se_cv, n, p.value) %>%
mutate(platform = "polar LC-MS")
setwd(sec_source)
sec_met_lib <- read_delim("cmQTL_val1_selected_secondary.txt", delim = "\t")
sec_met_lib_tidy <- sec_met_lib %>%
select(Name, mz_mean = mz_mean_new, RT_mean = RT_mean_new, mode, adduct_pos, adduct_neg,
Mol.formula, mz_lit_pos, mz_lit_neg,
RT_lit, adduct_lit, Species.detected.before, lit) %>%
distinct(mz_mean, RT_mean, .keep_all = T)
latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_normalization$"),
pattern = "^\\d{6}_normalization"),
decreasing = T)[[1]]
latest_normalization <- str_c(sec_source, "/", latest)
latest_normalization_date <- str_extract(latest, pattern = "^\\d{6}")
setwd(latest_normalization)
sec_met_dat <- read_csv(str_c(latest_normalization_date, "_cmQTL_val_1_2_met_dat_LC.csv"))
sec_met_total <- met_sec %>%
left_join(sec_met_dat) %>%
left_join(sec_met_lib_tidy, by = c("mz_mean", "RT_mean")) %>%
mutate(platform = "polar LC-MS")
# Lipids loading ---------------------------------------------------------
setwd(source)
lip_source <- str_c(getwd(), "/3. Lipids")
setwd(lip_source)
latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_analysis$"),
pattern = "^\\d{6}_analysis"),
decreasing = T)[[1]]
latest_analysis <- str_c(lip_source, "/", latest)
setwd(latest_analysis)
latest_analysis_date <- str_extract(latest, pattern = "^\\d{6}")
fc_1_lip <- read_csv("mean_values_se_n.csv") %>%
mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))%>%
mutate(platform = "apolar LC-MS") %>%
rename(n = n.x)
genotypes <- fc_1_lip %>%
distinct(alias, genotype) %>%
mutate(genotype_label = str_remove_all(genotype, "\\*"))
fc_1_ind_lip <- read_csv("individual_values.csv") %>%
select(-genotype) %>%
left_join(genotypes)
sig_lip <- read_csv("p_values.csv")
fc_1_cv_lip <- read_csv("mean_values_se_n_cv.csv") %>%
mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))
fc_1_ind_cv_lip <- read_csv("individual_values_cv.csv") %>%
select(-genotype) %>%
left_join(genotypes)
sig_lip_cv <- read_csv("p_values_cv.csv") %>%
mutate(group1 = as_factor(if_else(group1 == "MoneyMaker", glue("{group1}"),glue("*{group1}*"))),
group1 = fct_relevel(group1, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")),
group2 = as_factor(if_else(group2 == "MoneyMaker", glue("{group2}"),glue("*{group2}*"))),
group2 = fct_relevel(group2, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*")))
fc_1_lt_lip <- read_csv("mean_values_se_n_levene.csv") %>%
mutate(genotype = fct_relevel(genotype, c("MoneyMaker", "*panK4-1*", "*log2-1*", "*transp1-1*"))) %>%
mutate(platform = "apolar LC-MS") %>%
rename(n = n.x)
fc_1_ind_lt_lip <- read_csv("individual_values_levene.csv") %>%
select(-genotype) %>%
left_join(genotypes)
sig_lip_lt <- read_csv("p_values_levene.csv")
met_lip <- fc_1_lip %>%
distinct(met, Compound_Name, Compound_Class)
cv_lip_out <- fc_1_cv_lip %>%
left_join(met_lip) %>%
left_join(sig_lip_cv, by = c("tissue", "met", "alias" = "alias2")) %>%
select(tissue, genotype, Compound_Name, mean_cv, sd_cv, se_cv, n, p.value) %>%
mutate(platform = "apolar LC-MS")
setwd(lip_source)
lip_met_lib <- read_delim("cmQTL_val1_selected_lipids.txt", delim = "\t")
lip_met_lib_tidy <- lip_met_lib %>%
select(Name, mz_mean = mz_mean_new, RT_mean = RT_mean_new, adduct_pos = Adduct,
Mol.formula = Chemical_Formula) %>%
mutate(mode = "pos",
mz_mean = round(mz_mean, 4),
RT_mean = round(RT_mean, 6)) %>%
distinct(mz_mean, RT_mean, .keep_all = T)
latest <- str_sort(str_extract(list.files(pattern = "^\\d{6}_normalization$"),
pattern = "^\\d{6}_normalization"),
decreasing = T)[[1]]
latest_normalization <- str_c(lip_source, "/", latest)
latest_normalization_date <- str_extract(latest, pattern = "^\\d{6}")
setwd(latest_normalization)
lip_met_dat <- read_csv(str_c(latest_normalization_date, "_cmQTL_val_1_2_met_dat_lip.csv")) %>%
mutate(mz_mean = round(mz_mean, 4),
RT_mean = round(RT_mean, 6))
lip_met_total <- met_lip %>%
left_join(lip_met_dat) %>%
left_join(lip_met_lib_tidy) %>%
mutate(platform = "apolar LC-MS")
# Data combination --------------------------------------------------------
setwd(out_dir)
fc_1 <- fc_1_prim %>%
bind_rows(fc_1_sec, fc_1_lip) %>%
select(Tissue = tissue, Irrigation = treatment, Genotype = genotype,
Compound = Compound_Name, `Relative fold change` = mean_fc,
SD = sd, SE = se, N = n, 'P-value' = p.value, Platform = platform) %>%
mutate(Platform = as_factor(Platform),
Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>%
arrange(Platform, Tissue, Irrigation, Compound, Genotype) %>%
mutate(Genotype = str_remove_all(Genotype, "\\*"))
fc_1_lt <- fc_1_lt_prim %>%
bind_rows(fc_1_lt_sec, fc_1_lt_lip) %>%
select(Tissue = tissue, Irrigation = treatment, Genotype = genotype,
Compound = Compound_Name, `Relative fold change` = mean_fc,
SD = sd, SE = se, N = n, 'P-value' = p.value, Platform = platform) %>%
mutate(Platform = as_factor(Platform),
Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>%
arrange(Platform, Tissue, Irrigation, Compound, Genotype) %>%
mutate(Genotype = str_remove_all(Genotype, "\\*"))
fc_1_ind <- fc_1_ind_prim %>%
bind_rows(fc_1_ind_sec, fc_1_ind_lip)
fc_1_cv <- fc_1_cv_prim %>%
bind_rows(fc_1_cv_sec, fc_1_cv_lip)
met_dat <- fc_1 %>%
distinct(met, Compound_Name, Compound_Class)
classes_tidy <- met_dat %>%
mutate(`ChEBI simplified` = if_else(str_detect(Compound_Class,"carbohydrate"),"carbohydrate or derivative",
if_else(str_detect(Compound_Class,"amino_acid"), "amino acid or derivative",
if_else(str_detect(Compound_Class,"carboxylic_acid"), "carboxylic acid",
"other"))))
genotypes <- fc_1 %>%
distinct(alias, genotype) %>%
mutate(genotype_label = str_remove_all(genotype, "\\*"))
per_comp_y <- fc_1 %>%
group_by(tissue, treatment, met) %>%
summarise(tot_val = max(mean_fc + se))
sig_lt <- sig_prim_lt %>%
bind_rows(sig_sec_lt, sig_lip_lt)
sig <- sig_prim %>%
bind_rows(sig_sec, sig_lip)
sig_cv <- sig_prim_cv %>%
bind_rows(sig_sec_cv, sig_lip_cv)
cb_scale <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7","#000000")
bw_scale <- c("black", "black", "black", "black", "black", "black", "black")
vir_scale <- plasma(4, begin = 0.2, end = 1)
cv_out <- bind_rows(cv_prim_out, cv_sec_out, cv_lip_out) %>%
select(Tissue = tissue, Genotype = genotype,
Compound = Compound_Name, `Mean CV` = mean_cv,
SD = sd_cv, SE = se_cv, N = n, 'P-value' = p.value, Platform = platform) %>%
mutate(Platform = as_factor(Platform),
Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>%
arrange(Platform, Tissue, Compound, Genotype) %>%
mutate(Genotype = str_remove_all(Genotype, "\\*"))
met_dat <- prim_met_total %>%
bind_rows(sec_met_total, lip_met_total) %>%
select(Platform = platform, everything()) %>%
mutate(Platform = as_factor(Platform),
Platform = fct_relevel(Platform, c("polar GC-MS", "polar LC-MS", "apolar LC-MS"))) %>%
arrange(Platform, RT_mean, Compound_Name) %>%
mutate('Peak no.' = str_c("M",row_number()),
'Identification level' = "") %>%
select('Peak no.', Compound = Compound_Name, 'Compound Class' = Compound_Class, 'm/z' = mz_mean,
'Ret. Time' = RT_mean, 'Identification level', Platform)
write_csv(fc_1, "FC1_supplementary.csv")
write_csv(fc_1_lt, "FC1_LT_supplementary.csv")
write_csv(cv_out, "CV_supplementary.csv")
write_csv(met_dat, "met_dat_supplementary.csv")
This diff is collapsed.
Compound_Class,Compound_Class_simple
purines,other
amino_acid,Amino acid or derivative
carboxylic_acid,Carboxylic acid
alcohol,other
no_chebi,other
organic_heterocyclic_compound,other
polyol,other
carbohydrate_phosphate,Carbohydrate or derivative
carbohydrate,Carbohydrate or derivative
carbohydrate_derivative,Carbohydrate or derivative
unannotated,other
one_carbon_compound,other
pyridines,other
phosphoric_acids,other
polyamine,other
amino_acid_derivative,Amino acid or derivative
primary_amino_compound,Amino acid or derivative
pyrimidines,other
Cinnamic acid,Cinnamic acid
Metabolism: Amino acid,Amino acid or derivative
Amino acid,Amino acid or derivative
Aromatic acid derivatives,other
Cofactor,other
Metabolism:TCA,other
Dipeptide,Dipeptide
Metabolism: Chlorophyll,other
Nucleotide,other
Steroidal Glycoalkaloids,Glycoalkaloid
Aliphatic acid glycosides,other
Aromatic alcohol glycosides,other
Flavonoid (glycosides),Flavonoid (glycosides)
lyso Phospholipid,Phospholipid
Lyso-Monogalactosyldiacylglycerol,Galactolipid
Lyso-Digalactosyl-Diacylglycerol,Galactolipid
Diacylglycerol,DAG
Phospholipid,Phospholipid
Phosphatidylcholine,Phospholipid
Sphingolipid,Sphingolipid
Monogalactosyldiacylglycerol,Galactolipid
Digalactosyl-Diacylglycerol,Galactolipid
Phosphatidylethanolamine,Phospholipid
Triacylglyceride,TAG
This diff is collapsed.
This diff is collapsed.
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
getwd()
library(tidyverse)
#tmp.wd <- "E:/R/Lipid"
#setwd(tmp.wd)
file1 <- list.files(pattern = "clusters.gda$")
file2 <- "210809_lip_lib_cmQTL_val_adapt.txt"
fileout <- "cmQTL_val1_selected_lipids.txt"
options(stringsAsFactors=FALSE)
mat1 <- read_delim(file1, skip = 8, delim = "\t") %>%
filter(str_detect(Name, "Cluster")) %>%
select(Name, mz_mean = `m/z`, RT_mean = RT, everything()) %>%
as.data.frame()
mat2 <- read.delim(file2)
mat1[is.na(mat1)] <- 0
rt.dev.range <- c(-0.2, 0.2) #rt.dev.range <- c(-0.2, 0.2)
mz.dev.range <- c(-0.005, 0.005) #mz.dev.range <- c(-0.005, 0.005)
out <- NULL
res <- vector("list",nrow(mat1))
for(i in 1:nrow(mat1)) {
mz.diff <- mat2$mz_mean - mat1$mz_mean[i]
rt.diff <- mat2$RT_mean - mat1$RT_mean[i]
j <- which(rt.diff > rt.dev.range[1] & rt.diff < rt.dev.range[2] &
mz.diff > mz.dev.range[1] & mz.diff < mz.dev.range[2])
res[[i]] <- if(length(j) == 0) NA else j
}
i <- rep.int(1:nrow(mat1), sapply(res, length))
j <- unlist(res)
out <- cbind(mat1[i, ], mat2[j, ], RTdiff=mat2$RT_mean[j] - mat1$RT_mean[i])
colnames(out)[c(2,3)] <- c("mz_mean_new", "RT_mean_new") # colnames were non-unique
# density plots
n.breaks=4 #to be adjusted
ranges <- cut(out$RT_mean, n.breaks, labels=F)
dens <- lapply(1:n.breaks, function(i) {
if(sum(!is.na(out$RTdiff[ranges == i]))) density(out$RTdiff[ranges == i], na.rm=T)
else NULL})
m <- ceiling(sqrt(n.breaks))
n <- ceiling(n.breaks/ m)
par(mfrow=c(n,m))
mx2 <- numeric(n.breaks)
RTrange <- matrix(NA, n.breaks, 2)
for(i in 1:n.breaks) {
if(is.null(dens[[i]]))
next
mx2[i] <- mx <- dens[[i]]$x[which.max(dens[[i]]$y)]
RTrange[i,] <- range(out$RT_mean[ranges == i], na.rm=TRUE)
plot(dens[[i]], main=sprintf("Range: %.2f - %.2f [min] | max RT dev: %.3f", RTrange[i,1], RTrange[i,2], mx),
xlim=c(-1,1))
abline(v=mx + c(-0.1, -0.05,0,0.05, 0.1), col=c(3,2,1,2,3))
}
rt2 <- sapply(1:n.breaks, function(i) median(out$RT_mean[ranges == i], na.rm=T))
x11()
plot(rt2, mx2, main="RT time vs. RT diff", xlab="RT [min]", ylab="RT deviation [min]", pch=19)
# adjust the deviations parameters (a matrix nrow=n.breaks; ncol=2), for example
RTdevs <- cbind(mx2 - 0.1, mx2 + 0.1) # Adjust
# or edit the object RTdevs manually...
# RTdevs <- edit(RTdevs)
indexes <- unlist(sapply(1:n.breaks, function(i)
which(ranges==i & out$RTdiff > RTdevs[i,1] & out$RTdiff < RTdevs[i,2])))
write.table(out[indexes,], file=fileout, sep="\t", quote=FALSE, row.names=FALSE)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment