diff --git a/workflows/phenotype_pca_and_hc/scripts/pheno_and_climate_pca_in_r.R b/workflows/phenotype_pca_and_hc/scripts/pheno_and_climate_pca_in_r.R index bf3fca11685b762a82c36ddfc80d81f4e2742479..ea96d4aee39a212fdef6ee04571bc3cdc7119f55 100644 --- a/workflows/phenotype_pca_and_hc/scripts/pheno_and_climate_pca_in_r.R +++ b/workflows/phenotype_pca_and_hc/scripts/pheno_and_climate_pca_in_r.R @@ -1,665 +1,319 @@ -# Quick PCA Analysis on phenotype data - +# Description: +# This R script performs a Principal Component Analysis (PCA) on teosinte individuals' phenotype and environmental data. +# The PCA is calculated using the prcomp() function, +# with data scaling to standardize variables for better analysis. The script generates visualizations to help +# interpret the PCA results, including: +# 1. A scree plot to show the proportion of variance explained by each principal component. +# 2. A plot of individuals (samples) in the PCA space, with points colored according to a grouping variable (taxon) +# 3. A biplot that simultaneously displays both individuals and the contributing variables + +# libraries library("factoextra") library("RColorBrewer") - -phen.data.raw <- read.csv("../downstream-data-and-outputs/data/phenotype_and_env_data.csv") -phen.data.raw.full.labs <- read.csv("../downstream-data-and-outputs/data/phenotype_and_env_data_vars_labled.csv") -#phen.data.full.labs <- read.csv("../downstream-data-and-outputs/data/phenotype_and_env_data_vars_labled.csv", header = T) +phen.data.raw <- read.csv("../data/phenotype_and_env_data.csv") +phen.data.raw.full.labs <- read.csv("../data/phenotype_and_env_data_vars_labled.csv") +# phen.data.full.labs <- read.csv("../data/phenotype_and_env_data_vars_labled.csv", header = T) phen.data.raw$classification <- gsub("Zea mays", "Z. m. ssp.", phen.data.raw$classification) phen.data.raw$classification <- gsub("\\bZea\\b", "Z.", phen.data.raw$classification) - -#phen.data <- phen.data.raw[,c(10:264)] -phen.data <- phen.data.raw.full.labs[,c(2:256)] - +# phen.data <- phen.data.raw[,c(10:264)] +phen.data <- phen.data.raw.full.labs[, c(2:256)] rownames(phen.data) <- phen.data.raw$POBL -#rownames(phen.data2) <- phen.data.raw.full.labs$population - - +# rownames(phen.data2) <- phen.data.raw.full.labs$population phen.pca <- prcomp(phen.data, scale = TRUE) - -# extarct results of the variables - -#var$coord: coordinates of variables to create a scatter plot -#var$cos2: represents the quality of representation for variables on the factor map. It’s calculated as the squared coordinates: var.cos2 = var.coord * var.coord. -#var$contrib: contains the contributions (in percentage) of the variables to the principal components. The contribution of a variable (var) to a given principal component is (in percentage) : (var.cos2 * 100) / (total cos2 of the component). - -#var <- get_pca_var(phen.pca) -#var - -#library("corrplot") -#corrplot(var$contrib, is.corr=FALSE) - # scree plot -png("../downstream-data-and-outputs/outputs/R_screeplot_transparent.png", width = 6, height = 6, units = 'in', res = 300, bg = 'transparent') +png("../results/R_screeplot_transparent.png", width = 6, height = 6, units = "in", res = 300, bg = "transparent") -fviz_eig(phen.pca, main = "Screeplot - Eigenvalues", linecolor = "black", ncp = 10, - barcolor = heat.colors(10), barfill = heat.colors(10), addlabels = T, - #ggtheme = theme_classic() - )+ - theme( text = element_text(color = 'white'))+ - theme(legend.text=element_text(color="white",size=10))+ - theme(legend.title = element_text(color ='white', size = 10, face='bold')) +fviz_eig(phen.pca, + main = "Screeplot - Eigenvalues", linecolor = "black", ncp = 10, + barcolor = heat.colors(10), barfill = heat.colors(10), addlabels = T, + # ggtheme = theme_classic() +) + + theme(text = element_text(color = "white")) + + theme(legend.text = element_text(color = "white", size = 10)) + + theme(legend.title = element_text(color = "white", size = 10, face = "bold")) dev.off() # Contributions of variables to PC1 -png("../downstream-data-and-outputs/outputs/PC1_PC2_conribution_variable_contributions.png", width = 12, height = 5, units = 'in', res = 360)#, bg = 'transparent') -fviz_contrib(phen.pca, choice = "var", axes = 1:2)+#, top = 150 )+ - theme( text = element_text(color = 'black'))+ - theme(legend.text=element_text(color="black",size=10))+ - theme(legend.title = element_text(color ='black', size = 10, face='bold')) +png("../results/PC1_PC2_conribution_variable_contributions.png", width = 12, height = 5, units = "in", res = 360) # , bg = 'transparent') +fviz_contrib(phen.pca, choice = "var", axes = 1:2) + # , top = 150 )+ + theme(text = element_text(color = "black")) + + theme(legend.text = element_text(color = "black", size = 10)) + + theme(legend.title = element_text(color = "black", size = 10, face = "bold")) dev.off() - # Calculate contributions to variation var_contrib <- get_pca_var(phen.pca)$contrib # Save the contributions as a CSV file -write.csv(var_contrib, "../downstream-data-and-outputs/outputs/PCA_contributions.csv", row.names = TRUE) +write.csv(var_contrib, "../results/PCA_contributions.csv", row.names = TRUE) # Save the plot of the contribution to variation for the first two components as a PNG file -png("../downstream-data-and-outputs/outputs/PC1_PC2_variable_contributions.png", - width = 14, height = 5, units = 'in', res = 360) +png("../results/PC1_PC2_variable_contributions.png", + width = 14, height = 5, units = "in", res = 360 +) fviz_contrib(phen.pca, choice = "var", axes = 1:2) + ggtitle("Contribution of Variables to PCA Axes 1 and 2") + theme( - axis.text.x = element_text(color = 'black', size = 5, angle = 90), + axis.text.x = element_text(color = "black", size = 5, angle = 90), legend.text = element_text(color = "black", size = 10), - legend.title = element_text(color = 'black', size = 10, face = 'bold') + legend.title = element_text(color = "black", size = 10, face = "bold") ) dev.off() # Save the plot as an SVG file -svg("../downstream-data-and-outputs/outputs/PC1_PC2_variable_contributions.svg", - width = 14, height = 5) +svg("../results/PC1_PC2_variable_contributions.svg", + width = 14, height = 5 +) fviz_contrib(phen.pca, choice = "var", axes = 1:2) + ggtitle("Contribution of Variables to PCA Axes 1 and 2") + theme( - axis.text.x = element_text(color = 'black', size = 5, angle = 90), + axis.text.x = element_text(color = "black", size = 5, angle = 90), legend.text = element_text(color = "black", size = 10), - legend.title = element_text(color = 'black', size = 10, face = 'bold') + legend.title = element_text(color = "black", size = 10, face = "bold") ) dev.off() # Save the plot as a PDF file -pdf("../downstream-data-and-outputs/outputs/PC1_PC2_variable_contributions.pdf", - width = 14, height = 5) +pdf("../results/PC1_PC2_variable_contributions.pdf", + width = 14, height = 5 +) fviz_contrib(phen.pca, choice = "var", axes = 1:2) + ggtitle("Contribution of Variables to PCA Axes 1 and 2") + theme( - axis.text.x = element_text(color = 'black', size = 5, angle = 90), + axis.text.x = element_text(color = "black", size = 5, angle = 90), legend.text = element_text(color = "black", size = 10), - legend.title = element_text(color = 'black', size = 10, face = 'bold') + legend.title = element_text(color = "black", size = 10, face = "bold") ) dev.off() -# top 100 -fviz_contrib(phen.pca, choice = "var", axes = 1, top = 100) - -# Contributions of variables to PC2 -fviz_contrib(phen.pca, choice = "var", axes = 2,) - -# Contributions of variables to PC3 -fviz_contrib(phen.pca, choice = "var", axes = 3,) - -# Contributions of variables to PC4 -fviz_contrib(phen.pca, choice = "var", axes = 4,) - -# Contributions of variables to PC1 to 3 -fviz_contrib(phen.pca, choice = "var", axes = 1:2,) - - - # individuals plot groups <- as.factor(phen.data.raw$classification) -# display.brewer.all() - -# cols <- brewer.pal(n=11, name = "Paired") -#cols <- c('#FF8C00','#7B68EE','#4169E1','#778899','#006400','#32CD32','#808000','#8FBC8F','#800000','#00FFFF','#FFD700') # Colors corresponding to the ecogeograph of teosinte published Jesus paper -cols <- c('#E515A1','#2E7312','#8FBC8F','#79DDFC','#E11D20','#FCAB2A','#5EFF27','#FDBDE7','#4286FC','#686868','#A800E2') +cols <- c("#E515A1", "#2E7312", "#8FBC8F", "#79DDFC", "#E11D20", "#FCAB2A", "#5EFF27", "#FDBDE7", "#4286FC", "#686868", "#A800E2") # biplot with 18 morphological and ovearll representative variables based on correlation -png("../downstream-data-and-outputs/outputs/PCA_Biplot_pub.png", width = 17, height = 11, units = 'in', res = 320)#, bg = 'transparent') -fviz_pca_biplot(phen.pca, - repel = TRUE, - lable = "var", - habillage = groups, - col.var = "#414a4c",#"black", - geom="point", - #gradient.cols = cols, - palette = cols, - legend.title = "Teosinte Taxa", - # invisible = "ind", - #title = "PCA Biplot of Morphological and Environmental Data" , - title = '', - select.var = list(name = c( 'plant_height','leaf_width','spikelet_width', - 'plant_surface_area', - 'days_to_silk_emergence', - 'days_to_pollen_shed', - 'number_of_tillers', - 'total_number_of_leaves_per_plant', 'tassel_length', - 'leaf_length', 'central_spike_length', - 'spikelet_length','Weight_of_100_kernels', - 'peduncle_length', 'total_number_of_tassel', - 'number_of_lateral_branches', 'heat_unit_to_silk', - 'heat_units_to_pollen_shed', 'altitude', - 'average_photoperiod_may_to_oct', - 'min_year_photoperiod', - 'annual_average_max_temp', - 'average_max_temp_may_to_oct', - 'average_min_temp_annual', - #'average_min_temp_may_to_oct', - 'average_temp_annual', - 'average_temp_may_to_oct', - 'accum_heat_units_annual', - 'accum_heat_units_may_to_oct', - 'accum_thermal_sum_annual', - 'accum_thermal_sum_may_to_oct', - 'thermal_oscillation_annual', - 'thermal_oscillation_may_to_oct', - 'accum_average_precipitation_annual', - 'accum_average_precipitation_may_to_oct', - 'accum_average_potential_evapotranspiration_average', - 'accum_average_potential_evapotranspiration_may_to_oct', - 'humidity_index_annual', - 'humidity_index_may_to_oct', - 'average_relative_humidity_annual', - 'average_relative_humidity_may_to_oct', - "average_solar_radiation_annual", - "average_solar_radiation_may_to_oct", - "annual_number_wet_months", - "growing_season_min_photoperiod", - "growing_season_average_max_temp", - "EC_average_accumulated_precipitation", - #"EC_monthly_average_max_accumulated_precipitation", - #"EC_monthly_average_min_accumulated_precipitation", - "max_annual_precipitation", - "min_precipitation_driest_month", - #"EC_average_humidity_index", - #"EC_min_monthly_humidity_index", - #"EC_max_humidity_index", - "precipitation_seasonality") ) -)+ theme(#plot.title = element_text(size=25, face = 'bold'), - text = element_text(size = 18),# face = 'bold'), - axis.title = element_text(size = 18), - axis.text = element_text(size = 18), - legend.title=element_text(size=20, face = 'bold'), - legend.text=element_text(size=18)#, face = 'italic',) - ) -dev.off() - -pdf("../downstream-data-and-outputs/outputs/PCA_Biplot_pub.pdf", width = 17, height = 11) -fviz_pca_biplot(phen.pca, - repel = TRUE, - lable = "var", - habillage = groups, - col.var = "#414a4c",#"black", - geom="point", - #gradient.cols = cols, - palette = cols, - legend.title = "Teosinte Taxa", - # invisible = "ind", - #title = "PCA Biplot of Morphological and Environmental Data" , - title = '', - select.var = list(name = c( 'plant_height','leaf_width','spikelet_width', - 'plant_surface_area', - 'days_to_silk_emergence', - 'days_to_pollen_shed', - 'number_of_tillers', - 'total_number_of_leaves_per_plant', 'tassel_length', - 'leaf_length', 'central_spike_length', - 'spikelet_length','Weight_of_100_kernels', - 'peduncle_length', 'total_number_of_tassel', - 'number_of_lateral_branches', 'heat_unit_to_silk', - 'heat_units_to_pollen_shed', 'altitude', - 'average_photoperiod_may_to_oct', - 'min_year_photoperiod', - 'annual_average_max_temp', - 'average_max_temp_may_to_oct', - 'average_min_temp_annual', - #'average_min_temp_may_to_oct', - 'average_temp_annual', - 'average_temp_may_to_oct', - 'accum_heat_units_annual', - 'accum_heat_units_may_to_oct', - 'accum_thermal_sum_annual', - 'accum_thermal_sum_may_to_oct', - 'thermal_oscillation_annual', - 'thermal_oscillation_may_to_oct', - 'accum_average_precipitation_annual', - 'accum_average_precipitation_may_to_oct', - 'accum_average_potential_evapotranspiration_average', - 'accum_average_potential_evapotranspiration_may_to_oct', - 'humidity_index_annual', - 'humidity_index_may_to_oct', - 'average_relative_humidity_annual', - 'average_relative_humidity_may_to_oct', - "average_solar_radiation_annual", - "average_solar_radiation_may_to_oct", - "annual_number_wet_months", - "growing_season_min_photoperiod", - "growing_season_average_max_temp", - "EC_average_accumulated_precipitation", - #"EC_monthly_average_max_accumulated_precipitation", - #"EC_monthly_average_min_accumulated_precipitation", - "max_annual_precipitation", - "min_precipitation_driest_month", - #"EC_average_humidity_index", - #"EC_min_monthly_humidity_index", - #"EC_max_humidity_index", - "precipitation_seasonality") ) -)+ theme(#plot.title = element_text(size=25, face = 'bold'), - text = element_text(size = 18),# face = 'bold'), +png("../results/PCA_Biplot_pub.png", width = 17, height = 11, units = "in", res = 320) # , bg = 'transparent') +fviz_pca_biplot(phen.pca, + repel = TRUE, + lable = "var", + habillage = groups, + col.var = "#414a4c", # "black", + geom = "point", + # gradient.cols = cols, + palette = cols, + legend.title = "Teosinte Taxa", + # invisible = "ind", + # title = "PCA Biplot of Morphological and Environmental Data" , + title = "", + select.var = list(name = c( + "plant_height", "leaf_width", "spikelet_width", + "plant_surface_area", + "days_to_silk_emergence", + "days_to_pollen_shed", + "number_of_tillers", + "total_number_of_leaves_per_plant", "tassel_length", + "leaf_length", "central_spike_length", + "spikelet_length", "Weight_of_100_kernels", + "peduncle_length", "total_number_of_tassel", + "number_of_lateral_branches", "heat_unit_to_silk", + "heat_units_to_pollen_shed", "altitude", + "average_photoperiod_may_to_oct", + "min_year_photoperiod", + "annual_average_max_temp", + "average_max_temp_may_to_oct", + "average_min_temp_annual", + #' average_min_temp_may_to_oct', + "average_temp_annual", + "average_temp_may_to_oct", + "accum_heat_units_annual", + "accum_heat_units_may_to_oct", + "accum_thermal_sum_annual", + "accum_thermal_sum_may_to_oct", + "thermal_oscillation_annual", + "thermal_oscillation_may_to_oct", + "accum_average_precipitation_annual", + "accum_average_precipitation_may_to_oct", + "accum_average_potential_evapotranspiration_average", + "accum_average_potential_evapotranspiration_may_to_oct", + "humidity_index_annual", + "humidity_index_may_to_oct", + "average_relative_humidity_annual", + "average_relative_humidity_may_to_oct", + "average_solar_radiation_annual", + "average_solar_radiation_may_to_oct", + "annual_number_wet_months", + "growing_season_min_photoperiod", + "growing_season_average_max_temp", + "EC_average_accumulated_precipitation", + # "EC_monthly_average_max_accumulated_precipitation", + # "EC_monthly_average_min_accumulated_precipitation", + "max_annual_precipitation", + "min_precipitation_driest_month", + # "EC_average_humidity_index", + # "EC_min_monthly_humidity_index", + # "EC_max_humidity_index", + "precipitation_seasonality" + )) +) + theme( # plot.title = element_text(size=25, face = 'bold'), + text = element_text(size = 18), # face = 'bold'), axis.title = element_text(size = 18), axis.text = element_text(size = 18), - legend.title=element_text(size=20, face = 'bold'), - legend.text=element_text(size=18)#, face = 'italic',) + legend.title = element_text(size = 20, face = "bold"), + legend.text = element_text(size = 18) # , face = 'italic',) ) dev.off() - -svg("../downstream-data-and-outputs/outputs/PCA_Biplot_pub.svg", width = 17, height = 11) -fviz_pca_biplot(phen.pca, - repel = TRUE, - lable = "var", - habillage = groups, - col.var = "#414a4c",#"black", - geom="point", - #gradient.cols = cols, - palette = cols, - legend.title = "Teosinte Taxa", - # invisible = "ind", - #title = "PCA Biplot of Morphological and Environmental Data" , - title = '', - select.var = list(name = c( 'plant_height','leaf_width','spikelet_width', - 'plant_surface_area', - 'days_to_silk_emergence', - 'days_to_pollen_shed', - 'number_of_tillers', - 'total_number_of_leaves_per_plant', 'tassel_length', - 'leaf_length', 'central_spike_length', - 'spikelet_length','Weight_of_100_kernels', - 'peduncle_length', 'total_number_of_tassel', - 'number_of_lateral_branches', 'heat_unit_to_silk', - 'heat_units_to_pollen_shed', 'altitude', - 'average_photoperiod_may_to_oct', - 'min_year_photoperiod', - 'annual_average_max_temp', - 'average_max_temp_may_to_oct', - 'average_min_temp_annual', - #'average_min_temp_may_to_oct', - 'average_temp_annual', - 'average_temp_may_to_oct', - 'accum_heat_units_annual', - 'accum_heat_units_may_to_oct', - 'accum_thermal_sum_annual', - 'accum_thermal_sum_may_to_oct', - 'thermal_oscillation_annual', - 'thermal_oscillation_may_to_oct', - 'accum_average_precipitation_annual', - 'accum_average_precipitation_may_to_oct', - 'accum_average_potential_evapotranspiration_average', - 'accum_average_potential_evapotranspiration_may_to_oct', - 'humidity_index_annual', - 'humidity_index_may_to_oct', - 'average_relative_humidity_annual', - 'average_relative_humidity_may_to_oct', - "average_solar_radiation_annual", - "average_solar_radiation_may_to_oct", - "annual_number_wet_months", - "growing_season_min_photoperiod", - "growing_season_average_max_temp", - "EC_average_accumulated_precipitation", - #"EC_monthly_average_max_accumulated_precipitation", - #"EC_monthly_average_min_accumulated_precipitation", - "max_annual_precipitation", - "min_precipitation_driest_month", - #"EC_average_humidity_index", - #"EC_min_monthly_humidity_index", - #"EC_max_humidity_index", - "precipitation_seasonality") ) -)+ theme(#plot.title = element_text(size=25, face = 'bold'), - text = element_text(size = 18),# face = 'bold'), +pdf("../results/PCA_Biplot_pub.pdf", width = 17, height = 11) +fviz_pca_biplot(phen.pca, + repel = TRUE, + lable = "var", + habillage = groups, + col.var = "#414a4c", # "black", + geom = "point", + # gradient.cols = cols, + palette = cols, + legend.title = "Teosinte Taxa", + # invisible = "ind", + # title = "PCA Biplot of Morphological and Environmental Data" , + title = "", + select.var = list(name = c( + "plant_height", "leaf_width", "spikelet_width", + "plant_surface_area", + "days_to_silk_emergence", + "days_to_pollen_shed", + "number_of_tillers", + "total_number_of_leaves_per_plant", "tassel_length", + "leaf_length", "central_spike_length", + "spikelet_length", "Weight_of_100_kernels", + "peduncle_length", "total_number_of_tassel", + "number_of_lateral_branches", "heat_unit_to_silk", + "heat_units_to_pollen_shed", "altitude", + "average_photoperiod_may_to_oct", + "min_year_photoperiod", + "annual_average_max_temp", + "average_max_temp_may_to_oct", + "average_min_temp_annual", + #' average_min_temp_may_to_oct', + "average_temp_annual", + "average_temp_may_to_oct", + "accum_heat_units_annual", + "accum_heat_units_may_to_oct", + "accum_thermal_sum_annual", + "accum_thermal_sum_may_to_oct", + "thermal_oscillation_annual", + "thermal_oscillation_may_to_oct", + "accum_average_precipitation_annual", + "accum_average_precipitation_may_to_oct", + "accum_average_potential_evapotranspiration_average", + "accum_average_potential_evapotranspiration_may_to_oct", + "humidity_index_annual", + "humidity_index_may_to_oct", + "average_relative_humidity_annual", + "average_relative_humidity_may_to_oct", + "average_solar_radiation_annual", + "average_solar_radiation_may_to_oct", + "annual_number_wet_months", + "growing_season_min_photoperiod", + "growing_season_average_max_temp", + "EC_average_accumulated_precipitation", + # "EC_monthly_average_max_accumulated_precipitation", + # "EC_monthly_average_min_accumulated_precipitation", + "max_annual_precipitation", + "min_precipitation_driest_month", + # "EC_average_humidity_index", + # "EC_min_monthly_humidity_index", + # "EC_max_humidity_index", + "precipitation_seasonality" + )) +) + theme( # plot.title = element_text(size=25, face = 'bold'), + text = element_text(size = 18), # face = 'bold'), axis.title = element_text(size = 18), axis.text = element_text(size = 18), - legend.title=element_text(size=20, face = 'bold'), - legend.text=element_text(size=18)#, face = 'italic',) -) -dev.off() - -##### -png("../downstream-data-and-outputs/outputs/pca_plot_individuals.png", width = 10, height = 6, units = 'in', res = 300, bg = 'transparent') -fviz_pca_ind(phen.pca, - axes = c(1, 2), - col.ind = groups, # color by groups - palette = cols, - #addEllipses = TRUE, # Concentration ellipses - #ellipse.type = "confidence", - legend.title = "Taxa", - title = "Principal Component Analysis of Morphological and Environmental Data", - geom="point", # use points only - repel = TRUE, -)+ - theme( text = element_text(color = 'black'))+ - theme(legend.text=element_text(color="black",size=10))+ - theme(legend.title = element_text(color ='black', size = 10, face='bold')) - -dev.off() - -# Graph of individuals. Individuals with a similar profile are grouped together. - -png("../downstream-data-and-outputs/outputs/R_individuals_representation_pca_plot_transparents.png", width = 8, height = 6, units = 'in', res = 300, bg = 'transparent') -fviz_pca_ind(phen.pca, - col.ind = "cos2", # Color by the quality of representation - gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), - repel = TRUE, # Avoid text overlapping - geom="point", - title = "Individuals colored by quality of their representation" + legend.title = element_text(size = 20, face = "bold"), + legend.text = element_text(size = 18) # , face = 'italic',) ) dev.off() -# biplot - -png("../downstream-data-and-outputs/outputs/R_biplot_pca_plot_transparent.png", width = 8, height = 6, units = 'in', res = 300, bg = 'transparent') -fviz_pca_biplot(phen.pca, repel = TRUE, - col.var = "#2E9FDF", # Variables color - col.ind = "#696969", # Individuals color - palette = cols, - legend.title = "Taxa", - -)+ - theme( text = element_text(color = 'white'))+ - theme(legend.text=element_text(color="white",size=10))+ - theme(legend.title = element_text(color ='white', size = 10, face='bold')) - -dev.off() - - -# biplot - variable colored by contribution -png("../downstream-data-and-outputs/outputs/R_biplot_var_col_contrib_pca_plot_transparent.png", width = 8, height = 6, units = 'in', res = 300, bg = 'transparent') -fviz_pca_biplot(phen.pca, repel = TRUE, - col.var = "contrib", # Variables color - gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), - col.ind = "#696969", # Individuals color - #palette = cols, - legend.title = "Contribution", - -)+ - theme( text = element_text(color = 'white'))+ - theme(legend.text=element_text(color="white",size=10))+ - theme(legend.title = element_text(color ='white', size = 10, face='bold')) - -dev.off() - - -png("../downstream-data-and-outputs/outputs/R_biplot2_pca_plot_transparent.png", width = 10, height = 6, units = 'in', res = 300, bg = 'transparent') -fviz_pca_biplot(phen.pca, repel = TRUE, - # select.var = list(contrib = 50), - # col.var = "#2E9FDF", # Variables color - # col.ind = "#696969" # Individuals color - lable = "var", - habillage = groups, - col.var = "#dcbeff", - palette = cols, - legend.title = "Taxa", - # invisible = "ind" -)+ - theme( text = element_text(color = 'white'))+ - theme(legend.text=element_text(color="white",size=10))+ - theme(legend.title = element_text(color ='white', size = 10, face='bold')) -dev.off() - - -# biplot with 23 variables used in in the ecogeograpy of teosinte paper -png("../downstream-data-and-outputs/outputs/R_biplot23_vars_ecogeo_paper_pca_plot.png", width = 10, height = 6, units = 'in', res = 300) -fviz_pca_biplot(phen.pca, repel = TRUE, - lable = "var", - habillage = groups, - col.var = "#332288", - #gradient.cols = cols, - palette = cols, - legend.title = "Taxa", - # invisible = "ind", - title = "PCA biplot of 23 climatic variables used in the 'Ecogeography of teosinte' paper" , - select.var = list(name = c( 'x3', 'x187', 'x178', 'x179', 'x58', 'x30', 'x208', 'x181', 'x210', 'x177', 'x176', 'x175', - 'x211', 'x44', 'x209', 'x182', 'x180', 'x156', 'x218', 'x201', 'x212', 'x170', 'x162') ) -) -dev.off() - - - -# biplot with 18 morphological and 23 variables in ecogeograpy of teosinte paper -png("../downstream-data-and-outputs/outputs/R_biplot23_vars_ecogeo_paper_pca_plot_transparent.png", width = 10, height = 6, units = 'in', res = 300, bg = 'transparent') -fviz_pca_biplot(phen.pca, repel = TRUE, - lable = "var", - habillage = groups, - col.var = "#dcbeff", - #gradient.cols = cols, - palette = cols, - legend.title = "Taxa", - # invisible = "ind", - #title = "PCA biplot of 23 climatic variables used in the 'Ecogeography of teosinte' paper" , - select.var = list(name = c( 'plant_height','leaf_width','spikelet_width', 'plant_surface_area', 'days_to_silk_emergence', 'days_to_pollen_shed', 'number_of_tillers','total_number_of_leaves_per_plant', 'tassel_length', - 'leaf_length', 'central_spike_length', 'spikelet_length','Weight_of_100_kernels', 'peduncle_length', 'total_number_of_tassel', 'number_of_lateral_branches', 'heat_unit_to_silk', 'heat_units_to_pollen_shed', - 'x3', 'x187', 'x178', 'x179', 'x58', 'x30', 'x208', 'x181', 'x210', 'x177', 'x176', 'x175', - 'x211', 'x44', 'x209', 'x182', 'x180', 'x156', 'x218', 'x201', 'x212', 'x170', 'x162') ) -)+ - theme( text = element_text(color = 'white'))+ - theme(legend.text=element_text(color="white",size=10))+ - theme(legend.title = element_text(color ='white', size = 10, face='bold')) -dev.off() - - - -# var_plot with 18 morphological and 23 variables in ecogeograpy of teosinte paper -png("../downstream-data-and-outputs/outputs/R_23_vars_plot_ecogeo_paper_pca_plot_transparent.png", width = 10, height = 6, units = 'in', res = 300, bg = 'transparent') -fviz_pca_var(phen.pca, - col.var = "contrib", # Color by contributions to the PC - gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), - repel = TRUE,# Avoid text overlapping - - # invisible = "ind", - #title = "PCA biplot of 23 climatic variables used in the 'Ecogeography of teosinte' paper" , - select.var = list(name = c( 'plant_height','leaf_width','spikelet_width', 'plant_surface_area', 'days_to_silk_emergence', 'days_to_pollen_shed', 'number_of_tillers','total_number_of_leaves_per_plant', 'tassel_length', - 'leaf_length', 'central_spike_length', 'spikelet_length','Weight_of_100_kernels', 'peduncle_length', 'total_number_of_tassel', 'number_of_lateral_branches', 'heat_unit_to_silk', 'heat_units_to_pollen_shed', - 'x3', 'x187', 'x178', 'x179', 'x58', 'x30', 'x208', 'x181', 'x210', 'x177', 'x176', 'x175', - 'x211', 'x44', 'x209', 'x182', 'x180', 'x156', 'x218', 'x201', 'x212', 'x170', 'x162') ) -)+ - theme( text = element_text(color = 'white'))+ - theme(legend.text=element_text(color="white",size=10))+ - theme(legend.title = element_text(color ='white', size = 10, face='bold')) -dev.off() - - - - - -## plotting variables of interest - -# morphological and precipitation variables -fviz_pca_biplot(phen.pca, repel = TRUE, - lable = "var", - habillage = groups, - col.var = "#332288", - #gradient.cols = cols, - palette = cols, - legend.title = "Taxa", - # invisible = "ind", - title = "PCA biplot of morphological and precipitation variables" , - select.var = list(name = c( 'plant_height','leaf_width','spikelet_width', 'plant_surface_area', 'days_to_silk_emergence', 'days_to_pollen_shed', 'number_of_tillers','total_number_of_leaves_per_plant', 'tassel_length', - 'leaf_length', 'central_spike_length', 'spikelet_length','Weight_of_100_kernels', 'peduncle_length', 'total_number_of_tassel', 'number_of_lateral_branches', 'heat_unit_to_silk', 'heat_units_to_pollen_shed', - 'x101', 'x102', 'x103', 'x104', 'x105', 'x106', 'x107', 'x108', 'x111', 'x109', 'x110', 'x112', - 'x113', 'x114') ) -) - -# morphological and humidity variables - -fviz_pca_biplot(phen.pca, repel = TRUE, - lable = "var", - habillage = groups, - col.var = "#332288", - #gradient.cols = cols, - palette = cols, - legend.title = "Taxa", - # invisible = "ind", - title = "PCA biplot of morphological and precipitation variables" , - select.var = list(name = c( 'plant_height','leaf_width','spikelet_width', 'plant_surface_area', 'days_to_silk_emergence', 'days_to_pollen_shed', 'number_of_tillers','total_number_of_leaves_per_plant', 'tassel_length', - 'leaf_length', 'central_spike_length', 'spikelet_length','Weight_of_100_kernels', 'peduncle_length', 'total_number_of_tassel', 'number_of_lateral_branches', 'heat_unit_to_silk', 'heat_units_to_pollen_shed', - 'x101', 'x102', 'x103', 'x104', 'x105', 'x106', 'x107', 'x108', 'x111', 'x109', 'x110', 'x112', - 'x113', 'x114') ) -) -# morphological and temperature variables -fviz_pca_biplot(phen.pca, repel = TRUE, - lable = "var", - habillage = groups, - col.var = "#332288", - #gradient.cols = cols, - palette = cols, - legend.title = "Taxa", - # invisible = "ind", - title = "PCA biplot of morphological and precipitation variables" , - select.var = list(name = c( 'plant_height','leaf_width','spikelet_width', 'plant_surface_area', 'days_to_silk_emergence', 'days_to_pollen_shed', 'number_of_tillers','total_number_of_leaves_per_plant', 'tassel_length', - 'leaf_length', 'central_spike_length', 'spikelet_length','Weight_of_100_kernels', 'peduncle_length', 'total_number_of_tassel', 'number_of_lateral_branches', 'heat_unit_to_silk', 'heat_units_to_pollen_shed', - 'x101', 'x102', 'x103', 'x104', 'x105', 'x106', 'x107', 'x108', 'x111', 'x109', 'x110', 'x112', - 'x113', 'x114') ) -) - -# morphological and solar radiation variables - -fviz_pca_biplot(phen.pca, repel = TRUE, - lable = "var", - habillage = groups, - col.var = "#332288", - #gradient.cols = cols, - palette = cols, - legend.title = "Taxa", - # invisible = "ind", - title = "PCA biplot of morphological and precipitation variables" , - select.var = list(name = c( 'plant_height','leaf_width','spikelet_width', 'plant_surface_area', 'days_to_silk_emergence', 'days_to_pollen_shed', 'number_of_tillers','total_number_of_leaves_per_plant', 'tassel_length', - 'leaf_length', 'central_spike_length', 'spikelet_length','Weight_of_100_kernels', 'peduncle_length', 'total_number_of_tassel', 'number_of_lateral_branches', 'heat_unit_to_silk', 'heat_units_to_pollen_shed', - 'x101', 'x102', 'x103', 'x104', 'x105', 'x106', 'x107', 'x108', 'x111', 'x109', 'x110', 'x112', - 'x113', 'x114') ) -) - - -# variables plot - -# Graph of variables. Positive correlated variables point to the same side of the plot. -# Negative correlated variables point to opposite sides of the graph. - -png("../downstream-data-and-outputs/outputs/R_23variable_pca_plot.png", width = 8, height = 6, units = 'in', res = 300) -fviz_pca_var(phen.pca, - col.var = "contrib", # Color by contributions to the PC - gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), - repel = TRUE,# Avoid text overlapping - max.overlaps = Inf, - select.var = list(name = c( 'x3', 'x187', 'x178', 'x179', 'x58', 'x30', 'x208', 'x181', 'x210', 'x177', 'x176', 'x175', - 'x211', 'x44', 'x209', 'x182', 'x180', 'x156', 'x218', 'x201', 'x212', 'x170', 'x162')) -) -dev.off() - - -# all variables pca -png("../downstream-data-and-outputs/outputs/R_variable_pca_plot_transparent.png", width = 8, height = 6, units = 'in', res = 300,bg = 'transparent') -fviz_pca_var(phen.pca, - col.var = "contrib", # Color by contributions to the PC - gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), - repel = T,# Avoid text overlapping - max.overlaps = getOption("ggrepel.max.overlaps", default = 10000), - title = 'Variables - PCA' -)+ - theme( text = element_text(color = 'white'))+ - theme(legend.text=element_text(color="white",size=10))+ - theme(legend.title = element_text(color ='white', size = 10, face='bold')) - -dev.off() - - - - -# variables plot from biplot -fviz_pca_biplot(phen.pca, repel = TRUE, - # select.var = list(contrib = 50), - col.var = "#2E9FDF", # Variables color - # col.ind = "#696969" # Individuals color - lable = "var", - habillage = groups, - # col.var = "#696969", - invisible = "ind", - -) - - - -# PC 3 -fviz_pca_ind(phen.pca, - axes = c(1, 3), - col.ind = groups, # color by groups - palette = cols, - addEllipses = TRUE, # Concentration ellipses - ellipse.type = "confidence", - legend.title = "Taxa", - title = "", - repel = TRUE, +svg("../results/PCA_Biplot_pub.svg", width = 17, height = 11) +fviz_pca_biplot(phen.pca, + repel = TRUE, + lable = "var", + habillage = groups, + col.var = "#414a4c", # "black", + geom = "point", + # gradient.cols = cols, + palette = cols, + legend.title = "Teosinte Taxa", + # invisible = "ind", + # title = "PCA Biplot of Morphological and Environmental Data" , + title = "", + select.var = list(name = c( + "plant_height", "leaf_width", "spikelet_width", + "plant_surface_area", + "days_to_silk_emergence", + "days_to_pollen_shed", + "number_of_tillers", + "total_number_of_leaves_per_plant", "tassel_length", + "leaf_length", "central_spike_length", + "spikelet_length", "Weight_of_100_kernels", + "peduncle_length", "total_number_of_tassel", + "number_of_lateral_branches", "heat_unit_to_silk", + "heat_units_to_pollen_shed", "altitude", + "average_photoperiod_may_to_oct", + "min_year_photoperiod", + "annual_average_max_temp", + "average_max_temp_may_to_oct", + "average_min_temp_annual", + #' average_min_temp_may_to_oct', + "average_temp_annual", + "average_temp_may_to_oct", + "accum_heat_units_annual", + "accum_heat_units_may_to_oct", + "accum_thermal_sum_annual", + "accum_thermal_sum_may_to_oct", + "thermal_oscillation_annual", + "thermal_oscillation_may_to_oct", + "accum_average_precipitation_annual", + "accum_average_precipitation_may_to_oct", + "accum_average_potential_evapotranspiration_average", + "accum_average_potential_evapotranspiration_may_to_oct", + "humidity_index_annual", + "humidity_index_may_to_oct", + "average_relative_humidity_annual", + "average_relative_humidity_may_to_oct", + "average_solar_radiation_annual", + "average_solar_radiation_may_to_oct", + "annual_number_wet_months", + "growing_season_min_photoperiod", + "growing_season_average_max_temp", + "EC_average_accumulated_precipitation", + # "EC_monthly_average_max_accumulated_precipitation", + # "EC_monthly_average_min_accumulated_precipitation", + "max_annual_precipitation", + "min_precipitation_driest_month", + # "EC_average_humidity_index", + # "EC_min_monthly_humidity_index", + # "EC_max_humidity_index", + "precipitation_seasonality" + )) +) + theme( # plot.title = element_text(size=25, face = 'bold'), + text = element_text(size = 18), # face = 'bold'), + axis.title = element_text(size = 18), + axis.text = element_text(size = 18), + legend.title = element_text(size = 20, face = "bold"), + legend.text = element_text(size = 18) # , face = 'italic',) ) - -png("../downstream-data-and-outputs/outputs/R_biplot2_pca_pc1_pc3_plot.png", width = 10, height = 6, units = 'in', res = 300) -fviz_pca_biplot(phen.pca, repel = TRUE, - axes = c(1, 3), - # select.var = list(contrib = 50), - # col.var = "#2E9FDF", # Variables color - # col.ind = "#696969" # Individuals color - lable = "var", - habillage = groups, - col.var = "#696969", - palette = cols, - legend.title = "Taxa", - # invisible = "ind" -) dev.off() - -png("../downstream-data-and-outputs/outputs/R_biplot_23vars_pca_pc1_pc3_plot.png", width = 10, height = 6, units = 'in', res = 300) -fviz_pca_biplot(phen.pca, repel = TRUE, - axes = c(1, 3), - lable = "var", - habillage = groups, - col.var = "#332288", - palette = cols, - legend.title = "Taxa", - select.var = list(name = c( 'x3', 'x187', 'x178', 'x179', 'x58', 'x30', 'x208', 'x181', 'x210', 'x177', 'x176', 'x175', - 'x211', 'x44', 'x209', 'x182', 'x180', 'x156', 'x218', 'x201', 'x212', 'x170', 'x162')) -) -dev.off() - - -# Biplot of PC1 vs PC4 -png("../downstream-data-and-outputs/outputs/R_biplot2_pca_pc1_pc4_plot.png", width = 10, height = 6, units = 'in', res = 300) -fviz_pca_biplot(phen.pca, repel = TRUE, - axes = c(1, 4), - # select.var = list(contrib = 50), - # col.var = "#2E9FDF", # Variables color - # col.ind = "#696969" # Individuals color - lable = "var", - habillage = groups, - col.var = "#696969", - palette = cols, - legend.title = "Taxa", - - # invisible = "ind" -) -dev.off() -