Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
Loading items

Target

Select target project
  • hhu-plant-biochemistry/Wrobel-2023-CastorBeanEndospermProteome
  • ceplas/Wrobel-2023-CastorBeanEndospermProteome
2 results
Select Git revision
Loading items
Show changes
## Ricinus-communis-seeds
![fpls-14-1182105-g001.jpg](resources/fpls-14-1182105-g001.jpg)
**Figure 1**
Endosperm morphology in seeds and germinating seedling of R. communis. Photographs were taken from the whole plant (upper panel) and two cotyledons embedded in the endosperm (lower panel) from mature seeds (dry), 24h-imbibed seeds (imb) and from 1- to 7-day old dark-grown castor bean seedlings (1-7d). Scale bar = 1 cm.
\ No newline at end of file
File added
# 2.1 Plant growth conditions
Dry seeds of *Ricinus communis* var. *zanzibariensis* were surface- sterilized in a solution of 0.1% (w/v) 8-quinolinol in water for ten minutes and soaked in running tap water over night for seed imbibition. The imbibed seeds were placed on moist vermiculite and incubated at 30°C in the dark (Beevers and Breidenbach, 1974).
- Beevers, H., and Breidenbach, R. W. (1974). “Glyoxysomes,” in Methods in enzymology, Vol 16 Biomembranes, Part A. Eds. S. Fleciher and L. packer (New York London: Academic Press), 565–571.
\ No newline at end of file
studies/Ricinus-communis-seeds/resources/fpls-14-1182105-g001.jpg

222 KiB

---
title: "Untitled"
author: "Dominik"
date: "4/13/2022"
output:
pdf_document: default
html_document: default
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(knitr)
library(tidyverse)
library(openxlsx)
```
```{r}
getwd()
dir("../assays/2022-04-13_proteome_discoverer/dataset/")
```
## Count peptides in fasta
```{bash}
grep ">" ../assays/2022-04-13_proteome_discoverer/dataset/AllProteins.fasta | wc -l
head ../assays/2022-04-13_proteome_discoverer/dataset/AllProteins.fasta
grep "OS=Ricinus communis" ../assays/2022-04-13_proteome_discoverer/dataset/AllProteins.fasta | wc -l
```
## skim excel files
### quantification.xlsx
```{r}
quantification <- readxl::read_xlsx("../assays/2022-04-13_proteome_discoverer/dataset/quantification.xlsx")
dim(quantification)
colnames(quantification)
length(unique(quantification$Accession))
```
### quantification_with_peptides.xlsx
```{r}
quantification_peptides <- readxl::read_xlsx("../assays/2022-04-13_proteome_discoverer/dataset/quantification_with_peptides.xlsx")
dim(quantification_peptides)
colnames(quantification_peptides)
length(unique(quantification_peptides$Accession))
## This is a whole new level of untidy. A table nested in a table. I hate it. Thank you, Thermo Fisher Scientific.
### 1. keep only high confidence rows
quant_proteins <- filter(quantification_peptides, `Protein FDR Confidence: Combined` == "High")
### 2. remove empty columns
quant_proteins <- quant_proteins[, colSums(is.na(quant_proteins)) != nrow(quant_proteins)]
### 3. dummy check, that content is the same...
dim(quant_proteins) == dim(quantification)
sum(as.numeric(unlist(quantification[,6])) == as.numeric(unlist(quant_proteins[,6])))
## pull out peptide data
peptides <- filter(quantification_peptides, is.na(`Protein FDR Confidence: Combined`))
nrow(quantification_peptides) - nrow(peptides)
### 2. remove empty columns
peptides <- peptides[, colSums(is.na(peptides)) != nrow(peptides)]
### 3. strip between-data headlines
colnames(peptides) = as.character(peptides[1, ])
peptides <- filter(peptides, Checked != "Checked")
peptides <- type_convert(peptides)
```
## create an isa.run.xlsx like data dictionary
```{r}
sum_tmp <- data.frame(summary(quantification)) %>%
pivot_wider(names_from = Var1, values_from = c(Var1, Freq),
values_fn = function(x){paste(na.exclude(x), collapse = "|")})
quantification_summary <- cbind.data.frame(sum_tmp[,c(1,3)],t(summarise_all(quantification, class)))
colnames(quantification_summary) <- c("Identifier", "ObjectSummary", "ObjectType")
quantification_summary$TargetFile <- "quantification.xlsx"
sum_tmp <- data.frame(summary(peptides)) %>%
pivot_wider(names_from = Var1, values_from = c(Var1, Freq),
values_fn = function(x){paste(na.exclude(x), collapse = "|")})
peptides_summary <- cbind.data.frame(sum_tmp[,c(1,3)],t(summarise_all(peptides, class)))
colnames(peptides_summary) <- c("Identifier", "ObjectSummary", "ObjectType")
peptides_summary$TargetFile <- "quantification_with_peptides.xlsx"
isa_run <- rbind.data.frame(quantification_summary, peptides_summary)[, c(4, 1, 3, 2)]
row.names(isa_run) <- NULL
isa_run$Comment <- ""
isa_run$Definition <- ""
write.xlsx(isa_run, file = "../assays/2022-04-13_proteome_discoverer/isa.run.xlsx", overwrite = T, asTable = T)
```
```{r}
# extract abundances per accession only
abundances_grouped <- quantification[, c("Accession", grep("Abundances (Grouped)", colnames(quantification), fixed = T, value = T))]
# pivot and split column
abundances_grouped2 <-
abundances_grouped %>%
pivot_longer(!Accession, names_to = c("organelle", "compartment"), values_to = "abundance", names_sep = ', ')
# remove
abundances_grouped2$organelle <- gsub("Abundances (Grouped): ", "", abundances_grouped2$organelle, fixed = T)
# transform columns to factor
abundances_grouped2$organelle <- as.factor(abundances_grouped2$organelle)
abundances_grouped2$compartment <- as.factor(abundances_grouped2$compartment)
# pick random accessions
selected_accs <- sample(unique(abundances_grouped2$Accession), 4)
# filter plot subset
plotsub <- filter(abundances_grouped2, Accession %in% selected_accs)
ggplot(plotsub, aes(x = organelle, y = abundance, fill = compartment)) +
geom_col(position = position_dodge(width = 0.7), width = 0.7) +
facet_wrap(~Accession, scales = 'free') +
scale_fill_brewer(palette = "Dark2")
```
### Calculate and draw a PCA to get an overview of the dataset
```{r, fig.width = 3, fig.height = 3, fig.align = "center", eval=T}
pca_data <- as.data.frame(pivot_wider(abundances_grouped2,
names_from = Accession,
values_from = abundance,
id_cols = c("organelle", "compartment")))
pca_data <- unite(pca_data, organelle, compartment, col = 'merger', sep = '_')
rownames(pca_data) <- pca_data$merger
pca_data <- pca_data[, -1]
####### double-check
pca_data[is.na(pca_data)] <- 0
####### double-check
pca_data <- pca_data[, apply(pca_data, 2, function(x) {sum(x) != 0})]
pca <- prcomp(pca_data, scale = T)
pcaPlotData <- as.data.frame(pca$x)
pcaPlotData$merger <- rownames(pcaPlotData)
pcaPlotData <- separate(data = pcaPlotData, col = merger, sep = '_',
into = c("organelle", "compartment"))
ggplot(pcaPlotData, aes_string(color = 'organelle', shape = 'compartment', x = 'PC1', y = 'PC2')) +
geom_point(size = 3, stroke = 1.5) +
coord_equal() +
# theme_dominik +
scale_color_brewer(palette = 'Dark2')
```
```{r}
# extract abundances per accession only
abundances <- quantification[, c("Accession", grep("Abundances (Normalized)", colnames(quantification), fixed = T, value = T))]
# pivot and split column
abundances2 <-
abundances %>%
pivot_longer(!Accession, names_to = c("sample", "organelle", "compartment"), values_to = "abundance", names_sep = ', ')
# remove
abundances2$sample <- gsub("Abundances (Normalized): ", "", abundances2$sample, fixed = T)
abundances2$sample <- gsub(": Sample", "", abundances2$sample, fixed = T)
# transform columns to factor
abundances2$sample <- as.factor(abundances2$sample)
abundances2$organelle <- as.factor(abundances2$organelle)
abundances2$compartment <- as.factor(abundances2$compartment)
# pick random accessions
selected_accs <- sample(unique(abundances2$Accession), 4)
# filter plot subset
plotsub <- filter(abundances2, Accession %in% selected_accs)
ggplot(plotsub, aes(x = organelle, y = abundance, fill = compartment)) +
geom_point(position = position_dodge(width = 0.7), width = 0.7) +
facet_wrap(~Accession, scales = 'free') +
scale_fill_brewer(palette = "Dark2")
```
### Calculate and draw a PCA to get an overview of the dataset
```{r, fig.width = 3, fig.height = 3, fig.align = "center", eval=T}
pca_data <- as.data.frame(pivot_wider(abundances2,
names_from = Accession,
values_from = abundance,
id_cols = c("sample", "organelle", "compartment")))
pca_data <- unite(pca_data, sample, organelle, compartment, col = 'merger', sep = '_')
rownames(pca_data) <- pca_data$merger
pca_data <- pca_data[, -1]
####### double-check
pca_data[is.na(pca_data)] <- 0
####### double-check
pca_data <- pca_data[, apply(pca_data, 2, function(x) {sum(x) != 0})]
pca <- prcomp(pca_data, scale = T)
pcaPlotData <- as.data.frame(pca$x)
pcaPlotData$merger <- rownames(pcaPlotData)
pcaPlotData <- separate(data = pcaPlotData, col = merger, sep = '_',
into = c("sample", "organelle", "compartment"))
pca_plot_individuals <- ggplot(pcaPlotData, aes_string(color = 'organelle', shape = 'compartment', x = 'PC1', y = 'PC2')) +
geom_point(size = 2, stroke = 1.5) +
coord_equal() +
# theme_dominik +
scale_color_brewer(palette = 'Dark2')
png(file = "pca_plot_individuals.png", res = 300, width = 2000, height = 2000)
pca_plot_individuals
dev.off()
png(file = "pca_plot_individuals_labelled.png", res = 300, width = 2000, height = 2000)
pca_plot_individuals + geom_text(aes(label = sample), nudge_x = 5)
dev.off()
```
```{r, out.width = "100%", eval= T, echo=F}
include_graphics('pca_plot_individuals.png')
include_graphics('pca_plot_individuals_labelled.png')
```
### exlude ER
```{r, fig.width = 3, fig.height = 3, fig.align = "center", eval=T}
pca_data <- as.data.frame(pivot_wider(filter(abundances2, organelle != "endoplasmatic reticulum"),
names_from = Accession,
values_from = abundance,
id_cols = c("sample", "organelle", "compartment")))
pca_data <- unite(pca_data, sample, organelle, compartment, col = 'merger', sep = '_')
rownames(pca_data) <- pca_data$merger
pca_data <- pca_data[, -1]
####### double-check
pca_data[is.na(pca_data)] <- 0
####### double-check
pca_data <- pca_data[, apply(pca_data, 2, function(x) {sum(x) != 0})]
pca <- prcomp(pca_data, scale = T)
pcaPlotData <- as.data.frame(pca$x)
pcaPlotData$merger <- rownames(pcaPlotData)
pcaPlotData <- separate(data = pcaPlotData, col = merger, sep = '_',
into = c("sample", "organelle", "compartment"))
pca_plot_excl_ER <- ggplot(pcaPlotData, aes_string(color = 'organelle', shape = 'compartment', x = 'PC1', y = 'PC2')) +
geom_point(size = 2, stroke = 1.5) +
coord_equal() +
# theme_dominik +
scale_color_brewer(palette = 'Dark2')
png(file = "pca_plot_excl_ER.png", res = 300, width = 2000, height = 2000)
pca_plot_excl_ER
dev.off()
```
```{r, out.width = "100%", eval= T, echo=F}
include_graphics('pca_plot_excl_ER.png')
```
File deleted
workflows/pca_plot_excl_ER.png

139 KiB

workflows/pca_plot_individuals.png

147 KiB

workflows/pca_plot_individuals_labelled.png

171 KiB