Skip to content
Snippets Groups Projects
Commit 3fd7a4aa authored by Dominik Brilhaus's avatar Dominik Brilhaus
Browse files

first glance at the data

parent 90f1f9d2
No related branches found
No related tags found
No related merge requests found
# questions to Anja
- what's the ricinus reference?
- analysis protocols?
- what tests?
- why membrane vs. lumen?
- can we also get ER vs. mito etc.?
-
\ No newline at end of file
No preview for this file type
---
title: "Untitled"
author: "Dominik"
date: "4/13/2022"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
getwd()
dir("../assays/2022-04-13_proteome_discoverer/dataset/")
```
## Count peptides in fasta
```{bash}
grep ">" ../assays/2022-04-13_proteome_discoverer/dataset/AllProteins.fasta | wc -l
```
## skim excel files
```{r}
library(tidyverse)
quantification <- readxl::read_xlsx("../assays/2022-04-13_proteome_discoverer/dataset/quantification.xlsx")
dim(quantification)
colnames(quantification)
length(unique(quantification$Accession))
quantification_peptides <- readxl::read_xlsx("../assays/2022-04-13_proteome_discoverer/dataset/quantification_with_peptides.xlsx")
dim(quantification_peptides)
colnames(quantification_peptides)
length(unique(quantification_peptides$Accession))
## This is a whole new level of untidy. A table nested in a table. I hate it. Thank you, Thermo Fisher Scientific.
### 1. keep only high confidence rows
quant_proteins <- filter(quantification_peptides, `Protein FDR Confidence: Combined` == "High")
### 2. remove empty columns
quant_proteins <- quant_proteins[, colSums(is.na(quant_proteins)) != nrow(quant_proteins)]
### 3. dummy check, that content is the same...
dim(quant_proteins) == dim(quantification)
sum(as.numeric(unlist(quantification[,6])) == as.numeric(unlist(quant_proteins[,6])))
## pull out peptide data
peptides <- filter(quantification_peptides, is.na(`Protein FDR Confidence: Combined`))
nrow(quantification_peptides) - nrow(peptides)
### 2. remove empty columns
peptides <- peptides[, colSums(is.na(peptides)) != nrow(peptides)]
### 3. strip between-data headlines
colnames(peptides) = as.character(peptides[1, ])
peptides <- filter(peptides, Checked != "Checked")
```
```{r}
# extract abundances per accession only
abundances_grouped <- quantification[, c("Accession", grep("Abundances (Grouped)", colnames(quantification), fixed = T, value = T))]
# pivot and split column
abundances_grouped2 <-
abundances_grouped %>%
pivot_longer(!Accession, names_to = c("organelle", "compartment"), values_to = "abundance", names_sep = ', ')
# remove
abundances_grouped2$organelle <- gsub("Abundances (Grouped): ", "", abundances_grouped2$organelle, fixed = T)
# transform colums to factor
abundances_grouped2$organelle <- as.factor(abundances_grouped2$organelle)
abundances_grouped2$compartment <- as.factor(abundances_grouped2$compartment)
# pick random accessions
selected_accs <- sample(unique(abundances_grouped2$Accession), 4)
# filter plot subset
plotsub <- filter(abundances_grouped2, Accession %in% selected_accs)
ggplot(plotsub, aes(x = organelle, y = abundance, fill = compartment)) +
geom_col(position = position_dodge(width = 0.7), width = 0.7) +
facet_wrap(~Accession, scales = 'free') +
scale_fill_brewer(palette = "Dark2")
```
### Calculate and draw a PCA to get an overview of the dataset
```{r, fig.width = 3, fig.height = 3, fig.align = "center", eval=T}
pca_data <- as.data.frame(pivot_wider(abundances_grouped2,
names_from = Accession,
values_from = abundance,
id_cols = c("organelle", "compartment")))
pca_data <- unite(pca_data, organelle, compartment, col = 'merger', sep = '_')
rownames(pca_data) <- pca_data$merger
pca_data <- pca_data[, -1]
####### double-check
pca_data[is.na(pca_data)] <- 0
####### double-check
pca_data <- pca_data[, apply(pca_data, 2, function(x) {sum(x) != 0})]
pca <- prcomp(pca_data, scale = T)
pcaPlotData <- as.data.frame(pca$x)
pcaPlotData$merger <- rownames(pcaPlotData)
pcaPlotData <- separate(data = pcaPlotData, col = merger, sep = '_',
into = c("organelle", "compartment"))
ggplot(pcaPlotData, aes_string(color = 'organelle', shape = 'compartment', x = 'PC1', y = 'PC2')) +
geom_point(size = 3, stroke = 1.5) +
coord_equal() +
# theme_dominik +
scale_color_brewer(palette = 'Dark2')
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment