Skip to content
Snippets Groups Projects
Commit 6b8f4005 authored by Alisandra Denton's avatar Alisandra Denton
Browse files

Merge branch 'main' of git.nfdi4plants.org:brilator/rnaseq-workshop

parents ff05dcec fb2b9757
No related branches found
No related tags found
No related merge requests found
......@@ -385,11 +385,7 @@ Temporary Items
runs/isoseq/polished/
# Share after playing the game
guess_the_plot*
# latex
*.aux
RNAseqWorkshop.out
RNAseqWorkshop.toc
workflows/docker/docker_tests_dominik.md
No preview for this file type
This diff is collapsed.
......@@ -2,12 +2,17 @@ load(file = "runs/kallisto_combined/mothertableV3.Rdata")
# now we make a data.frame with the data required for Mapman loading
# if you have more fold-changes, you can load more than one
forMapman <- dfr[, c("locus", "log2FC")]
forMapman <- dfr[!duplicated(dfr$locus), c("locus", "log2FC")]
### TODO: the `duplicated` solution is a quick-and-dirty fix to avoid duplicated
### locus IDs coming from mapping on transcript level
### (plus Mapman accepts AT1G01040, not AT1G01040.1)
head(forMapman)
# now we export the data.frame in biologist and mapman readable format
dir.create(path = "runs/mapman", recursive = T, showWarnings = F)
write.table(forMapman,
file = "runs/mapman/forMapmanloading.txt",
file = "runs/_backup/mapman/forMapmanloading.txt",
quote = F, sep = "\t", row.names = F
)
remove(forMapman)
---
title: "Comparing data presentations"
author: "Dominik Brilhaus"
date: "2022-08-16"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# Load libraries
```{r}
if(!"scales" %in% row.names(installed.packages())){install.packages("scales")}
library(tidyverse)
if(!"scales" %in% row.names(installed.packages())){install.packages("scales")}
library(scales)
```
# Load data
```{r}
load("../runs/kallisto_combined/mothertableV3.Rdata")
# View(dfr)
gene_set <- c("AT1G29930", "AT4G23230", "AT1G01120", "AT1G06410", "AT2G25510")
dir.create("../runs/guess_the_plots/", showWarnings = F, recursive = T)
```
## Dot plot of individual tpm
```{r}
plot_tpm <- subset(dfr, locus %in% gene_set,
select = c("locus", grep("_tpm", colnames(dfr), value = T)), drop = T)
plot_tpm <- pivot_longer(plot_tpm, cols = 2:ncol(plot_tpm))
plot_tpm$condition <- gsub("._tpm", "", plot_tpm$name)
p_tpm_point <- ggplot(plot_tpm, aes(x = locus, y = value)) +
geom_point(aes(col = condition), position = position_dodge(width = 0.5)) +
theme_classic() + scale_color_brewer(palette = "Dark2") +
labs(y = "Transcript level [tpm]")
print(p_tpm_point)
pdf(file = "../runs/guess_the_plots/p_tpm_point.pdf", width = 6, height = 6)
print(p_tpm_point)
print(p_tpm_point + labs(y = "") + theme(legend.position = "none"))
dev.off()
```
## Bar plot of mean tpm
```{r}
plot_tpm_mean <- subset(dfr, locus %in% gene_set,
select = c("locus", grep("mean_", colnames(dfr), value = T)), drop = T)
plot_tpm_mean <- pivot_longer(plot_tpm_mean, cols = 2:ncol(plot_tpm_mean))
plot_tpm_mean$condition <- gsub("mean_", "", plot_tpm_mean$name)
p_mean_bar <- ggplot(plot_tpm_mean, aes(x = locus, y = value)) +
geom_col(aes(fill = condition), position = position_dodge()) +
theme_classic() + scale_fill_brewer(palette = "Dark2") +
labs(y = "Transcript level [tpm]") +
scale_y_continuous(expand = expansion(mult = c(0, .1)))
print(p_mean_bar)
pdf(file = "../runs/guess_the_plots/p_mean_bar.pdf", width = 6, height = 6)
print(p_mean_bar)
print(p_mean_bar + labs(y = "") + theme(legend.position = "none"))
dev.off()
```
### ... log10 scaled
```{r}
p_mean_bar_log10 <- p_mean_bar + scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)),
expand = expansion(mult = c(0, .1)))
print(p_mean_bar_log10) + annotation_logticks(sides = "l")
pdf(file = "../runs/guess_the_plots/p_mean_bar_log10.pdf", width = 6, height = 6)
print(p_mean_bar_log10) + annotation_logticks(sides = "l")
print(p_mean_bar_log10 + labs(y = "") + theme(axis.ticks.y = element_blank(), axis.text.y = element_blank()))
dev.off()
```
### ... facetted by gene locus
```{r}
p_mean_bar_facet <- p_mean_bar + facet_wrap(~locus, scales = "free") +
theme(aspect.ratio = 1) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank())
print(p_mean_bar_facet)
pdf(file = "../runs/guess_the_plots/p_mean_bar_facet.pdf", width = 6, height = 6)
print(p_mean_bar_facet)
dev.off()
```
## Heatmap of mean tpm
```{r}
p_mean_heat <- ggplot(plot_tpm_mean, aes(x = condition, y = locus, fill = value)) +
geom_point(alpha = 1, size = 12, shape = 22) +
scale_x_discrete(position = "top") +
theme_classic() +
theme(aspect.ratio = length(gene_set), axis.title = element_blank(),
axis.text.x.top = element_text(angle = 45, hjust = 0),
axis.line = element_blank(),
axis.ticks = element_blank()) +
labs(fill = "Transcript level [tpm]")
p_mean_heat + scale_fill_gradient2(low = "white", high = "#C21F3A")
pdf(file = "../runs/guess_the_plots/p_mean_heat.pdf", width = 6, height = 5)
print(p_mean_heat + scale_fill_gradient2(low = "white", high = "#C21F3A"))
print(p_mean_heat + scale_fill_gradient2(low = "white", high = "#C21F3A") +
theme(legend.position = "none", axis.text.x.top = element_blank()))
dev.off()
```
### ...log10 scaled
```{r}
p_mean_heat_log10 <- p_mean_heat + scale_fill_gradient2(low = "white", high = "#C21F3A", trans='log10') +
labs(fill = "log10(Transcript level [tpm])")
pdf(file = "../runs/guess_the_plots/p_mean_heat_log10.pdf", width = 6, height = 5)
print(p_mean_heat_log10)
print(p_mean_heat_log10 + theme(legend.position = "none"))
dev.off()
```
## Bar plot of logFC
```{r}
plot_logfc <- subset(dfr, locus %in% gene_set,
select = c("locus", "log2FC"), drop = T)
p_logfc_bar <- ggplot(plot_logfc, aes(x = locus, y = log2FC)) +
geom_col(width = 0.5) +
theme_classic() +
theme(aspect.ratio = length(gene_set)*0.7, axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_hline(yintercept = 0) +
labs(y = "log2-FC (treatment/mock)")
print(p_logfc_bar)
pdf(file = "../runs/guess_the_plots/p_logfc_bar.pdf", width = 6, height = 6)
print(p_logfc_bar)
print(p_logfc_bar + labs(y = ""))
dev.off()
```
## Heatmap of logFC
```{r}
p_logfc_heat <- ggplot(plot_logfc, aes(x = 1, y = locus, fill = log2FC)) +
scale_fill_gradient2(low = "#377D98", high = "#C21F3A", midpoint = 0) +
geom_point(alpha = 1, size = 12, shape = 22) +
theme_classic() +
theme(aspect.ratio = length(gene_set), axis.title = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank()) +
labs(fill = "log2-FC (treatment/mock)")
print(p_logfc_heat)
pdf(file = "../runs/guess_the_plots/p_logfc_heat.pdf", width = 6, height = 5)
print(p_logfc_heat)
print(p_logfc_heat + theme(legend.position = "none"))
dev.off()
```
This diff is collapsed.
......@@ -4,10 +4,6 @@ library(sleuth)
library(ggplot2)
# First we need to specify where the kallisto results are stored.
# If you didn't specify this in your kallisto script, move all kallisto results
# folders (one for each sample) by GUI or the command line into a new folder called
# "kallisto_results".
# Begin by storing the base directory of the kallisto results in a variable
base_dir <- "runs/kallisto_results/"
......@@ -77,8 +73,8 @@ table(treatment.vs.mock$qval <= 0.01)
head(treatment.vs.mock)
# <<< challenge excercises >>> #
# 1. compare the logFC edgeR calculated to that which we did
# 2. where does the difference comes from? (it's in the edgeR manual)
# 1. compare the logFC sleuth calculated to that which we did
# 2. where does the difference comes from?
# now we transfer the result to our compilation data.frame 'dfr'
# actually, all we really want is the 'false discovery rate' AKA 'q_value'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment