Merge branch 'main' of git.nfdi4plants.org:brilator/rnaseq-workshop

6b8f4005 · Alisandra Denton · ff05dcec · fb2b9757 · 6b8f4005 · 6b8f4005
Commit 6b8f4005 authored 2 years ago by Alisandra Denton
--- a/.gitignore
+++ b/.gitignore
@@ -385,11 +385,7 @@ Temporary Items

 runs/isoseq/polished/

-# Share after playing the game
-guess_the_plot*
-
 # latex
 *.aux
 RNAseqWorkshop.out
 RNAseqWorkshop.toc
-workflows/docker/docker_tests_dominik.md
--- a/_reader/RNAseqWorkshop.pdf
+++ b/_reader/RNAseqWorkshop.pdf
--- a/runs/mapman/forMapmanloading.txt
+++ b/runs/mapman/forMapmanloading.txt
--- a/workflows/functional_mapman.R
+++ b/workflows/functional_mapman.R
@@ -2,12 +2,17 @@ load(file = "runs/kallisto_combined/mothertableV3.Rdata")

 # now we make a data.frame with the data required for Mapman loading
 # if you have more fold-changes, you can load more than one
-forMapman <- dfr[, c("locus", "log2FC")]
+forMapman <- dfr[!duplicated(dfr$locus), c("locus", "log2FC")]
+
+### TODO: the `duplicated` solution is a quick-and-dirty fix to avoid duplicated 
+### locus IDs coming from mapping on transcript level 
+### (plus Mapman accepts AT1G01040, not AT1G01040.1)
+
 head(forMapman)
 # now we export the data.frame in biologist and mapman readable format
 dir.create(path = "runs/mapman", recursive = T, showWarnings = F)
 write.table(forMapman,
-  file = "runs/mapman/forMapmanloading.txt",
+  file = "runs/_backup/mapman/forMapmanloading.txt",
  quote = F, sep = "\t", row.names = F
 )
 remove(forMapman)
--- a/workflows/guess_the_plot.Rmd
+++ b/workflows/guess_the_plot.Rmd
+---
+title: "Comparing data presentations"
+author: "Dominik Brilhaus"
+date: "2022-08-16"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+# Load libraries
+
+```{r}
+
+if(!"scales" %in% row.names(installed.packages())){install.packages("scales")}
+library(tidyverse)
+
+if(!"scales" %in% row.names(installed.packages())){install.packages("scales")}
+library(scales)
+
+```
+
+# Load data
+
+```{r}
+
+load("../runs/kallisto_combined/mothertableV3.Rdata")
+
+# View(dfr)
+gene_set <- c("AT1G29930", "AT4G23230", "AT1G01120", "AT1G06410", "AT2G25510")
+
+dir.create("../runs/guess_the_plots/", showWarnings = F, recursive = T)
+
+```
+
+
+
+## Dot plot of individual tpm
+
+```{r}
+
+plot_tpm <- subset(dfr, locus %in% gene_set, 
+                   select = c("locus", grep("_tpm", colnames(dfr), value = T)), drop = T)
+
+plot_tpm <- pivot_longer(plot_tpm, cols = 2:ncol(plot_tpm))
+plot_tpm$condition <- gsub("._tpm", "", plot_tpm$name)
+
+p_tpm_point <- ggplot(plot_tpm, aes(x = locus, y = value)) + 
+  geom_point(aes(col = condition), position = position_dodge(width = 0.5)) + 
+  theme_classic() + scale_color_brewer(palette = "Dark2") + 
+  labs(y = "Transcript level [tpm]")
+
+print(p_tpm_point)
+
+pdf(file = "../runs/guess_the_plots/p_tpm_point.pdf", width = 6, height = 6)
+print(p_tpm_point)
+print(p_tpm_point + labs(y = "") + theme(legend.position = "none"))
+dev.off()
+
+```
+
+## Bar plot of mean tpm
+
+```{r}
+plot_tpm_mean <- subset(dfr, locus %in% gene_set, 
+                   select = c("locus", grep("mean_", colnames(dfr), value = T)), drop = T)
+
+plot_tpm_mean <- pivot_longer(plot_tpm_mean, cols = 2:ncol(plot_tpm_mean))
+plot_tpm_mean$condition <- gsub("mean_", "", plot_tpm_mean$name)
+
+p_mean_bar <- ggplot(plot_tpm_mean, aes(x = locus, y = value)) + 
+  geom_col(aes(fill = condition), position = position_dodge()) + 
+  theme_classic() + scale_fill_brewer(palette = "Dark2") + 
+  labs(y = "Transcript level [tpm]") + 
+  scale_y_continuous(expand = expansion(mult = c(0, .1)))
+
+print(p_mean_bar)
+
+pdf(file = "../runs/guess_the_plots/p_mean_bar.pdf", width = 6, height = 6)
+print(p_mean_bar)
+print(p_mean_bar + labs(y = "") + theme(legend.position = "none"))
+dev.off()
+
+
+
+```
+
+
+### ... log10 scaled
+
+
+```{r}
+
+p_mean_bar_log10 <- p_mean_bar +  scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
+              labels = trans_format("log10", math_format(10^.x)), 
+              expand = expansion(mult = c(0, .1)))
+
+print(p_mean_bar_log10) + annotation_logticks(sides = "l")
+
+pdf(file = "../runs/guess_the_plots/p_mean_bar_log10.pdf", width = 6, height = 6)
+print(p_mean_bar_log10) + annotation_logticks(sides = "l")
+print(p_mean_bar_log10 + labs(y = "") + theme(axis.ticks.y = element_blank(), axis.text.y = element_blank()))
+dev.off()
+
+
+```
+
+### ... facetted by gene locus
+
+```{r}
+
+p_mean_bar_facet <- p_mean_bar + facet_wrap(~locus, scales = "free") + 
+  theme(aspect.ratio = 1) + 
+  theme(axis.text.x = element_blank(), 
+        axis.ticks.x = element_blank(), 
+        axis.title.x = element_blank())
+
+print(p_mean_bar_facet)
+
+pdf(file = "../runs/guess_the_plots/p_mean_bar_facet.pdf", width = 6, height = 6)
+print(p_mean_bar_facet) 
+dev.off()
+
+
+
+```
+
+
+
+## Heatmap of mean tpm
+
+```{r}
+p_mean_heat <-  ggplot(plot_tpm_mean, aes(x = condition, y = locus, fill = value)) + 
+  geom_point(alpha = 1, size = 12, shape = 22) +
+  scale_x_discrete(position = "top") +
+  theme_classic() + 
+  theme(aspect.ratio = length(gene_set), axis.title = element_blank(), 
+        axis.text.x.top = element_text(angle = 45, hjust = 0), 
+        axis.line = element_blank(), 
+        axis.ticks = element_blank()) + 
+  labs(fill = "Transcript level [tpm]")
+
+p_mean_heat + scale_fill_gradient2(low = "white", high = "#C21F3A")
+
+pdf(file = "../runs/guess_the_plots/p_mean_heat.pdf", width = 6, height = 5)
+print(p_mean_heat + scale_fill_gradient2(low = "white", high = "#C21F3A"))
+print(p_mean_heat + scale_fill_gradient2(low = "white", high = "#C21F3A") + 
+        theme(legend.position = "none", axis.text.x.top = element_blank()))
+dev.off()
+
+
+
+
+```
+
+
+### ...log10 scaled
+
+```{r}
+
+p_mean_heat_log10 <- p_mean_heat + scale_fill_gradient2(low = "white", high = "#C21F3A", trans='log10') + 
+  labs(fill = "log10(Transcript level [tpm])") 
+
+
+pdf(file = "../runs/guess_the_plots/p_mean_heat_log10.pdf", width = 6, height = 5)
+print(p_mean_heat_log10)
+print(p_mean_heat_log10 + theme(legend.position = "none"))
+dev.off()
+
+
+
+
+```
+
+
+## Bar plot of logFC
+
+```{r}
+
+plot_logfc <- subset(dfr, locus %in% gene_set, 
+                   select = c("locus", "log2FC"), drop = T)
+
+p_logfc_bar <- ggplot(plot_logfc, aes(x = locus, y = log2FC)) + 
+  geom_col(width = 0.5) + 
+  theme_classic() + 
+  theme(aspect.ratio = length(gene_set)*0.7, axis.text.x = element_text(angle = 45, hjust = 1)) + 
+  geom_hline(yintercept = 0) + 
+  labs(y = "log2-FC (treatment/mock)")
+  
+
+print(p_logfc_bar)
+
+pdf(file = "../runs/guess_the_plots/p_logfc_bar.pdf", width = 6, height = 6)
+print(p_logfc_bar)
+print(p_logfc_bar + labs(y = ""))
+dev.off()
+
+```
+
+## Heatmap of logFC
+
+```{r}
+
+p_logfc_heat <- ggplot(plot_logfc, aes(x = 1, y = locus, fill = log2FC)) + 
+  scale_fill_gradient2(low = "#377D98", high = "#C21F3A", midpoint = 0) + 
+  geom_point(alpha = 1, size = 12, shape = 22) +
+  theme_classic() + 
+  theme(aspect.ratio = length(gene_set), axis.title = element_blank(), 
+        axis.text.x = element_blank(), 
+        axis.line.x = element_blank(), 
+        axis.ticks.x = element_blank()) + 
+  labs(fill = "log2-FC (treatment/mock)")
+
+
+print(p_logfc_heat)
+
+pdf(file = "../runs/guess_the_plots/p_logfc_heat.pdf", width = 6, height = 5)
+print(p_logfc_heat)
+print(p_logfc_heat + theme(legend.position = "none"))
+dev.off()
+
+  
+
+```
+  
+
--- a/workflows/guess_the_plot.html
+++ b/workflows/guess_the_plot.html
--- a/workflows/sleuth_differential_expression.R
+++ b/workflows/sleuth_differential_expression.R
@@ -4,10 +4,6 @@ library(sleuth)
 library(ggplot2)

 # First we need to specify where the kallisto results are stored.
-# If you didn't specify this in your kallisto script, move all kallisto results
-# folders (one for each sample) by GUI or the command line into a new folder called
-# "kallisto_results".
-
 # Begin by storing the base directory of the kallisto results in a variable
 base_dir <- "runs/kallisto_results/"

@@ -77,8 +73,8 @@ table(treatment.vs.mock$qval <= 0.01)
 head(treatment.vs.mock)

 # <<< challenge excercises >>> #
-# 1. compare the logFC edgeR calculated to that which we did
-# 2. where does the difference comes from? (it's in the edgeR manual)
+# 1. compare the logFC sleuth calculated to that which we did
+# 2. where does the difference comes from?

 # now we transfer the result to our compilation data.frame 'dfr'
 # actually, all we really want is the 'false discovery rate' AKA 'q_value'