Skip to content
Snippets Groups Projects
Commit 35149420 authored by Saskia Hiltemann's avatar Saskia Hiltemann
Browse files

add Fabian's R scripts

parent 8ebefab1
No related branches found
No related tags found
No related merge requests found
# 3d R plot
saveat <- "/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Hoecker"
file.rpkm <- '/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Hoecker/31315.p.sort.rpkm'
## Windows ###
#-#saveat <- "Q:/haasf/leubner_RNA-seq/Celery/DEGs_0.9/"
#-#file.rpkm <- 'Q:/haasf/leubner_RNA-seq/Celery/DEGs/Celery_merged_isoforms.p.sort.rpkm'
#-#file.count <- 'Q:/haasf/leubner_RNA-seq/Celery/DEGs_0.9/Celery_merged_isoforms.p.sort.counts'
###
data.rpkm <- read.table(file.rpkm, header=T, sep="\t", row.names=1)
# sort by colnames
data.rpkm <- data.rpkm[,order(colnames(data.rpkm))]
librariesName <- list(
cop_D = c("cop_D", "red"),
cop_L = c("cop_L", "blue"),
spa_D = c("spa_D", "green"),
spa_L = c("spa_L", "yellow"),
WT_D = c("WT_D", "black"),
WT_L = c("WT_L", "violet")
)
#
# header.ori <- c("56754_WT_Naturally_3.bam.sort.fastq.unmapped.sam.sort.bam", "56753_WT_Naturally_2.bam.sort.fastq.unmapped.sam.sort.bam", "56752_WT_Naturally_1.bam.sort.fastq.unmapped.sam.sort.bam", "56751_tt_6_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56750_tt_6_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56749_tt_6_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56748_tt_0_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56747_tt_0_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56746_tt_0_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56745_WT_10_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56744_WT_10_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56743_WT_10_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56742_WT_6_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56741_WT_6_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56740_WT_6_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56739_WT_0_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56738_WT_0_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56737_WT_0_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56736_tt_T1_tt_6d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56735_tt_T1_tt_6d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56734_tt_T1_tt_6d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56733_tt_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56732_tt_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56731_tt_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56730_tt_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56729_tt_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56728_tt_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56727_WT_T1_NA_3.bam.sort.fastq.unmapped.sam.sort.bam", "56726_WT_T1_NA_2.bam.sort.fastq.unmapped.sam.sort.bam", "56725_WT_T1_NA_1.bam.sort.fastq.unmapped.sam.sort.bam", "56724_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56723_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56722_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56721_WT_T1_WT_10d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56720_WT_T1_WT_10d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56719_WT_T1_WT_10d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56718_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56717_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56716_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56715_WT_T1_WT_6d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56714_WT_T1_WT_6d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56713_WT_T1_WT_6d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56712_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56711_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56710_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56709_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56708_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56707_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56706_WT_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56705_WT_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56704_WT_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam")
# header.new <- c("T17_Dry_Naturally_Freezedryseed", "T17_Dry_Naturally_Freezedryseed.1", "T17_Dry_Naturally_Freezedryseed.2", "T16_Dry_6days_Freezeafter6dageing", "T16_Dry_6days_Freezeafter6dageing.1", "T16_Dry_6days_Freezeafter6dageing.2", "T15_Dry_0days_Freezedryseed", "T15_Dry_0days_Freezedryseed.1", "T15_Dry_0days_Freezedryseed.2", "T14_Dry_10days_Freezeafter10dageing", "T14_Dry_10days_Freezeafter10dageing.1", "T14_Dry_10days_Freezeafter10dageing.2", "T13_Dry_6days_Freezeafter6dageing", "T13_Dry_6days_Freezeafter6dageing.1", "T13_Dry_6days_Freezeafter6dageing.2", "T12_Dry_0days_Freezedryseed", "T12_Dry_0days_Freezedryseed.1", "T12_Dry_0days_Freezedryseed.2", "T11_Imbibe_6days_12h", "T11_Imbibe_6days_12h.1", "T11_Imbibe_6days_12h.2", "T10_Imbibe_6days_5h", "T10_Imbibe_6days_5h.1", "T10_Imbibe_6days_5h.2", "T9_Imbibe_0dyas_5h", "T9_Imbibe_0dyas_5h.1", "T9_Imbibe_0dyas_5h.2", "T8_Imbibe_Naturally_47h", "T8_Imbibe_Naturally_47h.1", "T8_Imbibe_Naturally_47h.2", "T7_Imbibe_Naturally_24h", "T7_Imbibe_Naturally_24h.1", "T7_Imbibe_Naturally_24h.2", "T6_Imbibe_10days_72h", "T6_Imbibe_10days_72h.1", "T6_Imbibe_10days_72h.2", "T5_Imbibe_10days_24h", "T5_Imbibe_10days_24h.1", "T5_Imbibe_10days_24h.2", "T4_Imbibe_6days_47h", "T4_Imbibe_6days_47h.1", "T4_Imbibe_6days_47h.2", "T3_Imbibe_6days_24h", "T3_Imbibe_6days_24h.1", "T3_Imbibe_6days_24h.2", "T2_Imbibe_0days_24h", "T2_Imbibe_0days_24h.1", "T2_Imbibe_0days_24h.2", "T1_Imbibe_0days_5h", "T1_Imbibe_0days_5h.1", "T1_Imbibe_0days_5h.2")
#
# header.new <- header.new[order(header.ori)]
# header.ori <- header.ori[order(header.ori)]
#
# col.header <- header.new
#
# colnames(data.rpkm) <- col.header
library("DESeq2")
library("ggplot2")
library("RColorBrewer")
library("pheatmap")
library("BiocGenerics")
library("rgl")
library("magick")
library("sjmisc")
################### running ######################
### PCA RPKM ###
set.seed(0)
data.inv <- t(data.rpkm)
data.dist <-dist(data.inv, method="euclidean") # "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski"
data.dist.hc <- hclust(data.dist,method="ward.D2")
data.dist.pca <-princomp(data.dist,cor=T)
pc1 <- data.dist.pca$scores[,1]
pc2 <- data.dist.pca$scores[,2]
pc3 <- data.dist.pca$scores[,3]
# create data frame for pc1 pc2 pc3
data.dist.pca.frame = data.frame(pc1,pc2,pc3)
rownames(data.dist.pca.frame) <- names(data.dist.pca$scale)
colnames(data.dist.pca.frame) <- c("pc1","pc2","pc3")
condition.values <- c()
condition.values.color <- c()
for(a in colnames(data.rpkm)) {
v <- substr(a, nchar(a), nchar(a))
if(str_contains(v, c(1,2,3,4,5,6,7,8,9,0), logic = "or")) {
if (substr(substr(a, 1, nchar(a)-2), nchar(substr(a, 1, nchar(a)-2)), nchar(substr(a, 1, nchar(a)-2))) == ".") {
n <- substr(a, 1, nchar(a)-3)
} else {
n <- substr(a, 1, nchar(a)-2)
}
} else {
n <- a
}
condition.values <- c(condition.values, librariesName[n][[1]][1])
condition.values.color <- c(condition.values.color, librariesName[n][[1]][2])
}
data.dist.pca.frame["tissue"] <- condition.values
data.dist.pca.frame["color"] <- condition.values.color
data.dist.pca.frame["name"] <- names(data.dist.pca$scale)
attr(data.dist.pca.frame, "percentVar") <- (data.dist.pca$sdev)^2 / sum(data.dist.pca$sdev^2) # cumsum()
# simple plot
png(filename=paste0(saveat, "/HC_RPKM_normalized.png"))
plot(data.dist.hc) # hc plot
dev.off()
png(filename=paste0(saveat, "/PCA_variance_RPKM_normalized.png"))
plot(data.dist.pca) # variances; var(data.dist.pca$sdev[1:9])
dev.off()
# get the parcent variation
percentVar <- round(100 * attr(data.dist.pca.frame, "percentVar"))
# 3d plot
plot3d(pc1, pc2, pc3,
type = "s", # p, s, l, h, n
#pch = c(1:3),
col = condition.values.color,
size = 1,
xlab = paste0("PC1: ",percentVar[1],"% variance"),
ylab = paste0("PC2: ",percentVar[2],"% variance"),
zlab = paste0("PC3: ",percentVar[3],"% variance"),
cex = 2,
main = "", # -> princomp",
)
# shift <- matrix(4, 4, 4, byrow = TRUE)
# text3d(shift,texts=1:3)
grid3d(c("x", "y", "z"))
## add legend
legend3d("right", unique(condition.values), pch = 19, col = unique(condition.values.color))
#### video #####
M <- par3d("userMatrix")
play3d( par3dinterp( userMatrix=list(M,
rotate3d(M, pi/2, 1, 0, 0),
rotate3d(M, pi/2, 0, 1, 0) ) ),
duration=2 )
movie3d(spin3d(axis = c(1, 2, 1)), duration = 5,
dir = saveat)
#### video end ####
# pc1, pc2
png(filename=paste0(saveat, "/PCA_RPKM_normalized.png"))
ggplot(
data.dist.pca.frame,
aes(
pc1,
pc2,
color=tissue)
) +
geom_point(size=2.5) +
xlab(
paste0("PC1: ",percentVar[1],"% variance")
) +
ylab(
paste0("PC2: ",percentVar[2],"% variance")
) +
#theme() + #, face="bold"
scale_colour_manual(
values= c("red", "blue", "green", "yellow", "black", "violet") # dodgerblue3
) +
ggtitle("PCA of all samples (RPKM normalized)") +
theme(
plot.title = element_text(lineheight=.8),
panel.background = element_rect(fill = "gray95")
)
dev.off()
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment