Skip to content
Snippets Groups Projects
Commit 4fe37915 authored by Saskia Hiltemann's avatar Saskia Hiltemann
Browse files

add workflows and results for second experiment

parent f4e17e07
No related branches found
No related tags found
No related merge requests found
Pipeline #568 failed
Showing
with 5053 additions and 0 deletions
File added
# 3d R plot
#saveat <- "/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Schippers"
saveat <- "R:/haasf/madland_RNA-seq_Schippers"
#file.rpkm <- '/mnt/Teamdrive/projects/MAdLand/cooperations/Schippers_Lab_RNA-seq_analyses/Mp.p.sort.wotRNA.rpkm'
file.rpkm <- "S:/projects/MAdLand/cooperations/Schippers_Lab_RNA-seq_analyses/Mp.p.sort.wotRNA.rpkm"
# file.rpkm <- "S:/projects/MAdLand/cooperations/Schippers_Lab_RNA-seq_analyses/31315_all.p.sort.rpkm"
data.rpkm <- read.table(file.rpkm, header=T, sep="\t", row.names=1)
# sort by colnames
data.rpkm <- data.rpkm[,order(colnames(data.rpkm))]
librariesName <- list(
control_6h = c("control_6h", "red"),
control_24h = c("control_24h", "blue"),
LCS_6h = c("LCS_6h", "green"),
LCS_24h = c("LCS_24h", "yellow")
)
header.ori <- c("MF82", "MF81", "MF80", "MF76", "MF75", "MF74", "MF68", "MF67", "MF66", "MF62", "MF61", "MF60")
header.new <- c("LCS_24h.2", "LCS_24h.1", "LCS_24h", "control_24h.2", "control_24h.1", "control_24h", "control_6h.2", "LCS_6h.1", "LCS_6h", "LCS_6h.2", "control_6h.1", "control_6h")
# header.ori <- c("P97", "P96", "P95", "P91", "P90", "P89", "P111", "P110", "P109", "P105", "P104", "P103")
# header.new <- c("LCS_6h.2", "LCS_6h.1", "LCS_6h", "control_6h.2", "control_6h.1", "control_6h", "LCS_24h.2", "LCS_24h.1", "LCS_24h", "control_24h.2", "control_24h.1", "control_24h")
header.new <- header.new[order(header.ori)]
header.ori <- header.ori[order(header.ori)]
col.header <- header.new
colnames(data.rpkm) <- col.header
library("DESeq2")
library("ggplot2")
library("RColorBrewer")
library("pheatmap")
library("BiocGenerics")
library("rgl")
library("magick")
library("sjmisc")
################### running ######################
### PCA RPKM ###
set.seed(0)
data.inv <- t(data.rpkm)
data.dist <-dist(data.inv, method="euclidean") # "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski"
data.dist.hc <- hclust(data.dist,method="ward.D2")
data.dist.pca <-princomp(data.dist,cor=T)
pc1 <- data.dist.pca$scores[,1]
pc2 <- data.dist.pca$scores[,2]
pc3 <- data.dist.pca$scores[,3]
# create data frame for pc1 pc2 pc3
data.dist.pca.frame = data.frame(pc1,pc2,pc3)
rownames(data.dist.pca.frame) <- names(data.dist.pca$scale)
colnames(data.dist.pca.frame) <- c("pc1","pc2","pc3")
condition.values <- c()
condition.values.color <- c()
for(a in colnames(data.rpkm)) {
v <- substr(a, nchar(a), nchar(a))
if(str_contains(v, c(1,2,3,4,5,6,7,8,9,0), logic = "or")) {
if (substr(substr(a, 1, nchar(a)-2), nchar(substr(a, 1, nchar(a)-2)), nchar(substr(a, 1, nchar(a)-2))) == ".") {
n <- substr(a, 1, nchar(a)-3)
} else {
n <- substr(a, 1, nchar(a)-2)
}
} else {
n <- a
}
condition.values <- c(condition.values, librariesName[n][[1]][1])
condition.values.color <- c(condition.values.color, librariesName[n][[1]][2])
}
data.dist.pca.frame["tissue"] <- condition.values
data.dist.pca.frame["color"] <- condition.values.color
data.dist.pca.frame["name"] <- names(data.dist.pca$scale)
attr(data.dist.pca.frame, "percentVar") <- (data.dist.pca$sdev)^2 / sum(data.dist.pca$sdev^2) # cumsum()
# simple plot
png(filename=paste0(saveat, "/HC_RPKM_normalized.png"))
plot(data.dist.hc) # hc plot
dev.off()
png(filename=paste0(saveat, "/PCA_variance_RPKM_normalized.png"))
plot(data.dist.pca) # variances; var(data.dist.pca$sdev[1:9])
dev.off()
# get the parcent variation
percentVar <- round(100 * attr(data.dist.pca.frame, "percentVar"))
# 3d plot
plot3d(pc1, pc2, pc3,
type = "s", # p, s, l, h, n
#pch = c(1:3),
col = condition.values.color,
size = 1,
xlab = paste0("PC1: ",percentVar[1],"% variance"),
ylab = paste0("PC2: ",percentVar[2],"% variance"),
zlab = paste0("PC3: ",percentVar[3],"% variance"),
cex = 2,
main = "", # -> princomp",
)
# shift <- matrix(4, 4, 4, byrow = TRUE)
# text3d(shift,texts=1:3)
grid3d(c("x", "y", "z"))
## add legend
legend3d("right", unique(condition.values), pch = 19, col = unique(condition.values.color))
#### video #####
M <- par3d("userMatrix")
play3d( par3dinterp( userMatrix=list(M,
rotate3d(M, pi/2, 1, 0, 0),
rotate3d(M, pi/2, 0, 1, 0) ) ),
duration=2 )
movie3d(spin3d(axis = c(1, 2, 1)), duration = 5,
dir = saveat)
#### video end ####
# pc1, pc2
png(filename=paste0(saveat, "/PCA_RPKM_normalized.png"))
ggplot(
data.dist.pca.frame,
aes(
pc1,
pc2,
color=tissue)
) +
geom_point(size=2.5) +
xlab(
paste0("PC1: ",percentVar[1],"% variance")
) +
ylab(
paste0("PC2: ",percentVar[2],"% variance")
) +
#theme() + #, face="bold"
scale_colour_manual(
values= c("blue", "red", "yellow", "green") # dodgerblue3
) +
ggtitle("PCA of all samples (RPKM normalized)") +
theme(
plot.title = element_text(lineheight=.8),
panel.background = element_rect(fill = "gray95")
)
dev.off()
######## Some experiments ######
# for(i in colnames(data)) {
# colnames(data)[colnames(data)==grep(i, names(data), value = TRUE)] <- librariesName[[i]]
# }
#
# for(i in colnames(data)) {
# colnames(data)[colnames(data)==grep(i, names(data), value = TRUE)] <- names(librariesName[librariesName==grep(i, librariesName, value = TRUE)])
# }
This diff is collapsed.
This diff is collapsed.
# 3d R plot
saveat <- "/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Schippers/second_experiment/At_all_DEGs_0.9"
# saveat <- "R:/haasf/madland_RNA-seq_Schippers"
file.rpkm <- '/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Schippers/second_experiment/At_all.p.sort.wo_ncRNA_miRNA.rpkm'
# file.rpkm <- "S:/projects/MAdLand/cooperations/Schippers_Lab_RNA-seq_analyses/Mp.p.sort.wotRNA.rpkm"
# file.rpkm <- "S:/projects/MAdLand/cooperations/Schippers_Lab_RNA-seq_analyses/31315_all.p.sort.rpkm"
data.rpkm <- read.table(file.rpkm, header=T, sep="\t", row.names=1)
# sort by colnames
data.rpkm <- data.rpkm[,order(colnames(data.rpkm))]
librariesName <- list(
control_6h = c("control_6h", "red"),
control_24h = c("control_24h", "blue"),
LCS_6h = c("LCS_6h", "green"),
LCS_24h = c("LCS_24h", "yellow")
)
# header.ori <- c("M54.fq.M54.fq_all.p.count", "M53.fq.M53.fq_all.p.count", "M52.fq.M52.fq_all.p.count", "M48.fq.M48.fq_all.p.count", "M47.fq.M47.fq_all.p.count", "M46.fq.M46.fq_all.p.count", "M40.fq.M40.fq_all.p.count", "M39.fq.M39.fq_all.p.count", "M38.fq.M38.fq_all.p.count", "M34.fq.M34.fq_all.p.count", "M33.fq.M33.fq_all.p.count", "M32.fq.M32.fq_all.p.count")
# header.new <- c("LCS_24h.2", "LCS_24h.1", "LCS_24h", "control_24h.2", "control_24h.1", "control_24h", "LCS_6h.2", "LCS_6h.1", "LCS_6h", "control_6h.2", "control_6h.1", "control_6h")
# header.ori <- c("MF82", "MF81", "MF80", "MF76", "MF75", "MF74", "MF68", "MF67", "MF66", "MF62", "MF61", "MF60")
# header.new <- c("LCS_24h.2", "LCS_24h.1", "LCS_24h", "control_24h.2", "control_24h.1", "control_24h", "control_6h.2", "LCS_6h.1", "LCS_6h", "LCS_6h.2", "control_6h.1", "control_6h")
header.ori <- c("A6.fq.A6.fq.single.p.count", "A5.fq.A5.fq.single.p.count", "A4.fq.A4.fq.single.p.count", "A26.fq.A26.fq.single.p.count", "A25.fq.A25.fq.single.p.count", "A24.fq.A24.fq.single.p.count", "A20.fq.A20.fq.single.p.count", "A19.fq.A19.fq.single.p.count", "A18.fq.A18.fq.single.p.count", "A12.fq.A12.fq.single.p.count", "A11.fq.A11.fq.single.p.count", "A10.fq.A10.fq.single.p.count")
header.new <- c("control_6h.2", "control_6h.1", "control_6h", "LCS_24h.2", "LCS_24h.1", "LCS_24h", "control_24h.2", "control_24h.1", "control_24h", "LCS_6h.2", "LCS_6h.1", "LCS_6h")
header.new <- header.new[order(header.ori)]
header.ori <- header.ori[order(header.ori)]
col.header <- header.new
colnames(data.rpkm) <- col.header
library("DESeq2")
library("ggplot2")
library("RColorBrewer")
library("pheatmap")
library("BiocGenerics")
library("rgl")
library("magick")
library("sjmisc")
################### running ######################
### PCA RPKM ###
set.seed(0)
data.inv <- t(data.rpkm)
data.dist <-dist(data.inv, method="euclidean") # "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski"
data.dist.hc <- hclust(data.dist,method="ward.D2")
data.dist.pca <-princomp(data.dist,cor=T)
pc1 <- data.dist.pca$scores[,1]
pc2 <- data.dist.pca$scores[,2]
pc3 <- data.dist.pca$scores[,3]
# create data frame for pc1 pc2 pc3
data.dist.pca.frame = data.frame(pc1,pc2,pc3)
rownames(data.dist.pca.frame) <- names(data.dist.pca$scale)
colnames(data.dist.pca.frame) <- c("pc1","pc2","pc3")
condition.values <- c()
condition.values.color <- c()
for(a in colnames(data.rpkm)) {
v <- substr(a, nchar(a), nchar(a))
if(str_contains(v, c(1,2,3,4,5,6,7,8,9,0), logic = "or")) {
if (substr(substr(a, 1, nchar(a)-2), nchar(substr(a, 1, nchar(a)-2)), nchar(substr(a, 1, nchar(a)-2))) == ".") {
n <- substr(a, 1, nchar(a)-3)
} else {
n <- substr(a, 1, nchar(a)-2)
}
} else {
n <- a
}
condition.values <- c(condition.values, librariesName[n][[1]][1])
condition.values.color <- c(condition.values.color, librariesName[n][[1]][2])
}
data.dist.pca.frame["tissue"] <- condition.values
data.dist.pca.frame["color"] <- condition.values.color
data.dist.pca.frame["name"] <- names(data.dist.pca$scale)
attr(data.dist.pca.frame, "percentVar") <- (data.dist.pca$sdev)^2 / sum(data.dist.pca$sdev^2) # cumsum()
# simple plot
png(filename=paste0(saveat, "/HC_RPKM_normalized.png"))
plot(data.dist.hc) # hc plot
dev.off()
png(filename=paste0(saveat, "/PCA_variance_RPKM_normalized.png"))
plot(data.dist.pca) # variances; var(data.dist.pca$sdev[1:9])
dev.off()
# get the parcent variation
percentVar <- round(100 * attr(data.dist.pca.frame, "percentVar"))
# 3d plot
plot3d(pc1, pc2, pc3,
type = "s", # p, s, l, h, n
#pch = c(1:3),
col = condition.values.color,
size = 1,
xlab = paste0("PC1: ",percentVar[1],"% variance"),
ylab = paste0("PC2: ",percentVar[2],"% variance"),
zlab = paste0("PC3: ",percentVar[3],"% variance"),
cex = 2,
main = "", # -> princomp",
)
# shift <- matrix(4, 4, 4, byrow = TRUE)
# text3d(shift,texts=1:3)
grid3d(c("x", "y", "z"))
## add legend
legend3d("right", unique(condition.values), pch = 19, col = unique(condition.values.color))
#### video #####
M <- par3d("userMatrix")
play3d( par3dinterp( userMatrix=list(M,
rotate3d(M, pi/2, 1, 0, 0),
rotate3d(M, pi/2, 0, 1, 0) ) ),
duration=2 )
movie3d(spin3d(axis = c(1, 2, 1)), duration = 5,
dir = saveat)
#### video end ####
# pc1, pc2
png(filename=paste0(saveat, "/PCA_RPKM_normalized.png"))
ggplot(
data.dist.pca.frame,
aes(
pc1,
pc2,
color=tissue)
) +
geom_point(size=2.5) +
xlab(
paste0("PC1: ",percentVar[1],"% variance")
) +
ylab(
paste0("PC2: ",percentVar[2],"% variance")
) +
#theme() + #, face="bold"
scale_colour_manual(
values= c("blue", "red", "yellow", "green") # dodgerblue3
) +
ggtitle("PCA of all samples (RPKM normalized)") +
theme(
plot.title = element_text(lineheight=.8),
panel.background = element_rect(fill = "gray95")
)
dev.off()
######## Some experiments ######
# for(i in colnames(data)) {
# colnames(data)[colnames(data)==grep(i, names(data), value = TRUE)] <- librariesName[[i]]
# }
#
# for(i in colnames(data)) {
# colnames(data)[colnames(data)==grep(i, names(data), value = TRUE)] <- names(librariesName[librariesName==grep(i, librariesName, value = TRUE)])
# }
#!/bin/sh
prefix=$1
job_mem=$2
out_path=$3
nthreads=$4
species=$5
inpath=$6
qsub -N "run_"$prefix -l h_vmem=$job_mem -pe multislot $nthreads -S /bin/bash -e $out_path -o $out_path /vol/agrensing/fhaas/RNA-seq_Schippers/second_samples/no_backup/SNP_pipe_At.sh $nthreads $out_path $prefix $species $inpath
This diff is collapsed.
#!/bin/sh
prefix=$1
job_mem=$2
out_path=$3
nthreads=$4
species=$5
inpath=$6
qsub -N "run_"$prefix -l h_vmem=$job_mem -pe multislot $nthreads -S /bin/bash -e $out_path -o $out_path /vol/agrensing/fhaas/RNA-seq_Schippers/second_samples/no_backup/SNP_pipe_Mp.sh $nthreads $out_path $prefix $species $inpath
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment