Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Physcomitrium patens light signaling 2022
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MAdLand
Physcomitrium patens light signaling 2022
Commits
35149420
Commit
35149420
authored
1 year ago
by
Saskia Hiltemann
Browse files
Options
Downloads
Patches
Plain Diff
add Fabian's R scripts
parent
8ebefab1
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
workflows/3D_PCA_plot.R
+167
-0
167 additions, 0 deletions
workflows/3D_PCA_plot.R
workflows/DEG_pipeline.R
+923
-0
923 additions, 0 deletions
workflows/DEG_pipeline.R
with
1090 additions
and
0 deletions
workflows/3D_PCA_plot.R
0 → 100644
+
167
−
0
View file @
35149420
# 3d R plot
saveat
<-
"/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Hoecker"
file.rpkm
<-
'/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Hoecker/31315.p.sort.rpkm'
## Windows ###
#-#saveat <- "Q:/haasf/leubner_RNA-seq/Celery/DEGs_0.9/"
#-#file.rpkm <- 'Q:/haasf/leubner_RNA-seq/Celery/DEGs/Celery_merged_isoforms.p.sort.rpkm'
#-#file.count <- 'Q:/haasf/leubner_RNA-seq/Celery/DEGs_0.9/Celery_merged_isoforms.p.sort.counts'
###
data.rpkm
<-
read.table
(
file.rpkm
,
header
=
T
,
sep
=
"\t"
,
row.names
=
1
)
# sort by colnames
data.rpkm
<-
data.rpkm
[,
order
(
colnames
(
data.rpkm
))]
librariesName
<-
list
(
cop_D
=
c
(
"cop_D"
,
"red"
),
cop_L
=
c
(
"cop_L"
,
"blue"
),
spa_D
=
c
(
"spa_D"
,
"green"
),
spa_L
=
c
(
"spa_L"
,
"yellow"
),
WT_D
=
c
(
"WT_D"
,
"black"
),
WT_L
=
c
(
"WT_L"
,
"violet"
)
)
#
# header.ori <- c("56754_WT_Naturally_3.bam.sort.fastq.unmapped.sam.sort.bam", "56753_WT_Naturally_2.bam.sort.fastq.unmapped.sam.sort.bam", "56752_WT_Naturally_1.bam.sort.fastq.unmapped.sam.sort.bam", "56751_tt_6_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56750_tt_6_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56749_tt_6_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56748_tt_0_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56747_tt_0_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56746_tt_0_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56745_WT_10_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56744_WT_10_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56743_WT_10_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56742_WT_6_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56741_WT_6_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56740_WT_6_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56739_WT_0_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56738_WT_0_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56737_WT_0_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56736_tt_T1_tt_6d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56735_tt_T1_tt_6d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56734_tt_T1_tt_6d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56733_tt_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56732_tt_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56731_tt_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56730_tt_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56729_tt_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56728_tt_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56727_WT_T1_NA_3.bam.sort.fastq.unmapped.sam.sort.bam", "56726_WT_T1_NA_2.bam.sort.fastq.unmapped.sam.sort.bam", "56725_WT_T1_NA_1.bam.sort.fastq.unmapped.sam.sort.bam", "56724_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56723_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56722_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56721_WT_T1_WT_10d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56720_WT_T1_WT_10d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56719_WT_T1_WT_10d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56718_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56717_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56716_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56715_WT_T1_WT_6d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56714_WT_T1_WT_6d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56713_WT_T1_WT_6d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56712_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56711_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56710_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56709_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56708_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56707_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56706_WT_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56705_WT_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56704_WT_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam")
# header.new <- c("T17_Dry_Naturally_Freezedryseed", "T17_Dry_Naturally_Freezedryseed.1", "T17_Dry_Naturally_Freezedryseed.2", "T16_Dry_6days_Freezeafter6dageing", "T16_Dry_6days_Freezeafter6dageing.1", "T16_Dry_6days_Freezeafter6dageing.2", "T15_Dry_0days_Freezedryseed", "T15_Dry_0days_Freezedryseed.1", "T15_Dry_0days_Freezedryseed.2", "T14_Dry_10days_Freezeafter10dageing", "T14_Dry_10days_Freezeafter10dageing.1", "T14_Dry_10days_Freezeafter10dageing.2", "T13_Dry_6days_Freezeafter6dageing", "T13_Dry_6days_Freezeafter6dageing.1", "T13_Dry_6days_Freezeafter6dageing.2", "T12_Dry_0days_Freezedryseed", "T12_Dry_0days_Freezedryseed.1", "T12_Dry_0days_Freezedryseed.2", "T11_Imbibe_6days_12h", "T11_Imbibe_6days_12h.1", "T11_Imbibe_6days_12h.2", "T10_Imbibe_6days_5h", "T10_Imbibe_6days_5h.1", "T10_Imbibe_6days_5h.2", "T9_Imbibe_0dyas_5h", "T9_Imbibe_0dyas_5h.1", "T9_Imbibe_0dyas_5h.2", "T8_Imbibe_Naturally_47h", "T8_Imbibe_Naturally_47h.1", "T8_Imbibe_Naturally_47h.2", "T7_Imbibe_Naturally_24h", "T7_Imbibe_Naturally_24h.1", "T7_Imbibe_Naturally_24h.2", "T6_Imbibe_10days_72h", "T6_Imbibe_10days_72h.1", "T6_Imbibe_10days_72h.2", "T5_Imbibe_10days_24h", "T5_Imbibe_10days_24h.1", "T5_Imbibe_10days_24h.2", "T4_Imbibe_6days_47h", "T4_Imbibe_6days_47h.1", "T4_Imbibe_6days_47h.2", "T3_Imbibe_6days_24h", "T3_Imbibe_6days_24h.1", "T3_Imbibe_6days_24h.2", "T2_Imbibe_0days_24h", "T2_Imbibe_0days_24h.1", "T2_Imbibe_0days_24h.2", "T1_Imbibe_0days_5h", "T1_Imbibe_0days_5h.1", "T1_Imbibe_0days_5h.2")
#
# header.new <- header.new[order(header.ori)]
# header.ori <- header.ori[order(header.ori)]
#
# col.header <- header.new
#
# colnames(data.rpkm) <- col.header
library
(
"DESeq2"
)
library
(
"ggplot2"
)
library
(
"RColorBrewer"
)
library
(
"pheatmap"
)
library
(
"BiocGenerics"
)
library
(
"rgl"
)
library
(
"magick"
)
library
(
"sjmisc"
)
################### running ######################
### PCA RPKM ###
set.seed
(
0
)
data.inv
<-
t
(
data.rpkm
)
data.dist
<-
dist
(
data.inv
,
method
=
"euclidean"
)
# "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski"
data.dist.hc
<-
hclust
(
data.dist
,
method
=
"ward.D2"
)
data.dist.pca
<-
princomp
(
data.dist
,
cor
=
T
)
pc1
<-
data.dist.pca
$
scores
[,
1
]
pc2
<-
data.dist.pca
$
scores
[,
2
]
pc3
<-
data.dist.pca
$
scores
[,
3
]
# create data frame for pc1 pc2 pc3
data.dist.pca.frame
=
data.frame
(
pc1
,
pc2
,
pc3
)
rownames
(
data.dist.pca.frame
)
<-
names
(
data.dist.pca
$
scale
)
colnames
(
data.dist.pca.frame
)
<-
c
(
"pc1"
,
"pc2"
,
"pc3"
)
condition.values
<-
c
()
condition.values.color
<-
c
()
for
(
a
in
colnames
(
data.rpkm
))
{
v
<-
substr
(
a
,
nchar
(
a
),
nchar
(
a
))
if
(
str_contains
(
v
,
c
(
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
0
),
logic
=
"or"
))
{
if
(
substr
(
substr
(
a
,
1
,
nchar
(
a
)
-2
),
nchar
(
substr
(
a
,
1
,
nchar
(
a
)
-2
)),
nchar
(
substr
(
a
,
1
,
nchar
(
a
)
-2
)))
==
"."
)
{
n
<-
substr
(
a
,
1
,
nchar
(
a
)
-3
)
}
else
{
n
<-
substr
(
a
,
1
,
nchar
(
a
)
-2
)
}
}
else
{
n
<-
a
}
condition.values
<-
c
(
condition.values
,
librariesName
[
n
][[
1
]][
1
])
condition.values.color
<-
c
(
condition.values.color
,
librariesName
[
n
][[
1
]][
2
])
}
data.dist.pca.frame
[
"tissue"
]
<-
condition.values
data.dist.pca.frame
[
"color"
]
<-
condition.values.color
data.dist.pca.frame
[
"name"
]
<-
names
(
data.dist.pca
$
scale
)
attr
(
data.dist.pca.frame
,
"percentVar"
)
<-
(
data.dist.pca
$
sdev
)
^
2
/
sum
(
data.dist.pca
$
sdev
^
2
)
# cumsum()
# simple plot
png
(
filename
=
paste0
(
saveat
,
"/HC_RPKM_normalized.png"
))
plot
(
data.dist.hc
)
# hc plot
dev.off
()
png
(
filename
=
paste0
(
saveat
,
"/PCA_variance_RPKM_normalized.png"
))
plot
(
data.dist.pca
)
# variances; var(data.dist.pca$sdev[1:9])
dev.off
()
# get the parcent variation
percentVar
<-
round
(
100
*
attr
(
data.dist.pca.frame
,
"percentVar"
))
# 3d plot
plot3d
(
pc1
,
pc2
,
pc3
,
type
=
"s"
,
# p, s, l, h, n
#pch = c(1:3),
col
=
condition.values.color
,
size
=
1
,
xlab
=
paste0
(
"PC1: "
,
percentVar
[
1
],
"% variance"
),
ylab
=
paste0
(
"PC2: "
,
percentVar
[
2
],
"% variance"
),
zlab
=
paste0
(
"PC3: "
,
percentVar
[
3
],
"% variance"
),
cex
=
2
,
main
=
""
,
# -> princomp",
)
# shift <- matrix(4, 4, 4, byrow = TRUE)
# text3d(shift,texts=1:3)
grid3d
(
c
(
"x"
,
"y"
,
"z"
))
## add legend
legend3d
(
"right"
,
unique
(
condition.values
),
pch
=
19
,
col
=
unique
(
condition.values.color
))
#### video #####
M
<-
par3d
(
"userMatrix"
)
play3d
(
par3dinterp
(
userMatrix
=
list
(
M
,
rotate3d
(
M
,
pi
/
2
,
1
,
0
,
0
),
rotate3d
(
M
,
pi
/
2
,
0
,
1
,
0
)
)
),
duration
=
2
)
movie3d
(
spin3d
(
axis
=
c
(
1
,
2
,
1
)),
duration
=
5
,
dir
=
saveat
)
#### video end ####
# pc1, pc2
png
(
filename
=
paste0
(
saveat
,
"/PCA_RPKM_normalized.png"
))
ggplot
(
data.dist.pca.frame
,
aes
(
pc1
,
pc2
,
color
=
tissue
)
)
+
geom_point
(
size
=
2.5
)
+
xlab
(
paste0
(
"PC1: "
,
percentVar
[
1
],
"% variance"
)
)
+
ylab
(
paste0
(
"PC2: "
,
percentVar
[
2
],
"% variance"
)
)
+
#theme() + #, face="bold"
scale_colour_manual
(
values
=
c
(
"red"
,
"blue"
,
"green"
,
"yellow"
,
"black"
,
"violet"
)
# dodgerblue3
)
+
ggtitle
(
"PCA of all samples (RPKM normalized)"
)
+
theme
(
plot.title
=
element_text
(
lineheight
=
.8
),
panel.background
=
element_rect
(
fill
=
"gray95"
)
)
dev.off
()
This diff is collapsed.
Click to expand it.
workflows/DEG_pipeline.R
0 → 100644
+
923
−
0
View file @
35149420
###############################################################
## DEG pipeline with edgeR, DESeq2 and NOISeq #
## Modified from the template of Dr. Kristian Ullrich 2014 #
## @fabianhaas, AG Rensing, Marburg, Jan 2016 #
###############################################################
# The pipeline expects sample triplicates.
# If you don't have three samples of one condition, use face files with zero counts.
# RPKM calculation is optional.
# Additional program to sum up the tsv files: /mnt/nas_temp/home/haasf/Program/sumup_htseq_counts.pl folder file_prefix
###########################################
## please define your variables #
###########################################
saveat
<-
"/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Hoecker/DEGs_0.9"
file
<-
'/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Hoecker/31315.p.sort.counts'
# Table with the length and the GC content
LenGCTable
<-
"/mnt/NAS_coruscant_datashare/haasf/GeneAtlas_RNASeq/DEG_2nd_round/cosmoss_V3.3.release_2016-07-22.IsoformV31.fasta.length_GC_Contig_Daten_for_34840_1nd.csv"
# Column position of the gene name and the width/gc content (first column is gene name!)
myLenPos
<-
1
myGCPos
<-
2
#
# hoo <- c("","")
# sampleLists <- c()
# for(a in hoo) {
# for(b in hoo) {
# if(a!=b) {
# tmp <- c(a,b)
# sampleLists <- c(sampleLists, list(tmp))
# }
# }
# }
sampleLists
<-
list
(
c
(
"cop_D"
,
"cop_L"
),
c
(
"cop_D"
,
"spa_D"
),
c
(
"cop_D"
,
"spa_L"
),
c
(
"cop_D"
,
"WT_D"
),
c
(
"cop_D"
,
"WT_L"
),
c
(
"cop_L"
,
"spa_D"
),
c
(
"cop_L"
,
"spa_L"
),
c
(
"cop_L"
,
"WT_D"
),
c
(
"cop_L"
,
"WT_L"
),
c
(
"spa_D"
,
"spa_L"
),
c
(
"spa_D"
,
"WT_D"
),
c
(
"spa_D"
,
"WT_L"
),
c
(
"spa_L"
,
"WT_D"
),
c
(
"spa_L"
,
"WT_L"
),
c
(
"WT_D"
,
"WT_L"
)
)
# Do you want a RPKM value table (1/0)
# Be warned, it takes a while to do this. approx. 10 minutes for 35000 genes and 6 samples.
rpkm
<-
0
# Are the data original HTSeq files? yes <- 1 ; no <- 0
htseq
<-
0
# threshold settings for the R packages
#edgeR
pvtedgeR
<-
0.001
#DESeq2
pvtDESeq2
<-
0.001
#NOISeq
pvtNOISeq
<-
0.9
# Output folder names
edgeRDir
<-
"edgeR"
DESeq2Dir
<-
"DESeq2"
NOISeqDir
<-
"NOISeq"
#########################
## stop editing here!! ##
#########################
###########################################
## executiv function #
###########################################
workflow
<-
function
(
file
,
saveat
,
SampleNames
,
LenGCTable
,
myLenPos
,
myGCPos
,
rpkm
,
pvtedgeR
,
pvtDESeq2
,
pvtNOISeq
,
edgeRDir
,
DESeq2Dir
,
NOISeqDir
,
htseq
)
{
# setwd(workingFolder) # set workspace
sampleNameI
<-
SampleNames
[
1
]
sampleNameII
<-
SampleNames
[
2
]
# Comparison name
cName
<-
paste
(
sampleNameI
,
sampleNameII
,
sep
=
" vs. "
)
filename
<-
strsplit
(
file
,
"/"
)
filename
<-
filename
[[
1
]][
length
(
filename
[[
1
]])]
# long but it's working
sampleName
<-
gsub
(
", "
,
"_"
,
toString
(
SampleNames
))
samples
<-
SampleNames
# create output dir
dir.create
(
saveat
,
showWarnings
=
FALSE
)
outdir
<-
paste0
(
saveat
,
"/"
,
sampleName
)
dir.create
(
outdir
)
# Start the log file
sink
(
paste0
(
outdir
,
"/DEGpipe_"
,
cName
,
".log"
))
# print parameter into the log file
print
(
"set parameters:"
)
print
(
paste0
(
"Counts file:"
,
file
))
print
(
paste0
(
"Results stored at:"
,
saveat
))
print
(
paste0
(
"SampleNames: "
,
SampleNames
))
print
(
paste0
(
"LenGCTable: "
,
LenGCTable
))
print
(
paste0
(
"myLenPos: "
,
myLenPos
))
print
(
paste0
(
"myGCPos: "
,
myGCPos
))
print
(
paste0
(
"rpkm: "
,
rpkm
))
print
(
paste0
(
"htseq: "
,
htseq
))
print
(
paste0
(
"pvtedgeR: "
,
pvtedgeR
))
print
(
paste0
(
"pvtDESeq2: "
,
pvtDESeq2
))
print
(
paste0
(
"pvtNOISeq: "
,
pvtNOISeq
))
print
(
paste0
(
"edgeRDir: "
,
edgeRDir
))
print
(
paste0
(
"DESeq2Dir: "
,
DESeq2Dir
))
print
(
paste0
(
"NOISeqDir: "
,
NOISeqDir
))
# session and package infos for the log file
print
(
sessionInfo
())
print
(
Sys.info
())
# load spike-in count tables
data0
<-
read.table
(
file
,
header
=
T
,
sep
=
"\t"
,
row.names
=
1
)
# just if the gene ids not ending with .1 (it is nessessery for NOISeqbio iteration step)
#row.names(data0) <- paste0(row.names(data0), ".1")
# remove HTSeq rows like "__no_feature"
data0
<-
data0
[
with
(
data0
,
!
grepl
(
'__'
,
rownames
(
data0
)))
,
]
data0
<-
data0
[
with
(
data0
,
!
grepl
(
'ERCC'
,
rownames
(
data0
)))
,
]
#data0 <- data0[ , with(data0, !grepl('X.1', colnames(data0)))]
#rename col names (library name to roman number)
# librariesName <- c("56754_WT_Naturally_3.bam.sort.fastq.unmapped.sam.sort.bam", "56753_WT_Naturally_2.bam.sort.fastq.unmapped.sam.sort.bam", "56752_WT_Naturally_1.bam.sort.fastq.unmapped.sam.sort.bam", "56751_tt_6_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56750_tt_6_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56749_tt_6_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56748_tt_0_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56747_tt_0_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56746_tt_0_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56745_WT_10_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56744_WT_10_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56743_WT_10_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56742_WT_6_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56741_WT_6_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56740_WT_6_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56739_WT_0_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56738_WT_0_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56737_WT_0_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56736_tt_T1_tt_6d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56735_tt_T1_tt_6d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56734_tt_T1_tt_6d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56733_tt_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56732_tt_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56731_tt_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56730_tt_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56729_tt_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56728_tt_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56727_WT_T1_NA_3.bam.sort.fastq.unmapped.sam.sort.bam", "56726_WT_T1_NA_2.bam.sort.fastq.unmapped.sam.sort.bam", "56725_WT_T1_NA_1.bam.sort.fastq.unmapped.sam.sort.bam", "56724_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56723_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56722_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56721_WT_T1_WT_10d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56720_WT_T1_WT_10d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56719_WT_T1_WT_10d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56718_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56717_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56716_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56715_WT_T1_WT_6d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56714_WT_T1_WT_6d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56713_WT_T1_WT_6d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56712_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56711_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56710_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56709_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56708_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56707_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56706_WT_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56705_WT_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56704_WT_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam")
# #librariesName <- c("80484_Celery_3_day_imbibed_embryo_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80483_Celery_3_day_imbibed_embryo_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80482_Celery_3_day_imbibed_embryo_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80481_Celery_1_day_imbibed_embryo_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80480_Celery_1_day_imbibed_embryo_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80479_Celery_1_day_imbibed_embryo_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80478_Celery_5_day_imbibed_whole_fruit_29C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80477_Celery_5_day_imbibed_whole_fruit_29C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80460_Celery_3_day_imbibed_whole_fruit_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80459_Celery_3_day_imbibed_whole_fruit_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80458_Celery_1_day_imbibed_whole_fruit_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80457_Celery_1_day_imbibed_whole_fruit_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80456_Celery_1_day_imbibed_whole_fruit_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80455_Celery_Dry_whole_fruit_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80454_Celery_Dry_whole_fruit_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80453_Celery_Dry_whole_fruit_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80452_Celery_5_day_imbibed_whole_fruit_29C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80451_Celery_5_day_imbibed_whole_fruit_100uM_ABA_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80450_Celery_5_day_imbibed_whole_fruit_100uM_ABA_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80449_Celery_5_day_imbibed_whole_fruit_100uM_ABA_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80448_Celery_5_day_imbibed_whole_fruit_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80447_Celery_5_day_imbibed_whole_fruit_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80446_Celery_5_day_imbibed_whole_fruit_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80445_Celery_3_day_imbibed_whole_fruit_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam.featureCounts.p.txt")
# romanNumber <- c("T17_Dry_Naturally_Freezedryseed", "T17_Dry_Naturally_Freezedryseed.1", "T17_Dry_Naturally_Freezedryseed.2", "T16_Dry_6days_Freezeafter6dageing", "T16_Dry_6days_Freezeafter6dageing.1", "T16_Dry_6days_Freezeafter6dageing.2", "T15_Dry_0days_Freezedryseed", "T15_Dry_0days_Freezedryseed.1", "T15_Dry_0days_Freezedryseed.2", "T14_Dry_10days_Freezeafter10dageing", "T14_Dry_10days_Freezeafter10dageing.1", "T14_Dry_10days_Freezeafter10dageing.2", "T13_Dry_6days_Freezeafter6dageing", "T13_Dry_6days_Freezeafter6dageing.1", "T13_Dry_6days_Freezeafter6dageing.2", "T12_Dry_0days_Freezedryseed", "T12_Dry_0days_Freezedryseed.1", "T12_Dry_0days_Freezedryseed.2", "T11_Imbibe_6days_12h", "T11_Imbibe_6days_12h.1", "T11_Imbibe_6days_12h.2", "T10_Imbibe_6days_5h", "T10_Imbibe_6days_5h.1", "T10_Imbibe_6days_5h.2", "T9_Imbibe_0dyas_5h", "T9_Imbibe_0dyas_5h.1", "T9_Imbibe_0dyas_5h.2", "T8_Imbibe_Naturally_47h", "T8_Imbibe_Naturally_47h.1", "T8_Imbibe_Naturally_47h.2", "T7_Imbibe_Naturally_24h", "T7_Imbibe_Naturally_24h.1", "T7_Imbibe_Naturally_24h.2", "T6_Imbibe_10days_72h", "T6_Imbibe_10days_72h.1", "T6_Imbibe_10days_72h.2", "T5_Imbibe_10days_24h", "T5_Imbibe_10days_24h.1", "T5_Imbibe_10days_24h.2", "T4_Imbibe_6days_47h", "T4_Imbibe_6days_47h.1", "T4_Imbibe_6days_47h.2", "T3_Imbibe_6days_24h", "T3_Imbibe_6days_24h.1", "T3_Imbibe_6days_24h.2", "T2_Imbibe_0days_24h", "T2_Imbibe_0days_24h.1", "T2_Imbibe_0days_24h.2", "T1_Imbibe_0days_5h", "T1_Imbibe_0days_5h.1", "T1_Imbibe_0days_5h.2")
# #romanNumber <- c("emb_3d_20.2", "emb_3d_20.1", "emb_3d_20", "emb_1d_20.2", "emb_1d_20.1", "emb_1d_20", "wf_5d_29.2", "wf_5d_29.1", "wf_3d_20.1", "wf_3d_20", "wf_1d_20.2", "wf_1d_20.1", "wf_1d_20", "wf_dry.2", "wf_dry.1", "wf_dry", "wf_5d_29", "wf_5d_ABA_20.2", "wf_5d_ABA_20.1", "wf_5d_ABA_20", "wf_5d_20.2", "wf_5d_20.1", "wf_5d_20", "wf_3d_20.2")
#
#
# for(i in 1:length(librariesName)) {
# colnames(data0)[colnames(data0)==grep(librariesName[i], names(data0), value = TRUE)] <- romanNumber[i]
# }
# creats a list of the column names/samples you would like to compare
SampleNames
<-
makeSampleList
(
sampleNameI
,
length
(
grep
(
paste0
(
"^"
,
sampleNameI
,
"[^A-Z]"
),
colnames
(
data0
)))
+
1
,
sampleNameII
,
length
(
grep
(
paste0
(
"^"
,
sampleNameII
,
"[^A-Z]"
),
colnames
(
data0
)))
+
1
)
print
(
SampleNames
)
## mal anschauen als ersatz für das darüber ##
## newenw <- table.new[, grepl(paste(LIST,collapse="|"), colnames(table.new))]
# exctract the samples from the data0 if needed
if
(
length
(
SampleNames
)
>
0
)
{
RTableInFiles
<-
data0
[,
SampleNames
]
data
<-
data0
[,
SampleNames
]
}
print
(
head
(
RTableInFiles
))
# sort by column name
# data <- data[ , order(names(data))] # dangeros, the order of query and referenz might be not the same
# see how many rows and columns are in the tables
print
(
paste
(
"Raw data row number: "
,
nrow
(
data
)))
print
(
paste
(
"Raw data column number: "
,
ncol
(
data
)))
# 2) A typical differential expression analysis workflow
# 2.1) Filtering and exploratory data analysis
# filter: One single gene/spike-in must have at least 5 counts in minimum 2 libraries
filter
<-
apply
(
data
,
1
,
function
(
x
)
length
(
x
[
x
>
5
])
>=
2
)
dataFiltered
<-
data
[
filter
,]
# see how many rows and columns are left
print
(
paste
(
"Filtered data row number: "
,
nrow
(
dataFiltered
)))
print
(
paste
(
"Filtered data row number: "
,
ncol
(
dataFiltered
)))
# filter gene names
genes
<-
rownames
(
dataFiltered
)[
grep
(
"^Pp3"
,
rownames
(
dataFiltered
))]
# filter spike-in names
spikes
<-
rownames
(
dataFiltered
)[
grep
(
"^ERCC"
,
rownames
(
dataFiltered
))]
# get the column name list as.factors
xtmp
<-
colnames
(
dataFiltered
)
x
<-
split2list
(
xtmp
)
sampleNameList
<-
gsub
(
".[1234567890]+$"
,
""
,
SampleNames
)
columnPos
<-
makeIndexList
(
SampleNames
)
# run ...
### edgeR ###
print
(
paste0
(
"edgeR is running with "
,
cName
))
packageDescription
(
"edgeR"
)
edgeRFunction
(
outdir
,
edgeRDir
,
cName
,
RTableInFiles
,
pvtedgeR
,
sampleNameI
,
sampleNameII
,
sampleNameList
)
#### DESeq2 ####
print
(
paste0
(
"DESeq2 is running with "
,
cName
))
packageDescription
(
"DESeq2"
)
DESeq2Function
(
outdir
,
DESeq2Dir
,
cName
,
RTableInFiles
,
pvtDESeq2
,
sampleNameI
,
sampleNameII
,
sampleNameList
)
### NOISeq ###
print
(
paste0
(
"NOISeq is running with "
,
cName
))
packageDescription
(
"NOISeq"
)
NOISeqFunction
(
outdir
,
NOISeqDir
,
cName
,
RTableInFiles
,
pvtNOISeq
,
sampleNameI
,
sampleNameII
,
myLenPos
,
myGCPos
,
LenGCTable
,
columnPos
,
sampleNameList
,
SampleNames
)
### Overlap of all three methods ###
# intersection with DEG direction check => 1, without => 0
intersection_4.file
<-
intersection
(
outdir
,
1
,
4
)
write.table
(
intersection_4.file
,
file
=
paste0
(
outdir
,
"/"
,
cName
,
"_intersection_four_methods_"
,
nrow
(
intersection_4.file
),
".tsv"
),
sep
=
"\t"
,
col.names
=
TRUE
,
row.names
=
TRUE
,
quote
=
FALSE
)
# edgeR, DESeq2, NOISeq
intersection_3.file
<-
intersection
(
outdir
,
1
,
3
)
write.table
(
intersection_3.file
,
file
=
paste0
(
outdir
,
"/"
,
cName
,
"_intersection_three_methods_"
,
nrow
(
intersection_3.file
),
".tsv"
),
sep
=
"\t"
,
col.names
=
TRUE
,
row.names
=
TRUE
,
quote
=
FALSE
)
sink
()
# End of log file
}
####################
## main functions #
####################
plotGraphics
<-
function
(
data
,
librariesName
,
cName
,
outdir
,
romanNumber
)
{
library
(
"DESeq2"
)
library
(
"ggplot2"
)
library
(
"RColorBrewer"
)
library
(
"pheatmap"
)
library
(
"genefilter"
)
# create sample annotation lists
# sample.name.list <- gsub(".[12]", "", librariesName[colnames(data)])
sample.name.list
<-
gsub
(
".[12]"
,
""
,
romanNumber
)
sample.name.uniq
<-
unique
(
unlist
(
sample.name.list
,
use.names
=
FALSE
))
col.data.condition
=
data.frame
(
row.names
=
librariesName
,
condition
=
sample.name.list
)
# run DESeq2 functions
dds.matrix
<-
DESeqDataSetFromMatrix
(
countData
=
data
,
colData
=
col.data.condition
,
design
=
~
condition
)
# data must be integer! (Ganze Zahlen)
colData
(
dds.matrix
)
$
condition
<-
factor
(
colData
(
dds.matrix
)
$
condition
,
levels
=
sample.name.uniq
)
dds.matrix.deseq
<-
DESeq
(
dds.matrix
)
dds.matrix.res
<-
results
(
dds.matrix.deseq
)
dds.matrix.res
<-
dds.matrix.res
[
order
(
dds.matrix.res
$
padj
),
]
dds.matrix.res.fdr
<-
dds.matrix.res
[
!
is.na
(
dds.matrix.res
$
padj
),
]
dds.matrix.res.fdr
<-
dds.matrix.res.fdr
[
dds.matrix.res.fdr
$
padj
<
0.0001
,
]
dds.matrix.rld
<-
rlogTransformation
(
dds.matrix
,
blind
=
TRUE
)
# data for PCA plot
pcaData
<-
plotPCA
(
dds.matrix.rld
,
intgroup
=
"condition"
,
returnData
=
TRUE
)
percentVar
<-
round
(
100
*
attr
(
pcaData
,
"percentVar"
))
# color for samples
sample.colors
<-
c
(
"gray60"
,
"#D73027"
,
"#4575B4"
,
"purple"
,
"orange"
,
"#113FF7"
,
"green"
,
"black"
)
# plot the PCA
# https://cran.r-project.org/web/packages/ggfortify/vignettes/plot_pca.html
png
(
filename
=
paste0
(
outdir
,
"PCA_all_samples_DEseq2_normalized.png"
))
ggplot
(
pcaData
,
aes
(
PC1
,
PC2
,
color
=
condition
,
shape
=
condition
)
)
+
scale_shape_manual
(
values
=
c
(
4
,
1
,
2
,
3
,
0
,
5
,
6
,
7
))
+
geom_point
(
size
=
2.5
)
+
xlab
(
paste0
(
"PC1: "
,
percentVar
[
1
],
"% variance"
)
)
+
ylab
(
paste0
(
"PC2: "
,
percentVar
[
2
],
"% variance"
)
)
+
theme
()
+
#, face="bold"
scale_colour_manual
(
values
=
sample.colors
# dodgerblue3
)
+
ggtitle
(
"PCA of all samples (DESeq2 normalized)"
)
+
theme
(
plot.title
=
element_text
(
lineheight
=
.8
),
panel.background
=
element_rect
(
fill
=
"gray95"
)
)
dev.off
()
# how many genes will be involved in the heatmap calculation
numbGenes
<-
100
# pheatmap data
topVarGenes
<-
head
(
order
(
rowVars
(
assay
(
dds.matrix.rld
)),
decreasing
=
TRUE
),
numbGenes
)
mat
<-
assay
(
dds.matrix.rld
)[
topVarGenes
,
]
mat
<-
mat
-
rowMeans
(
mat
)
# pheatmap data annotation
anno
<-
as.data.frame
(
colData
(
dds.matrix.rld
)[,
c
(
"condition"
)])
rownames
(
anno
)
<-
colnames
(
data
)
# check out the row names of annotation
colnames
(
anno
)
<-
"condition"
# pheatmap data annotation color
condition
<-
sample.colors
names
(
condition
)
<-
sample.name.uniq
anno_colors
<-
list
(
condition
=
condition
)
#heatmap(mat)
#pheatmap(mat)
# ftp://cran.r-project.org/pub/R/web/packages/pheatmap/pheatmap.pdf
png
(
filename
=
paste0
(
outdir
,
"HC_heatmap_all_samples_DESeq2_normalized.png"
))
pheatmap
(
mat
,
annotation
=
anno
,
annotation_colors
=
anno_colors
,
fontsize
=
10
,
fontsize_row
=
6
,
fontsize_col
=
10
,
main
=
paste0
(
"Heatmap of all samples (DESeq2 normalized) "
,
numbGenes
,
" genes\n"
),
annotation_names_col
=
F
,
show_rownames
=
F
,
cluster_rows
=
F
,
border_color
=
F
,
treeheight_col
=
20
)
dev.off
()
}
edgeRFunction
<-
function
(
odir
,
edgeRDir
,
cName
,
RTableInFiles
,
pvtedgeR
,
sampleNameI
,
sampleNameII
,
sampleNameList
)
{
library
(
edgeR
)
outdir
<-
paste0
(
odir
,
"/"
,
edgeRDir
,
"/"
)
# set outdir
dir.create
(
outdir
,
showWarnings
=
FALSE
)
# create output folder
dir.create
(
paste0
(
outdir
,
"plots"
),
showWarnings
=
FALSE
)
# create subfolder "plots"
comparison
<-
cName
counts
<-
RTableInFiles
pvt
<-
pvtedgeR
group
<-
factor
(
sampleNameList
)
# edgeR 'normal'
y.normal
<-
DGEList
(
counts
=
counts
,
group
=
group
)
y.normal
[,
1
]
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" edgeR - plotMDS"
)))
plotMDS
(
y.normal
,
method
=
"bcv"
,
main
=
paste0
(
cName
,
"\nedgeR - plotMDS"
))
dev.off
()
y.normal
<-
calcNormFactors
(
y.normal
)
y.normal
<-
estimateCommonDisp
(
y.normal
)
y.normal
<-
estimateTagwiseDisp
(
y.normal
)
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" edgeR - plotBCV"
)))
plotBCV
(
y.normal
,
main
=
paste0
(
cName
,
"\nedgeR - plotBCV"
))
dev.off
()
et.normal
<-
exactTest
(
y.normal
,
pair
=
c
(
sampleNameI
,
sampleNameII
))
topTags
(
et.normal
)
# adjust.method ==> character string specifying p-value adjustment method. Possible values are "none", "BH", "fdr" (equivalent to "BH"), "BY" and "holm". See p.adjust for details.
summary
(
de.normal
<-
decideTestsDGE
(
et.normal
,
p
=
pvt
,
adjust
=
"BH"
))
et.normal.fdr
<-
cbind
(
et.normal
$
table
,
p.adjust
(
et.normal
$
table
[,
3
],
method
=
"BH"
))
et.normal.fdr
<-
et.normal.fdr
[
et.normal.fdr
[,
4
]
<
pvt
,
]
write.table
(
et.normal.fdr
,
file
=
paste0
(
outdir
,
cName
,
"_"
,
nrow
(
et.normal.fdr
),
"_edgeR_fdr.tsv"
),
sep
=
"\t"
,
col.names
=
TRUE
,
row.names
=
TRUE
,
quote
=
FALSE
)
detags.normal
<-
rownames
(
de.normal
)[
as.logical
(
de.normal
)]
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" edgeR - plotSmear"
)))
plotSmear
(
et.normal
,
de.normal
,
tags
=
detags.normal
,
main
=
paste0
(
cName
,
"\nedgeR - plotSmear"
))
abline
(
h
=
c
(
-2
,
2
),
col
=
"blue"
)
dev.off
()
# edgeR 'GLM'
y.glm
<-
DGEList
(
counts
=
counts
,
group
=
group
)
design
<-
model.matrix
(
~
0
+
group
,
data
=
y.glm
$
samples
)
design
y.glm
<-
estimateGLMCommonDisp
(
y.glm
,
design
)
y.glm
<-
estimateGLMTrendedDisp
(
y.glm
,
design
)
## Loading required package: splines
y.glm
<-
estimateGLMTagwiseDisp
(
y.glm
,
design
)
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" edgeR - plotBCV - GLM"
)))
plotBCV
(
y.glm
,
main
=
paste0
(
cName
,
"\nedgeR - plotBCV - GLM"
))
dev.off
()
y.fit
<-
glmFit
(
y.glm
,
design
)
y.lrt
<-
glmLRT
(
y.fit
)
FDR
<-
p.adjust
(
y.lrt
$
table
$
PValue
,
method
=
"BH"
)
sum
(
FDR
<
pvt
)
topTags
(
y.lrt
)
summary
(
de.glm
<-
decideTestsDGE
(
y.lrt
,
p
=
pvt
,
adjust
=
"BH"
))
}
DESeq2Function
<-
function
(
odir
,
DESeq2Dir
,
cName
,
RTableInFiles
,
pvtDESeq2
,
sampleNameI
,
sampleNameII
,
sampleNameList
)
{
library
(
DESeq2
)
library
(
RColorBrewer
)
library
(
gplots
)
library
(
vsn
)
outdir
<-
paste0
(
odir
,
"/"
,
DESeq2Dir
,
"/"
)
# set outdir
dir.create
(
outdir
,
showWarnings
=
FALSE
)
dir.create
(
paste0
(
outdir
,
"plots"
),
showWarnings
=
FALSE
)
# create subfolder "plots"
pvt
<-
pvtDESeq2
# DESeqDataSet(se, design, ignoreRank = FALSE)
# DESeqDataSetFromMatrix(countData, colData, design, tidy = FALSE, ignoreRank = FALSE, ...)
# DESeqDataSetFromHTSeqCount(sampleTable, directory = ".", design, ignoreRank = FALSE, ...)
# DESeqDataSetFromTximport(txi, colData, design, ...)
coldata
=
data.frame
(
row.names
=
colnames
(
RTableInFiles
),
condition
=
sampleNameList
)
ddsHTSeq
<-
DESeqDataSetFromMatrix
(
countData
=
RTableInFiles
,
colData
=
coldata
,
design
=
~
condition
)
colData
(
ddsHTSeq
)
$
condition
<-
factor
(
colData
(
ddsHTSeq
)
$
condition
,
levels
=
c
(
sampleNameI
,
sampleNameII
))
ddsHTSeq
<-
DESeq
(
ddsHTSeq
)
ddsHTSeq.res
<-
results
(
ddsHTSeq
)
ddsHTSeq.res
<-
ddsHTSeq.res
[
order
(
ddsHTSeq.res
$
padj
),
]
head
(
ddsHTSeq.res
)
# png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - plotMA")))
# plotMA(ddsHTSeq, main = paste0(cName, "\nDESeq2 - plotMA"), ylim = c(-2, 2))
# dev.off()
ddsHTSeq.res.fdr
<-
ddsHTSeq.res
[
!
is.na
(
ddsHTSeq.res
$
padj
),
]
ddsHTSeq.res.fdr
<-
ddsHTSeq.res.fdr
[
ddsHTSeq.res.fdr
$
padj
<
pvt
,
]
write.table
(
as.matrix
(
ddsHTSeq.res.fdr
),
file
=
paste0
(
outdir
,
cName
,
"_"
,
nrow
(
as.matrix
(
ddsHTSeq.res.fdr
)),
"_DESeq2_fdr.tsv"
),
sep
=
"\t"
,
col.names
=
TRUE
,
row.names
=
TRUE
,
quote
=
FALSE
)
ddsHTSeq.rld
<-
rlogTransformation
(
ddsHTSeq
,
blind
=
TRUE
)
ddsHTSeq.vsd
<-
varianceStabilizingTransformation
(
ddsHTSeq
,
blind
=
TRUE
)
par
(
mfrow
=
c
(
1
,
3
))
notAllZero
<-
(
rowSums
(
counts
(
ddsHTSeq
))
>
0
)
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" DESeq2 - SdPlot.1"
)))
meanSdPlot
(
log2
(
counts
(
ddsHTSeq
,
normalized
=
TRUE
)[
notAllZero
,
]
+
1
))
# , ylim = c(0, 2.5)
dev.off
()
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" DESeq2 - SdPlot.2"
)))
# , ylim = c(0, 2.5)
meanSdPlot
(
assay
(
ddsHTSeq.rld
[
notAllZero
,
]))
dev.off
()
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" DESeq2 - SdPlot.3"
)))
# , ylim = c(0, 2.5)
meanSdPlot
(
assay
(
ddsHTSeq.vsd
[
notAllZero
,
]))
dev.off
()
hmcol
<-
colorRampPalette
(
brewer.pal
(
9
,
"GnBu"
))(
100
)
distRL
<-
dist
(
t
(
assay
(
ddsHTSeq.rld
)))
mat
<-
as.matrix
(
distRL
)
rownames
(
mat
)
<-
colnames
(
mat
)
<-
with
(
colData
(
ddsHTSeq
),
paste0
(
condition
))
# png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - heatmap")))
# heatmap(mat, trace = "none", col = rev(hmcol), margin = c(13, 13))
# dev.off()
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" DESeq2 - PCA"
)))
print
(
plotPCA
(
ddsHTSeq.rld
,
intgroup
=
"condition"
))
dev.off
()
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" DESeq2 - DispEsts"
)))
plotDispEsts
(
ddsHTSeq
,
main
=
paste0
(
cName
,
"\nDESeq2 - plotDispEsts"
))
dev.off
()
}
NOISeqFunction
<-
function
(
odir
,
NOISeqDir
,
cName
,
RTableInFiles
,
pvtNOISeq
,
sampleNameI
,
sampleNameII
,
myLenPos
,
myGCPos
,
LenGCTable
,
columnPos
,
sampleNameList
,
SampleNames
)
{
library
(
NOISeq
)
outdir
<-
paste0
(
odir
,
"/"
,
NOISeqDir
,
"/"
)
# set outdir
dir.create
(
outdir
,
showWarnings
=
FALSE
)
dir.create
(
paste0
(
outdir
,
"plots"
),
showWarnings
=
FALSE
)
# create subfolder "plots"
pvt
<-
pvtNOISeq
counts
<-
RTableInFiles
[,
columnPos
]
head
(
counts
)
# must be changed if there are no triplicates
# !! Please change this setting if needed !!
myfactors
<-
data.frame
(
Treatment
=
sampleNameList
,
TreatmentRun
=
SampleNames
)
# sort counts by row names
mycounts
<-
counts
[
order
(
rownames
(
counts
)),
]
# filter out low counts
mycounts.filter
<-
filtered.data
(
mycounts
,
factor
=
myfactors
$
Treatment
,
norm
=
FALSE
,
depth
=
NULL
,
method
=
1
,
cv.cutoff
=
100
,
cpm
=
1
)
mycounts.rowname
<-
rownames
(
mycounts.filter
)
# Extra file with length and GC content
myLengthTable
<-
read.table
(
LenGCTable
,
sep
=
"\t"
,
header
=
TRUE
,
stringsAsFactor
=
FALSE
,
row.names
=
1
)
myLengthTable
<-
myLengthTable
[
order
(
rownames
(
myLengthTable
)),]
# sort by rownames
myLengthTable.filter
<-
myLengthTable
[
mycounts.rowname
,]
mylength
<-
myLengthTable.filter
[,
myLenPos
]
names
(
mylength
)
<-
rownames
(
myLengthTable.filter
)
mygc
<-
myLengthTable.filter
[,
myGCPos
]
names
(
mygc
)
<-
rownames
(
myLengthTable.filter
)
mydata
<-
readData
(
data
=
mycounts.filter
,
length
=
mylength
,
gc
=
mygc
,
factors
=
myfactors
)
mydata
mydata.GCbias
<-
dat
(
mydata
,
type
=
"GCbias"
)
#-# png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - GCbias.1")))
#-# explo.plot(mydata.GCbias)
#-# dev.off()
mydata.GCbias.group
<-
dat
(
mydata
,
factor
=
"Treatment"
,
type
=
"GCbias"
)
#-# png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - GCbias.2")))
#-# explo.plot(mydata.GCbias.group)
#-# dev.off()
show
(
mydata.GCbias.group
)
mydata.lengthbias
<-
dat
(
mydata
,
type
=
"lengthbias"
)
#-# png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - Lenbias.1")))
#-# explo.plot(mydata.lengthbias)
#-# dev.off()
mydata.lengthbias.group
<-
dat
(
mydata
,
factor
=
"Treatment"
,
type
=
"lengthbias"
)
#-# png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - Lenbias.2")))
#-# explo.plot(mydata.lengthbias.group)
#-# dev.off()
show
(
mydata.lengthbias.group
)
# looks different
mydata.cd
<-
dat
(
mydata
,
type
=
"cd"
)
#-# png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - cd")))
#-# explo.plot(mydata.cd)
#-# dev.off()
mylength
myRPKM
<-
NOISeq
::
rpkm
(
assayData
(
mydata
)
$
exprs
)
myUQUA
<-
NOISeq
::
uqua
(
assayData
(
mydata
)
$
exprs
,
long
=
mylength
,
lc
=
0.5
,
k
=
0
)
myTMM
<-
NOISeq
::
tmm
(
assayData
(
mydata
)
$
exprs
,
long
=
1000
,
lc
=
0
)
mynoiseq.rpkm
<-
noiseq
(
mydata
,
k
=
0.5
,
norm
=
"rpkm"
,
factor
=
"Treatment"
,
pnr
=
0.2
,
nss
=
5
,
v
=
0.02
,
lc
=
1
,
replicates
=
"biological"
)
mynoiseq.nnorm
<-
noiseq
(
mydata
,
k
=
0.5
,
norm
=
"n"
,
factor
=
"Treatment"
,
pnr
=
0.2
,
nss
=
5
,
v
=
0.02
,
lc
=
1
,
replicates
=
"biological"
)
mynoiseq.tmm
<-
noiseq
(
mydata
,
k
=
0.5
,
norm
=
"tmm"
,
factor
=
"Treatment"
,
pnr
=
0.2
,
nss
=
5
,
v
=
0.02
,
lc
=
1
,
replicates
=
"biological"
)
# RPKM
mynoiseq.rpkm.prob
<-
mynoiseq.rpkm
@
results
[[
1
]][
mynoiseq.rpkm
@
results
[[
1
]]
$
prob
>
pvt
,
]
mynoiseq.rpkm.prob
<-
mynoiseq.rpkm.prob
[
!
is.na
(
mynoiseq.rpkm.prob
$
prob
),
]
write.table
(
mynoiseq.rpkm.prob
,
file
=
paste0
(
outdir
,
cName
,
"_"
,
nrow
(
mynoiseq.rpkm.prob
),
"_NOISeq_rpkm_prob.tsv"
),
sep
=
"\t"
,
col.names
=
TRUE
,
row.names
=
TRUE
,
quote
=
FALSE
)
# none
mynoiseq.nnorm.prob
<-
mynoiseq.nnorm
@
results
[[
1
]][
mynoiseq.nnorm
@
results
[[
1
]]
$
prob
>
pvt
,
]
mynoiseq.nnorm.prob
<-
mynoiseq.nnorm.prob
[
!
is.na
(
mynoiseq.nnorm.prob
$
prob
),
]
write.table
(
mynoiseq.nnorm.prob
,
file
=
paste0
(
outdir
,
cName
,
"_"
,
nrow
(
mynoiseq.nnorm.prob
),
"_NOISeq_nnorm_prob.tsv"
),
sep
=
"\t"
,
col.names
=
TRUE
,
row.names
=
TRUE
,
quote
=
FALSE
)
# TMM
mynoiseq.tmm.prob
<-
mynoiseq.tmm
@
results
[[
1
]][
mynoiseq.tmm
@
results
[[
1
]]
$
prob
>
pvt
,
]
mynoiseq.tmm.prob
<-
mynoiseq.tmm.prob
[
!
is.na
(
mynoiseq.tmm.prob
$
prob
),
]
write.table
(
mynoiseq.tmm.prob
,
file
=
paste0
(
outdir
,
cName
,
"_"
,
nrow
(
mynoiseq.tmm.prob
),
"_NOISeq_tmm_prob.tsv"
),
sep
=
"\t"
,
col.names
=
TRUE
,
row.names
=
TRUE
,
quote
=
FALSE
)
############ NOISeq bio ################
pvtnsb
<-
0.995
a0per_value
<-
0.99
iterations
<-
5
x5
<-
c
(
sample
(
10000
:
99999
,
iterations
,
replace
=
F
),
12345
)
x5
collector.rpkm
<-
NULL
collector.tmm
<-
NULL
# for (iter in x5) {
# # RPKM
# mynoiseqbio.rpkm <- noiseqbio(mydata, k = 0.5, norm = "rpkm", factor = "Treatment", lc = 1, r = 20, adj = 1.5, a0per = a0per_value, random.seed = iter, filter = 1)#, plot = TRUE)
# mynoiseqbio.rpkm.prob <- mynoiseqbio.rpkm@results[[1]][mynoiseqbio.rpkm@results[[1]]$prob > pvtnsb, ]
# mynoiseqbio.rpkm.prob <- mynoiseqbio.rpkm.prob[!is.na(mynoiseqbio.rpkm.prob$prob), ]
# collector.rpkm <- rbind(collector.rpkm, mynoiseqbio.rpkm.prob)
# #collector <- rbind(collector, mynoiseqbio.rpkm.prob[rownames(mynoiseqbio.rpkm.prob) %in% rownames(collector),])
# # TMM
# mynoiseqbio.tmm <- noiseqbio(mydata, k = 0.5, norm = "tmm", factor = "Treatment", lc = 1, r = 20, adj = 1.5, a0per = a0per_value, random.seed = iter, filter = 1)#, plot = TRUE)
# mynoiseqbio.tmm.prob <- mynoiseqbio.tmm@results[[1]][mynoiseqbio.tmm@results[[1]]$prob > pvtnsb, ]
# mynoiseqbio.tmm.prob <- mynoiseqbio.tmm.prob[!is.na(mynoiseqbio.tmm.prob$prob), ]
# collector.tmm <- rbind(collector.tmm, mynoiseqbio.tmm.prob)
# }
#
# # RPKM
# print(sort(row.names(collector.rpkm)))
# library(stringr)
# mynoiseqbio.rpkm.prob.overlap <- collector.rpkm[str_split_fixed(row.names(collector.rpkm), "\\.",2)[,2]>=iterations+10,]
# row.names(mynoiseqbio.rpkm.prob.overlap) <- substr(row.names(mynoiseqbio.rpkm.prob.overlap),1,str_length(row.names(mynoiseqbio.rpkm.prob.overlap))-1)
#
# write.table(mynoiseqbio.rpkm.prob.overlap, file = paste0(outdir, cName,"_", nrow(mynoiseqbio.rpkm.prob.overlap),"_NOISeqbio_rpkm_prob_seed_iteration_overlap.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
#
# #TMM
# print(sort(row.names(collector.tmm)))
# library(stringr)
# mynoiseqbio.tmm.prob.overlap <- collector.tmm[str_split_fixed(row.names(collector.tmm), "\\.",2)[,2]>=iterations+10,]
# row.names(mynoiseqbio.tmm.prob.overlap) <- substr(row.names(mynoiseqbio.tmm.prob.overlap),1,str_length(row.names(mynoiseqbio.tmm.prob.overlap))-1)
#
# write.table(mynoiseqbio.tmm.prob.overlap, file = paste0(outdir, cName,"_", nrow(mynoiseqbio.tmm.prob.overlap),"_NOISeqbio_tmm_prob_seed_iteration_overlap.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
# # none
# mynoiseqbio.nnorm.prob <- mynoiseqbio.nnorm@results[[1]][mynoiseqbio.nnorm@results[[1]]$prob > pvtnsb, ]
# mynoiseqbio.nnorm.prob <- mynoiseqbio.nnorm.prob[!is.na(mynoiseqbio.nnorm.prob$prob), ]
# write.table(mynoiseqbio.nnorm.prob, file = paste0(outdir, cName, "_NOISeqbio_nnorm_prob.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
# # TMM
# mynoiseqbio.tmm.prob <- mynoiseqbio.tmm@results[[1]][mynoiseqbio.tmm@results[[1]]$prob > pvtnsb, ]
# mynoiseqbio.tmm.prob <- mynoiseqbio.tmm.prob[!is.na(mynoiseqbio.tmm.prob$prob), ]
# write.table(mynoiseqbio.tmm.prob, file = paste0(outdir, cName, "_NOISeqbio_tmm_prob.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeq - noiseqbiorpkm"
)))
mynoiseqbio.rpkm
<-
noiseqbio
(
mydata
,
k
=
0.5
,
norm
=
"rpkm"
,
factor
=
"Treatment"
,
lc
=
1
,
r
=
20
,
adj
=
1.5
,
a0per
=
a0per_value
,
random.seed
=
12345
,
filter
=
1
,
plot
=
TRUE
)
dev.off
()
mynoiseqbio.rpkm.prob
<-
mynoiseqbio.rpkm
@
results
[[
1
]][
mynoiseqbio.rpkm
@
results
[[
1
]]
$
prob
>
pvtnsb
,
]
mynoiseqbio.rpkm.prob
<-
mynoiseqbio.rpkm.prob
[
!
is.na
(
mynoiseqbio.rpkm.prob
$
prob
),
]
write.table
(
mynoiseqbio.rpkm.prob
,
file
=
paste0
(
outdir
,
cName
,
"_"
,
nrow
(
mynoiseqbio.rpkm.prob
),
"_NOISeqbio_rpkm_prob_seed_12345.tsv"
),
sep
=
"\t"
,
col.names
=
TRUE
,
row.names
=
TRUE
,
quote
=
FALSE
)
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeq - noiseqbiontmm"
)))
mynoiseqbio.tmm
<-
noiseqbio
(
mydata
,
k
=
0.5
,
norm
=
"tmm"
,
factor
=
"Treatment"
,
lc
=
1
,
r
=
20
,
adj
=
1.5
,
a0per
=
a0per_value
,
random.seed
=
12345
,
filter
=
1
,
plot
=
TRUE
)
dev.off
()
mynoiseqbio.tmm.prob
<-
mynoiseqbio.tmm
@
results
[[
1
]][
mynoiseqbio.tmm
@
results
[[
1
]]
$
prob
>
pvtnsb
,
]
mynoiseqbio.tmm.prob
<-
mynoiseqbio.tmm.prob
[
!
is.na
(
mynoiseqbio.tmm.prob
$
prob
),
]
write.table
(
mynoiseqbio.tmm.prob
,
file
=
paste0
(
outdir
,
cName
,
"_"
,
nrow
(
mynoiseqbio.tmm.prob
),
"_NOISeqbio_tmm_prob_seed_12345.tsv"
),
sep
=
"\t"
,
col.names
=
TRUE
,
row.names
=
TRUE
,
quote
=
FALSE
)
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeq - noiseqbionnorm"
)))
mynoiseqbio.nnorm
<-
noiseqbio
(
mydata
,
k
=
0.5
,
norm
=
"n"
,
factor
=
"Treatment"
,
lc
=
1
,
r
=
20
,
adj
=
1.5
,
a0per
=
a0per_value
,
random.seed
=
12345
,
filter
=
1
,
plot
=
TRUE
)
dev.off
()
head
(
mynoiseqbio.rpkm
@
results
[[
1
]])
head
(
mynoiseqbio.nnorm
@
results
[[
1
]])
mynoiseqbio.rpkm.deg
<-
degenes
(
mynoiseqbio.rpkm
,
q
=
pvtnsb
,
M
=
NULL
)
mynoiseqbio.rpkm.deg.up
<-
degenes
(
mynoiseqbio.rpkm
,
q
=
pvtnsb
,
M
=
"up"
)
mynoiseqbio.rpkm.deg.down
<-
degenes
(
mynoiseqbio.rpkm
,
q
=
pvtnsb
,
M
=
"down"
)
mynoiseqbio.nnorm.deg
<-
degenes
(
mynoiseqbio.nnorm
,
q
=
pvtnsb
,
M
=
NULL
)
mynoiseqbio.nnorm.deg.up
<-
degenes
(
mynoiseqbio.nnorm
,
q
=
pvtnsb
,
M
=
"up"
)
mynoiseqbio.nnorm.deg.down
<-
degenes
(
mynoiseqbio.nnorm
,
q
=
pvtnsb
,
M
=
"down"
)
par
(
mfrow
=
c
(
3
,
2
))
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeq - DE.rpkm_0.9"
)))
DE.plot
(
mynoiseq.rpkm
,
q
=
0.9
,
graphic
=
"expr"
,
log.scale
=
TRUE
)
dev.off
()
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeq - DE.rpkm_0.8"
)))
DE.plot
(
mynoiseq.rpkm
,
q
=
0.8
,
graphic
=
"expr"
,
log.scale
=
TRUE
)
dev.off
()
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeq - DE.nnorn_0.9"
)))
DE.plot
(
mynoiseq.nnorm
,
q
=
0.9
,
graphic
=
"expr"
,
log.scale
=
TRUE
)
dev.off
()
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeq - DE.nnorn_0.8"
)))
DE.plot
(
mynoiseq.nnorm
,
q
=
0.8
,
graphic
=
"expr"
,
log.scale
=
TRUE
)
dev.off
()
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeq - DE.tmm_0.9"
)))
DE.plot
(
mynoiseq.tmm
,
q
=
0.9
,
graphic
=
"expr"
,
log.scale
=
TRUE
)
dev.off
()
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeq - DE.tmm_0.8"
)))
DE.plot
(
mynoiseq.tmm
,
q
=
0.8
,
graphic
=
"expr"
,
log.scale
=
TRUE
)
dev.off
()
par
(
mfrow
=
c
(
2
,
2
))
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeqbio - DE.rpkm_"
,
pvt
)))
DE.plot
(
mynoiseqbio.rpkm
,
q
=
pvt
,
graphic
=
"expr"
,
log.scale
=
TRUE
)
dev.off
()
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeqbio - DE.rpkm_"
,
pvtnsb
)))
DE.plot
(
mynoiseqbio.rpkm
,
q
=
pvtnsb
,
graphic
=
"expr"
,
log.scale
=
TRUE
)
dev.off
()
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeqbio - DE.nnorm_"
,
pvt
)))
DE.plot
(
mynoiseqbio.nnorm
,
q
=
pvt
,
graphic
=
"expr"
,
log.scale
=
TRUE
)
dev.off
()
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeqbio - DE.nnorm_"
,
pvtnsb
)))
DE.plot
(
mynoiseqbio.nnorm
,
q
=
pvtnsb
,
graphic
=
"expr"
,
log.scale
=
TRUE
)
dev.off
()
par
(
mfrow
=
c
(
1
,
2
))
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeq - DE.rpkm_0.9_MD"
)))
DE.plot
(
mynoiseq.rpkm
,
q
=
0.9
,
graphic
=
"MD"
,
log.scale
=
TRUE
)
dev.off
()
png
(
filename
=
paste0
(
outdir
,
"plots/"
,
paste0
(
cName
,
" NOISeq - DE.nnorm_0.9_MD"
)))
DE.plot
(
mynoiseq.nnorm
,
q
=
0.9
,
graphic
=
"MD"
,
log.scale
=
TRUE
)
dev.off
()
}
###########################################
## additional function for more usability #
###########################################
calcRPKM
<-
function
(
c
,
n
,
l
)
{
RPKM
<-
(
10
^
9
*
as.numeric
(
c
))
/
(
as.numeric
(
n
)
*
as.numeric
(
l
))
return
(
RPKM
)
}
RPKMMatrix
<-
function
(
InTable
,
directory
,
LenGCTable
,
myLenPos
)
{
myLengthTable
<-
read.table
(
paste0
(
directory
,
LenGCTable
),
sep
=
"\t"
,
header
=
TRUE
,
stringsAsFactor
=
FALSE
,
row.names
=
1
)
newtable
<-
matrix
(
data
=
c
(
1
),
nrow
=
nrow
(
InTable
),
ncol
(
InTable
),
byrow
=
TRUE
)
colnames
(
newtable
)
<-
colnames
(
InTable
)
rownames
(
newtable
)
<-
rownames
(
InTable
)
sumCol
<-
apply
(
InTable
,
2
,
sum
)
# list of the all col sums
for
(
s
in
colnames
(
InTable
))
{
for
(
r
in
rownames
(
InTable
))
{
w
<-
InTable
[
r
,
s
]
len
<-
myLengthTable
[
r
,
myLenPos
]
sum
<-
as.numeric
(
sumCol
[
s
])
newtable
[
r
,
s
]
<-
calcRPKM
(
w
,
sum
,
len
)
}
}
return
(
newtable
)
}
# rmHTSeqLines removes the last five rows
rmHTSeqLines
<-
function
(
counts
)
{
counts
<-
head
(
counts
,
-5
)
return
(
counts
)
}
###############
## functions ##
###############
# split list to a factor list
split2list
<-
function
(
tmp
)
{
xx
<-
c
()
for
(
i
in
1
:
length
(
tmp
))
{
if
(
grepl
(
"\\."
,
tmp
[
i
]))
{
foo
<-
strsplit
(
tmp
[
i
],
'\\.'
)[[
1
]]
fooo
<-
strsplit
(
foo
,
" "
)[[
1
]]
xx
<-
c
(
xx
,
fooo
)
}
else
{
xx
<-
c
(
xx
,
tmp
[
i
])
}
}
return
(
as.factor
(
xx
))
}
# makeGroups
# https://github.com/drisso/RUVSeq/blob/master/R/helper.R
makeGroups
<-
function
(
xs
)
{
xs
<-
factor
(
xs
)
groups
<-
matrix
(
-1
,
nrow
=
length
(
levels
(
xs
)),
ncol
=
max
(
table
(
xs
)))
for
(
i
in
1
:
length
(
levels
(
xs
)))
{
idxs
<-
which
(
xs
==
levels
(
xs
)[
i
])
groups
[
i
,
1
:
length
(
idxs
)]
<-
idxs
}
return
(
groups
)
}
# The function makeSampleList creates a list of the columns(samples) you would like to compare
makeSampleList
<-
function
(
A
,
Al
,
B
,
Bl
)
{
l
<-
c
()
if
(
exists
(
"Al"
,
mode
=
"numeric"
))
{
l
<-
makeSampleList.helper
(
A
,
Al
,
l
)
l
<-
makeSampleList.helper
(
B
,
Bl
,
l
)
}
return
(
l
)
}
makeSampleList.helper
<-
function
(
v
,
k
,
l
)
{
for
(
h
in
1
:
k
)
{
if
(
h
==
1
)
{
l
<-
c
(
l
,
v
)
}
else
{
l
<-
c
(
l
,
paste0
(
v
,
"."
,
h
-1
))
}
}
return
(
l
)
}
# function creates a list of numbers out of another list. The List is the index of the given list.
makeIndexList
<-
function
(
l
)
{
n
<-
1
jj
<-
c
()
for
(
v
in
l
)
{
jj
<-
c
(
jj
,
n
)
n
<-
n
+
1
}
return
(
jj
)
}
intersection
<-
function
(
l
,
check
,
t
)
{
l.list
<-
list.files
(
path
=
l
,
pattern
=
"*(rpkm|edgeR|DESeq2).*tsv"
,
recursive
=
TRUE
)
if
(
grepl
(
"bio"
,
l.list
[
4
],
fixed
=
TRUE
))
{
l.list.noiseq
<-
read.table
(
paste0
(
l
,
"/"
,
l.list
[
3
]),
header
=
T
,
sep
=
"\t"
,
row.names
=
1
)
l.list.noiseqbio
<-
read.table
(
paste0
(
l
,
"/"
,
l.list
[
4
]),
header
=
T
,
sep
=
"\t"
,
row.names
=
1
)
}
else
{
l.list.noiseq
<-
read.table
(
paste0
(
l
,
"/"
,
l.list
[
4
]),
header
=
T
,
sep
=
"\t"
,
row.names
=
1
)
l.list.noiseqbio
<-
read.table
(
paste0
(
l
,
"/"
,
l.list
[
3
]),
header
=
T
,
sep
=
"\t"
,
row.names
=
1
)
}
l.list.edgeR
<-
read.table
(
paste0
(
l
,
"/"
,
l.list
[
2
]),
header
=
T
,
sep
=
"\t"
,
row.names
=
1
)
l.list.deseq2
<-
read.table
(
paste0
(
l
,
"/"
,
l.list
[
1
]),
header
=
T
,
sep
=
"\t"
,
row.names
=
1
)
print
(
"DEGs edgeR"
)
print
(
nrow
(
l.list.edgeR
))
print
(
"DEGs deseq2"
)
print
(
nrow
(
l.list.deseq2
))
print
(
"DEGs noiseq"
)
print
(
nrow
(
l.list.noiseq
))
print
(
"DEGs noiseqbio"
)
print
(
nrow
(
l.list.noiseqbio
))
interect.list
<-
""
interect.list.direct
<-
""
if
(
t
==
4
)
{
# List of overlaps without direction check
interect.list
<-
intersect
(
rownames
(
l.list.edgeR
),
intersect
(
rownames
(
l.list.noiseq
),
intersect
(
rownames
(
l.list.noiseqbio
),
rownames
(
l.list.deseq2
))))
print
(
"DEGs intersection without direction check (four methods)"
)
print
(
length
(
interect.list
))
print
(
interect.list
)
print
(
"DEGs intersection with direction check"
)
# List of overlaps with direction check
if
(
length
(
interect.list
)
!=
0
)
{
interect.list.direct
<-
interDirect
(
l.list.edgeR
,
l.list.deseq2
,
l.list.noiseq
,
l.list.noiseqbio
,
interect.list
)
print
(
length
(
interect.list.direct
))
print
(
interect.list.direct
)
}
else
{
print
(
"0"
)
}
}
if
(
t
==
3
)
{
# List of overlaps without direction check
interect.list
<-
intersect
(
rownames
(
l.list.edgeR
),
intersect
(
rownames
(
l.list.noiseq
),
rownames
(
l.list.deseq2
)))
print
(
"DEGs intersection without direction check (three methods)"
)
print
(
length
(
interect.list
))
print
(
interect.list
)
print
(
"DEGs intersection with direction check"
)
# List of overlaps with direction check
if
(
length
(
interect.list
)
!=
0
)
{
# fake data frame with 0 rows => nrow == 0
df_fake
<-
data.frame
(
Date
=
as.Date
(
character
()),
File
=
character
(),
User
=
character
(),
stringsAsFactors
=
FALSE
)
interect.list.direct
<-
interDirect
(
l.list.edgeR
,
l.list.deseq2
,
l.list.noiseq
,
df_fake
,
interect.list
)
print
(
length
(
interect.list.direct
))
print
(
interect.list.direct
)
}
else
{
print
(
"0"
)
}
}
if
(
check
==
1
){
return
(
l.list.noiseq
[
interect.list.direct
,])
}
else
{
return
(
l.list.noiseq
[
interect.list
,])
}
}
interDirect
<-
function
(
e
,
d
,
n
,
nb
,
i
)
{
if
(
nrow
(
nb
)
==
0
)
{
nbx
<-
apply
(
n
[
i
,],
2
,
function
(
x
)
{
ifelse
(
x
>
0
,
"-"
,
"+"
)})
# use noiseq twice to simulate noiseqbio
nby
<-
paste0
(
nbx
[,
"M"
],
rownames
(
nbx
))
}
else
{
nbx
<-
apply
(
nb
[
i
,],
2
,
function
(
x
)
{
ifelse
(
x
>
0
,
"-"
,
"+"
)})
nby
<-
paste0
(
nbx
[,
"log2FC"
],
rownames
(
nbx
))
}
ex
<-
apply
(
e
[
i
,],
2
,
function
(
x
)
{
ifelse
(
x
<
0
,
"-"
,
"+"
)})
dx
<-
apply
(
d
[
i
,],
2
,
function
(
x
)
{
ifelse
(
x
<
0
,
"-"
,
"+"
)})
nx
<-
apply
(
n
[
i
,],
2
,
function
(
x
)
{
ifelse
(
x
>
0
,
"-"
,
"+"
)})
# twisted, because edgeR and DESeq2 are calculating vice versa
ey
<-
paste0
(
ex
[,
"logFC"
],
rownames
(
ex
))
dy
<-
paste0
(
dx
[,
"log2FoldChange"
],
rownames
(
dx
))
ny
<-
paste0
(
nx
[,
"M"
],
rownames
(
nx
))
interect.list.direct
<-
intersect
(
ey
,
intersect
(
dy
,
intersect
(
ny
,
nby
)))
return
(
substr
(
interect.list.direct
,
2
,
100
))
}
###########################################
## executiv part of the script #
###########################################
for
(
SampleNames
in
sampleLists
)
{
workflow
(
file
,
saveat
,
SampleNames
,
LenGCTable
,
myLenPos
,
myGCPos
,
rpkm
,
pvtedgeR
,
pvtDESeq2
,
pvtNOISeq
,
edgeRDir
,
DESeq2Dir
,
NOISeqDir
,
htseq
)
}
print
(
"Thank you for using the DEG pipeline :) \n"
)
# fin
### compare DEGs analysis ###
# used venny and Excel
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment