add Fabian's R scripts

35149420 · Saskia Hiltemann · 8ebefab1 · 35149420 · 35149420
Commit 35149420 authored 1 year ago by Saskia Hiltemann
--- a/workflows/3D_PCA_plot.R
+++ b/workflows/3D_PCA_plot.R
+    # 3d R plot
+    saveat <- "/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Hoecker"
+    file.rpkm <- '/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Hoecker/31315.p.sort.rpkm'
+    ## Windows ###
+    #-#saveat <- "Q:/haasf/leubner_RNA-seq/Celery/DEGs_0.9/"
+    #-#file.rpkm <- 'Q:/haasf/leubner_RNA-seq/Celery/DEGs/Celery_merged_isoforms.p.sort.rpkm'
+    #-#file.count <- 'Q:/haasf/leubner_RNA-seq/Celery/DEGs_0.9/Celery_merged_isoforms.p.sort.counts'
+    ###
+    data.rpkm <- read.table(file.rpkm, header=T, sep="\t", row.names=1)
+    # sort by colnames
+    data.rpkm <- data.rpkm[,order(colnames(data.rpkm))]
+    librariesName <- list(
+      cop_D = c("cop_D", "red"),
+      cop_L = c("cop_L", "blue"),
+      spa_D = c("spa_D", "green"),
+      spa_L = c("spa_L", "yellow"),
+      WT_D = c("WT_D", "black"),
+      WT_L = c("WT_L", "violet")
+    )
+    #
+    # header.ori <- c("56754_WT_Naturally_3.bam.sort.fastq.unmapped.sam.sort.bam", "56753_WT_Naturally_2.bam.sort.fastq.unmapped.sam.sort.bam", "56752_WT_Naturally_1.bam.sort.fastq.unmapped.sam.sort.bam", "56751_tt_6_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56750_tt_6_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56749_tt_6_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56748_tt_0_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56747_tt_0_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56746_tt_0_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56745_WT_10_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56744_WT_10_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56743_WT_10_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56742_WT_6_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56741_WT_6_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56740_WT_6_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56739_WT_0_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56738_WT_0_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56737_WT_0_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56736_tt_T1_tt_6d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56735_tt_T1_tt_6d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56734_tt_T1_tt_6d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56733_tt_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56732_tt_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56731_tt_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56730_tt_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56729_tt_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56728_tt_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56727_WT_T1_NA_3.bam.sort.fastq.unmapped.sam.sort.bam", "56726_WT_T1_NA_2.bam.sort.fastq.unmapped.sam.sort.bam", "56725_WT_T1_NA_1.bam.sort.fastq.unmapped.sam.sort.bam", "56724_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56723_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56722_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56721_WT_T1_WT_10d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56720_WT_T1_WT_10d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56719_WT_T1_WT_10d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56718_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56717_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56716_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56715_WT_T1_WT_6d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56714_WT_T1_WT_6d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56713_WT_T1_WT_6d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56712_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56711_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56710_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56709_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56708_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56707_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56706_WT_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56705_WT_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56704_WT_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam")
+    # header.new <- c("T17_Dry_Naturally_Freezedryseed", "T17_Dry_Naturally_Freezedryseed.1", "T17_Dry_Naturally_Freezedryseed.2", "T16_Dry_6days_Freezeafter6dageing", "T16_Dry_6days_Freezeafter6dageing.1", "T16_Dry_6days_Freezeafter6dageing.2", "T15_Dry_0days_Freezedryseed", "T15_Dry_0days_Freezedryseed.1", "T15_Dry_0days_Freezedryseed.2", "T14_Dry_10days_Freezeafter10dageing", "T14_Dry_10days_Freezeafter10dageing.1", "T14_Dry_10days_Freezeafter10dageing.2", "T13_Dry_6days_Freezeafter6dageing", "T13_Dry_6days_Freezeafter6dageing.1", "T13_Dry_6days_Freezeafter6dageing.2", "T12_Dry_0days_Freezedryseed", "T12_Dry_0days_Freezedryseed.1", "T12_Dry_0days_Freezedryseed.2", "T11_Imbibe_6days_12h", "T11_Imbibe_6days_12h.1", "T11_Imbibe_6days_12h.2", "T10_Imbibe_6days_5h", "T10_Imbibe_6days_5h.1", "T10_Imbibe_6days_5h.2", "T9_Imbibe_0dyas_5h", "T9_Imbibe_0dyas_5h.1", "T9_Imbibe_0dyas_5h.2", "T8_Imbibe_Naturally_47h", "T8_Imbibe_Naturally_47h.1", "T8_Imbibe_Naturally_47h.2", "T7_Imbibe_Naturally_24h", "T7_Imbibe_Naturally_24h.1", "T7_Imbibe_Naturally_24h.2", "T6_Imbibe_10days_72h", "T6_Imbibe_10days_72h.1", "T6_Imbibe_10days_72h.2", "T5_Imbibe_10days_24h", "T5_Imbibe_10days_24h.1", "T5_Imbibe_10days_24h.2", "T4_Imbibe_6days_47h", "T4_Imbibe_6days_47h.1", "T4_Imbibe_6days_47h.2", "T3_Imbibe_6days_24h", "T3_Imbibe_6days_24h.1", "T3_Imbibe_6days_24h.2", "T2_Imbibe_0days_24h", "T2_Imbibe_0days_24h.1", "T2_Imbibe_0days_24h.2", "T1_Imbibe_0days_5h", "T1_Imbibe_0days_5h.1", "T1_Imbibe_0days_5h.2")
+    #
+    # header.new <- header.new[order(header.ori)]
+    # header.ori <- header.ori[order(header.ori)]
+    #
+    # col.header <- header.new
+    #
+    # colnames(data.rpkm) <- col.header
+    library("DESeq2")
+    library("ggplot2")
+    library("RColorBrewer")
+    library("pheatmap")
+    library("BiocGenerics")
+    library("rgl")
+    library("magick")
+    library("sjmisc")
+    ################### running ######################
+    ### PCA RPKM ###
+    set.seed(0)
+    data.inv <- t(data.rpkm)
+    data.dist <-dist(data.inv, method="euclidean") # "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski"
+    data.dist.hc <- hclust(data.dist,method="ward.D2")
+    data.dist.pca <-princomp(data.dist,cor=T)
+    pc1 <- data.dist.pca$scores[,1]
+    pc2 <- data.dist.pca$scores[,2]
+    pc3 <- data.dist.pca$scores[,3]
+    # create data frame for pc1 pc2 pc3
+    data.dist.pca.frame = data.frame(pc1,pc2,pc3)
+    rownames(data.dist.pca.frame) <- names(data.dist.pca$scale)
+    colnames(data.dist.pca.frame) <- c("pc1","pc2","pc3")
+    condition.values <- c()
+    condition.values.color <- c()
+    for(a in colnames(data.rpkm)) {
+      v <- substr(a, nchar(a), nchar(a))
+      if(str_contains(v, c(1,2,3,4,5,6,7,8,9,0), logic = "or")) {
+        if (substr(substr(a, 1, nchar(a)-2), nchar(substr(a, 1, nchar(a)-2)), nchar(substr(a, 1, nchar(a)-2))) == ".") {
+          n <- substr(a, 1, nchar(a)-3)
+        } else {
+          n <- substr(a, 1, nchar(a)-2)
+        }
+      } else {
+        n <- a
+      }
+      condition.values <- c(condition.values, librariesName[n][[1]][1])
+      condition.values.color <- c(condition.values.color, librariesName[n][[1]][2])
+    }
+    data.dist.pca.frame["tissue"] <- condition.values
+    data.dist.pca.frame["color"] <- condition.values.color
+    data.dist.pca.frame["name"] <- names(data.dist.pca$scale)
+    attr(data.dist.pca.frame, "percentVar") <- (data.dist.pca$sdev)^2 / sum(data.dist.pca$sdev^2) # cumsum()
+    # simple plot
+    png(filename=paste0(saveat, "/HC_RPKM_normalized.png"))
+    plot(data.dist.hc) # hc plot
+    dev.off()
+    png(filename=paste0(saveat, "/PCA_variance_RPKM_normalized.png"))
+    plot(data.dist.pca) # variances; var(data.dist.pca$sdev[1:9])
+    dev.off()
+    # get the parcent variation
+    percentVar <- round(100 * attr(data.dist.pca.frame, "percentVar"))
+    # 3d plot
+    plot3d(pc1, pc2, pc3,
+           type = "s", # p, s, l, h, n
+           #pch = c(1:3),
+           col = condition.values.color,
+           size = 1,
+           xlab = paste0("PC1: ",percentVar[1],"% variance"),
+           ylab = paste0("PC2: ",percentVar[2],"% variance"),
+           zlab = paste0("PC3: ",percentVar[3],"% variance"),
+           cex = 2,
+           main = "", # -> princomp",
+    )
+    # shift <- matrix(4, 4, 4, byrow = TRUE)
+    # text3d(shift,texts=1:3)
+    grid3d(c("x", "y", "z"))
+    ## add legend
+    legend3d("right", unique(condition.values), pch = 19, col = unique(condition.values.color))
+    #### video #####
+    M <- par3d("userMatrix")
+    play3d( par3dinterp( userMatrix=list(M,
+                                         rotate3d(M, pi/2, 1, 0, 0),
+                                         rotate3d(M, pi/2, 0, 1, 0) ) ),
+            duration=2 )
+    movie3d(spin3d(axis = c(1, 2, 1)), duration = 5,
+            dir = saveat)
+    #### video end ####
+    # pc1, pc2
+    png(filename=paste0(saveat, "/PCA_RPKM_normalized.png"))
+    ggplot(
+      data.dist.pca.frame,
+      aes(
+        pc1,
+        pc2,
+        color=tissue)
+    ) +
+      geom_point(size=2.5) +
+      xlab(
+        paste0("PC1: ",percentVar[1],"% variance")
+      ) +
+      ylab(
+        paste0("PC2: ",percentVar[2],"% variance")
+      ) +
+      #theme() + #, face="bold"
+      scale_colour_manual(
+        values= c("red", "blue", "green", "yellow", "black", "violet") # dodgerblue3
+      ) +
+      ggtitle("PCA of all samples (RPKM normalized)") +
+      theme(
+        plot.title = element_text(lineheight=.8),
+        panel.background = element_rect(fill = "gray95")
+      )
+    dev.off()
--- a/workflows/DEG_pipeline.R
+++ b/workflows/DEG_pipeline.R
+    ###############################################################
+    ## DEG pipeline with edgeR, DESeq2 and NOISeq                 #
+    ## Modified from the template of Dr. Kristian Ullrich 2014    #
+    ## @fabianhaas, AG Rensing, Marburg, Jan 2016                 #
+    ###############################################################
+    # The pipeline expects sample triplicates.
+    #   If you don't have three samples of one condition, use face files with zero counts.
+    # RPKM calculation is optional.
+    # Additional program to sum up the tsv files: /mnt/nas_temp/home/haasf/Program/sumup_htseq_counts.pl folder file_prefix
+    ###########################################
+    ## please define your variables           #
+    ###########################################
+    saveat <- "/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Hoecker/DEGs_0.9"
+    file <- '/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Hoecker/31315.p.sort.counts'
+    # Table with the length and the GC content
+    LenGCTable <- "/mnt/NAS_coruscant_datashare/haasf/GeneAtlas_RNASeq/DEG_2nd_round/cosmoss_V3.3.release_2016-07-22.IsoformV31.fasta.length_GC_Contig_Daten_for_34840_1nd.csv"
+    # Column position of the gene name and the width/gc content (first column is gene name!)
+    myLenPos <- 1
+    myGCPos <- 2
+    #
+    # hoo <- c("","")
+    # sampleLists <- c()
+    # for(a in hoo) {
+    #   for(b in hoo) {
+    #     if(a!=b) {
+    #       tmp <- c(a,b)
+    #       sampleLists <- c(sampleLists, list(tmp))
+    #     }
+    #   }
+    # }
+    sampleLists <- list(c("cop_D", "cop_L"),
+                        c("cop_D", "spa_D"),
+                        c("cop_D", "spa_L"),
+                        c("cop_D", "WT_D"),
+                        c("cop_D", "WT_L"),
+                        c("cop_L", "spa_D"),
+                        c("cop_L", "spa_L"),
+                        c("cop_L", "WT_D"),
+                        c("cop_L", "WT_L"),
+                        c("spa_D", "spa_L"),
+                        c("spa_D", "WT_D"),
+                        c("spa_D", "WT_L"),
+                        c("spa_L", "WT_D"),
+                        c("spa_L", "WT_L"),
+                        c("WT_D", "WT_L")
+                      )
+    # Do you want a RPKM value table (1/0)
+    # Be warned, it takes a while to do this. approx. 10 minutes for 35000 genes and 6 samples.
+    rpkm <- 0
+    # Are the data original HTSeq files? yes <- 1 ; no <- 0
+    htseq <- 0
+    # threshold settings for the R packages
+    #edgeR
+    pvtedgeR <- 0.001
+    #DESeq2
+    pvtDESeq2 <- 0.001
+    #NOISeq
+    pvtNOISeq <- 0.9
+    # Output folder names
+    edgeRDir <- "edgeR"
+    DESeq2Dir <- "DESeq2"
+    NOISeqDir <- "NOISeq"
+    #########################
+    ## stop editing here!! ##
+    #########################
+    ###########################################
+    ## executiv function                      #
+    ###########################################
+    workflow <- function(file, saveat, SampleNames, LenGCTable, myLenPos, myGCPos, rpkm, pvtedgeR, pvtDESeq2, pvtNOISeq, edgeRDir, DESeq2Dir, NOISeqDir, htseq)
+    {
+        # setwd(workingFolder) # set workspace
+                sampleNameI <- SampleNames[1]
+                sampleNameII <- SampleNames[2]
+                # Comparison name
+                cName <- paste(sampleNameI, sampleNameII, sep = " vs. ")
+                filename <- strsplit(file, "/")
+                filename <- filename[[1]][length(filename[[1]])] # long but it's working
+                sampleName <- gsub(", ","_",toString(SampleNames))
+                samples <- SampleNames
+                # create output dir
+                dir.create(saveat, showWarnings = FALSE)
+                outdir <- paste0(saveat,"/",sampleName)
+                dir.create(outdir)
+                # Start the log file
+                sink(paste0(outdir, "/DEGpipe_",cName, ".log"))
+                # print parameter into the log file
+                print("set parameters:")
+                print(paste0("Counts file:", file))
+                print(paste0("Results stored at:", saveat))
+                print(paste0("SampleNames: ",SampleNames))
+                print(paste0("LenGCTable: ",LenGCTable))
+                print(paste0("myLenPos: ",myLenPos))
+                print(paste0("myGCPos: ",myGCPos))
+                print(paste0("rpkm: ",rpkm))
+                print(paste0("htseq: ",htseq))
+                print(paste0("pvtedgeR: ",pvtedgeR))
+                print(paste0("pvtDESeq2: ",pvtDESeq2))
+                print(paste0("pvtNOISeq: ",pvtNOISeq))
+                print(paste0("edgeRDir: ",edgeRDir))
+                print(paste0("DESeq2Dir: ",DESeq2Dir))
+                print(paste0("NOISeqDir: ",NOISeqDir))
+                # session and package infos for the log file
+                print(sessionInfo())
+                print(Sys.info())
+                # load spike-in count tables
+                data0 <- read.table(file, header=T, sep="\t", row.names=1)
+                # just if the gene ids not ending with .1 (it is nessessery for NOISeqbio iteration step)
+                #row.names(data0) <- paste0(row.names(data0), ".1")
+                # remove HTSeq rows like "__no_feature"
+                data0 <- data0[ with(data0,  !grepl('__', rownames(data0))) , ]
+                data0 <- data0[ with(data0,  !grepl('ERCC', rownames(data0))) , ]
+                #data0 <- data0[ , with(data0,  !grepl('X.1', colnames(data0)))]
+                #rename col names (library name to roman number)
+              # librariesName <- c("56754_WT_Naturally_3.bam.sort.fastq.unmapped.sam.sort.bam", "56753_WT_Naturally_2.bam.sort.fastq.unmapped.sam.sort.bam", "56752_WT_Naturally_1.bam.sort.fastq.unmapped.sam.sort.bam", "56751_tt_6_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56750_tt_6_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56749_tt_6_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56748_tt_0_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56747_tt_0_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56746_tt_0_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56745_WT_10_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56744_WT_10_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56743_WT_10_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56742_WT_6_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56741_WT_6_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56740_WT_6_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56739_WT_0_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56738_WT_0_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56737_WT_0_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56736_tt_T1_tt_6d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56735_tt_T1_tt_6d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56734_tt_T1_tt_6d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56733_tt_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56732_tt_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56731_tt_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56730_tt_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56729_tt_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56728_tt_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56727_WT_T1_NA_3.bam.sort.fastq.unmapped.sam.sort.bam", "56726_WT_T1_NA_2.bam.sort.fastq.unmapped.sam.sort.bam", "56725_WT_T1_NA_1.bam.sort.fastq.unmapped.sam.sort.bam", "56724_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56723_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56722_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56721_WT_T1_WT_10d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56720_WT_T1_WT_10d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56719_WT_T1_WT_10d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56718_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56717_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56716_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56715_WT_T1_WT_6d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56714_WT_T1_WT_6d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56713_WT_T1_WT_6d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56712_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56711_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56710_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56709_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56708_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56707_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56706_WT_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56705_WT_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56704_WT_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam")
+              # #librariesName <- c("80484_Celery_3_day_imbibed_embryo_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80483_Celery_3_day_imbibed_embryo_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80482_Celery_3_day_imbibed_embryo_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80481_Celery_1_day_imbibed_embryo_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80480_Celery_1_day_imbibed_embryo_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80479_Celery_1_day_imbibed_embryo_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80478_Celery_5_day_imbibed_whole_fruit_29C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80477_Celery_5_day_imbibed_whole_fruit_29C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80460_Celery_3_day_imbibed_whole_fruit_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80459_Celery_3_day_imbibed_whole_fruit_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80458_Celery_1_day_imbibed_whole_fruit_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80457_Celery_1_day_imbibed_whole_fruit_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80456_Celery_1_day_imbibed_whole_fruit_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80455_Celery_Dry_whole_fruit_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80454_Celery_Dry_whole_fruit_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80453_Celery_Dry_whole_fruit_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80452_Celery_5_day_imbibed_whole_fruit_29C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80451_Celery_5_day_imbibed_whole_fruit_100uM_ABA_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80450_Celery_5_day_imbibed_whole_fruit_100uM_ABA_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80449_Celery_5_day_imbibed_whole_fruit_100uM_ABA_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80448_Celery_5_day_imbibed_whole_fruit_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80447_Celery_5_day_imbibed_whole_fruit_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80446_Celery_5_day_imbibed_whole_fruit_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80445_Celery_3_day_imbibed_whole_fruit_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam.featureCounts.p.txt")
+              # romanNumber <- c("T17_Dry_Naturally_Freezedryseed", "T17_Dry_Naturally_Freezedryseed.1", "T17_Dry_Naturally_Freezedryseed.2", "T16_Dry_6days_Freezeafter6dageing", "T16_Dry_6days_Freezeafter6dageing.1", "T16_Dry_6days_Freezeafter6dageing.2", "T15_Dry_0days_Freezedryseed", "T15_Dry_0days_Freezedryseed.1", "T15_Dry_0days_Freezedryseed.2", "T14_Dry_10days_Freezeafter10dageing", "T14_Dry_10days_Freezeafter10dageing.1", "T14_Dry_10days_Freezeafter10dageing.2", "T13_Dry_6days_Freezeafter6dageing", "T13_Dry_6days_Freezeafter6dageing.1", "T13_Dry_6days_Freezeafter6dageing.2", "T12_Dry_0days_Freezedryseed", "T12_Dry_0days_Freezedryseed.1", "T12_Dry_0days_Freezedryseed.2", "T11_Imbibe_6days_12h", "T11_Imbibe_6days_12h.1", "T11_Imbibe_6days_12h.2", "T10_Imbibe_6days_5h", "T10_Imbibe_6days_5h.1", "T10_Imbibe_6days_5h.2", "T9_Imbibe_0dyas_5h", "T9_Imbibe_0dyas_5h.1", "T9_Imbibe_0dyas_5h.2", "T8_Imbibe_Naturally_47h", "T8_Imbibe_Naturally_47h.1", "T8_Imbibe_Naturally_47h.2", "T7_Imbibe_Naturally_24h", "T7_Imbibe_Naturally_24h.1", "T7_Imbibe_Naturally_24h.2", "T6_Imbibe_10days_72h", "T6_Imbibe_10days_72h.1", "T6_Imbibe_10days_72h.2", "T5_Imbibe_10days_24h", "T5_Imbibe_10days_24h.1", "T5_Imbibe_10days_24h.2", "T4_Imbibe_6days_47h", "T4_Imbibe_6days_47h.1", "T4_Imbibe_6days_47h.2", "T3_Imbibe_6days_24h", "T3_Imbibe_6days_24h.1", "T3_Imbibe_6days_24h.2", "T2_Imbibe_0days_24h", "T2_Imbibe_0days_24h.1", "T2_Imbibe_0days_24h.2", "T1_Imbibe_0days_5h", "T1_Imbibe_0days_5h.1", "T1_Imbibe_0days_5h.2")
+              # #romanNumber <- c("emb_3d_20.2", "emb_3d_20.1", "emb_3d_20", "emb_1d_20.2", "emb_1d_20.1", "emb_1d_20", "wf_5d_29.2", "wf_5d_29.1", "wf_3d_20.1", "wf_3d_20", "wf_1d_20.2", "wf_1d_20.1", "wf_1d_20", "wf_dry.2", "wf_dry.1", "wf_dry", "wf_5d_29", "wf_5d_ABA_20.2", "wf_5d_ABA_20.1", "wf_5d_ABA_20", "wf_5d_20.2", "wf_5d_20.1", "wf_5d_20", "wf_3d_20.2")
+              #
+              #
+              # for(i in 1:length(librariesName)) {
+              #     colnames(data0)[colnames(data0)==grep(librariesName[i], names(data0), value = TRUE)] <- romanNumber[i]
+              # }
+                # creats a list of the column names/samples you would like to compare
+                SampleNames <- makeSampleList(sampleNameI, length(grep(paste0("^",sampleNameI,"[^A-Z]"), colnames(data0))) + 1,
+                                              sampleNameII, length(grep(paste0("^",sampleNameII,"[^A-Z]"), colnames(data0))) + 1)
+                print(SampleNames)
+                ## mal anschauen als ersatz für das darüber ##
+                ## newenw <- table.new[, grepl(paste(LIST,collapse="|"), colnames(table.new))]
+                # exctract the samples from the data0 if needed
+                if(length(SampleNames) > 0) {
+                    RTableInFiles <- data0[,SampleNames]
+                    data <- data0[,SampleNames]
+                }
+                print(head(RTableInFiles))
+                # sort by column name
+                # data <- data[ , order(names(data))] # dangeros, the order of query and referenz might be not the same
+                # see how many rows and columns are in the tables
+                print(paste("Raw data row number: ",nrow(data)))
+                print(paste("Raw data column number: ",ncol(data)))
+                # 2) A typical differential expression analysis workflow
+                # 2.1) Filtering and exploratory data analysis
+                # filter: One single gene/spike-in must have at least 5 counts in minimum 2 libraries
+                filter <- apply(data, 1, function(x) length(x[x>5])>=2)
+                dataFiltered <- data[filter,]
+                # see how many rows and columns are left
+                print(paste("Filtered data row number: ",nrow(dataFiltered)))
+                print(paste("Filtered data row number: ",ncol(dataFiltered)))
+                # filter gene names
+                genes <- rownames(dataFiltered)[grep("^Pp3", rownames(dataFiltered))]
+                # filter spike-in names
+                spikes <- rownames(dataFiltered)[grep("^ERCC", rownames(dataFiltered))]
+                # get the column name list as.factors
+                xtmp <- colnames(dataFiltered)
+                x <- split2list(xtmp)
+                sampleNameList <- gsub(".[1234567890]+$", "", SampleNames)
+                columnPos <- makeIndexList(SampleNames)
+                # run ...
+                ### edgeR ###
+                print(paste0("edgeR is running with ", cName))
+                packageDescription("edgeR")
+                edgeRFunction(outdir, edgeRDir, cName, RTableInFiles, pvtedgeR, sampleNameI, sampleNameII, sampleNameList)
+                #### DESeq2 ####
+                print(paste0("DESeq2 is running with ", cName))
+                packageDescription("DESeq2")
+                DESeq2Function(outdir, DESeq2Dir, cName, RTableInFiles, pvtDESeq2, sampleNameI, sampleNameII, sampleNameList)
+                ### NOISeq ###
+                print(paste0("NOISeq is running with ", cName))
+                packageDescription("NOISeq")
+               NOISeqFunction(outdir, NOISeqDir, cName, RTableInFiles, pvtNOISeq, sampleNameI, sampleNameII, myLenPos, myGCPos, LenGCTable, columnPos, sampleNameList, SampleNames)
+                ### Overlap of all three methods ###
+                # intersection with DEG direction check => 1, without => 0
+                intersection_4.file <- intersection(outdir,1,4)
+                write.table(intersection_4.file, file = paste0(outdir, "/", cName,"_intersection_four_methods_",nrow(intersection_4.file),".tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
+                # edgeR, DESeq2, NOISeq
+                intersection_3.file <- intersection(outdir,1,3)
+                write.table(intersection_3.file, file = paste0(outdir, "/", cName,"_intersection_three_methods_",nrow(intersection_3.file),".tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
+                sink() # End of log file
+    }
+    ####################
+    ## main functions  #
+    ####################
+    plotGraphics <- function(data, librariesName, cName, outdir, romanNumber)
+    {
+        library("DESeq2")
+        library("ggplot2")
+        library("RColorBrewer")
+        library("pheatmap")
+        library("genefilter")
+        # create sample annotation lists
+        # sample.name.list <- gsub(".[12]", "", librariesName[colnames(data)])
+        sample.name.list <- gsub(".[12]", "", romanNumber)
+        sample.name.uniq <- unique(unlist(sample.name.list, use.names = FALSE))
+        col.data.condition = data.frame(row.names = librariesName, condition = sample.name.list)
+        # run DESeq2 functions
+        dds.matrix <- DESeqDataSetFromMatrix(countData = data, colData = col.data.condition, design = ~condition) # data must be integer! (Ganze Zahlen)
+        colData(dds.matrix)$condition <- factor(colData(dds.matrix)$condition, levels = sample.name.uniq)
+        dds.matrix.deseq <- DESeq(dds.matrix)
+        dds.matrix.res <- results(dds.matrix.deseq)
+        dds.matrix.res <- dds.matrix.res[order(dds.matrix.res$padj), ]
+        dds.matrix.res.fdr <- dds.matrix.res[!is.na(dds.matrix.res$padj), ]
+        dds.matrix.res.fdr <- dds.matrix.res.fdr[dds.matrix.res.fdr$padj < 0.0001, ]
+        dds.matrix.rld <- rlogTransformation(dds.matrix, blind = TRUE)
+        # data for PCA plot
+        pcaData <- plotPCA(dds.matrix.rld, intgroup = "condition", returnData=TRUE)
+        percentVar <- round(100 * attr(pcaData, "percentVar"))
+        # color for samples
+        sample.colors <- c("gray60","#D73027","#4575B4", "purple", "orange","#113FF7","green", "black")
+        # plot the PCA
+        # https://cran.r-project.org/web/packages/ggfortify/vignettes/plot_pca.html
+        png(filename=paste0(outdir,"PCA_all_samples_DEseq2_normalized.png"))
+          ggplot(
+              pcaData,
+              aes(
+                  PC1,
+                  PC2,
+                  color=condition,
+                  shape=condition)
+              ) +
+              scale_shape_manual(values = c(4, 1, 2, 3, 0, 5, 6, 7)) +
+              geom_point(size=2.5) +
+              xlab(
+                  paste0("PC1: ",percentVar[1],"% variance")
+                  ) +
+              ylab(
+                  paste0("PC2: ",percentVar[2],"% variance")
+                  ) +
+              theme() + #, face="bold"
+              scale_colour_manual(
+                  values=sample.colors  # dodgerblue3
+                  ) +
+              ggtitle("PCA of all samples (DESeq2 normalized)") +
+              theme(
+                  plot.title = element_text(lineheight=.8),
+                  panel.background = element_rect(fill = "gray95")
+              )
+        dev.off()
+        # how many genes will be involved in the heatmap calculation
+        numbGenes <- 100
+        # pheatmap data
+        topVarGenes <- head(order(rowVars(assay(dds.matrix.rld)),decreasing=TRUE),numbGenes)
+        mat <- assay(dds.matrix.rld)[ topVarGenes, ]
+        mat <- mat - rowMeans(mat)
+        # pheatmap data annotation
+        anno <-as.data.frame(colData(dds.matrix.rld)[,c("condition")])
+        rownames(anno) <- colnames(data) # check out the row names of annotation
+        colnames(anno) <- "condition"
+        # pheatmap data annotation color
+        condition <- sample.colors
+        names(condition) <- sample.name.uniq
+        anno_colors <- list(condition = condition)
+        #heatmap(mat)
+        #pheatmap(mat)
+        # ftp://cran.r-project.org/pub/R/web/packages/pheatmap/pheatmap.pdf
+        png(filename=paste0(outdir,"HC_heatmap_all_samples_DESeq2_normalized.png"))
+          pheatmap(mat ,
+                   annotation = anno,
+                   annotation_colors = anno_colors,
+                   fontsize = 10,
+                   fontsize_row = 6,
+                   fontsize_col = 10,
+                   main = paste0("Heatmap of all samples (DESeq2 normalized) ", numbGenes," genes\n"),
+                   annotation_names_col = F,
+                   show_rownames = F,
+                   cluster_rows = F,
+                   border_color = F,
+                   treeheight_col = 20
+          )
+        dev.off()
+    }
+    edgeRFunction <- function(odir, edgeRDir, cName, RTableInFiles, pvtedgeR, sampleNameI, sampleNameII, sampleNameList)
+    {
+        library(edgeR)
+        outdir <- paste0(odir, "/",edgeRDir,"/") # set outdir
+        dir.create(outdir, showWarnings = FALSE) # create output folder
+        dir.create(paste0(outdir,"plots"), showWarnings = FALSE) # create subfolder "plots"
+        comparison <- cName
+        counts <- RTableInFiles
+        pvt <- pvtedgeR
+        group <- factor(sampleNameList)
+        # edgeR 'normal'
+        y.normal <- DGEList(counts = counts, group = group)
+        y.normal[,1]
+        png(filename=paste0(outdir,"plots/",paste0(cName, " edgeR - plotMDS")))
+            plotMDS(y.normal, method = "bcv", main = paste0(cName, "\nedgeR - plotMDS"))
+        dev.off()
+        y.normal <- calcNormFactors(y.normal)
+        y.normal <- estimateCommonDisp(y.normal)
+        y.normal <- estimateTagwiseDisp(y.normal)
+        png(filename=paste0(outdir,"plots/",paste0(cName, " edgeR - plotBCV")))
+            plotBCV(y.normal, main = paste0(cName, "\nedgeR - plotBCV"))
+        dev.off()
+        et.normal <- exactTest(y.normal, pair = c(sampleNameI, sampleNameII))
+        topTags(et.normal)
+        # adjust.method   ==>  character string specifying p-value adjustment method. Possible values are "none", "BH", "fdr" (equivalent to "BH"), "BY" and "holm". See p.adjust for details.
+        summary(de.normal <- decideTestsDGE(et.normal, p = pvt, adjust = "BH"))
+        et.normal.fdr <- cbind(et.normal$table, p.adjust(et.normal$table[, 3], method = "BH"))
+        et.normal.fdr <- et.normal.fdr[et.normal.fdr[, 4] < pvt, ]
+        write.table(et.normal.fdr, file = paste0(outdir, cName, "_", nrow(et.normal.fdr), "_edgeR_fdr.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
+        detags.normal <- rownames(de.normal)[as.logical(de.normal)]
+        png(filename=paste0(outdir,"plots/",paste0(cName, " edgeR - plotSmear")))
+        plotSmear(et.normal, de.normal, tags = detags.normal, main = paste0(cName, "\nedgeR - plotSmear"))
+        abline(h = c(-2, 2), col = "blue")
+        dev.off()
+        # edgeR 'GLM'
+        y.glm <- DGEList(counts = counts, group = group)
+        design <- model.matrix(~0 + group, data = y.glm$samples)
+        design
+        y.glm <- estimateGLMCommonDisp(y.glm, design)
+        y.glm <- estimateGLMTrendedDisp(y.glm, design)
+        ## Loading required package: splines
+        y.glm <- estimateGLMTagwiseDisp(y.glm, design)
+        png(filename=paste0(outdir,"plots/",paste0(cName, " edgeR - plotBCV - GLM")))
+        plotBCV(y.glm, main = paste0(cName, "\nedgeR - plotBCV - GLM"))
+        dev.off()
+        y.fit <- glmFit(y.glm, design)
+        y.lrt <- glmLRT(y.fit)
+        FDR <- p.adjust(y.lrt$table$PValue, method = "BH")
+        sum(FDR < pvt)
+        topTags(y.lrt)
+        summary(de.glm <- decideTestsDGE(y.lrt, p = pvt, adjust = "BH"))
+    }
+    DESeq2Function <- function(odir, DESeq2Dir, cName, RTableInFiles, pvtDESeq2, sampleNameI, sampleNameII, sampleNameList)
+    {
+        library(DESeq2)
+        library(RColorBrewer)
+        library(gplots)
+        library(vsn)
+        outdir <- paste0(odir, "/",DESeq2Dir,"/") # set outdir
+        dir.create(outdir, showWarnings = FALSE)
+        dir.create(paste0(outdir,"plots"), showWarnings = FALSE) # create subfolder "plots"
+        pvt <- pvtDESeq2
+        #   DESeqDataSet(se, design, ignoreRank = FALSE)
+        #   DESeqDataSetFromMatrix(countData, colData, design, tidy = FALSE, ignoreRank = FALSE, ...)
+        #   DESeqDataSetFromHTSeqCount(sampleTable, directory = ".", design, ignoreRank = FALSE, ...)
+        #   DESeqDataSetFromTximport(txi, colData, design, ...)
+        coldata = data.frame(row.names = colnames(RTableInFiles), condition = sampleNameList)
+        ddsHTSeq <- DESeqDataSetFromMatrix(countData = RTableInFiles, colData = coldata, design = ~ condition)
+        colData(ddsHTSeq)$condition <- factor(colData(ddsHTSeq)$condition, levels = c(sampleNameI, sampleNameII))
+        ddsHTSeq <- DESeq(ddsHTSeq)
+        ddsHTSeq.res <- results(ddsHTSeq)
+        ddsHTSeq.res <- ddsHTSeq.res[order(ddsHTSeq.res$padj), ]
+        head(ddsHTSeq.res)
+        # png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - plotMA")))
+        #     plotMA(ddsHTSeq, main = paste0(cName, "\nDESeq2 - plotMA"), ylim = c(-2, 2))
+        # dev.off()
+        ddsHTSeq.res.fdr <- ddsHTSeq.res[!is.na(ddsHTSeq.res$padj), ]
+        ddsHTSeq.res.fdr <- ddsHTSeq.res.fdr[ddsHTSeq.res.fdr$padj < pvt, ]
+        write.table(as.matrix(ddsHTSeq.res.fdr), file = paste0(outdir, cName, "_", nrow(as.matrix(ddsHTSeq.res.fdr)), "_DESeq2_fdr.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
+        ddsHTSeq.rld <- rlogTransformation(ddsHTSeq, blind = TRUE)
+        ddsHTSeq.vsd <- varianceStabilizingTransformation(ddsHTSeq, blind = TRUE)
+        par(mfrow = c(1, 3))
+        notAllZero <- (rowSums(counts(ddsHTSeq)) > 0)
+        png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - SdPlot.1")))
+            meanSdPlot(log2(counts(ddsHTSeq, normalized = TRUE)[notAllZero, ] + 1)) # , ylim = c(0, 2.5)
+        dev.off()
+        png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - SdPlot.2"))) # , ylim = c(0, 2.5)
+            meanSdPlot(assay(ddsHTSeq.rld[notAllZero, ]))
+        dev.off()
+        png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - SdPlot.3"))) # , ylim = c(0, 2.5)
+            meanSdPlot(assay(ddsHTSeq.vsd[notAllZero, ]))
+        dev.off()
+        hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
+        distRL <- dist(t(assay(ddsHTSeq.rld)))
+        mat <- as.matrix(distRL)
+        rownames(mat) <- colnames(mat) <- with(colData(ddsHTSeq), paste0(condition))
+        # png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - heatmap")))
+        #     heatmap(mat, trace = "none", col = rev(hmcol), margin = c(13, 13))
+        # dev.off()
+        png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - PCA")))
+            print(plotPCA(ddsHTSeq.rld, intgroup = "condition"))
+        dev.off()
+        png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - DispEsts")))
+            plotDispEsts(ddsHTSeq, main = paste0(cName, "\nDESeq2 - plotDispEsts"))
+        dev.off()
+    }
+    NOISeqFunction <- function(odir, NOISeqDir, cName, RTableInFiles, pvtNOISeq, sampleNameI, sampleNameII, myLenPos, myGCPos, LenGCTable, columnPos, sampleNameList, SampleNames)
+    {
+        library(NOISeq)
+        outdir <- paste0(odir, "/",NOISeqDir,"/") # set outdir
+        dir.create(outdir, showWarnings = FALSE)
+        dir.create(paste0(outdir,"plots"), showWarnings = FALSE) # create subfolder "plots"
+        pvt <- pvtNOISeq
+        counts <- RTableInFiles[, columnPos]
+        head(counts)
+        # must be changed if there are no triplicates
+        # !! Please change this setting if needed !!
+        myfactors <- data.frame(Treatment = sampleNameList, TreatmentRun = SampleNames)
+        # sort counts by row names
+        mycounts <- counts[order(rownames(counts)), ]
+        # filter out low counts
+        mycounts.filter <- filtered.data(mycounts, factor = myfactors$Treatment, norm = FALSE, depth = NULL, method = 1, cv.cutoff = 100, cpm = 1)
+        mycounts.rowname <- rownames(mycounts.filter)
+        # Extra file with length and GC content
+        myLengthTable <- read.table(LenGCTable, sep = "\t", header = TRUE, stringsAsFactor = FALSE, row.names=1)
+        myLengthTable <- myLengthTable[order(rownames(myLengthTable)),] # sort by rownames
+        myLengthTable.filter <- myLengthTable[mycounts.rowname,]
+        mylength <- myLengthTable.filter[,myLenPos]
+        names(mylength) <- rownames(myLengthTable.filter)
+        mygc <- myLengthTable.filter[,myGCPos]
+        names(mygc) <- rownames(myLengthTable.filter)
+        mydata <- readData(data = mycounts.filter, length = mylength, gc = mygc, factors = myfactors)
+        mydata
+        mydata.GCbias <- dat(mydata, type = "GCbias")
+    #-#    png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - GCbias.1")))
+    #-#        explo.plot(mydata.GCbias)
+    #-#    dev.off()
+        mydata.GCbias.group <- dat(mydata, factor = "Treatment", type = "GCbias")
+    #-#    png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - GCbias.2")))
+    #-#      explo.plot(mydata.GCbias.group)
+    #-#    dev.off()
+        show(mydata.GCbias.group)
+        mydata.lengthbias <- dat(mydata, type = "lengthbias")
+    #-#    png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - Lenbias.1")))
+    #-#        explo.plot(mydata.lengthbias)
+    #-#    dev.off()
+        mydata.lengthbias.group <- dat(mydata, factor = "Treatment", type = "lengthbias")
+    #-#    png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - Lenbias.2")))
+    #-#        explo.plot(mydata.lengthbias.group)
+    #-#    dev.off()
+        show(mydata.lengthbias.group)
+        # looks different
+        mydata.cd <- dat(mydata, type = "cd")
+    #-#    png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - cd")))
+    #-#        explo.plot(mydata.cd)
+    #-#    dev.off()
+        mylength
+        myRPKM <- NOISeq::rpkm(assayData(mydata)$exprs)
+        myUQUA <- NOISeq::uqua(assayData(mydata)$exprs, long = mylength, lc = 0.5, k = 0)
+        myTMM <- NOISeq::tmm(assayData(mydata)$exprs, long = 1000, lc = 0)
+        mynoiseq.rpkm <- noiseq(mydata, k = 0.5, norm = "rpkm", factor = "Treatment", pnr = 0.2, nss = 5, v = 0.02, lc = 1, replicates = "biological")
+        mynoiseq.nnorm <- noiseq(mydata, k = 0.5, norm = "n", factor = "Treatment", pnr = 0.2, nss = 5, v = 0.02, lc = 1, replicates = "biological")
+        mynoiseq.tmm <- noiseq(mydata, k = 0.5, norm = "tmm", factor = "Treatment", pnr = 0.2, nss = 5, v = 0.02, lc = 1, replicates = "biological")
+        # RPKM
+        mynoiseq.rpkm.prob <- mynoiseq.rpkm@results[[1]][mynoiseq.rpkm@results[[1]]$prob > pvt, ]
+        mynoiseq.rpkm.prob <- mynoiseq.rpkm.prob[!is.na(mynoiseq.rpkm.prob$prob), ]
+        write.table(mynoiseq.rpkm.prob, file = paste0(outdir, cName, "_", nrow(mynoiseq.rpkm.prob), "_NOISeq_rpkm_prob.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
+        # none
+        mynoiseq.nnorm.prob <- mynoiseq.nnorm@results[[1]][mynoiseq.nnorm@results[[1]]$prob > pvt, ]
+        mynoiseq.nnorm.prob <- mynoiseq.nnorm.prob[!is.na(mynoiseq.nnorm.prob$prob), ]
+        write.table(mynoiseq.nnorm.prob, file = paste0(outdir, cName,"_", nrow(mynoiseq.nnorm.prob), "_NOISeq_nnorm_prob.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
+        # TMM
+        mynoiseq.tmm.prob <- mynoiseq.tmm@results[[1]][mynoiseq.tmm@results[[1]]$prob > pvt, ]
+        mynoiseq.tmm.prob <- mynoiseq.tmm.prob[!is.na(mynoiseq.tmm.prob$prob), ]
+        write.table(mynoiseq.tmm.prob, file = paste0(outdir, cName,"_", nrow(mynoiseq.tmm.prob), "_NOISeq_tmm_prob.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
+        ############ NOISeq bio ################
+        pvtnsb <- 0.995
+        a0per_value <- 0.99
+        iterations <- 5
+        x5 <- c(sample(10000:99999, iterations, replace=F), 12345)
+        x5
+        collector.rpkm <- NULL
+        collector.tmm <- NULL
+        # for (iter in x5) {
+        #   # RPKM
+        #   mynoiseqbio.rpkm <- noiseqbio(mydata, k = 0.5, norm = "rpkm", factor = "Treatment", lc = 1, r = 20, adj = 1.5, a0per = a0per_value, random.seed = iter, filter = 1)#, plot = TRUE)
+        #   mynoiseqbio.rpkm.prob <- mynoiseqbio.rpkm@results[[1]][mynoiseqbio.rpkm@results[[1]]$prob > pvtnsb, ]
+        #   mynoiseqbio.rpkm.prob <- mynoiseqbio.rpkm.prob[!is.na(mynoiseqbio.rpkm.prob$prob), ]
+        #   collector.rpkm <- rbind(collector.rpkm, mynoiseqbio.rpkm.prob)
+        #   #collector <- rbind(collector, mynoiseqbio.rpkm.prob[rownames(mynoiseqbio.rpkm.prob) %in% rownames(collector),])
+        #   # TMM
+        #   mynoiseqbio.tmm <- noiseqbio(mydata, k = 0.5, norm = "tmm", factor = "Treatment", lc = 1, r = 20, adj = 1.5, a0per = a0per_value, random.seed = iter, filter = 1)#, plot = TRUE)
+        #   mynoiseqbio.tmm.prob <- mynoiseqbio.tmm@results[[1]][mynoiseqbio.tmm@results[[1]]$prob > pvtnsb, ]
+        #   mynoiseqbio.tmm.prob <- mynoiseqbio.tmm.prob[!is.na(mynoiseqbio.tmm.prob$prob), ]
+        #   collector.tmm <- rbind(collector.tmm, mynoiseqbio.tmm.prob)
+        # }
+        #
+        # # RPKM
+        # print(sort(row.names(collector.rpkm)))
+        # library(stringr)
+        # mynoiseqbio.rpkm.prob.overlap <- collector.rpkm[str_split_fixed(row.names(collector.rpkm), "\\.",2)[,2]>=iterations+10,]
+        # row.names(mynoiseqbio.rpkm.prob.overlap) <- substr(row.names(mynoiseqbio.rpkm.prob.overlap),1,str_length(row.names(mynoiseqbio.rpkm.prob.overlap))-1)
+        #
+        # write.table(mynoiseqbio.rpkm.prob.overlap, file = paste0(outdir, cName,"_", nrow(mynoiseqbio.rpkm.prob.overlap),"_NOISeqbio_rpkm_prob_seed_iteration_overlap.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
+        #
+        # #TMM
+        # print(sort(row.names(collector.tmm)))
+        # library(stringr)
+        # mynoiseqbio.tmm.prob.overlap <- collector.tmm[str_split_fixed(row.names(collector.tmm), "\\.",2)[,2]>=iterations+10,]
+        # row.names(mynoiseqbio.tmm.prob.overlap) <- substr(row.names(mynoiseqbio.tmm.prob.overlap),1,str_length(row.names(mynoiseqbio.tmm.prob.overlap))-1)
+        #
+        # write.table(mynoiseqbio.tmm.prob.overlap, file = paste0(outdir, cName,"_", nrow(mynoiseqbio.tmm.prob.overlap),"_NOISeqbio_tmm_prob_seed_iteration_overlap.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
+        # # none
+        # mynoiseqbio.nnorm.prob <- mynoiseqbio.nnorm@results[[1]][mynoiseqbio.nnorm@results[[1]]$prob > pvtnsb, ]
+        # mynoiseqbio.nnorm.prob <- mynoiseqbio.nnorm.prob[!is.na(mynoiseqbio.nnorm.prob$prob), ]
+        # write.table(mynoiseqbio.nnorm.prob, file = paste0(outdir, cName, "_NOISeqbio_nnorm_prob.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
+        # # TMM
+        # mynoiseqbio.tmm.prob <- mynoiseqbio.tmm@results[[1]][mynoiseqbio.tmm@results[[1]]$prob > pvtnsb, ]
+        # mynoiseqbio.tmm.prob <- mynoiseqbio.tmm.prob[!is.na(mynoiseqbio.tmm.prob$prob), ]
+        # write.table(mynoiseqbio.tmm.prob, file = paste0(outdir, cName, "_NOISeqbio_tmm_prob.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - noiseqbiorpkm")))
+            mynoiseqbio.rpkm <- noiseqbio(mydata, k = 0.5, norm = "rpkm", factor = "Treatment", lc = 1, r = 20, adj = 1.5, a0per = a0per_value, random.seed = 12345, filter = 1, plot = TRUE)
+        dev.off()
+        mynoiseqbio.rpkm.prob <- mynoiseqbio.rpkm@results[[1]][mynoiseqbio.rpkm@results[[1]]$prob > pvtnsb, ]
+        mynoiseqbio.rpkm.prob <- mynoiseqbio.rpkm.prob[!is.na(mynoiseqbio.rpkm.prob$prob), ]
+        write.table(mynoiseqbio.rpkm.prob, file = paste0(outdir, cName,"_", nrow(mynoiseqbio.rpkm.prob),"_NOISeqbio_rpkm_prob_seed_12345.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - noiseqbiontmm")))
+            mynoiseqbio.tmm <- noiseqbio(mydata, k = 0.5, norm = "tmm", factor = "Treatment", lc = 1, r = 20, adj = 1.5, a0per = a0per_value, random.seed = 12345, filter = 1, plot = TRUE)
+        dev.off()
+        mynoiseqbio.tmm.prob <- mynoiseqbio.tmm@results[[1]][mynoiseqbio.tmm@results[[1]]$prob > pvtnsb, ]
+        mynoiseqbio.tmm.prob <- mynoiseqbio.tmm.prob[!is.na(mynoiseqbio.tmm.prob$prob), ]
+        write.table(mynoiseqbio.tmm.prob, file = paste0(outdir, cName,"_", nrow(mynoiseqbio.tmm.prob),"_NOISeqbio_tmm_prob_seed_12345.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - noiseqbionnorm")))
+            mynoiseqbio.nnorm <- noiseqbio(mydata, k = 0.5, norm = "n", factor = "Treatment", lc = 1, r = 20, adj = 1.5, a0per = a0per_value, random.seed = 12345, filter = 1, plot = TRUE)
+        dev.off()
+        head(mynoiseqbio.rpkm@results[[1]])
+        head(mynoiseqbio.nnorm@results[[1]])
+        mynoiseqbio.rpkm.deg <- degenes(mynoiseqbio.rpkm, q = pvtnsb, M = NULL)
+        mynoiseqbio.rpkm.deg.up <- degenes(mynoiseqbio.rpkm, q = pvtnsb, M = "up")
+        mynoiseqbio.rpkm.deg.down <- degenes(mynoiseqbio.rpkm, q = pvtnsb, M = "down")
+        mynoiseqbio.nnorm.deg <- degenes(mynoiseqbio.nnorm, q = pvtnsb, M = NULL)
+        mynoiseqbio.nnorm.deg.up <- degenes(mynoiseqbio.nnorm, q = pvtnsb, M = "up")
+        mynoiseqbio.nnorm.deg.down <- degenes(mynoiseqbio.nnorm, q = pvtnsb, M = "down")
+        par(mfrow = c(3, 2))
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.rpkm_0.9")))
+            DE.plot(mynoiseq.rpkm, q = 0.9, graphic = "expr", log.scale = TRUE)
+        dev.off()
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.rpkm_0.8")))
+            DE.plot(mynoiseq.rpkm, q = 0.8, graphic = "expr", log.scale = TRUE)
+        dev.off()
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.nnorn_0.9")))
+            DE.plot(mynoiseq.nnorm, q = 0.9, graphic = "expr", log.scale = TRUE)
+        dev.off()
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.nnorn_0.8")))
+            DE.plot(mynoiseq.nnorm, q = 0.8, graphic = "expr", log.scale = TRUE)
+        dev.off()
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.tmm_0.9")))
+            DE.plot(mynoiseq.tmm, q = 0.9, graphic = "expr", log.scale = TRUE)
+        dev.off()
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.tmm_0.8")))
+            DE.plot(mynoiseq.tmm, q = 0.8, graphic = "expr", log.scale = TRUE)
+        dev.off()
+        par(mfrow = c(2, 2))
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeqbio - DE.rpkm_",pvt)))
+            DE.plot(mynoiseqbio.rpkm, q = pvt, graphic = "expr", log.scale = TRUE)
+        dev.off()
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeqbio - DE.rpkm_",pvtnsb)))
+            DE.plot(mynoiseqbio.rpkm, q = pvtnsb, graphic = "expr", log.scale = TRUE)
+        dev.off()
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeqbio - DE.nnorm_",pvt)))
+            DE.plot(mynoiseqbio.nnorm, q = pvt, graphic = "expr", log.scale = TRUE)
+        dev.off()
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeqbio - DE.nnorm_",pvtnsb)))
+            DE.plot(mynoiseqbio.nnorm, q = pvtnsb, graphic = "expr", log.scale = TRUE)
+        dev.off()
+        par(mfrow = c(1, 2))
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.rpkm_0.9_MD")))
+            DE.plot(mynoiseq.rpkm, q = 0.9, graphic = "MD", log.scale = TRUE)
+        dev.off()
+        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.nnorm_0.9_MD")))
+            DE.plot(mynoiseq.nnorm, q = 0.9, graphic = "MD", log.scale = TRUE)
+        dev.off()
+    }
+    ###########################################
+    ## additional function for more usability #
+    ###########################################
+    calcRPKM <- function(c,n,l)
+    {
+        RPKM <- (10^9* as.numeric(c)) / (as.numeric(n) * as.numeric(l))
+        return(RPKM)
+    }
+    RPKMMatrix <- function(InTable,directory,LenGCTable,myLenPos)
+    {
+        myLengthTable <- read.table(paste0(directory,LenGCTable), sep = "\t", header = TRUE, stringsAsFactor = FALSE, row.names = 1)
+        newtable <- matrix(data = c(1), nrow = nrow(InTable), ncol(InTable), byrow= TRUE)
+        colnames(newtable) <- colnames(InTable)
+        rownames(newtable) <- rownames(InTable)
+        sumCol <- apply(InTable,2,sum) # list of the all col sums
+        for(s in colnames(InTable)) {
+            for(r in rownames(InTable)) {
+                w <- InTable[r, s]
+                len <- myLengthTable[r, myLenPos]
+                sum <- as.numeric(sumCol[s])
+                newtable[r, s] <- calcRPKM(w, sum, len)
+            }
+        }
+        return(newtable)
+    }
+    # rmHTSeqLines removes the last five rows
+    rmHTSeqLines <- function(counts)
+    {
+        counts <- head(counts, -5)
+        return(counts)
+    }
+    ###############
+    ## functions ##
+    ###############
+    # split list to a factor list
+    split2list <- function(tmp)
+    {
+        xx <- c()
+        for(i in 1:length(tmp)) {
+            if(grepl("\\.", tmp[i])) {
+                foo <- strsplit(tmp[i], '\\.')[[1]]
+                fooo <- strsplit(foo, " ")[[1]]
+                xx <- c(xx, fooo)
+            } else {
+                xx <- c(xx, tmp[i])
+            }
+        }
+        return(as.factor(xx))
+    }
+    # makeGroups
+    # https://github.com/drisso/RUVSeq/blob/master/R/helper.R
+    makeGroups <- function(xs)
+    {
+        xs <- factor(xs)
+        groups <- matrix(-1, nrow = length(levels(xs)), ncol = max(table(xs)))
+        for (i in 1:length(levels(xs))) {
+            idxs <- which(xs == levels(xs)[i])
+            groups[i,1:length(idxs)] <- idxs
+        }
+        return(groups)
+    }
+    # The function makeSampleList creates a list of the columns(samples) you would like to compare
+    makeSampleList <- function(A, Al, B, Bl)
+    {
+        l <- c()
+        if(exists("Al", mode = "numeric")) {
+            l <- makeSampleList.helper(A, Al, l)
+            l <- makeSampleList.helper(B, Bl, l)
+        }
+        return(l)
+    }
+    makeSampleList.helper <- function(v,k,l)
+    {
+        for(h in 1:k) {
+            if(h == 1) {
+                l <- c(l, v)
+            } else {
+                l <- c(l, paste0(v,".",h-1))
+            }
+        }
+        return(l)
+    }
+    # function creates a list of numbers out of another list. The List is the index of the given list.
+    makeIndexList <- function(l)
+    {
+        n <- 1
+        jj <- c()
+        for(v in l) {
+            jj <- c(jj, n)
+            n <- n + 1
+        }
+        return(jj)
+    }
+    intersection <- function(l,check,t)
+    {
+      l.list <- list.files(path = l, pattern = "*(rpkm|edgeR|DESeq2).*tsv", recursive = TRUE)
+      if(grepl("bio", l.list[4],fixed = TRUE)) {
+        l.list.noiseq <- read.table(paste0(l,"/",l.list[3]), header=T, sep="\t", row.names=1)
+        l.list.noiseqbio <- read.table(paste0(l,"/",l.list[4]), header=T, sep="\t", row.names=1)
+      } else {
+        l.list.noiseq <- read.table(paste0(l,"/",l.list[4]), header=T, sep="\t", row.names=1)
+        l.list.noiseqbio <- read.table(paste0(l,"/",l.list[3]), header=T, sep="\t", row.names=1)
+      }
+      l.list.edgeR <- read.table(paste0(l,"/",l.list[2]), header=T, sep="\t", row.names=1)
+      l.list.deseq2 <- read.table(paste0(l,"/",l.list[1]), header=T, sep="\t", row.names=1)
+      print("DEGs edgeR")
+      print(nrow(l.list.edgeR))
+      print("DEGs deseq2")
+      print(nrow(l.list.deseq2))
+      print("DEGs noiseq")
+      print(nrow(l.list.noiseq))
+      print("DEGs noiseqbio")
+      print(nrow(l.list.noiseqbio))
+      interect.list <- ""
+      interect.list.direct <- ""
+      if(t==4) {
+        # List of overlaps without direction check
+        interect.list <- intersect(rownames(l.list.edgeR), intersect(rownames(l.list.noiseq), intersect(rownames(l.list.noiseqbio), rownames(l.list.deseq2))))
+        print("DEGs intersection without direction check (four methods)")
+        print(length(interect.list))
+        print(interect.list)
+        print("DEGs intersection with direction check")
+        # List of overlaps with direction check
+        if(length(interect.list)!=0) {
+          interect.list.direct <- interDirect(l.list.edgeR, l.list.deseq2, l.list.noiseq, l.list.noiseqbio, interect.list)
+          print(length(interect.list.direct))
+          print(interect.list.direct)
+        } else {
+          print("0")
+        }
+      }
+      if(t==3) {
+        # List of overlaps without direction check
+        interect.list <- intersect(rownames(l.list.edgeR), intersect(rownames(l.list.noiseq), rownames(l.list.deseq2)))
+        print("DEGs intersection without direction check  (three methods)")
+        print(length(interect.list))
+        print(interect.list)
+        print("DEGs intersection with direction check")
+        # List of overlaps with direction check
+        if(length(interect.list)!=0) {
+          # fake data frame with 0 rows => nrow == 0
+          df_fake <- data.frame(Date=as.Date(character()),
+                           File=character(),
+                           User=character(),
+                           stringsAsFactors=FALSE)
+          interect.list.direct <- interDirect(l.list.edgeR, l.list.deseq2, l.list.noiseq, df_fake, interect.list)
+          print(length(interect.list.direct))
+          print(interect.list.direct)
+        } else {
+          print("0")
+        }
+      }
+      if(check == 1){
+        return(l.list.noiseq[interect.list.direct,])
+      } else {
+        return(l.list.noiseq[interect.list,])
+      }
+    }
+    interDirect <- function(e,d,n,nb,i)
+    {
+      if(nrow(nb) == 0) {
+        nbx <- apply(n[i,], 2, function(x) {ifelse(x > 0, "-", "+")}) # use noiseq twice to simulate noiseqbio
+        nby <- paste0(nbx[,"M"],rownames(nbx))
+      } else {
+        nbx <- apply(nb[i,], 2, function(x) {ifelse(x > 0, "-", "+")})
+        nby <- paste0(nbx[,"log2FC"],rownames(nbx))
+      }
+      ex <- apply(e[i,], 2, function(x) {ifelse(x < 0, "-", "+")})
+      dx <- apply(d[i,], 2, function(x) {ifelse(x < 0, "-", "+")})
+      nx <- apply(n[i,], 2, function(x) {ifelse(x > 0, "-", "+")}) # twisted, because edgeR and DESeq2 are calculating vice versa
+      ey <- paste0(ex[,"logFC"],rownames(ex))
+      dy <- paste0(dx[,"log2FoldChange"],rownames(dx))
+      ny <- paste0(nx[,"M"],rownames(nx))
+      interect.list.direct <- intersect(ey, intersect(dy, intersect(ny, nby)))
+      return(substr(interect.list.direct,2,100))
+    }
+    ###########################################
+    ## executiv part of the script            #
+    ###########################################
+    for(SampleNames in sampleLists) {
+       workflow(file, saveat, SampleNames, LenGCTable, myLenPos, myGCPos, rpkm, pvtedgeR, pvtDESeq2, pvtNOISeq, edgeRDir, DESeq2Dir, NOISeqDir, htseq)
+    }
+    print("Thank you for using the DEG pipeline :) \n")
+    # fin
+    ### compare DEGs analysis ###
+    # used venny and Excel