###############################################################
    ## DEG pipeline with edgeR, DESeq2 and NOISeq                 #
    ## Modified from the template of Dr. Kristian Ullrich 2014    #
    ## @fabianhaas, AG Rensing, Marburg, Jan 2016                 #
    ###############################################################
    # The pipeline expects sample triplicates.
    #   If you don't have three samples of one condition, use face files with zero counts.
    # RPKM calculation is optional.
    # Additional program to sum up the tsv files: /mnt/nas_temp/home/haasf/Program/sumup_htseq_counts.pl folder file_prefix

    ###########################################
    ## please define your variables           #
    ###########################################

    saveat <- "/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Hoecker/DEGs_0.9"

    file <- '/mnt/NAS_coruscant_datashare/haasf/madland_RNA-seq_Hoecker/31315.p.sort.counts'

    # Table with the length and the GC content
    LenGCTable <- "/mnt/NAS_coruscant_datashare/haasf/GeneAtlas_RNASeq/DEG_2nd_round/cosmoss_V3.3.release_2016-07-22.IsoformV31.fasta.length_GC_Contig_Daten_for_34840_1nd.csv"

    # Column position of the gene name and the width/gc content (first column is gene name!)
    myLenPos <- 1
    myGCPos <- 2

    #
    # hoo <- c("","")
    # sampleLists <- c()
    # for(a in hoo) {
    #   for(b in hoo) {
    #     if(a!=b) {
    #       tmp <- c(a,b)
    #       sampleLists <- c(sampleLists, list(tmp))
    #     }
    #   }
    # }


    sampleLists <- list(c("cop_D", "cop_L"),
                        c("cop_D", "spa_D"),
                        c("cop_D", "spa_L"),
                        c("cop_D", "WT_D"),
                        c("cop_D", "WT_L"),
                        c("cop_L", "spa_D"),
                        c("cop_L", "spa_L"),
                        c("cop_L", "WT_D"),
                        c("cop_L", "WT_L"),
                        c("spa_D", "spa_L"),
                        c("spa_D", "WT_D"),
                        c("spa_D", "WT_L"),
                        c("spa_L", "WT_D"),
                        c("spa_L", "WT_L"),
                        c("WT_D", "WT_L")
                      )


    # Do you want a RPKM value table (1/0)
    # Be warned, it takes a while to do this. approx. 10 minutes for 35000 genes and 6 samples.
    rpkm <- 0
    # Are the data original HTSeq files? yes <- 1 ; no <- 0
    htseq <- 0

    # threshold settings for the R packages
    #edgeR
    pvtedgeR <- 0.001
    #DESeq2
    pvtDESeq2 <- 0.001
    #NOISeq
    pvtNOISeq <- 0.9

    # Output folder names
    edgeRDir <- "edgeR"
    DESeq2Dir <- "DESeq2"
    NOISeqDir <- "NOISeq"

    #########################
    ## stop editing here!! ##
    #########################


    ###########################################
    ## executiv function                      #
    ###########################################

    workflow <- function(file, saveat, SampleNames, LenGCTable, myLenPos, myGCPos, rpkm, pvtedgeR, pvtDESeq2, pvtNOISeq, edgeRDir, DESeq2Dir, NOISeqDir, htseq)
    {
        # setwd(workingFolder) # set workspace

                sampleNameI <- SampleNames[1]
                sampleNameII <- SampleNames[2]

                # Comparison name
                cName <- paste(sampleNameI, sampleNameII, sep = " vs. ")

                filename <- strsplit(file, "/")
                filename <- filename[[1]][length(filename[[1]])] # long but it's working
                sampleName <- gsub(", ","_",toString(SampleNames))
                samples <- SampleNames
                # create output dir
                dir.create(saveat, showWarnings = FALSE)
                outdir <- paste0(saveat,"/",sampleName)
                dir.create(outdir)

                # Start the log file
                sink(paste0(outdir, "/DEGpipe_",cName, ".log"))

                # print parameter into the log file
                print("set parameters:")
                print(paste0("Counts file:", file))
                print(paste0("Results stored at:", saveat))
                print(paste0("SampleNames: ",SampleNames))
                print(paste0("LenGCTable: ",LenGCTable))
                print(paste0("myLenPos: ",myLenPos))
                print(paste0("myGCPos: ",myGCPos))
                print(paste0("rpkm: ",rpkm))
                print(paste0("htseq: ",htseq))
                print(paste0("pvtedgeR: ",pvtedgeR))
                print(paste0("pvtDESeq2: ",pvtDESeq2))
                print(paste0("pvtNOISeq: ",pvtNOISeq))
                print(paste0("edgeRDir: ",edgeRDir))
                print(paste0("DESeq2Dir: ",DESeq2Dir))
                print(paste0("NOISeqDir: ",NOISeqDir))
                # session and package infos for the log file
                print(sessionInfo())
                print(Sys.info())

                # load spike-in count tables
                data0 <- read.table(file, header=T, sep="\t", row.names=1)
                # just if the gene ids not ending with .1 (it is nessessery for NOISeqbio iteration step)
                #row.names(data0) <- paste0(row.names(data0), ".1")

                # remove HTSeq rows like "__no_feature"
                data0 <- data0[ with(data0,  !grepl('__', rownames(data0))) , ]
                data0 <- data0[ with(data0,  !grepl('ERCC', rownames(data0))) , ]
                #data0 <- data0[ , with(data0,  !grepl('X.1', colnames(data0)))]
                #rename col names (library name to roman number)

              # librariesName <- c("56754_WT_Naturally_3.bam.sort.fastq.unmapped.sam.sort.bam", "56753_WT_Naturally_2.bam.sort.fastq.unmapped.sam.sort.bam", "56752_WT_Naturally_1.bam.sort.fastq.unmapped.sam.sort.bam", "56751_tt_6_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56750_tt_6_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56749_tt_6_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56748_tt_0_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56747_tt_0_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56746_tt_0_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56745_WT_10_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56744_WT_10_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56743_WT_10_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56742_WT_6_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56741_WT_6_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56740_WT_6_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56739_WT_0_days_3.bam.sort.fastq.unmapped.sam.sort.bam", "56738_WT_0_days_2.bam.sort.fastq.unmapped.sam.sort.bam", "56737_WT_0_days_1.bam.sort.fastq.unmapped.sam.sort.bam", "56736_tt_T1_tt_6d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56735_tt_T1_tt_6d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56734_tt_T1_tt_6d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56733_tt_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56732_tt_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56731_tt_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56730_tt_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56729_tt_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56728_tt_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56727_WT_T1_NA_3.bam.sort.fastq.unmapped.sam.sort.bam", "56726_WT_T1_NA_2.bam.sort.fastq.unmapped.sam.sort.bam", "56725_WT_T1_NA_1.bam.sort.fastq.unmapped.sam.sort.bam", "56724_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56723_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56722_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56721_WT_T1_WT_10d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56720_WT_T1_WT_10d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56719_WT_T1_WT_10d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56718_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56717_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56716_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56715_WT_T1_WT_6d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56714_WT_T1_WT_6d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56713_WT_T1_WT_6d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56712_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56711_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56710_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56709_WT_T1_WT_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56708_WT_T1_WT_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56707_WT_T1_WT_0d_1.bam.sort.fastq.unmapped.sam.sort.bam", "56706_WT_T1_tt_0d_3.bam.sort.fastq.unmapped.sam.sort.bam", "56705_WT_T1_tt_0d_2.bam.sort.fastq.unmapped.sam.sort.bam", "56704_WT_T1_tt_0d_1.bam.sort.fastq.unmapped.sam.sort.bam")
              # #librariesName <- c("80484_Celery_3_day_imbibed_embryo_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80483_Celery_3_day_imbibed_embryo_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80482_Celery_3_day_imbibed_embryo_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80481_Celery_1_day_imbibed_embryo_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80480_Celery_1_day_imbibed_embryo_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80479_Celery_1_day_imbibed_embryo_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80478_Celery_5_day_imbibed_whole_fruit_29C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80477_Celery_5_day_imbibed_whole_fruit_29C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80460_Celery_3_day_imbibed_whole_fruit_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80459_Celery_3_day_imbibed_whole_fruit_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80458_Celery_1_day_imbibed_whole_fruit_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80457_Celery_1_day_imbibed_whole_fruit_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80456_Celery_1_day_imbibed_whole_fruit_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80455_Celery_Dry_whole_fruit_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80454_Celery_Dry_whole_fruit_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80453_Celery_Dry_whole_fruit_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80452_Celery_5_day_imbibed_whole_fruit_29C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80451_Celery_5_day_imbibed_whole_fruit_100uM_ABA_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80450_Celery_5_day_imbibed_whole_fruit_100uM_ABA_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80449_Celery_5_day_imbibed_whole_fruit_100uM_ABA_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80448_Celery_5_day_imbibed_whole_fruit_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80447_Celery_5_day_imbibed_whole_fruit_20C_rep_2_2.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80446_Celery_5_day_imbibed_whole_fruit_20C_rep_1_1.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam", "80445_Celery_3_day_imbibed_whole_fruit_20C_rep_3_3.bam.sort.fastq.unmapped.concordant_uniqsort.bam_reads.Celery_genome_GG_trinity.500bp_new.sam.sort.bam.featureCounts.p.txt")
              # romanNumber <- c("T17_Dry_Naturally_Freezedryseed", "T17_Dry_Naturally_Freezedryseed.1", "T17_Dry_Naturally_Freezedryseed.2", "T16_Dry_6days_Freezeafter6dageing", "T16_Dry_6days_Freezeafter6dageing.1", "T16_Dry_6days_Freezeafter6dageing.2", "T15_Dry_0days_Freezedryseed", "T15_Dry_0days_Freezedryseed.1", "T15_Dry_0days_Freezedryseed.2", "T14_Dry_10days_Freezeafter10dageing", "T14_Dry_10days_Freezeafter10dageing.1", "T14_Dry_10days_Freezeafter10dageing.2", "T13_Dry_6days_Freezeafter6dageing", "T13_Dry_6days_Freezeafter6dageing.1", "T13_Dry_6days_Freezeafter6dageing.2", "T12_Dry_0days_Freezedryseed", "T12_Dry_0days_Freezedryseed.1", "T12_Dry_0days_Freezedryseed.2", "T11_Imbibe_6days_12h", "T11_Imbibe_6days_12h.1", "T11_Imbibe_6days_12h.2", "T10_Imbibe_6days_5h", "T10_Imbibe_6days_5h.1", "T10_Imbibe_6days_5h.2", "T9_Imbibe_0dyas_5h", "T9_Imbibe_0dyas_5h.1", "T9_Imbibe_0dyas_5h.2", "T8_Imbibe_Naturally_47h", "T8_Imbibe_Naturally_47h.1", "T8_Imbibe_Naturally_47h.2", "T7_Imbibe_Naturally_24h", "T7_Imbibe_Naturally_24h.1", "T7_Imbibe_Naturally_24h.2", "T6_Imbibe_10days_72h", "T6_Imbibe_10days_72h.1", "T6_Imbibe_10days_72h.2", "T5_Imbibe_10days_24h", "T5_Imbibe_10days_24h.1", "T5_Imbibe_10days_24h.2", "T4_Imbibe_6days_47h", "T4_Imbibe_6days_47h.1", "T4_Imbibe_6days_47h.2", "T3_Imbibe_6days_24h", "T3_Imbibe_6days_24h.1", "T3_Imbibe_6days_24h.2", "T2_Imbibe_0days_24h", "T2_Imbibe_0days_24h.1", "T2_Imbibe_0days_24h.2", "T1_Imbibe_0days_5h", "T1_Imbibe_0days_5h.1", "T1_Imbibe_0days_5h.2")
              # #romanNumber <- c("emb_3d_20.2", "emb_3d_20.1", "emb_3d_20", "emb_1d_20.2", "emb_1d_20.1", "emb_1d_20", "wf_5d_29.2", "wf_5d_29.1", "wf_3d_20.1", "wf_3d_20", "wf_1d_20.2", "wf_1d_20.1", "wf_1d_20", "wf_dry.2", "wf_dry.1", "wf_dry", "wf_5d_29", "wf_5d_ABA_20.2", "wf_5d_ABA_20.1", "wf_5d_ABA_20", "wf_5d_20.2", "wf_5d_20.1", "wf_5d_20", "wf_3d_20.2")
              #
              #
              # for(i in 1:length(librariesName)) {
              #     colnames(data0)[colnames(data0)==grep(librariesName[i], names(data0), value = TRUE)] <- romanNumber[i]
              # }

                # creats a list of the column names/samples you would like to compare
                SampleNames <- makeSampleList(sampleNameI, length(grep(paste0("^",sampleNameI,"[^A-Z]"), colnames(data0))) + 1,
                                              sampleNameII, length(grep(paste0("^",sampleNameII,"[^A-Z]"), colnames(data0))) + 1)
                print(SampleNames)

                ## mal anschauen als ersatz für das darüber ##
                ## newenw <- table.new[, grepl(paste(LIST,collapse="|"), colnames(table.new))]


                # exctract the samples from the data0 if needed
                if(length(SampleNames) > 0) {
                    RTableInFiles <- data0[,SampleNames]
                    data <- data0[,SampleNames]
                }
                print(head(RTableInFiles))

                # sort by column name
                # data <- data[ , order(names(data))] # dangeros, the order of query and referenz might be not the same
                # see how many rows and columns are in the tables
                print(paste("Raw data row number: ",nrow(data)))
                print(paste("Raw data column number: ",ncol(data)))

                # 2) A typical differential expression analysis workflow
                # 2.1) Filtering and exploratory data analysis
                # filter: One single gene/spike-in must have at least 5 counts in minimum 2 libraries
                filter <- apply(data, 1, function(x) length(x[x>5])>=2)
                dataFiltered <- data[filter,]
                # see how many rows and columns are left
                print(paste("Filtered data row number: ",nrow(dataFiltered)))
                print(paste("Filtered data row number: ",ncol(dataFiltered)))
                # filter gene names
                genes <- rownames(dataFiltered)[grep("^Pp3", rownames(dataFiltered))]
                # filter spike-in names
                spikes <- rownames(dataFiltered)[grep("^ERCC", rownames(dataFiltered))]
                # get the column name list as.factors
                xtmp <- colnames(dataFiltered)
                x <- split2list(xtmp)

                sampleNameList <- gsub(".[1234567890]+$", "", SampleNames)
                columnPos <- makeIndexList(SampleNames)

                # run ...
                ### edgeR ###
                print(paste0("edgeR is running with ", cName))
                packageDescription("edgeR")
                edgeRFunction(outdir, edgeRDir, cName, RTableInFiles, pvtedgeR, sampleNameI, sampleNameII, sampleNameList)

                #### DESeq2 ####
                print(paste0("DESeq2 is running with ", cName))
                packageDescription("DESeq2")
                DESeq2Function(outdir, DESeq2Dir, cName, RTableInFiles, pvtDESeq2, sampleNameI, sampleNameII, sampleNameList)

                ### NOISeq ###
                print(paste0("NOISeq is running with ", cName))
                packageDescription("NOISeq")
               NOISeqFunction(outdir, NOISeqDir, cName, RTableInFiles, pvtNOISeq, sampleNameI, sampleNameII, myLenPos, myGCPos, LenGCTable, columnPos, sampleNameList, SampleNames)

                ### Overlap of all three methods ###
                # intersection with DEG direction check => 1, without => 0
                intersection_4.file <- intersection(outdir,1,4)
                write.table(intersection_4.file, file = paste0(outdir, "/", cName,"_intersection_four_methods_",nrow(intersection_4.file),".tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
                # edgeR, DESeq2, NOISeq
                intersection_3.file <- intersection(outdir,1,3)
                write.table(intersection_3.file, file = paste0(outdir, "/", cName,"_intersection_three_methods_",nrow(intersection_3.file),".tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)

                sink() # End of log file

    }





    ####################
    ## main functions  #
    ####################

    plotGraphics <- function(data, librariesName, cName, outdir, romanNumber)
    {
        library("DESeq2")
        library("ggplot2")
        library("RColorBrewer")
        library("pheatmap")
        library("genefilter")

        # create sample annotation lists
        # sample.name.list <- gsub(".[12]", "", librariesName[colnames(data)])
        sample.name.list <- gsub(".[12]", "", romanNumber)
        sample.name.uniq <- unique(unlist(sample.name.list, use.names = FALSE))

        col.data.condition = data.frame(row.names = librariesName, condition = sample.name.list)

        # run DESeq2 functions
        dds.matrix <- DESeqDataSetFromMatrix(countData = data, colData = col.data.condition, design = ~condition) # data must be integer! (Ganze Zahlen)
        colData(dds.matrix)$condition <- factor(colData(dds.matrix)$condition, levels = sample.name.uniq)
        dds.matrix.deseq <- DESeq(dds.matrix)
        dds.matrix.res <- results(dds.matrix.deseq)
        dds.matrix.res <- dds.matrix.res[order(dds.matrix.res$padj), ]
        dds.matrix.res.fdr <- dds.matrix.res[!is.na(dds.matrix.res$padj), ]
        dds.matrix.res.fdr <- dds.matrix.res.fdr[dds.matrix.res.fdr$padj < 0.0001, ]
        dds.matrix.rld <- rlogTransformation(dds.matrix, blind = TRUE)

        # data for PCA plot
        pcaData <- plotPCA(dds.matrix.rld, intgroup = "condition", returnData=TRUE)
        percentVar <- round(100 * attr(pcaData, "percentVar"))
        # color for samples
        sample.colors <- c("gray60","#D73027","#4575B4", "purple", "orange","#113FF7","green", "black")

        # plot the PCA
        # https://cran.r-project.org/web/packages/ggfortify/vignettes/plot_pca.html
        png(filename=paste0(outdir,"PCA_all_samples_DEseq2_normalized.png"))
          ggplot(
              pcaData,
              aes(
                  PC1,
                  PC2,
                  color=condition,
                  shape=condition)
              ) +
              scale_shape_manual(values = c(4, 1, 2, 3, 0, 5, 6, 7)) +
              geom_point(size=2.5) +
              xlab(
                  paste0("PC1: ",percentVar[1],"% variance")
                  ) +
              ylab(
                  paste0("PC2: ",percentVar[2],"% variance")
                  ) +
              theme() + #, face="bold"
              scale_colour_manual(
                  values=sample.colors  # dodgerblue3
                  ) +
              ggtitle("PCA of all samples (DESeq2 normalized)") +
              theme(
                  plot.title = element_text(lineheight=.8),
                  panel.background = element_rect(fill = "gray95")
              )
        dev.off()

        # how many genes will be involved in the heatmap calculation
        numbGenes <- 100
        # pheatmap data
        topVarGenes <- head(order(rowVars(assay(dds.matrix.rld)),decreasing=TRUE),numbGenes)
        mat <- assay(dds.matrix.rld)[ topVarGenes, ]
        mat <- mat - rowMeans(mat)
        # pheatmap data annotation
        anno <-as.data.frame(colData(dds.matrix.rld)[,c("condition")])
        rownames(anno) <- colnames(data) # check out the row names of annotation
        colnames(anno) <- "condition"
        # pheatmap data annotation color
        condition <- sample.colors
        names(condition) <- sample.name.uniq
        anno_colors <- list(condition = condition)

        #heatmap(mat)
        #pheatmap(mat)
        # ftp://cran.r-project.org/pub/R/web/packages/pheatmap/pheatmap.pdf
        png(filename=paste0(outdir,"HC_heatmap_all_samples_DESeq2_normalized.png"))
          pheatmap(mat ,
                   annotation = anno,
                   annotation_colors = anno_colors,
                   fontsize = 10,
                   fontsize_row = 6,
                   fontsize_col = 10,
                   main = paste0("Heatmap of all samples (DESeq2 normalized) ", numbGenes," genes\n"),
                   annotation_names_col = F,
                   show_rownames = F,
                   cluster_rows = F,
                   border_color = F,
                   treeheight_col = 20
          )
        dev.off()
    }


    edgeRFunction <- function(odir, edgeRDir, cName, RTableInFiles, pvtedgeR, sampleNameI, sampleNameII, sampleNameList)
    {
        library(edgeR)

        outdir <- paste0(odir, "/",edgeRDir,"/") # set outdir

        dir.create(outdir, showWarnings = FALSE) # create output folder
        dir.create(paste0(outdir,"plots"), showWarnings = FALSE) # create subfolder "plots"

        comparison <- cName
        counts <- RTableInFiles

        pvt <- pvtedgeR

        group <- factor(sampleNameList)

        # edgeR 'normal'

        y.normal <- DGEList(counts = counts, group = group)

        y.normal[,1]

        png(filename=paste0(outdir,"plots/",paste0(cName, " edgeR - plotMDS")))
            plotMDS(y.normal, method = "bcv", main = paste0(cName, "\nedgeR - plotMDS"))
        dev.off()

        y.normal <- calcNormFactors(y.normal)
        y.normal <- estimateCommonDisp(y.normal)
        y.normal <- estimateTagwiseDisp(y.normal)


        png(filename=paste0(outdir,"plots/",paste0(cName, " edgeR - plotBCV")))
            plotBCV(y.normal, main = paste0(cName, "\nedgeR - plotBCV"))
        dev.off()

        et.normal <- exactTest(y.normal, pair = c(sampleNameI, sampleNameII))

        topTags(et.normal)
        # adjust.method   ==>  character string specifying p-value adjustment method. Possible values are "none", "BH", "fdr" (equivalent to "BH"), "BY" and "holm". See p.adjust for details.
        summary(de.normal <- decideTestsDGE(et.normal, p = pvt, adjust = "BH"))
        et.normal.fdr <- cbind(et.normal$table, p.adjust(et.normal$table[, 3], method = "BH"))
        et.normal.fdr <- et.normal.fdr[et.normal.fdr[, 4] < pvt, ]

        write.table(et.normal.fdr, file = paste0(outdir, cName, "_", nrow(et.normal.fdr), "_edgeR_fdr.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)

        detags.normal <- rownames(de.normal)[as.logical(de.normal)]
        png(filename=paste0(outdir,"plots/",paste0(cName, " edgeR - plotSmear")))
        plotSmear(et.normal, de.normal, tags = detags.normal, main = paste0(cName, "\nedgeR - plotSmear"))
        abline(h = c(-2, 2), col = "blue")
        dev.off()

        # edgeR 'GLM'

        y.glm <- DGEList(counts = counts, group = group)

        design <- model.matrix(~0 + group, data = y.glm$samples)
        design

        y.glm <- estimateGLMCommonDisp(y.glm, design)
        y.glm <- estimateGLMTrendedDisp(y.glm, design)

        ## Loading required package: splines
        y.glm <- estimateGLMTagwiseDisp(y.glm, design)

        png(filename=paste0(outdir,"plots/",paste0(cName, " edgeR - plotBCV - GLM")))
        plotBCV(y.glm, main = paste0(cName, "\nedgeR - plotBCV - GLM"))
        dev.off()

        y.fit <- glmFit(y.glm, design)
        y.lrt <- glmLRT(y.fit)

        FDR <- p.adjust(y.lrt$table$PValue, method = "BH")
        sum(FDR < pvt)

        topTags(y.lrt)

        summary(de.glm <- decideTestsDGE(y.lrt, p = pvt, adjust = "BH"))

    }


    DESeq2Function <- function(odir, DESeq2Dir, cName, RTableInFiles, pvtDESeq2, sampleNameI, sampleNameII, sampleNameList)
    {
        library(DESeq2)
        library(RColorBrewer)
        library(gplots)
        library(vsn)

        outdir <- paste0(odir, "/",DESeq2Dir,"/") # set outdir

        dir.create(outdir, showWarnings = FALSE)
        dir.create(paste0(outdir,"plots"), showWarnings = FALSE) # create subfolder "plots"


        pvt <- pvtDESeq2



        #   DESeqDataSet(se, design, ignoreRank = FALSE)
        #   DESeqDataSetFromMatrix(countData, colData, design, tidy = FALSE, ignoreRank = FALSE, ...)
        #   DESeqDataSetFromHTSeqCount(sampleTable, directory = ".", design, ignoreRank = FALSE, ...)
        #   DESeqDataSetFromTximport(txi, colData, design, ...)
        coldata = data.frame(row.names = colnames(RTableInFiles), condition = sampleNameList)

        ddsHTSeq <- DESeqDataSetFromMatrix(countData = RTableInFiles, colData = coldata, design = ~ condition)

        colData(ddsHTSeq)$condition <- factor(colData(ddsHTSeq)$condition, levels = c(sampleNameI, sampleNameII))

        ddsHTSeq <- DESeq(ddsHTSeq)

        ddsHTSeq.res <- results(ddsHTSeq)
        ddsHTSeq.res <- ddsHTSeq.res[order(ddsHTSeq.res$padj), ]
        head(ddsHTSeq.res)

        # png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - plotMA")))
        #     plotMA(ddsHTSeq, main = paste0(cName, "\nDESeq2 - plotMA"), ylim = c(-2, 2))
        # dev.off()

        ddsHTSeq.res.fdr <- ddsHTSeq.res[!is.na(ddsHTSeq.res$padj), ]
        ddsHTSeq.res.fdr <- ddsHTSeq.res.fdr[ddsHTSeq.res.fdr$padj < pvt, ]

        write.table(as.matrix(ddsHTSeq.res.fdr), file = paste0(outdir, cName, "_", nrow(as.matrix(ddsHTSeq.res.fdr)), "_DESeq2_fdr.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)

        ddsHTSeq.rld <- rlogTransformation(ddsHTSeq, blind = TRUE)
        ddsHTSeq.vsd <- varianceStabilizingTransformation(ddsHTSeq, blind = TRUE)

        par(mfrow = c(1, 3))
        notAllZero <- (rowSums(counts(ddsHTSeq)) > 0)
        png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - SdPlot.1")))
            meanSdPlot(log2(counts(ddsHTSeq, normalized = TRUE)[notAllZero, ] + 1)) # , ylim = c(0, 2.5)
        dev.off()
        png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - SdPlot.2"))) # , ylim = c(0, 2.5)
            meanSdPlot(assay(ddsHTSeq.rld[notAllZero, ]))
        dev.off()
        png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - SdPlot.3"))) # , ylim = c(0, 2.5)
            meanSdPlot(assay(ddsHTSeq.vsd[notAllZero, ]))
        dev.off()

        hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
        distRL <- dist(t(assay(ddsHTSeq.rld)))
        mat <- as.matrix(distRL)
        rownames(mat) <- colnames(mat) <- with(colData(ddsHTSeq), paste0(condition))

        # png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - heatmap")))
        #     heatmap(mat, trace = "none", col = rev(hmcol), margin = c(13, 13))
        # dev.off()

        png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - PCA")))
            print(plotPCA(ddsHTSeq.rld, intgroup = "condition"))
        dev.off()

        png(filename=paste0(outdir,"plots/",paste0(cName, " DESeq2 - DispEsts")))
            plotDispEsts(ddsHTSeq, main = paste0(cName, "\nDESeq2 - plotDispEsts"))
        dev.off()
    }

    NOISeqFunction <- function(odir, NOISeqDir, cName, RTableInFiles, pvtNOISeq, sampleNameI, sampleNameII, myLenPos, myGCPos, LenGCTable, columnPos, sampleNameList, SampleNames)
    {
        library(NOISeq)

        outdir <- paste0(odir, "/",NOISeqDir,"/") # set outdir

        dir.create(outdir, showWarnings = FALSE)
        dir.create(paste0(outdir,"plots"), showWarnings = FALSE) # create subfolder "plots"

        pvt <- pvtNOISeq

        counts <- RTableInFiles[, columnPos]
        head(counts)

        # must be changed if there are no triplicates
        # !! Please change this setting if needed !!
        myfactors <- data.frame(Treatment = sampleNameList, TreatmentRun = SampleNames)
        # sort counts by row names
        mycounts <- counts[order(rownames(counts)), ]
        # filter out low counts
        mycounts.filter <- filtered.data(mycounts, factor = myfactors$Treatment, norm = FALSE, depth = NULL, method = 1, cv.cutoff = 100, cpm = 1)
        mycounts.rowname <- rownames(mycounts.filter)

        # Extra file with length and GC content
        myLengthTable <- read.table(LenGCTable, sep = "\t", header = TRUE, stringsAsFactor = FALSE, row.names=1)
        myLengthTable <- myLengthTable[order(rownames(myLengthTable)),] # sort by rownames
        myLengthTable.filter <- myLengthTable[mycounts.rowname,]

        mylength <- myLengthTable.filter[,myLenPos]
        names(mylength) <- rownames(myLengthTable.filter)

        mygc <- myLengthTable.filter[,myGCPos]
        names(mygc) <- rownames(myLengthTable.filter)





        mydata <- readData(data = mycounts.filter, length = mylength, gc = mygc, factors = myfactors)
        mydata

        mydata.GCbias <- dat(mydata, type = "GCbias")
    #-#    png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - GCbias.1")))
    #-#        explo.plot(mydata.GCbias)
    #-#    dev.off()

        mydata.GCbias.group <- dat(mydata, factor = "Treatment", type = "GCbias")
    #-#    png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - GCbias.2")))
    #-#      explo.plot(mydata.GCbias.group)
    #-#    dev.off()

        show(mydata.GCbias.group)

        mydata.lengthbias <- dat(mydata, type = "lengthbias")
    #-#    png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - Lenbias.1")))
    #-#        explo.plot(mydata.lengthbias)
    #-#    dev.off()

        mydata.lengthbias.group <- dat(mydata, factor = "Treatment", type = "lengthbias")
    #-#    png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - Lenbias.2")))
    #-#        explo.plot(mydata.lengthbias.group)
    #-#    dev.off()

        show(mydata.lengthbias.group)

        # looks different
        mydata.cd <- dat(mydata, type = "cd")
    #-#    png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - cd")))
    #-#        explo.plot(mydata.cd)
    #-#    dev.off()
        mylength

        myRPKM <- NOISeq::rpkm(assayData(mydata)$exprs)
        myUQUA <- NOISeq::uqua(assayData(mydata)$exprs, long = mylength, lc = 0.5, k = 0)
        myTMM <- NOISeq::tmm(assayData(mydata)$exprs, long = 1000, lc = 0)

        mynoiseq.rpkm <- noiseq(mydata, k = 0.5, norm = "rpkm", factor = "Treatment", pnr = 0.2, nss = 5, v = 0.02, lc = 1, replicates = "biological")
        mynoiseq.nnorm <- noiseq(mydata, k = 0.5, norm = "n", factor = "Treatment", pnr = 0.2, nss = 5, v = 0.02, lc = 1, replicates = "biological")
        mynoiseq.tmm <- noiseq(mydata, k = 0.5, norm = "tmm", factor = "Treatment", pnr = 0.2, nss = 5, v = 0.02, lc = 1, replicates = "biological")

        # RPKM
        mynoiseq.rpkm.prob <- mynoiseq.rpkm@results[[1]][mynoiseq.rpkm@results[[1]]$prob > pvt, ]
        mynoiseq.rpkm.prob <- mynoiseq.rpkm.prob[!is.na(mynoiseq.rpkm.prob$prob), ]
        write.table(mynoiseq.rpkm.prob, file = paste0(outdir, cName, "_", nrow(mynoiseq.rpkm.prob), "_NOISeq_rpkm_prob.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
        # none
        mynoiseq.nnorm.prob <- mynoiseq.nnorm@results[[1]][mynoiseq.nnorm@results[[1]]$prob > pvt, ]
        mynoiseq.nnorm.prob <- mynoiseq.nnorm.prob[!is.na(mynoiseq.nnorm.prob$prob), ]
        write.table(mynoiseq.nnorm.prob, file = paste0(outdir, cName,"_", nrow(mynoiseq.nnorm.prob), "_NOISeq_nnorm_prob.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
        # TMM
        mynoiseq.tmm.prob <- mynoiseq.tmm@results[[1]][mynoiseq.tmm@results[[1]]$prob > pvt, ]
        mynoiseq.tmm.prob <- mynoiseq.tmm.prob[!is.na(mynoiseq.tmm.prob$prob), ]
        write.table(mynoiseq.tmm.prob, file = paste0(outdir, cName,"_", nrow(mynoiseq.tmm.prob), "_NOISeq_tmm_prob.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)

        ############ NOISeq bio ################

        pvtnsb <- 0.995
        a0per_value <- 0.99

        iterations <- 5
        x5 <- c(sample(10000:99999, iterations, replace=F), 12345)
        x5

        collector.rpkm <- NULL
        collector.tmm <- NULL


        # for (iter in x5) {
        #   # RPKM
        #   mynoiseqbio.rpkm <- noiseqbio(mydata, k = 0.5, norm = "rpkm", factor = "Treatment", lc = 1, r = 20, adj = 1.5, a0per = a0per_value, random.seed = iter, filter = 1)#, plot = TRUE)
        #   mynoiseqbio.rpkm.prob <- mynoiseqbio.rpkm@results[[1]][mynoiseqbio.rpkm@results[[1]]$prob > pvtnsb, ]
        #   mynoiseqbio.rpkm.prob <- mynoiseqbio.rpkm.prob[!is.na(mynoiseqbio.rpkm.prob$prob), ]
        #   collector.rpkm <- rbind(collector.rpkm, mynoiseqbio.rpkm.prob)
        #   #collector <- rbind(collector, mynoiseqbio.rpkm.prob[rownames(mynoiseqbio.rpkm.prob) %in% rownames(collector),])
        #   # TMM
        #   mynoiseqbio.tmm <- noiseqbio(mydata, k = 0.5, norm = "tmm", factor = "Treatment", lc = 1, r = 20, adj = 1.5, a0per = a0per_value, random.seed = iter, filter = 1)#, plot = TRUE)
        #   mynoiseqbio.tmm.prob <- mynoiseqbio.tmm@results[[1]][mynoiseqbio.tmm@results[[1]]$prob > pvtnsb, ]
        #   mynoiseqbio.tmm.prob <- mynoiseqbio.tmm.prob[!is.na(mynoiseqbio.tmm.prob$prob), ]
        #   collector.tmm <- rbind(collector.tmm, mynoiseqbio.tmm.prob)
        # }
        #
        # # RPKM
        # print(sort(row.names(collector.rpkm)))
        # library(stringr)
        # mynoiseqbio.rpkm.prob.overlap <- collector.rpkm[str_split_fixed(row.names(collector.rpkm), "\\.",2)[,2]>=iterations+10,]
        # row.names(mynoiseqbio.rpkm.prob.overlap) <- substr(row.names(mynoiseqbio.rpkm.prob.overlap),1,str_length(row.names(mynoiseqbio.rpkm.prob.overlap))-1)
        #
        # write.table(mynoiseqbio.rpkm.prob.overlap, file = paste0(outdir, cName,"_", nrow(mynoiseqbio.rpkm.prob.overlap),"_NOISeqbio_rpkm_prob_seed_iteration_overlap.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
        #
        # #TMM
        # print(sort(row.names(collector.tmm)))
        # library(stringr)
        # mynoiseqbio.tmm.prob.overlap <- collector.tmm[str_split_fixed(row.names(collector.tmm), "\\.",2)[,2]>=iterations+10,]
        # row.names(mynoiseqbio.tmm.prob.overlap) <- substr(row.names(mynoiseqbio.tmm.prob.overlap),1,str_length(row.names(mynoiseqbio.tmm.prob.overlap))-1)
        #
        # write.table(mynoiseqbio.tmm.prob.overlap, file = paste0(outdir, cName,"_", nrow(mynoiseqbio.tmm.prob.overlap),"_NOISeqbio_tmm_prob_seed_iteration_overlap.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)

        # # none
        # mynoiseqbio.nnorm.prob <- mynoiseqbio.nnorm@results[[1]][mynoiseqbio.nnorm@results[[1]]$prob > pvtnsb, ]
        # mynoiseqbio.nnorm.prob <- mynoiseqbio.nnorm.prob[!is.na(mynoiseqbio.nnorm.prob$prob), ]
        # write.table(mynoiseqbio.nnorm.prob, file = paste0(outdir, cName, "_NOISeqbio_nnorm_prob.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
        # # TMM
        # mynoiseqbio.tmm.prob <- mynoiseqbio.tmm@results[[1]][mynoiseqbio.tmm@results[[1]]$prob > pvtnsb, ]
        # mynoiseqbio.tmm.prob <- mynoiseqbio.tmm.prob[!is.na(mynoiseqbio.tmm.prob$prob), ]
        # write.table(mynoiseqbio.tmm.prob, file = paste0(outdir, cName, "_NOISeqbio_tmm_prob.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)




        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - noiseqbiorpkm")))
            mynoiseqbio.rpkm <- noiseqbio(mydata, k = 0.5, norm = "rpkm", factor = "Treatment", lc = 1, r = 20, adj = 1.5, a0per = a0per_value, random.seed = 12345, filter = 1, plot = TRUE)
        dev.off()

        mynoiseqbio.rpkm.prob <- mynoiseqbio.rpkm@results[[1]][mynoiseqbio.rpkm@results[[1]]$prob > pvtnsb, ]
        mynoiseqbio.rpkm.prob <- mynoiseqbio.rpkm.prob[!is.na(mynoiseqbio.rpkm.prob$prob), ]
        write.table(mynoiseqbio.rpkm.prob, file = paste0(outdir, cName,"_", nrow(mynoiseqbio.rpkm.prob),"_NOISeqbio_rpkm_prob_seed_12345.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)

        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - noiseqbiontmm")))
            mynoiseqbio.tmm <- noiseqbio(mydata, k = 0.5, norm = "tmm", factor = "Treatment", lc = 1, r = 20, adj = 1.5, a0per = a0per_value, random.seed = 12345, filter = 1, plot = TRUE)
        dev.off()

        mynoiseqbio.tmm.prob <- mynoiseqbio.tmm@results[[1]][mynoiseqbio.tmm@results[[1]]$prob > pvtnsb, ]
        mynoiseqbio.tmm.prob <- mynoiseqbio.tmm.prob[!is.na(mynoiseqbio.tmm.prob$prob), ]
        write.table(mynoiseqbio.tmm.prob, file = paste0(outdir, cName,"_", nrow(mynoiseqbio.tmm.prob),"_NOISeqbio_tmm_prob_seed_12345.tsv"), sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)

        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - noiseqbionnorm")))
            mynoiseqbio.nnorm <- noiseqbio(mydata, k = 0.5, norm = "n", factor = "Treatment", lc = 1, r = 20, adj = 1.5, a0per = a0per_value, random.seed = 12345, filter = 1, plot = TRUE)
        dev.off()

        head(mynoiseqbio.rpkm@results[[1]])
        head(mynoiseqbio.nnorm@results[[1]])

        mynoiseqbio.rpkm.deg <- degenes(mynoiseqbio.rpkm, q = pvtnsb, M = NULL)

        mynoiseqbio.rpkm.deg.up <- degenes(mynoiseqbio.rpkm, q = pvtnsb, M = "up")
        mynoiseqbio.rpkm.deg.down <- degenes(mynoiseqbio.rpkm, q = pvtnsb, M = "down")
        mynoiseqbio.nnorm.deg <- degenes(mynoiseqbio.nnorm, q = pvtnsb, M = NULL)
        mynoiseqbio.nnorm.deg.up <- degenes(mynoiseqbio.nnorm, q = pvtnsb, M = "up")
        mynoiseqbio.nnorm.deg.down <- degenes(mynoiseqbio.nnorm, q = pvtnsb, M = "down")

        par(mfrow = c(3, 2))
        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.rpkm_0.9")))
            DE.plot(mynoiseq.rpkm, q = 0.9, graphic = "expr", log.scale = TRUE)
        dev.off()
        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.rpkm_0.8")))
            DE.plot(mynoiseq.rpkm, q = 0.8, graphic = "expr", log.scale = TRUE)
        dev.off()
        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.nnorn_0.9")))
            DE.plot(mynoiseq.nnorm, q = 0.9, graphic = "expr", log.scale = TRUE)
        dev.off()
        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.nnorn_0.8")))
            DE.plot(mynoiseq.nnorm, q = 0.8, graphic = "expr", log.scale = TRUE)
        dev.off()
        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.tmm_0.9")))
            DE.plot(mynoiseq.tmm, q = 0.9, graphic = "expr", log.scale = TRUE)
        dev.off()
        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.tmm_0.8")))
            DE.plot(mynoiseq.tmm, q = 0.8, graphic = "expr", log.scale = TRUE)
        dev.off()

        par(mfrow = c(2, 2))
        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeqbio - DE.rpkm_",pvt)))
            DE.plot(mynoiseqbio.rpkm, q = pvt, graphic = "expr", log.scale = TRUE)
        dev.off()
        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeqbio - DE.rpkm_",pvtnsb)))
            DE.plot(mynoiseqbio.rpkm, q = pvtnsb, graphic = "expr", log.scale = TRUE)
        dev.off()
        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeqbio - DE.nnorm_",pvt)))
            DE.plot(mynoiseqbio.nnorm, q = pvt, graphic = "expr", log.scale = TRUE)
        dev.off()
        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeqbio - DE.nnorm_",pvtnsb)))
            DE.plot(mynoiseqbio.nnorm, q = pvtnsb, graphic = "expr", log.scale = TRUE)
        dev.off()

        par(mfrow = c(1, 2))
        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.rpkm_0.9_MD")))
            DE.plot(mynoiseq.rpkm, q = 0.9, graphic = "MD", log.scale = TRUE)
        dev.off()
        png(filename=paste0(outdir,"plots/",paste0(cName, " NOISeq - DE.nnorm_0.9_MD")))
            DE.plot(mynoiseq.nnorm, q = 0.9, graphic = "MD", log.scale = TRUE)
        dev.off()
    }

    ###########################################
    ## additional function for more usability #
    ###########################################

    calcRPKM <- function(c,n,l)
    {
        RPKM <- (10^9* as.numeric(c)) / (as.numeric(n) * as.numeric(l))
        return(RPKM)
    }

    RPKMMatrix <- function(InTable,directory,LenGCTable,myLenPos)
    {
        myLengthTable <- read.table(paste0(directory,LenGCTable), sep = "\t", header = TRUE, stringsAsFactor = FALSE, row.names = 1)

        newtable <- matrix(data = c(1), nrow = nrow(InTable), ncol(InTable), byrow= TRUE)
        colnames(newtable) <- colnames(InTable)
        rownames(newtable) <- rownames(InTable)

        sumCol <- apply(InTable,2,sum) # list of the all col sums

        for(s in colnames(InTable)) {
            for(r in rownames(InTable)) {
                w <- InTable[r, s]
                len <- myLengthTable[r, myLenPos]
                sum <- as.numeric(sumCol[s])

                newtable[r, s] <- calcRPKM(w, sum, len)
            }
        }
        return(newtable)
    }


    # rmHTSeqLines removes the last five rows
    rmHTSeqLines <- function(counts)
    {
        counts <- head(counts, -5)
        return(counts)
    }


    ###############
    ## functions ##
    ###############

    # split list to a factor list
    split2list <- function(tmp)
    {
        xx <- c()
        for(i in 1:length(tmp)) {
            if(grepl("\\.", tmp[i])) {
                foo <- strsplit(tmp[i], '\\.')[[1]]
                fooo <- strsplit(foo, " ")[[1]]
                xx <- c(xx, fooo)
            } else {
                xx <- c(xx, tmp[i])
            }
        }
        return(as.factor(xx))
    }

    # makeGroups
    # https://github.com/drisso/RUVSeq/blob/master/R/helper.R
    makeGroups <- function(xs)
    {
        xs <- factor(xs)
        groups <- matrix(-1, nrow = length(levels(xs)), ncol = max(table(xs)))
        for (i in 1:length(levels(xs))) {
            idxs <- which(xs == levels(xs)[i])
            groups[i,1:length(idxs)] <- idxs
        }
        return(groups)
    }

    # The function makeSampleList creates a list of the columns(samples) you would like to compare
    makeSampleList <- function(A, Al, B, Bl)
    {
        l <- c()
        if(exists("Al", mode = "numeric")) {
            l <- makeSampleList.helper(A, Al, l)
            l <- makeSampleList.helper(B, Bl, l)
        }
        return(l)
    }
    makeSampleList.helper <- function(v,k,l)
    {
        for(h in 1:k) {
            if(h == 1) {
                l <- c(l, v)
            } else {
                l <- c(l, paste0(v,".",h-1))
            }
        }
        return(l)
    }

    # function creates a list of numbers out of another list. The List is the index of the given list.
    makeIndexList <- function(l)
    {
        n <- 1
        jj <- c()
        for(v in l) {
            jj <- c(jj, n)
            n <- n + 1
        }
        return(jj)
    }

    intersection <- function(l,check,t)
    {
      l.list <- list.files(path = l, pattern = "*(rpkm|edgeR|DESeq2).*tsv", recursive = TRUE)
      if(grepl("bio", l.list[4],fixed = TRUE)) {
        l.list.noiseq <- read.table(paste0(l,"/",l.list[3]), header=T, sep="\t", row.names=1)
        l.list.noiseqbio <- read.table(paste0(l,"/",l.list[4]), header=T, sep="\t", row.names=1)
      } else {
        l.list.noiseq <- read.table(paste0(l,"/",l.list[4]), header=T, sep="\t", row.names=1)
        l.list.noiseqbio <- read.table(paste0(l,"/",l.list[3]), header=T, sep="\t", row.names=1)
      }
      l.list.edgeR <- read.table(paste0(l,"/",l.list[2]), header=T, sep="\t", row.names=1)
      l.list.deseq2 <- read.table(paste0(l,"/",l.list[1]), header=T, sep="\t", row.names=1)


      print("DEGs edgeR")
      print(nrow(l.list.edgeR))
      print("DEGs deseq2")
      print(nrow(l.list.deseq2))
      print("DEGs noiseq")
      print(nrow(l.list.noiseq))
      print("DEGs noiseqbio")
      print(nrow(l.list.noiseqbio))

      interect.list <- ""
      interect.list.direct <- ""

      if(t==4) {
        # List of overlaps without direction check
        interect.list <- intersect(rownames(l.list.edgeR), intersect(rownames(l.list.noiseq), intersect(rownames(l.list.noiseqbio), rownames(l.list.deseq2))))

        print("DEGs intersection without direction check (four methods)")
        print(length(interect.list))
        print(interect.list)
        print("DEGs intersection with direction check")

        # List of overlaps with direction check
        if(length(interect.list)!=0) {
          interect.list.direct <- interDirect(l.list.edgeR, l.list.deseq2, l.list.noiseq, l.list.noiseqbio, interect.list)
          print(length(interect.list.direct))
          print(interect.list.direct)
        } else {
          print("0")
        }
      }
      if(t==3) {
        # List of overlaps without direction check
        interect.list <- intersect(rownames(l.list.edgeR), intersect(rownames(l.list.noiseq), rownames(l.list.deseq2)))

        print("DEGs intersection without direction check  (three methods)")
        print(length(interect.list))
        print(interect.list)
        print("DEGs intersection with direction check")

        # List of overlaps with direction check
        if(length(interect.list)!=0) {
          # fake data frame with 0 rows => nrow == 0
          df_fake <- data.frame(Date=as.Date(character()),
                           File=character(),
                           User=character(),
                           stringsAsFactors=FALSE)

          interect.list.direct <- interDirect(l.list.edgeR, l.list.deseq2, l.list.noiseq, df_fake, interect.list)
          print(length(interect.list.direct))
          print(interect.list.direct)
        } else {
          print("0")
        }
      }


      if(check == 1){
        return(l.list.noiseq[interect.list.direct,])
      } else {
        return(l.list.noiseq[interect.list,])
      }
    }

    interDirect <- function(e,d,n,nb,i)
    {

      if(nrow(nb) == 0) {
        nbx <- apply(n[i,], 2, function(x) {ifelse(x > 0, "-", "+")}) # use noiseq twice to simulate noiseqbio
        nby <- paste0(nbx[,"M"],rownames(nbx))
      } else {
        nbx <- apply(nb[i,], 2, function(x) {ifelse(x > 0, "-", "+")})
        nby <- paste0(nbx[,"log2FC"],rownames(nbx))
      }

      ex <- apply(e[i,], 2, function(x) {ifelse(x < 0, "-", "+")})
      dx <- apply(d[i,], 2, function(x) {ifelse(x < 0, "-", "+")})
      nx <- apply(n[i,], 2, function(x) {ifelse(x > 0, "-", "+")}) # twisted, because edgeR and DESeq2 are calculating vice versa

      ey <- paste0(ex[,"logFC"],rownames(ex))
      dy <- paste0(dx[,"log2FoldChange"],rownames(dx))
      ny <- paste0(nx[,"M"],rownames(nx))

      interect.list.direct <- intersect(ey, intersect(dy, intersect(ny, nby)))


      return(substr(interect.list.direct,2,100))
    }

    ###########################################
    ## executiv part of the script            #
    ###########################################
    for(SampleNames in sampleLists) {
       workflow(file, saveat, SampleNames, LenGCTable, myLenPos, myGCPos, rpkm, pvtedgeR, pvtDESeq2, pvtNOISeq, edgeRDir, DESeq2Dir, NOISeqDir, htseq)
    }
    print("Thank you for using the DEG pipeline :) \n")

    # fin

    ### compare DEGs analysis ###
    # used venny and Excel