diff --git a/_pre-ARC-legacy/190522_RNASeq_Report.docx b/_pre-ARC-legacy/190522_RNASeq_Report.docx
new file mode 100644
index 0000000000000000000000000000000000000000..2cc34e9ad4ef570d8eb6da2aee81954f089889e6
Binary files /dev/null and b/_pre-ARC-legacy/190522_RNASeq_Report.docx differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/01_FullPipeline.R b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/01_FullPipeline.R
new file mode 100644
index 0000000000000000000000000000000000000000..da65d1644c4cefc64569a50a6cda6e61199c3ca1
--- /dev/null
+++ b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/01_FullPipeline.R
@@ -0,0 +1,1372 @@
+### Setting up the expression.data / mother table from kallisto and
+### Analysis of differential gene expression using sleuth
+
+# Tutorial adapted from this online tutorial: https://pachterlab.github.io/sleuth_walkthroughs/trapnell/analysis.html
+
+
+required.packages <- c("Rmisc", "plyr", "ggplot2", "reshape2", "gridExtra", 'openxlsx', 'sleuth', 'amap', 'ggdendro', 'ellipse', 'UpSetR', 'VennDiagram')
+
+for(package in required.packages)
+{
+  print(package)
+  ## Check if package is installed. If not, install
+  if(!package %in% row.names(installed.packages())){install.packages(package, repos ="https://cran.uni-muenster.de/")}
+  # ## Check if package is up to date. If not, update
+  # update.packages(package, repos = "https://cran.uni-muenster.de/")
+  ## Load package
+  library(package, character.only = T)
+}
+
+nice.date <- format(Sys.time(), "%y%m%d")
+SampleInfo <- read.xlsx('../00_SampleInfo.xlsx')
+
+color.values <- brewer.pal(8, 'Dark2')[8:1]
+names(color.values) <- unique(SampleInfo$Cond_short)
+
+theme_dominik <-
+  theme(panel.grid = element_blank(), panel.background = element_blank(), panel.border = element_rect(fill = NA)) +
+  theme(aspect.ratio = 1) +
+  theme(axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black")) +
+  theme(strip.background = element_blank(), strip.text = element_text(size = 8)) +
+  theme(legend.position = "bottom", legend.direction = "horizontal", legend.title = element_blank()) + #, legend.key.size = unit(0.05, "npc")
+  theme(plot.margin=unit(c(1, 1, 1, 1),"cm")) +
+  theme(plot.title = element_text(hjust = 0, face = "bold", size = 15))
+
+
+########################################
+#### Annotation data
+########################################
+
+load('RNASeq_Annotation/Arabidopsis/02.0Araport11/02.0Araport.RData')
+load("RNASeq_Annotation/Arabidopsis/02.4GO/GOAnno2018-02-06.RData")
+load("RNASeq_Annotation/Arabidopsis/02.3Mapman/180924_MercatorMapman.RData")
+load("RNASeq_Annotation/Arabidopsis/02.3Mapman/02.3Mapman.RData")
+colnames(mapman)[2] <- 'MAPMAN.BIN'
+load("RNASeq_Annotation/Arabidopsis/02.9PlnTFDB/02.9PlnTFDB.RData")
+load("RNASeq_Annotation/Arabidopsis/02.12Lipids/02.12lipids.RData")
+
+# load("02_kmeans_clustering/190520_02_ClusterResults.RData")
+
+
+########################################
+#### Reading Kallisto results 
+########################################
+
+# sleuth_objects <- mother.tables <- s2c.list <- mapping_stats.list <- list()
+# for(type in c('not_trimmed', 'trimmed'))
+#     {
+#       base_dir <- paste0('kallisto_', type)
+#       
+#       # A list of paths to the kallisto results indexed by the sample IDs is collated with
+#       kal_dirs <- dir(base_dir, full.names = T) ## Sleuth requires full paths 
+#       kal_dirs
+#       
+#       s2c <- SampleInfo[order(SampleInfo$IlluminaID), c('IlluminaID', 'Cond_short')]
+#       colnames(s2c)[1] <- 'sample'
+#       
+#       s2c$path <- kal_dirs
+#       s2c.list[[type]] <- s2c
+#       
+#       # First build a sleuth object 
+#       sleuth_objects[[type]] <- so <- sleuth_prep(s2c, ~ Cond_short)
+#       
+#       expression.data <- kallisto_table(so)
+#       mother.tables[[type]] <- dcast(expression.data, target_id~sample, value.var = 'tpm')
+#       
+#       mapping_stats <- c()
+#       
+#       for(i in dir(kal_dirs, pattern = '.json', full.names = T))
+#       {
+#         
+#         x <- readLines(i)
+#         x <- x[2:(length(x)-1)]
+#         y <- t(do.call(rbind, strsplit(gsub(',', '', x), split = '": ')))
+#         
+#         colnames(y) <- y[1,]  
+#         z <- as.data.frame(t(y[-1,]))
+#         
+#         z$sample <- unlist(strsplit(i, split = '/'))[2]
+#         
+#         mapping_stats <- rbind(mapping_stats, z)
+#         
+#       }
+#       
+#       mapping_stats.list[[type]] <- mapping_stats
+#       
+# }
+# 
+# 
+# write.xlsx(mapping_stats.list, file = paste(nice.date, '01_mappingstats.xlsx', sep = '_'))
+# 
+# save(sleuth_objects, mother.tables, s2c.list, file = '01_Kallisto.RData')
+
+
+load(file = '01_Kallisto.RData')
+
+
+
+########################################
+#### PCA and HCL
+########################################
+
+pdf(paste(nice.date, '02_PCA_HCL.pdf', sep = '_'), width = 8, height = 8)
+for(type in c('not_trimmed', 'trimmed'))
+{
+
+  ### Prepare data
+      mother.table <- mother.tables[[type]]
+
+      for_clust <- mother.table[, 2:ncol(mother.table)]
+
+      # we'll filter to leave only, more abundant, always-above-0 genes
+      for_clust <- for_clust[apply(for_clust, 1, min) > 0, ]
+      # and log2 transform
+      for_clust <- t(log2(for_clust))
+
+  ### PCA
+      pca <- prcomp(for_clust, scale. = T, retx = T)
+      s <- summary(pca)
+
+      scores <- as.data.frame(pca$x)
+      scores$sample <-  row.names(scores)
+
+      scores <- merge(scores, SampleInfo, by.x = 'sample', by.y = 'IlluminaID')
+
+
+      pca12 <- ggplot(data=scores,aes(x = PC1, y = PC2)) + theme_bw() +
+        geom_point(aes(color=Cond_short), size=2.5) +
+        geom_text(aes(label=sample),size=3, vjust = 2) +
+        labs(x = paste0("PC1 (", round(s$importance[2,1], digits = 3) *100, "%)"),
+             y = paste0("PC2 (", round(s$importance[2,2], digits = 3) *100, "%)")) +
+        theme(panel.grid = element_blank(), panel.background = element_blank(), panel.border = element_rect(fill = NA)) +
+        theme(axis.text = element_text(color = "black"), axis.ticks = element_line(color = "black"), axis.title = element_text(color = "black")) +
+        theme(plot.title = element_text(face= "bold")) +
+        theme(aspect.ratio = 1) +
+        scale_color_manual(values = color.values) +
+        theme(strip.background = element_blank(), strip.text = element_text(size = 10, face = "bold")) +
+        ggtitle(type) +
+        theme(legend.position = "right",  legend.title = element_blank()) #legend.direction = "vertical",
+
+        print(pca12)
+        
+        
+        ### PCA with ellipses
+        
+        ellipses <- droplevels(scores[, c('PC1', 'PC2', 'Cond_short')])
+        colnames(ellipses) <- c('PC1', 'PC2', 'group')
+        
+        centroids <- aggregate(cbind(ellipses$PC1, ellipses$PC2)~group, ellipses, mean)
+        
+        conf.rgn  <- do.call(rbind,lapply(unique(ellipses$group),function(t)
+          data.frame(group=as.character(t),
+                     
+                     ellipse::ellipse(cov(ellipses[ellipses$group==t,1:2]),
+                                      centre=as.matrix(centroids[centroids$group==t,2:3]),
+                                      level=0.95),
+                     stringsAsFactors=FALSE)))
+        
+    pc.elli <- ggplot(ellipses, aes(x = PC1, y = PC2, col = group)) +
+          geom_point(size = 2.5) +
+          geom_path(data=conf.rgn) +
+          labs(x = paste0("PC1 (", round(s$importance[2,1], digits = 3) *100, "%)"),
+               y = paste0("PC2 (", round(s$importance[2,2], digits = 3) *100, "%)")) +
+          theme(panel.grid = element_blank(), panel.background = element_blank(), panel.border = element_rect(fill = NA)) +
+          theme(axis.text = element_text(color = "black"), axis.ticks = element_line(color = "black"), axis.title = element_text(color = "black")) +
+          theme(plot.title = element_text(face= "bold")) +
+          theme(aspect.ratio = 1) +
+          scale_color_manual(values = color.values) +
+          theme(strip.background = element_blank(), strip.text = element_text(size = 10, face = "bold")) +
+          theme(legend.position = "right",  legend.title = element_blank())
+        
+    print(pc.elli)
+    
+
+    ### HCL
+
+        clusters <- hcluster(for_clust, method = "pearson", link = "complete")
+
+        ### Extract the cluster data to use in gglot
+        ddata <- dendro_data(as.dendrogram(clusters), type = "rectangle") ### alternative: type = "triangle"
+
+        ### Extract the label data of the dendogram...
+        labelData <- label(ddata)
+        ### ... and match with the Sample info
+        colnames(labelData)[3] <- "IlluminaID"
+        labelData <- merge(labelData, SampleInfo, "IlluminaID")
+
+
+        p <- ggplot(segment(ddata)) +
+          ### The following line is the info for the indivSampleIDual segments (leaves) of the dendogram
+          geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
+          ### The following line is the info for the labels of the leaves
+          geom_text(data = labelData, aes(x = x, y = y, hjust = 0, label = IlluminaID, col = Cond_short)) +
+          scale_color_manual(values = color.values) +
+          coord_flip() +
+          scale_y_reverse(expand = c(0.4,0)) +
+          theme_dendro() +
+          # theme_dominik +
+          ggtitle(type)
+
+        print(p)
+
+
+}
+dev.off()
+
+
+########################################
+#### PCA and HCL of only single treatments
+########################################
+
+pdf(paste(nice.date, '02_PCA_HCL_singleTreatments.pdf', sep = '_'), width = 6, height = 6)
+
+    mother.table <- mother.tables$not_trimmed[,c('target_id', SampleInfo[SampleInfo$Cond_short %in% c('C', 'S', 'M', 'H'), 'IlluminaID'])]
+      for_clust <- mother.table[, 2:ncol(mother.table)]
+      for_clust <- for_clust[apply(for_clust, 1, min) > 0, ]
+      for_clust <- t(log2(for_clust))
+
+  ### PCA
+  pca <- prcomp(for_clust, retx=T, scale = T)
+    # pca <- prcomp(for_clust)
+      s <- summary(pca)
+
+      scores <- as.data.frame(pca$x)
+      scores$sample <-  row.names(scores)
+
+      scores <- merge(scores, SampleInfo, by.x = 'sample', by.y = 'IlluminaID')
+
+
+      pca12 <- ggplot(data=scores,aes(x = PC1, y = PC2)) + theme_bw() +
+        geom_point(aes(color=Cond_short), size=2.5) +
+        # geom_text(aes(label=sample),size=3, vjust = 2) +
+        labs(x = paste0("PC1 (", round(s$importance[2,1], digits = 3) *100, "%)"),
+             y = paste0("PC2 (", round(s$importance[2,2], digits = 3) *100, "%)")) +
+        theme(panel.grid = element_blank(), panel.background = element_blank(), panel.border = element_rect(fill = NA)) +
+        theme(axis.text = element_text(color = "black"), axis.ticks = element_line(color = "black"), axis.title = element_text(color = "black")) +
+        theme(plot.title = element_text(face= "bold")) +
+        theme(aspect.ratio = 1) +
+        scale_color_manual(values = color.values) +
+        theme(strip.background = element_blank(), strip.text = element_text(size = 10, face = "bold")) +
+        theme(legend.position = "right",  legend.title = element_blank()) #legend.direction = "vertical",
+
+        print(pca12)
+
+    ### PCA with ellipses
+
+
+      ellipses <- droplevels(scores[, c('PC1', 'PC2', 'Cond_short')])
+      colnames(ellipses) <- c('PC1', 'PC2', 'group')
+      ellipses$group <- as.factor(ellipses$group)
+      
+        centroids <- aggregate(cbind(ellipses$PC1, ellipses$PC2)~group, ellipses, mean)
+
+        conf.rgn  <- do.call(rbind,lapply(unique(ellipses$group),function(t)
+          data.frame(group=as.character(t),
+
+                     ellipse::ellipse(cov(ellipses[ellipses$group==t,1:2]),
+                             centre=as.matrix(centroids[centroids$group==t,2:3]),
+                             level=0.95),
+                     stringsAsFactors=FALSE)))
+
+        pc.elli <- ggplot(ellipses, aes(x = PC1, y = PC2, col = group)) +
+          geom_point(size = 2.5) +
+          geom_path(data=conf.rgn) +
+          labs(x = paste0("PC1 (", round(s$importance[2,1], digits = 3) *100, "%)"),
+               y = paste0("PC2 (", round(s$importance[2,2], digits = 3) *100, "%)")) +
+          theme(panel.grid = element_blank(), panel.background = element_blank(), panel.border = element_rect(fill = NA)) +
+          theme(axis.text = element_text(color = "black"), axis.ticks = element_line(color = "black"), axis.title = element_text(color = "black")) +
+          theme(plot.title = element_text(face= "bold")) +
+          theme(aspect.ratio = 1) +
+          scale_color_manual(values = color.values) +
+          theme(strip.background = element_blank(), strip.text = element_text(size = 10, face = "bold")) +
+          theme(legend.position = "right",  legend.title = element_blank())
+        
+        print(pc.elli)
+
+    ### HCL
+
+        clusters <- hcluster(for_clust, method = "pearson", link = "complete")
+
+        ### Extract the cluster data to use in gglot
+        ddata <- dendro_data(as.dendrogram(clusters), type = "rectangle") ### alternative: type = "triangle"
+
+        ### Extract the label data of the dendogram...
+        labelData <- label(ddata)
+        ### ... and match with the Sample info
+        colnames(labelData)[3] <- "IlluminaID"
+        labelData <- merge(labelData, SampleInfo, "IlluminaID")
+
+
+        p <- ggplot(segment(ddata)) +
+          ### The following line is the info for the indivSampleIDual segments (leaves) of the dendogram
+          geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
+          ### The following line is the info for the labels of the leaves
+          geom_text(data = labelData, aes(x = x, y = y, hjust = 0, label = IlluminaID, col = Cond_short)) +
+          scale_color_manual(values = color.values) +
+          coord_flip() +
+          scale_y_reverse(expand = c(0.4,0)) +
+          theme_dendro()
+
+        print(p)
+
+dev.off()
+
+
+########################################
+#### PCA separated with / without Heat
+########################################
+
+suppressWarnings(dir.create('02_PCA_results'))
+
+for(i in c('Heat', 'NoHeat', 'All'))
+{
+ 
+  if(i == 'Heat'){for_clust <- mother.tables$not_trimmed[,c('target_id', SampleInfo[grepl('H', SampleInfo$Cond_short), 'IlluminaID'])]}
+  if(i == 'NoHeat'){for_clust <- mother.tables$not_trimmed[,c('target_id', SampleInfo[!grepl('H', SampleInfo$Cond_short), 'IlluminaID'])]}
+  if(i == 'All'){for_clust <- mother.tables$not_trimmed}
+  
+  for_clust <- for_clust[, 2:ncol(for_clust)]
+  for_clust <- for_clust[apply(for_clust, 1, min) > 0, ]
+  for_clust <- t(log2(for_clust))
+  
+  pca <- prcomp(for_clust, retx=T, scale = T)
+  s <- summary(pca)
+  
+  scores <- as.data.frame(pca$x)
+  scores$sample <-  row.names(scores)
+  
+  scores <- merge(scores, SampleInfo, by.x = 'sample', by.y = 'IlluminaID')
+  
+  ellipses <- droplevels(scores[, c('PC1', 'PC2', 'Cond_short')])
+  colnames(ellipses) <- c('PC1', 'PC2', 'group')
+  ellipses$group <- as.factor(ellipses$group)
+  
+  centroids <- aggregate(cbind(ellipses$PC1, ellipses$PC2)~group, ellipses, mean)
+  
+  conf.rgn  <- do.call(rbind,lapply(unique(ellipses$group),function(t)
+    data.frame(group=as.character(t),
+               
+               ellipse::ellipse(cov(ellipses[ellipses$group==t,1:2]),
+                                centre=as.matrix(centroids[centroids$group==t,2:3]),
+                                level=0.95),
+               stringsAsFactors=FALSE)))
+  
+  pc.elli <- ggplot(ellipses, aes(x = PC1, y = PC2, col = group)) +
+    geom_point(size = 2.5) +
+    geom_path(data=conf.rgn) +
+    labs(x = paste0("PC1 (", round(s$importance[2,1], digits = 3) *100, "%)"),
+         y = paste0("PC2 (", round(s$importance[2,2], digits = 3) *100, "%)")) +
+    theme(panel.grid = element_blank(), panel.background = element_blank(), panel.border = element_rect(fill = NA)) +
+    theme(axis.text = element_text(color = "black"), axis.ticks = element_line(color = "black"), axis.title = element_text(color = "black")) +
+    theme(plot.title = element_text(face= "bold")) +
+    theme(aspect.ratio = 1) +
+    scale_color_manual(values = color.values) +
+    theme(strip.background = element_blank(), strip.text = element_text(size = 10, face = "bold")) +
+    theme(legend.position = "right",  legend.title = element_blank())
+  
+  png(paste('02_PCA_results/', nice.date, '02_PCA_', i, '.png', sep = ''), res = 300, width = 1600, height = 1600)
+  print(pc.elli)
+  dev.off()
+  
+}
+
+
+########################################
+#### Averaged PCA separated with / without Heat
+########################################
+
+suppressWarnings(dir.create('02_PCA_results'))
+
+for(i in c('Heat', 'NoHeat', 'All'))
+{
+ 
+  if(i == 'Heat'){for_clust <- mother.tables$not_trimmed[,c('target_id', SampleInfo[grepl('H', SampleInfo$Cond_short), 'IlluminaID'])]}
+  if(i == 'NoHeat'){for_clust <- mother.tables$not_trimmed[,c('target_id', SampleInfo[!grepl('H', SampleInfo$Cond_short), 'IlluminaID'])]}
+  if(i == 'All'){for_clust <- mother.tables$not_trimmed}
+  
+  averaged <- melt(for_clust)
+  averaged <- merge(averaged, SampleInfo, by.x = 'variable', by.y = 'IlluminaID')
+  averaged.su <- summarySE(averaged, groupvars = c('target_id', 'Cond_short'), measurevar = 'value')
+  for_clust <- dcast(averaged.su, target_id~Cond_short, value.var = 'value')
+  
+  for_clust <- for_clust[, 2:ncol(for_clust)]
+  for_clust <- for_clust[apply(for_clust, 1, min) > 0, ]
+  for_clust <- t(log2(for_clust))
+  
+  pca <- prcomp(for_clust, retx=T, scale = T)
+  s <- summary(pca)
+  
+  scores <- as.data.frame(pca$x)
+  scores$Cond_short <-  row.names(scores)
+  
+  pc <- ggplot(scores, aes(x = PC1, y = PC2, col = Cond_short)) +
+    geom_text(aes(label = Cond_short)) +
+    labs(x = paste0("PC1 (", round(s$importance[2,1], digits = 3) *100, "%)"),
+         y = paste0("PC2 (", round(s$importance[2,2], digits = 3) *100, "%)")) +
+    theme(panel.grid = element_blank(), panel.background = element_blank(), panel.border = element_rect(fill = NA)) +
+    theme(axis.text = element_text(color = "black"), axis.ticks = element_line(color = "black"), axis.title = element_text(color = "black")) +
+    theme(plot.title = element_text(face= "bold")) +
+    theme(aspect.ratio = 1) +
+    scale_color_manual(values = color.values) +
+    theme(strip.background = element_blank(), strip.text = element_text(size = 10, face = "bold")) +
+    theme(legend.position = "none",  legend.title = element_blank())
+  
+  
+  png(paste('02_PCA_results/', nice.date, '02_PCA_Averaged', i, '.png', sep = ''), res = 300, width = 1600, height = 1600)
+  print(pc)
+  dev.off()
+  
+}
+
+
+
+
+
+# 
+# ########################################
+# #### Sleuth
+# ########################################
+# 
+s2c <- s2c.list$not_trimmed
+so <- sleuth_objects$not_trimmed
+# 
+# 
+# sleuth.results <- c()
+# 
+# # for(i in setdiff(s2c$Cond_short, 'C'))
+# for(i in c('S', 'M', 'H', 'SM', 'SH', 'MH', 'SMH')) ##Nasser's Order
+# {
+# 
+#   sleuthData <- droplevels(rbind(s2c[s2c$Cond_short == "C", ], s2c[s2c$Cond_short == i, ]))
+#   sleuthData$Cond_short <- relevel(factor(sleuthData$Cond_short), i) ### makes sure that the control / wt comes last
+# 
+#   so <- sleuth_prep(sleuthData, ~Cond_short)
+#   so <- sleuth_fit(so)
+#   so <- sleuth_fit(so, ~1, 'reduced')
+#   so <- sleuth_lrt(so, 'reduced', 'full')
+#   lrt_results <- sleuth_results(so, 'reduced:full', test_type = 'lrt')
+# 
+#   so <- sleuth_wt(so, 'Cond_shortC')
+#   wt_results <- sleuth_results(so, test = 'Cond_shortC', test_type = 'wt')
+# 
+#   ### the 'relative to direction' is weird for sleuth...!
+#   wt_results$b <- -wt_results$b
+# 
+#   results <- merge(lrt_results[, c('target_id', 'qval')], wt_results[, c('target_id', 'b', 'se_b')], by = 'target_id')
+#   # results <- wt_results[, c('target_id', 'qval','b', 'se_b')]
+# 
+#   sleuth.results <- rbind(sleuth.results, cbind(results, Cond_short = i))
+# }
+# 
+# 
+# save(sleuth.results, means, sleuth.exprt, file = '01_Sleuth.RData')
+
+
+means <- summarySE(expression.data, groupvars = c('Cond_short', 'target_id'), measurevar = 'tpm')
+means$AGI <- do.call(rbind, strsplit(means$target_id, '.', fixed = T))[,1]
+
+
+
+for(LFC_cut in c(1, 1.5, 2))
+{
+  load('01_Sleuth.RData')
+ 
+  suppressWarnings(dir.create(paste0('03_ResultsWithLFC_CutOff_', LFC_cut)))
+  
+  sleuth.results[abs(sleuth.results$b) < LFC_cut & !is.na(sleuth.results$b), 'qval'] <- 1
+  
+  q.values <- dcast(sleuth.results, target_id~Cond_short, value.var = 'qval')
+  colnames(q.values)[2:ncol(q.values)] <- paste('qvalue', colnames(q.values)[2:ncol(q.values)], sep = '_')
+  
+  lfc_b <- dcast(sleuth.results, target_id~Cond_short, value.var = 'b')
+  colnames(lfc_b)[2:ncol(lfc_b)] <- paste('b', colnames(lfc_b)[2:ncol(lfc_b)], sep = '_')
+  
+  lfc_se_b <- dcast(sleuth.results, target_id~Cond_short, value.var = 'se_b')
+  colnames(lfc_se_b)[2:ncol(lfc_se_b)] <- paste('se_b', colnames(lfc_se_b)[2:ncol(lfc_se_b)], sep = '_')
+  
+  
+  sleuth.exprt <- merge(q.values, lfc_b, 'target_id')
+  sleuth.exprt <- merge(sleuth.exprt, lfc_se_b, 'target_id')
+  
+  
+  expression.data <- kallisto_table(sleuth_objects$not_trimmed)
+  
+  
+  ########################################
+  #### Sanity check top25 / bottom25 lfc
+  ########################################
+
+  
+  for(q_thresh in c(0.01, 0.05))
+  {
+
+    plotList <- list()
+    for(i in setdiff(s2c$Cond_short, 'C'))
+    {
+
+      current.sig <- subset(sleuth.exprt, sleuth.exprt[,paste('qvalue', i, sep =  '_')] <= q_thresh)
+
+      current.top25 <- head(current.sig[order(current.sig[,paste('b', i,sep =  '_')], decreasing = T),], 25)
+      if(nrow(current.top25) > 0)
+      {
+        plot.data <- merge(subset(means, Cond_short %in% c('C', i)), current.top25, 'target_id')
+        plot.data <- merge(plot.data, araport, 'AGI', all.x = T)
+
+        plot.data$label <- paste(plot.data$target_id, plot.data$NAME, sep = '\n')
+
+        plotList[[i]][['top25']] <- ggplot(plot.data, aes(x = Cond_short, y = tpm)) +
+          geom_bar(stat = 'identity') +
+          geom_errorbar(aes(ymin = (tpm - sd), ymax = (tpm + sd)), width = 0.1) +
+          theme_dominik +
+          facet_wrap('label', scales = 'free') +
+          ggtitle(paste('Top25 C vs.', i), subtitle = paste('q.value <', q_thresh))
+      }
+
+      current.bottom25 <- head(current.sig[order(current.sig[,paste('b', i,sep =  '_')], decreasing = F),], 25)
+      if(nrow(current.bottom25) > 0)
+      {
+
+        plot.data <- merge(subset(means, Cond_short %in% c('C', i)), current.bottom25, 'target_id')
+        plot.data <- merge(plot.data, araport, 'AGI', all.x = T)
+
+        plot.data$label <- paste(plot.data$target_id, plot.data$NAME, sep = '\n')
+
+        plotList[[i]][['bottom25']] <- ggplot(plot.data, aes(x = Cond_short, y = tpm)) +
+          geom_bar(stat = 'identity') +
+          geom_errorbar(aes(ymin = (tpm - sd), ymax = (tpm + sd)), width = 0.1) +
+          theme_dominik +
+          facet_wrap('label', scales = 'free') +
+          ggtitle(paste('Top25 C vs.', i), subtitle = paste('q.value <', q_thresh))
+
+      }
+    }
+
+    plotList <- unlist(plotList, recursive = F)
+    plotList <- lapply(plotList, ggplotGrob)
+    plotList <- adaptGGplotSize_depth1(plotList)
+
+    pdf(paste0('03_ResultsWithLFC_CutOff_', LFC_cut, '/', nice.date, '_03_q', q_thresh, 'SanityCheck.pdf'), width = 15, height = 15)
+    for(i in names(plotList))
+    {
+      grid.arrange(plotList[[i]])
+    }
+    dev.off()
+
+  }
+  
+  
+  ########################################
+  #### Venn diagrams and UpsetR type venn diagram for all treatments
+  ########################################
+  
+  for(q_thresh in c(0.01, 0.05))
+  {
+
+    ########################################
+    #### UpsetR type Venn diagram
+    ########################################
+
+    pdf(paste0('03_ResultsWithLFC_CutOff_', LFC_cut, '/', nice.date, '_04_q', q_thresh, 'UpsetR_Venn.pdf'), width = 10, height = 10)
+    #### 1. differentiating down and up
+
+  sleuth.upset <- subset(sleuth.results, qval <= q_thresh)
+  sleuth.upset[sleuth.upset$b < 0, 'direction'] <- 'down'
+  sleuth.upset[sleuth.upset$b > 0, 'direction'] <- 'up'
+  sleuth.upset$set <- paste(sleuth.upset$Cond_short, sleuth.upset$direction)
+
+  sleuth.upset$AGI <- do.call(rbind, strsplit(sleuth.upset$target_id, split = '.', fixed = T))[,1]
+
+  set.order <- c(grep('down', sort(unique(sleuth.upset$set)), value = T), grep('up', sort(unique(sleuth.upset$set)), value = T))
+  sleuth.venn <- dcast(sleuth.upset, target_id~set, value.var = 'set', fun.aggregate = length)
+  upset(sleuth.venn, sets = set.order, keep.order = F)
+
+
+    #### 2. Generally de-regulated
+
+    sleuth.upset2 <- subset(sleuth.results, qval <= q_thresh)
+    sleuth.upset2$set <- sleuth.upset2$Cond_short
+    sleuth.venn2 <- dcast(sleuth.upset2, target_id~set, value.var = 'set', fun.aggregate = length)
+    upset(sleuth.venn2)
+
+    count(sleuth.upset$set)
+    count(sleuth.upset2$set)
+
+    ########################################
+    #### Report some numbers
+    ########################################
+
+
+    grid.arrange(top = paste('q <=', q_thresh), tableGrob(count(sleuth.upset2$set), rows = NULL), tableGrob(count(sleuth.upset$set), rows = NULL), ncol = 2)
+
+
+
+    ########################################
+    #### Standard venn diagram
+    ########################################
+
+    venn.plots <- list()
+    ### 1. Generally de-regulated
+    venn.data <- list()
+    for(i in setdiff(s2c$Cond_short, 'C'))
+    {
+      venn.data[[i]] <- subset(sleuth.exprt, sleuth.exprt[,paste('qvalue', i, sep =  '_')] <= q_thresh, target_id, drop = T)
+    }
+
+    for(j in c('H', 'M', 'S'))
+    {
+      # grid.newpage()
+      venn.plots[['de']][[j]] <- gTree(children =
+                                         venn.diagram(
+                                           x = venn.data[grep(j, names(venn.data))],
+                                           sub = paste(paste('q <=', q_thresh)),
+                                           main = 'de-regulated',
+                                           inverted = F,
+                                           cat.fontface = 'bold',
+                                           filename = NULL,
+                                           cat.default.pos = "text",
+                                           cat.pos = 0
+                                         )
+      )
+    }
+
+
+    ### 2. Up-regulated
+    venn.data <- list()
+    for(i in setdiff(s2c$Cond_short, 'C'))
+    {
+      venn.data[[i]] <- subset(sleuth.exprt, sleuth.exprt[,paste('qvalue', i, sep =  '_')] <= q_thresh & sleuth.exprt[,paste('b', i, sep =  '_')] > 0, target_id, drop = T)
+    }
+
+    for(j in c('H', 'M', 'S'))
+    {
+      # grid.newpage()
+      venn.plots[['up']][[j]] <- gTree(children =
+                                         venn.diagram(
+                                           x = venn.data[grep(j, names(venn.data))],
+                                           sub = paste(paste('q <=', q_thresh)),
+                                           main = 'Up-regulated',
+                                           inverted = F,
+                                           cat.fontface = 'bold',
+                                           filename = NULL,
+                                           cat.default.pos = "text",
+                                           cat.pos = 0
+                                         )
+      )
+    }
+
+    ### 3. Down-regulated
+    venn.data <- list()
+    for(i in setdiff(s2c$Cond_short, 'C'))
+    {
+      venn.data[[i]] <- subset(sleuth.exprt, sleuth.exprt[,paste('qvalue', i, sep =  '_')] <= q_thresh & sleuth.exprt[,paste('b', i, sep =  '_')] < 0, target_id, drop = T)
+    }
+
+    for(j in c('H', 'M', 'S'))
+    {
+      # grid.newpage()
+      venn.plots[['down']][[j]] <- gTree(children =
+                                           venn.diagram(
+                                             x = venn.data[grep(j, names(venn.data))],
+                                             sub = paste(paste('q <=', q_thresh)),
+                                             main = 'Down-regulated',
+                                             inverted = F,
+                                             cat.fontface = 'bold',
+                                             filename = NULL,
+                                             cat.default.pos = "text",
+                                             cat.pos = 0
+                                           )
+      )
+    }
+
+
+
+    do.call(grid.arrange, unlist(venn.plots, recursive = F))
+
+
+    dev.off()
+
+  }
+
+  
+
+  ########################################
+  #### Venn diagrams and UpsetR type venn diagram for single treatments
+  ########################################
+
+  for(q_thresh in c(0.01, 0.05))
+  {
+
+    ########################################
+    #### UpsetR type Venn diagram
+    ########################################
+    pdf(paste0('03_ResultsWithLFC_CutOff_', LFC_cut, '/', nice.date, '_04_q', q_thresh, 'UpsetR_Venn_singleTreatments.pdf'), width = 10, height = 10)
+    
+
+    sleuth.upset <- subset(sleuth.results, qval <= q_thresh & Cond_short %in% c('H', 'M', 'S'))
+    sleuth.upset[sleuth.upset$b < 0, 'direction'] <- 'down'
+    sleuth.upset[sleuth.upset$b > 0, 'direction'] <- 'up'
+    sleuth.upset$set <- paste(sleuth.upset$Cond_short, sleuth.upset$direction)
+
+    sleuth.upset2 <- subset(sleuth.results, qval <= q_thresh  & Cond_short %in% c('H', 'M', 'S'))
+    sleuth.upset2$set <- sleuth.upset2$Cond_short
+    
+    sleuth.upset$AGI <- do.call(rbind, strsplit(sleuth.upset$target_id, split = '.', fixed = T))[,1]
+    
+    
+    ########################################
+    #### Report some numbers
+    ########################################
+
+
+    grid.arrange(top = paste('q <=', q_thresh), tableGrob(count(sleuth.upset2$set), rows = NULL), tableGrob(count(sleuth.upset$set), rows = NULL), ncol = 2)
+
+
+    ########################################
+    #### Standard venn diagram
+    ########################################
+
+    venn.plots <- list()
+    ### 1. Generally de-regulated
+    venn.data <- list()
+    for(i in c('H', 'M', 'S'))
+    {
+      venn.data[[i]] <- subset(sleuth.exprt, sleuth.exprt[,paste('qvalue', i, sep =  '_')] <= q_thresh, target_id, drop = T)
+    }
+
+    venn.plots[['de']] <- gTree(children =
+                                         venn.diagram(
+                                           x = venn.data,
+                                           sub = paste(paste('q <=', q_thresh)),
+                                           main = 'de-regulated',
+                                           inverted = F,
+                                           cat.fontface = 'bold',
+                                           filename = NULL,
+                                           cat.default.pos = "text",
+                                           cat.pos = 0
+                                         )
+      )
+
+
+    ### 2. Up-regulated
+    venn.data <- list()
+    for(i in c('H', 'M', 'S'))
+    {
+      venn.data[[i]] <- subset(sleuth.exprt, sleuth.exprt[,paste('qvalue', i, sep =  '_')] <= q_thresh & sleuth.exprt[,paste('b', i, sep =  '_')] > 0, target_id, drop = T)
+    }
+
+      venn.plots[['up']] <- gTree(children =
+                                         venn.diagram(
+                                           x = venn.data,
+                                           sub = paste(paste('q <=', q_thresh)),
+                                           main = 'Up-regulated',
+                                           inverted = F,
+                                           cat.fontface = 'bold',
+                                           filename = NULL,
+                                           cat.default.pos = "text",
+                                           cat.pos = 0
+                                         )
+      )
+
+    ### 3. Down-regulated
+    venn.data <- list()
+    for(i in c('H', 'M', 'S'))
+    {
+      venn.data[[i]] <- subset(sleuth.exprt, sleuth.exprt[,paste('qvalue', i, sep =  '_')] <= q_thresh & sleuth.exprt[,paste('b', i, sep =  '_')] < 0, target_id, drop = T)
+    }
+
+      venn.plots[['down']] <- gTree(children =
+                                           venn.diagram(
+                                             x = venn.data,
+                                             sub = paste(paste('q <=', q_thresh)),
+                                             main = 'Down-regulated',
+                                             inverted = F,
+                                             cat.fontface = 'bold',
+                                             filename = NULL,
+                                             cat.default.pos = "text",
+                                             cat.pos = 0
+                                           )
+      )
+
+
+
+
+    do.call(grid.arrange, c(venn.plots, ncol = 3, nrow = 3))
+
+
+    dev.off()
+
+  }
+
+  
+  
+
+
+  ############################################################################################################
+  ### GO-term enrichment using fisher's exact
+  ############################################################################################################
+
+  alphaEnrich <- 0.05
+  pAdjust <- "BH"
+  GO.type <- 'P'
+
+  q_thresh <- 0.05
+
+
+  fish.list <- list()
+    for(i in unique(sleuth.results$Cond_short))
+    {
+      print(i)
+
+        for(HighLow in c("higher", "lower"))
+        {
+          print(HighLow)
+
+          current.sleuth <- subset(sleuth.results, Cond_short == i,select = c(target_id, qval, b))
+
+          current.sleuth[is.na(current.sleuth$qval), "qval"] <- 1
+          current.sleuth[is.na(current.sleuth$b), "b"] <- 0
+
+          current.sleuth$HighLow <- as.character(cut(current.sleuth$b, breaks = c(min(current.sleuth$b), 0, max(current.sleuth$b)), labels = c("lower", "higher"), include.lowest = T))
+          current.sleuth[current.sleuth$qval > q_thresh, "HighLow"] <- "Neither"
+
+          ### Pick only genes, not transcripts
+          current.sleuth$AGI <-  do.call(rbind, strsplit(split = ".", as.character(current.sleuth$target_id), fixed = T))[,1]
+
+          GO.subset <- droplevels(merge(GOAnno[[GO.type]], current.sleuth, "AGI"))
+
+
+          fishers.results <-  data.frame(matrix(NA, 0,3))
+          for(GO.ID in unique(GO.subset[,"GO.ID"]))
+          {
+
+            ##############################################################################################################
+            ### Fisher's exact test based on all (GO tagged) genes
+            ##############################################################################################################
+            gene.count <- length(unique(GO.subset[GO.subset$GO.ID == GO.ID,"AGI"]))
+
+            InIn <- sum(GO.subset$HighLow == HighLow & GO.subset[,"GO.ID"] == GO.ID)
+            InOut <- sum(GO.subset$HighLow == HighLow & GO.subset[,"GO.ID"] != GO.ID)
+            OutIn <- sum((GO.subset$HighLow != HighLow & GO.subset[,"GO.ID"] == GO.ID))
+            OutOut <- sum((GO.subset$HighLow != HighLow & GO.subset[,"GO.ID"] != GO.ID))
+
+            p <- fisher.test(matrix(c(InIn, InOut, OutIn, OutOut), nrow = 2))$p.value
+
+            if(InOut != 0 & InIn != 0){if(InIn/InOut > OutIn/OutOut){represented  = "over"}else{represented  = "under"}}
+            
+            fishers.results <- rbind.data.frame(fishers.results, cbind.data.frame(GO.ID, p, represented, gene.count, InIn, InOut, OutIn, OutOut))
+
+          }
+
+          fishers.results[, pAdjust] <- p.adjust(fishers.results$p, method = pAdjust)
+          # ### Remove non-significant
+          # fishers.results <- fishers.results[fishers.results[,pAdjust] <= alphaEnrich,]
+          # ### Remove under-represented
+          fishers.results <- fishers.results[fishers.results$represented == "over",]
+          ### Add Term
+          fishers.results <- merge(fishers.results, subset(GO.IDtoGO.Term, Aspect == GO.type), "GO.ID")
+
+
+          fish.list[[GO.type]][[i]][[HighLow]] <- fishers.results
+
+          head(print(fishers.results))
+
+          print("Done")
+    }
+    }
+
+
+
+
+  fish.extraxt <- c()
+  for(i in names(fish.list$P))
+  {
+    test <- do.call(rbind, fish.list$P[[i]])
+    test <- subset(test, BH <= alphaEnrich)
+    if(nrow(test) > 0 )
+    {
+      test$direction <- do.call(rbind, strsplit(split = ".", row.names(test), fixed = T))[,1]
+      test$Cond_short <- i
+
+      fish.extraxt <- rbind(fish.extraxt, test)
+    }
+  }
+
+
+  fish.summary <- dcast(fish.extraxt, GO.ID+GO.Term+gene.count~Cond_short+direction, value.var = 'BH')
+  write.xlsx(fish.summary, file = paste0('03_ResultsWithLFC_CutOff_', LFC_cut, '/', nice.date, '_05_q', q_thresh, 'GO_enrichment.xlsx'))
+  
+  
+  
+
+  ############################################################################################################
+  ### Mapman enrichment using fisher's exact
+  ############################################################################################################
+
+  alphaEnrich <- 0.05
+  pAdjust <- "BH"
+
+  q_thresh <- 0.05
+
+
+
+  fish.list <- list()
+  for(i in unique(sleuth.results$Cond_short))
+  {
+    print(i)
+
+    for(HighLow in c("higher", "lower"))
+    {
+      print(HighLow)
+
+      current.sleuth <- subset(sleuth.results, Cond_short == i,select = c(target_id, qval, b))
+
+      current.sleuth[is.na(current.sleuth$qval), "qval"] <- 1
+      current.sleuth[is.na(current.sleuth$b), "b"] <- 0
+
+      current.sleuth$HighLow <- as.character(cut(current.sleuth$b, breaks = c(min(current.sleuth$b), 0, max(current.sleuth$b)), labels = c("lower", "higher"), include.lowest = T))
+      current.sleuth[current.sleuth$qval > q_thresh, "HighLow"] <- "Neither"
+
+      ### Pick only genes, not transcripts
+      current.sleuth$AGI <-  do.call(rbind, strsplit(split = ".", as.character(current.sleuth$target_id), fixed = T))[,1]
+
+      # mapman.subset <- droplevels(merge(mapman.X4, current.sleuth, "AGI"))
+      mapman.subset <- droplevels(merge(mapman, current.sleuth, "AGI"))
+
+
+      fishers.results <-  data.frame(matrix(NA, 0,3))
+      for(mapman.ID in unique(mapman.subset$BIN1))
+      {
+
+        ##############################################################################################################
+        ### Fisher's exact test based on all (mapman tagged) genes
+        ##############################################################################################################
+        gene.count <- length(unique(mapman.subset[mapman.subset$BIN1 == mapman.ID,"AGI"]))
+
+        InIn <- sum(mapman.subset$HighLow == HighLow & mapman.subset$BIN1 == mapman.ID)
+        InOut <- sum(mapman.subset$HighLow == HighLow & mapman.subset$BIN1 != mapman.ID)
+        OutIn <- sum((mapman.subset$HighLow != HighLow & mapman.subset$BIN1 == mapman.ID))
+        OutOut <- sum((mapman.subset$HighLow != HighLow & mapman.subset$BIN1 != mapman.ID))
+
+        p <- fisher.test(matrix(c(InIn, InOut, OutIn, OutOut), nrow = 2))$p.value
+
+        if(InIn/InOut > OutIn/OutOut){represented  = "over"}else{represented  = "under"}
+
+        fishers.results <- rbind.data.frame(fishers.results, cbind.data.frame(mapman.ID, p, represented, gene.count, InIn, InOut, OutIn, OutOut))
+
+      }
+
+      fishers.results[, pAdjust] <- p.adjust(fishers.results$p, method = pAdjust)
+      # ### Remove non-significant
+      # fishers.results <- fishers.results[fishers.results[,pAdjust] <= alphaEnrich,]
+      # ### Remove under-represented
+      fishers.results <- fishers.results[fishers.results$represented == "over",]
+
+      fish.list[[i]][[HighLow]] <- fishers.results
+
+      head(print(fishers.results))
+
+      print("Done")
+    }
+  }
+
+
+
+
+  fish.extraxt <- c()
+  for(i in names(fish.list))
+  {
+    test <- do.call(rbind, fish.list[[i]])
+    test <- subset(test, BH <= alphaEnrich)
+    if(nrow(test) > 0 )
+    {
+      test$direction <- do.call(rbind, strsplit(split = ".", row.names(test), fixed = T))[,1]
+      test$Cond_short <- i
+
+      fish.extraxt <- rbind(fish.extraxt, test)
+    }
+  }
+
+
+  fish.summary <- dcast(fish.extraxt, mapman.ID+gene.count~Cond_short+direction, value.var = 'BH')
+  write.xlsx(fish.summary, file = paste0('03_ResultsWithLFC_CutOff_', LFC_cut, '/', nice.date, '_05_q', q_thresh, 'mapman_enrichment.xlsx'))
+  
+  ############################################################################################################
+  ### TF family enrichment using fisher's exact
+  ############################################################################################################
+  
+  # 
+  # alphaEnrich <- 0.05
+  # pAdjust <- "BH"
+  # 
+  # q_thresh <- 0.05
+  # 
+  # 
+  # 
+  # fish.list <- list()
+  # for(i in unique(sleuth.results$Cond_short))
+  # {
+  #   print(i)
+  #   
+  #   for(HighLow in c("higher", "lower"))
+  #   {
+  #     print(HighLow)
+  #     
+  #     current.sleuth <- subset(sleuth.results, Cond_short == i,select = c(target_id, qval, b))
+  #     
+  #     current.sleuth[is.na(current.sleuth$qval), "qval"] <- 1
+  #     current.sleuth[is.na(current.sleuth$b), "b"] <- 0
+  #     
+  #     current.sleuth$HighLow <- as.character(cut(current.sleuth$b, breaks = c(min(current.sleuth$b), 0, max(current.sleuth$b)), labels = c("lower", "higher"), include.lowest = T))
+  #     current.sleuth[current.sleuth$qval > q_thresh, "HighLow"] <- "Neither"
+  #     
+  #     ### Pick only genes, not transcripts
+  #     current.sleuth$AGI <-  do.call(rbind, strsplit(split = ".", as.character(current.sleuth$target_id), fixed = T))[,1]
+  #     
+  #     TF.subset <- droplevels(merge(PlnTFDB, current.sleuth, "AGI"))
+  #     
+  #     
+  #     fishers.results <-  data.frame(matrix(NA, 0,3))
+  #     for(TF.ID in unique(TF.subset$TF_Family))
+  #     {
+  #       
+  #       ##############################################################################################################
+  #       ### Fisher's exact test based on all (TF tagged) genes
+  #       ##############################################################################################################
+  #       gene.count <- length(unique(TF.subset[TF.subset$TF_Family == TF.ID,"AGI"]))
+  #       
+  #       InIn <- sum(TF.subset$HighLow == HighLow & TF.subset$TF_Family == TF.ID)
+  #       InOut <- sum(TF.subset$HighLow == HighLow & TF.subset$TF_Family != TF.ID)
+  #       OutIn <- sum((TF.subset$HighLow != HighLow & TF.subset$TF_Family == TF.ID))
+  #       OutOut <- sum((TF.subset$HighLow != HighLow & TF.subset$TF_Family != TF.ID))
+  #       
+  #       p <- fisher.test(matrix(c(InIn, InOut, OutIn, OutOut), nrow = 2))$p.value
+  #       
+  #       if(InIn/InOut > OutIn/OutOut){represented  = "over"}else{represented  = "under"}
+  #       
+  #       fishers.results <- rbind.data.frame(fishers.results, cbind.data.frame(TF.ID, p, represented, gene.count, InIn, InOut, OutIn, OutOut))
+  #       
+  #     }
+  #     
+  #     fishers.results[, pAdjust] <- p.adjust(fishers.results$p, method = pAdjust)
+  #     # ### Remove non-significant
+  #     # fishers.results <- fishers.results[fishers.results[,pAdjust] <= alphaEnrich,]
+  #     # ### Remove under-represented
+  #     fishers.results <- fishers.results[fishers.results$represented == "over",]
+  #     
+  #     fish.list[[i]][[HighLow]] <- fishers.results
+  #     
+  #     head(print(fishers.results))
+  #     
+  #     print("Done")
+  #   }
+  # }
+  #   
+  # 
+  # 
+  # 
+  # fish.extraxt <- c()
+  # for(i in names(fish.list))
+  # {
+  #   test <- do.call(rbind, fish.list[[i]]) 
+  #   test <- subset(test, BH <= alphaEnrich)
+  #   if(nrow(test) > 0 )
+  #   {
+  #     test$direction <- do.call(rbind, strsplit(split = ".", row.names(test), fixed = T))[,1]
+  #     test$Cond_short <- i
+  #     
+  #     fish.extraxt <- rbind(fish.extraxt, test)
+  #   }
+  # }
+  # 
+  # 
+  # 
+  # fish.summary <- dcast(fish.extraxt, TF.ID+gene.count~Cond_short+direction, value.var = 'BH')
+  # write.xlsx(fish.summary, file = paste(nice.date, '05_q', q_thresh, 'TF_enrichment.xlsx', sep = '_'))
+  # 
+  # 
+  
+  
+  
+  ############################################################################################################
+  ### Extracting some numbers for how many genes are up / down in the individual mapman bins 
+  ############################################################################################################
+  q_thresh <- 0.05
+  
+  sleuth.upset <- subset(sleuth.results, qval <= q_thresh)
+  sleuth.upset[sleuth.upset$b < 0, 'direction'] <- 'down'
+  sleuth.upset[sleuth.upset$b > 0, 'direction'] <- 'up'
+  sleuth.upset$set <- paste(sleuth.upset$Cond_short, sleuth.upset$direction)
+  
+  sleuth.upset$AGI <- do.call(rbind, strsplit(sleuth.upset$target_id, split = '.', fixed = T))[,1]
+  
+  
+  
+  
+  
+  mapman_summary <- merge(sleuth.upset, mapman, 'AGI')
+  
+  mapman_summar2y <- list()
+  mapman_summar2y[['BIN1']] <- dcast(mapman_summary, BIN1~set)
+  mapman_summar2y[['BIN1.BIN2']] <- dcast(mapman_summary, BIN1+BIN2~set)
+  mapman_summar2y[['BIN1.BIN2.BIN3']] <- dcast(mapman_summary, BIN1+BIN2+BIN3~set)
+  mapman_summar2y[['BIN1.BIN2.BIN3.BIN4']] <- dcast(mapman_summary, BIN1+BIN2+BIN3+BIN4~set)
+  
+    write.xlsx(mapman_summar2y, file = paste0('03_ResultsWithLFC_CutOff_', LFC_cut, '/', nice.date, '_06_MapmanSummary.xlsx'))
+
+  
+    
+  ############################################################################################################
+  ### Extracting some numbers for how many genes are up / down across treatments
+  ############################################################################################################
+  
+  diff.dev <- list()
+  
+  diff.dev[['p1']] <- ggplot(sleuth.upset, aes(x = Cond_short, fill = direction)) + 
+    geom_bar(position = position_dodge()) + 
+    theme_dominik + 
+    scale_fill_manual(values = c(down =  "#1f78b4", up = "#e41a1c")) + 
+    geom_text(stat='count', aes(label=..count.., group = direction), vjust=-1, position = position_dodge(width = 1)) +
+    labs(y = 'Gene count', x = '') + theme(legend.position = c(0.1,0.8)) + ggtitle('Overview')
+  
+  
+  for(x in unique(sleuth.upset$set))
+  {
+    
+    current.sub <- subset(sleuth.upset, AGI %in% subset(sleuth.upset, set == x, AGI, drop = T) & set != x)
+    
+    diff.dev[[x]] <- ggplot(current.sub, aes(x = Cond_short, fill = direction)) + 
+      geom_bar(position = position_dodge()) + 
+      theme_dominik + 
+      scale_fill_manual(values = c(down =  "#1f78b4", up = "#e41a1c")) + 
+      geom_text(stat='count', aes(label=..count.., group = direction), vjust=-1, position = position_dodge(width = 1)) +
+      labs(y = 'Gene count', x = '') + 
+      ggtitle(x) + theme(legend.position = 'none')
+    
+    
+  }
+  
+  
+  pdf(paste0('03_ResultsWithLFC_CutOff_', LFC_cut, '/', nice.date, '_DEG_counts.pdf'), width = 30, height = 30)
+  do.call(grid.arrange, diff.dev)
+  dev.off()
+  
+  
+  ############################################################################################################
+  ### Extracting some numbers for how many genes are up / down across treatments
+  ############################################################################################################
+  
+  set.order <- rev(c(grep('down', sort(unique(sleuth.upset$set)), value = T), grep('up', sort(unique(sleuth.upset$set)), value = T)))
+  sleuth.venn <- dcast(sleuth.upset, target_id~set, value.var = 'set', fun.aggregate = length)
+  
+  
+  meta <- unique(sleuth.upset[, c('set', 'direction', 'Cond_short')])
+  
+  
+  pdf(paste0('03_ResultsWithLFC_CutOff_', LFC_cut, '/', nice.date, '_GeneSets_AllCombinations.pdf'), width = 60, height = 6)
+  upset(sleuth.venn, sets = set.order, keep.order = T, nintersects = NA, sets.bar.color=c(rep("#1f78b4", 7), rep("#e41a1c", 7)), set.metadata = list(data = meta, plots = list(list(type = "matrix_rows", column = "Cond_short", colors = color.values[names(color.values) != 'C'],  alpha = 0.5))))
+  # upset(sleuth.venn, sets = set.order, keep.order = T, nintersects = NA, set.metadata = list(data = meta, plots = list(list(type = "matrix_rows", column = "Cond_short", colors = color.values[names(color.values) != 'C'],  alpha = 0.5))))
+  dev.off()
+  
+  # 
+  # a <- sleuth.venn
+  # a$combinations <- apply(a, 1, function(x){paste(colnames(a)[which(x == 1)], collapse = '|')})
+  # gene.set.count <- count(a$combinations)
+  # 
+  
+  
+  
+  
+  gene.set.list <- data.frame(AGI = unique(sleuth.upset$AGI))
+  gene.set.plots <- list()
+  for(i in c('H', 'S', 'M'))
+  {
+    if(i == 'H'){label <- 'Heat'}
+    if(i == 'S'){label <- 'Salt'}
+    if(i == 'M'){label <- 'Mannitol'}
+    
+    upset.test <- subset(sleuth.upset, grepl(i, Cond_short))
+    
+    set.order <- rev(c(grep('down', sort(unique(upset.test$set)), value = T), grep('up', sort(unique(upset.test$set)), value = T)))
+    sleuth.venn <- dcast(upset.test, target_id~set, value.var = 'set', fun.aggregate = length)
+    # upset(sleuth.venn, sets = set.order, keep.order = T, nintersects = NA)
+    
+    upset(sleuth.venn, sets = set.order, sets.bar.color=c(rep("#1f78b4", 4), rep("#e41a1c", 4)), keep.order = T, nintersects = NA, set.metadata = list(data = meta, plots = list(list(type = "matrix_rows", column = "Cond_short", colors = color.values[grepl(i, names(color.values))],  alpha = 0.5))))
+    
+    
+    grid.edit('arrange', name = 'bla')
+    vp <- grid.grab()
+    
+    gene.set.plots[[label]] <- arrangeGrob(vp, top = label)
+    
+    a <- sleuth.venn
+    
+    a$combinations <- apply(a, 1, function(x){paste(colnames(a)[which(x == 1)], collapse = '|')})
+    
+    gene.set.count <- count(a$combinations)
+    
+    a <- merge(a, gene.set.count, by.x = 'combinations', by.y = 'x')
+    a[,label] <- paste(a$combinations, a$freq)
+    
+    a$AGI <-  do.call(rbind, strsplit(split = ".", as.character(a$target_id), fixed = T))[,1]
+    b <- a[, c('AGI', label)]
+    
+    gene.set.list <- merge(gene.set.list, b, all.x = T, by = 'AGI')
+    
+  }
+  
+  
+  pdf(paste0('03_ResultsWithLFC_CutOff_', LFC_cut, '/', nice.date, '_GeneSets_byTreatment.pdf'), width = 16, height = 18)
+  do.call(grid.arrange, c(gene.set.plots, ncol = 1))
+  dev.off()
+  
+  
+}
+
+
+
+# 
+# 
+# 
+# ############################################################################################################
+# ### Gene categories / LFC tables
+# ############################################################################################################
+# 
+# #
+# # anno.cols <-  colnames(supp)[1:26]
+# #
+# # lfc.table <- melt(supp, id.vars = anno.cols, measure.vars = grep('^b_', colnames(supp)), value.name = 'lfc')
+# # q.table <- melt(supp, id.vars = 'AGI', measure.vars = grep('qvalue_', colnames(supp)), value.name = 'q')
+# # head(q.table)
+# #
+# # lfc.table <- cbind.data.frame(lfc.table, q.table[,2:3])
+# # lfc.table$q.star <- cut(lfc.table$q, breaks = c(0, 0.001, 0.01, 0.05, 1), labels = c('***', '**', '*', ''), include.lowest = T)
+# #
+# # grep('cell wall', supp$MAPMAN.BIN, value = T)
+# 
+# anno.cols <-  colnames(supp)[1:26]
+# 
+# lfc.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('^b_', colnames(sleuth.exprt)), value.name = 'lfc')
+# q.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('qvalue_', colnames(sleuth.exprt)), value.name = 'q')
+# head(q.table)
+# 
+# lfc.table <- cbind.data.frame(lfc.table, q.table[,2:3])
+# lfc.table$q.star <- cut(lfc.table$q, breaks = c(0, 0.001, 0.01, 0.05, 1), labels = c('***', '**', '*', ' '), include.lowest = T)
+# 
+# 
+# sig.at.some <- unique(subset(q.table, q <= 0.05, AGI, drop = T))
+# 
+# 
+# count(mapman.X4[, c('BIN1', 'BIN2')])
+# 
+# 
+# cwllwall <- merge(subset(mapman.X4, BIN1 == 'Cell wall' & BIN2 == 'cellulose'), lfc.table, 'AGI')
+# cwllwall$label <- gsub('b_' , '', cwllwall$variable)
+# cwllwall$label <- factor(cwllwall$label, unique(cwllwall[order(cwllwall$variable), 'label']))
+# 
+# ggplot(cwllwall, aes(x = label, y = AGI, label = q.star, fill = lfc)) +
+#   # geom_tile() +
+#   geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+#   geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+#   geom_text(size = 3, color = "black", vjust = 0.7) +
+#   scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+#                        # limits = limits.lfc.sleuth.GT,
+#                        midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+#   theme_dominik +
+#   theme(aspect.ratio = 5)
+# 
+# 
+# View(GOAnno$P)
+# 
+# unique(grep('pathogen', GOAnno$P$GO.Term, ignore.case = T, value = T))
+# 
+# unique(grep('heat', araport$NAME, ignore.case = T, value = T))
+# 
+# 
+# 
+# 
+# 
+# candidates <- subset(lfc.table, AGI %in% subset(araport, grepl("heat shock protein", NAME, ignore.case = T), AGI, drop = T))
+# category <- "HSPs"
+# 
+# 
+# View(subset(araport, grepl("late embryogenesis", NAME, ignore.case = T)))
+# View(subset(araport, grepl("LEA", name, ignore.case = T)))
+# 
+# View(subset(mapman, grepl("embryo", MAPMAN.BIN, ignore.case = T)))
+# 
+# View(subset(mapman.X4, grepl("embryo", MAPMAN.BIN, ignore.case = T)))
+# 
+# 
+# 
+# unique(grep('HSP', mapman.X4$MAPMAN.BIN, ignore.case = T, value = T))
+# 
+# 
+# 
+# candidates <- subset(lfc.table, AGI %in% subset(mapman.X4, grepl("heat-shock", MAPMAN.BIN, ignore.case = T), AGI, drop = T))
+# category <- "HSPs"
+# 
+# View(subset(araport, grepl("late embryogenesis", NAME, ignore.case = T)))
+# 
+# 
+# candidates <- subset(lfc.table, AGI %in% subset(araport,  grepl("late embryogenesis", NAME, ignore.case = T), AGI, drop = T))
+# category <- "LEAs"
+# 
+# 
+# candidates <- merge(subset(GOAnno$P, GO.Term =='response to heat'), lfc.table, 'AGI')
+# category <- "response to heat"
+# 
+# candidates <- merge(subset(GOAnno$P, GO.Term =='pathogenesis'), lfc.table, 'AGI')
+# category <- "pathogen"
+# 
+# #
+# #
+# #
+# # for(i in unique(cluster.results$`14`$km.new.order))
+# # {
+# #
+# #   current.cluster <- subset(cluster.results$`14`, km.new.order == i)
+# #
+# #   tfs.cluster <- merge(current.cluster, PlnTFDB, 'AGI')
+# #
+# #
+# #
+# # }
+# 
+# 
+# 
+# candidates <- merge(candidates, araport, 'AGI')
+# candidates$label <- gsub('b_' , '', candidates$variable)
+# candidates$label <- factor(candidates$label, unique(candidates[order(candidates$variable), 'label']))
+# # candidates$gene.label <- paste0(candidates$name, ' (', candidates$AGI, ')')
+# candidates$gene.label <- paste0(candidates$NAME, ' (', candidates$AGI, ')')
+# 
+# 
+# pdf(paste0(category, '.pdf'), width = nrow(candidates)/50, height = nrow(candidates)/50 * 2)
+# 
+# ggplot(candidates, aes(x = label, y = gene.label, label = q.star, fill = lfc)) +
+#   geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+#   geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+#   geom_text(size = 3, color = "black", vjust = 0.7) +
+#   theme_dominik +
+#   scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+#                        # limits = limits.lfc.sleuth.GT,
+#                        midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / Ctrl)", sep = ""))) +
+#   theme(legend.position = 'right', legend.direction = 'vertical', legend.title = element_text(), axis.ticks = element_blank(), panel.background = element_blank()) +
+#   theme(aspect.ratio = nrow(candidates)/50) +
+#   labs(x = '', y = '') +
+#   scale_x_discrete(position = 'top') +
+#   ggtitle(category)
+# 
+# dev.off()
+# 
+# 
+# 
+# 
+# 
+# load('02_kmeans_clustering/190520_02_FishList.RData')
+# 
+# View(fish.list$mapman$`14`)
+# View(fish.list$go$`14`)
+
+
+
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/01_Kallisto.RData b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/01_Kallisto.RData
new file mode 100644
index 0000000000000000000000000000000000000000..0673a90dbec915f62d28b495256fa905a01ec7be
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/01_Kallisto.RData differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/01_Sleuth.RData b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/01_Sleuth.RData
new file mode 100644
index 0000000000000000000000000000000000000000..eacfe0d76191502d97127dd357fd402cb063e0af
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/01_Sleuth.RData differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_All.png b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_All.png
new file mode 100644
index 0000000000000000000000000000000000000000..90dd03e0b26e0d7a4607269a7055559cd30f504f
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_All.png differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_AveragedAll.png b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_AveragedAll.png
new file mode 100644
index 0000000000000000000000000000000000000000..1a2c0dd5ec63922b4e11102eaa62c43649042652
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_AveragedAll.png differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_AveragedHeat.png b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_AveragedHeat.png
new file mode 100644
index 0000000000000000000000000000000000000000..ba1b3d3138ff832b7a78d05d1bfff9c0b6f4a5b6
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_AveragedHeat.png differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_AveragedNoHeat.png b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_AveragedNoHeat.png
new file mode 100644
index 0000000000000000000000000000000000000000..c1f0eb3985510e89ccf31832960f670852a0ef15
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_AveragedNoHeat.png differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_Heat.png b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_Heat.png
new file mode 100644
index 0000000000000000000000000000000000000000..1937291d029381135e7489304824deffb3d8afed
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_Heat.png differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_NoHeat.png b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_NoHeat.png
new file mode 100644
index 0000000000000000000000000000000000000000..8354e0eebb39ec5a93231d6fdf66ab0f000e0391
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/02_PCA_results/19073102_PCA_NoHeat.png differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_03_q0.01SanityCheck.pdf b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_03_q0.01SanityCheck.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..04d669f8cd986c43247964d12f7844f08b9c4629
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_03_q0.01SanityCheck.pdf differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_03_q0.05SanityCheck.pdf b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_03_q0.05SanityCheck.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..0bcbe5f47284868b848987da1662d17e9d4325a2
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_03_q0.05SanityCheck.pdf differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_04_q0.01UpsetR_Venn.pdf b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_04_q0.01UpsetR_Venn.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..2d7546e414bc37b8cc497ae0519aa3736a2bf795
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_04_q0.01UpsetR_Venn.pdf differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_04_q0.01UpsetR_Venn_singleTreatments.pdf b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_04_q0.01UpsetR_Venn_singleTreatments.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..508ee50741d30ef2c8f8de0bc6a298dbed84c492
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_04_q0.01UpsetR_Venn_singleTreatments.pdf differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_04_q0.05UpsetR_Venn.pdf b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_04_q0.05UpsetR_Venn.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..0c6e1db8b94bcb3273e4c4f3be42cce192f2b735
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_04_q0.05UpsetR_Venn.pdf differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_04_q0.05UpsetR_Venn_singleTreatments.pdf b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_04_q0.05UpsetR_Venn_singleTreatments.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..aecd8d8b821be015057e22585d8ca64d36e1e9e0
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_04_q0.05UpsetR_Venn_singleTreatments.pdf differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_05_q0.05GO_enrichment.xlsx b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_05_q0.05GO_enrichment.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..0fe2d2245ff8e6d597e9dc27bb64fc5da24a95bf
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_05_q0.05GO_enrichment.xlsx differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_05_q0.05mapman_enrichment.xlsx b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_05_q0.05mapman_enrichment.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..407efcb5440b679f6db46a4eb3473b5547eab042
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_05_q0.05mapman_enrichment.xlsx differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_06_MapmanSummary.xlsx b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_06_MapmanSummary.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..731f1c5c37241bc41b851b64ee49f29bc1cdbc3a
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_06_MapmanSummary.xlsx differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_DEG_counts.pdf b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_DEG_counts.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..54ab76aa614cae0467dd833141786dc6126f3b38
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_DEG_counts.pdf differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_GeneSets_AllCombinations.pdf b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_GeneSets_AllCombinations.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..e782a2bb34e76db9e1ec066d14583ac8f031591d
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_GeneSets_AllCombinations.pdf differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_GeneSets_byTreatment.pdf b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_GeneSets_byTreatment.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..fc1ba719fb19ad15d8b7ec5ff85a345651d8e447
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/03_ResultsWithLFC_CutOff_1/190731_GeneSets_byTreatment.pdf differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/04_ClusterResultsWithLFC_CutOff_1/190731_CLusterIteration.pdf b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/04_ClusterResultsWithLFC_CutOff_1/190731_CLusterIteration.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..94859954c9634103dd8b00fc4b374ed81b9fc596
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/04_ClusterResultsWithLFC_CutOff_1/190731_CLusterIteration.pdf differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/04_ClusterResultsWithLFC_CutOff_1/190731_ClusterResults.RData b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/04_ClusterResultsWithLFC_CutOff_1/190731_ClusterResults.RData
new file mode 100644
index 0000000000000000000000000000000000000000..1e3fbe3ff407ef1171effbe23db97e4f2d49f9c8
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/04_ClusterResultsWithLFC_CutOff_1/190731_ClusterResults.RData differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/04_kmeans.R b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/04_kmeans.R
new file mode 100644
index 0000000000000000000000000000000000000000..22de0db6f2798a33a1279164352c2ac9b089bc92
--- /dev/null
+++ b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/04_kmeans.R
@@ -0,0 +1,369 @@
+
+
+required.packages <- c("Rmisc", "plyr", "ggplot2", "reshape2", "gridExtra", 'openxlsx', 'sleuth', 'amap', 'ggdendro', 'ellipse', 'UpSetR', 'VennDiagram')
+
+for(package in required.packages)
+{
+  print(package)
+  ## Check if package is installed. If not, install
+  if(!package %in% row.names(installed.packages())){install.packages(package, repos ="https://cran.uni-muenster.de/")}
+  # ## Check if package is up to date. If not, update
+  # update.packages(package, repos = "https://cran.uni-muenster.de/")
+  ## Load package
+  library(package, character.only = T)
+}
+
+
+theme_dominik <-
+  theme(panel.grid = element_blank(), panel.background = element_blank(), panel.border = element_rect(fill = NA)) +
+  theme(aspect.ratio = 1) +
+  theme(axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black")) +
+  theme(strip.background = element_blank(), strip.text = element_text(size = 8)) +
+  theme(legend.position = "bottom", legend.direction = "horizontal", legend.title = element_blank()) + #, legend.key.size = unit(0.05, "npc")
+  theme(plot.margin=unit(c(1, 1, 1, 1),"cm")) +
+  theme(plot.title = element_text(hjust = 0, face = "bold", size = 8))
+
+
+########################################
+#### Annotation data
+########################################
+
+load('RNASeq_Annotation/Arabidopsis/02.0Araport11/02.0Araport.RData')
+load("RNASeq_Annotation/Arabidopsis/02.4GO/GOAnno2018-02-06.RData")
+load("RNASeq_Annotation/Arabidopsis/02.3Mapman/180924_MercatorMapman.RData")
+load("RNASeq_Annotation/Arabidopsis/02.3Mapman/02.3Mapman.RData")
+colnames(mapman)[2] <- 'MAPMAN.BIN'
+load("RNASeq_Annotation/Arabidopsis/02.9PlnTFDB/02.9PlnTFDB.RData")
+load("RNASeq_Annotation/Arabidopsis/02.12Lipids/02.12lipids.RData")
+
+
+########################################
+#### Expression data
+########################################
+
+load('01_Kallisto.RData')
+
+
+nice.date <- format(Sys.time(), "%y%m%d")
+SampleInfo <- read.xlsx('../00_SampleInfo.xlsx')
+
+color.values <- brewer.pal(8, 'Dark2')[8:1]
+names(color.values) <- unique(SampleInfo$Cond_short)
+
+
+#### Pull count / tpm data 
+expression.data <- kallisto_table(sleuth_objects$not_trimmed)
+expression.data$AGI <-  do.call(rbind, strsplit(split = ".", as.character(expression.data$target_id), fixed = T))[,1]
+expression.data <- merge(expression.data, SampleInfo, by.x = 'sample', by.y = 'IlluminaID')
+
+#### Calculate means and sd
+means <- summarySE(expression.data, groupvars = c('Cond_short.y', 'AGI'), measurevar = 'tpm')
+mean.tpm <- dcast(means, AGI~Cond_short.y, value.var = 'tpm')
+mean.tpm <- mean.tpm[, c('AGI', 'C', 'S', 'M', 'H', 'SM', 'SH', 'MH', 'SMH')]
+colnames(mean.tpm)[2:ncol(mean.tpm)] <- paste('mean.tpm', colnames(mean.tpm)[2:ncol(mean.tpm)], sep = '_')
+
+kmeans.elbows <- list()
+for(LFC_cut in c(1, 1.5, 2, 3))
+{
+  load('01_Sleuth.RData')
+  sleuth.results[abs(sleuth.results$b) < LFC_cut & !is.na(sleuth.results$b), 'qval'] <- 1
+
+
+########################################
+#### k-means of only genes with at least one sig diff
+########################################
+
+sleuth.exprt$AGI    <-  do.call(rbind, strsplit(split = ".", as.character(sleuth.exprt$target_id), fixed = T))[,1]
+sleuth.results$AGI  <-  do.call(rbind, strsplit(split = ".", as.character(sleuth.results$target_id), fixed = T))[,1]
+
+
+########################################
+#### Option b) k-means of LFCs (only genes with at least one sig diff)
+
+lfcs <- sleuth.exprt[,grep('^b_', colnames(sleuth.exprt))]
+row.names(lfcs) <- sleuth.exprt$AGI
+
+sig.lfcs <- lfcs[row.names(lfcs) %in% unique(subset(sleuth.results, qval <= 0.05, AGI, drop = T)), ]
+sig.lfcs[is.na(sig.lfcs)] <- 0
+
+sub_to_cluster <- sig.lfcs
+########################################
+
+k_to_test <- 2:30
+km_k2up <- sapply(k_to_test,function(x) kmeans(sub_to_cluster, x, nstart=3)$tot.withinss)
+to_plot <- data.frame(ss=km_k2up, k=k_to_test, run="real")
+
+n_negative_controls <- 7
+for (i in 1:n_negative_controls){
+  scrambled_to_cluster <- t(apply(sub_to_cluster, 1, sample))
+  km_k2tok24negative <- sapply(k_to_test,
+                               function(x) kmeans(scrambled_to_cluster, x)$tot.withinss)
+  to_plot <- rbind(to_plot, data.frame(ss=km_k2tok24negative, k=k_to_test, run=paste0("neg", i)))
+  
+}
+
+choose.k <- ggplot(data=to_plot, aes(x=k, y=ss, color=run, group=run)) + geom_line() +
+  geom_point(size=1, shape=20) + scale_color_manual(values=brewer.pal(length(unique(to_plot$run)), "Set1"))
+choose.k
+
+
+neg.minus.real <- sapply(paste0("neg", 1:n_negative_controls), function(x) to_plot$ss[to_plot$run==x])
+neg.minus.real <- to_plot[to_plot$run=="neg1", "ss"] - to_plot[to_plot$run=="real", "ss"]
+neg.minus.real <- data.frame(neg.minus.real, k=k_to_test)
+
+
+choose.k.by.diff <- ggplot(neg.minus.real, aes(y=neg.minus.real, x=k))+ geom_bar(stat="identity") + ggtitle(LFC_cut)
+
+kmeans.elbows[[LFC_cut]] <- choose.k.by.diff 
+
+}
+
+
+
+for(LFC_cut in c(1, 1.5, 2))
+{
+  load('01_Sleuth.RData')
+  
+  suppressWarnings(dir.create(paste0('04_ClusterResultsWithLFC_CutOff_', LFC_cut)))
+  
+  sleuth.results[abs(sleuth.results$b) < LFC_cut & !is.na(sleuth.results$b), 'qval'] <- 1
+  
+  
+  ########################################
+  #### k-means of only genes with at least one sig diff
+  ########################################
+  
+  sleuth.exprt$AGI    <-  do.call(rbind, strsplit(split = ".", as.character(sleuth.exprt$target_id), fixed = T))[,1]
+  sleuth.results$AGI  <-  do.call(rbind, strsplit(split = ".", as.character(sleuth.results$target_id), fixed = T))[,1]
+  
+  
+  ########################################
+  #### Option b) k-means of LFCs (only genes with at least one sig diff)
+  
+  lfcs <- sleuth.exprt[,grep('^b_', colnames(sleuth.exprt))]
+  row.names(lfcs) <- sleuth.exprt$AGI
+  
+  sig.lfcs <- lfcs[row.names(lfcs) %in% unique(subset(sleuth.results, qval <= 0.05, AGI, drop = T)), ]
+  sig.lfcs[is.na(sig.lfcs)] <- 0
+  
+  sub_to_cluster <- sig.lfcs
+  
+  cluster.results <- c()
+  for(i in 3:10)
+  {
+    
+    km <- kmeans(sub_to_cluster, i, nstart=100)
+    
+    cluster.mship <- as.data.frame(km$cluster)
+    colnames(cluster.mship) <- 'km'
+    cluster.mship$AGI <-  rownames(cluster.mship)
+    
+    plot.d <- as.data.frame(sub_to_cluster)
+    plot.d$AGI <- row.names(plot.d)
+    
+    scaled.tpm <- melt(plot.d)
+    colnames(scaled.tpm) <- c("AGI", "Condition", "z.score")
+    scaled.tpm$Condition <- gsub('b_', '', scaled.tpm$Condition)
+    scaled.tpm$Condition <- factor(gsub('mean.tpm_', '', scaled.tpm$Condition), levels = c('C' ,'S', 'M', 'H', 'SM', 'SH', 'MH', 'SMH'))
+    
+    ###Building correlations between the center means for resorting
+    
+    ## 1. Calculate mean scores
+    mean.scores <- merge(scaled.tpm, cluster.mship, 'AGI')
+    mean.scores <- summarySE(mean.scores, measurevar = 'z.score', groupvars = c('km', 'Condition'))
+    ## 2. Pick the cluster with the least deviation from zero (most boring cluster) as a reference
+    
+    min.dev <- order(abs(summarySE(mean.scores, measurevar = 'z.score', groupvars = c('km'))$z.score))[1]
+    
+    cor.mat <- c()
+    for(j in unique(mean.scores$km))
+    {
+      cor.mat[j] <- cor(x = subset(mean.scores, km == min.dev, z.score), y = subset(mean.scores, km == j, z.score))
+    }
+    
+    re.ordercluster  <- cbind(km.new.order = sort(unique(mean.scores$km)), km = order(cor.mat, decreasing = T))
+    
+    cluster.mship <- merge(cluster.mship, re.ordercluster, 'km')
+    cluster.mship <- merge(cluster.mship, count(cluster.mship$km.new.order), by.x = 'km.new.order', by.y = 'x')
+    
+    cluster.mship$label <- paste0('Cluster ', cluster.mship$km.new.order, ' (', cluster.mship$freq, ' genes)')
+    
+    cluster.results[[as.character(i)]] <- cluster.mship[,c(1, 3, 5)]
+    
+    
+  }
+  
+  save(cluster.results, file = paste0('04_ClusterResultsWithLFC_CutOff_', LFC_cut, '/', nice.date, '_ClusterResults.RData'))
+  
+  
+  
+  
+  ############################################################################################################
+  ### Category enrichment using fisher's exact
+  ############################################################################################################
+  
+  
+  alphaEnrich <- 0.05
+  pAdjust <- "BH"
+  
+  GO.type <- 'P'
+  
+  q_thresh <- 0.05
+  
+  fish.list <- list()
+  for(k in names(cluster.results))
+  {
+    
+    current.cluster.appr <- cluster.results[[k]]
+    
+    # mapman.subset <- droplevels(merge(mapman.X4, current.cluster.appr, "AGI"))
+    mapman.subset <- droplevels(merge(mapman, current.cluster.appr, "AGI"))
+    GO.subset <- droplevels(merge(GOAnno[[GO.type]], current.cluster.appr, "AGI"))
+    
+    for(i in unique(current.cluster.appr$km.new.order))
+    {
+      
+      ############################################################################################################
+      ### Mapman 
+      ############################################################################################################
+      
+      
+      fishers.results <-  data.frame(matrix(NA, 0,3))
+      for(mapman.ID in unique(mapman.subset$BIN1))
+      {
+        gene.count <- length(unique(mapman.subset[mapman.subset$BIN1 == mapman.ID,"AGI"]))
+        
+        InIn <- sum(mapman.subset$km.new.order == i & mapman.subset$BIN1 == mapman.ID)
+        InOut <- sum(mapman.subset$km.new.order == i & mapman.subset$BIN1 != mapman.ID)
+        OutIn <- sum((mapman.subset$km.new.order != i & mapman.subset$BIN1 == mapman.ID))
+        OutOut <- sum((mapman.subset$km.new.order != i & mapman.subset$BIN1 != mapman.ID))
+        
+        p <- fisher.test(matrix(c(InIn, InOut, OutIn, OutOut), nrow = 2))$p.value
+        
+        if(InIn/InOut > OutIn/OutOut){represented  = "over"}else{represented  = "under"}
+        
+        fishers.results <- rbind.data.frame(fishers.results, cbind.data.frame(mapman.ID, p, represented, gene.count, InIn, InOut, OutIn, OutOut))
+        
+      }
+      
+      fishers.results[, pAdjust] <- p.adjust(fishers.results$p, method = pAdjust)
+      # ### Remove non-significant
+      # fishers.results <- fishers.results[fishers.results[,pAdjust] <= alphaEnrich,]
+      # ### Remove under-represented
+      fishers.results <- fishers.results[fishers.results$represented == "over",]
+      
+      fishers.results$km.new.order <- i
+      
+      fish.list[['mapman']][[k]] <- rbind(fish.list[['mapman']][[k]], fishers.results)
+      
+      
+      ############################################################################################################
+      ### GO terms
+      ############################################################################################################
+      
+      
+      fishers.results <-  data.frame(matrix(NA, 0,3))
+      for(GO.Term in unique(GO.subset$GO.Term))
+      {
+        
+        gene.count <- length(unique(GO.subset[GO.subset$GO.Term == GO.Term,"AGI"]))
+        
+        InIn <- sum(GO.subset$km.new.order == i & GO.subset$GO.Term == GO.Term)
+        InOut <- sum(GO.subset$km.new.order == i & GO.subset$GO.Term != GO.Term)
+        OutIn <- sum((GO.subset$km.new.order != i & GO.subset$GO.Term == GO.Term))
+        OutOut <- sum((GO.subset$km.new.order != i & GO.subset$GO.Term != GO.Term))
+        
+        p <- fisher.test(matrix(c(InIn, InOut, OutIn, OutOut), nrow = 2))$p.value
+        
+        
+        if(InIn/InOut > OutIn/OutOut){represented  = "over"}else{represented  = "under"}
+        
+        fishers.results <- rbind.data.frame(fishers.results, cbind.data.frame(GO.Term, p, represented, gene.count, InIn, InOut, OutIn, OutOut))
+        
+      }
+      
+      fishers.results[, pAdjust] <- p.adjust(fishers.results$p, method = pAdjust)
+      # ### Remove non-significant
+      # fishers.results <- fishers.results[fishers.results[,pAdjust] <= alphaEnrich,]
+      # ### Remove under-represented
+      fishers.results <- fishers.results[fishers.results$represented == "over",]
+      
+      
+      fishers.results$km.new.order <- i
+      
+      fish.list[['go']][[k]] <- rbind(fish.list[['go']][[k]], fishers.results)
+      
+      
+    }
+  }
+  
+  # save(fish.list, file = paste0(nice.date, '_02_FishList.RData'))
+  
+  
+  
+  
+  pdf(paste0('04_ClusterResultsWithLFC_CutOff_', LFC_cut, '/', nice.date, '_CLusterIteration.pdf'), width = 20, height = 20)
+  
+  for(k in names(cluster.results))
+  {
+    
+    go.ex <- drop.levels(subset(fish.list[['go']][[k]], BH <= alphaEnrich))
+    go.ex$GO.Term <-  factor(go.ex$GO.Term, levels = rev(sort(unique(go.ex$GO.Term))))
+    
+    go.plot <- ggplot(go.ex, aes(x=as.factor(km.new.order), y=GO.Term, label  = InIn)) + 
+      geom_label() + 
+      xlab('k-means cluster') + 
+      theme_dominik + theme(aspect.ratio = 2) + 
+      theme(panel.grid = element_line(color = 'grey70'), panel.background = element_blank(), panel.border = element_rect(fill = NA))
+    
+    
+    mm.ex <- drop.levels(subset(fish.list[['mapman']][[k]], BH <= alphaEnrich))
+    mm.ex$mapman.ID <-  factor(mm.ex$mapman.ID, levels = rev(sort(unique(mm.ex$mapman.ID))))
+    
+    
+    mm.plot <- ggplot(mm.ex, aes(x=as.factor(km.new.order), y=mapman.ID, label  = InIn)) + 
+      geom_label() + 
+      xlab('k-means cluster') + 
+      theme_dominik + theme(aspect.ratio = 2) + 
+      theme(panel.grid = element_line(color = 'grey70'), panel.background = element_blank(), panel.border = element_rect(fill = NA))
+    
+    
+    plot.data <- merge(scaled.tpm, cluster.results[[k]], 'AGI')
+    plot.data$label <- factor(plot.data$label, levels = unique(plot.data[order(plot.data$km.new.order), 'label']))
+    
+    cluster.plot <- ggplot(plot.data, aes(x=Condition, y=z.score, group=AGI)) + 
+      # geom_boxplot(aes(group = Condition)) +
+      geom_violin(aes(group = Condition), scale = 'width') +
+      geom_hline(yintercept = 0, color = 'red') +
+      labs(x="Treatment", y="log-FC (Trtmt / Control)") +
+      facet_wrap(~label, scales = 'free', nrow = 2) +
+      # facet_wrap(~km, scales = 'free') +
+      theme_dominik +
+      theme(strip.background = element_blank(), strip.text = element_text(size = 12))
+    
+    grid.arrange(cluster.plot, gtable_cbind(ggplotGrob(mm.plot), ggplotGrob(go.plot)), nrow = 2, 
+                 top = 'k-means clustering  of relative expression', 
+                 bottom = 'A, log2-FC (Trtmt / Control) as violin diagram; Red line: control reference\n
+                 B, mapman terms over-represented in each cluster (according to FisherÂ´s Exact Test, q < 0.05); Numbers: gene count\n
+                 C, gene ontology terms over-represented in each cluster (according to FisherÂ´s Exact Test, q < 0.05); Numbers: gene count')
+    
+  }
+  
+  dev.off()
+  
+  
+  
+}
+
+
+
+
+
+load("04_ClusterResultsWithLFC_CutOff_1/190731_ClusterResults.RData")
+cluster.results <- cluster.results$`8`[,1:2]
+
+colnames(cluster.results) <- c('kmeans_cluster', 'AGI')
+save(cluster.results, file = '04_190731_ClusterResults_final.RData')
+
+
+
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_190731_ClusterResults_final.RData b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_190731_ClusterResults_final.RData
new file mode 100644
index 0000000000000000000000000000000000000000..4100f4628d8fdcd49937be013685d6dbe7393194
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_190731_ClusterResults_final.RData differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans.R b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans.R
new file mode 100644
index 0000000000000000000000000000000000000000..0418dc1ec29aa90beddb1320697bd8f878a4e6b5
--- /dev/null
+++ b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans.R
@@ -0,0 +1,357 @@
+
+
+required.packages <- c("Rmisc", "plyr", "ggplot2", "reshape2", "gridExtra", 'openxlsx', 'sleuth', 'amap', 'ggdendro', 'ellipse', 'UpSetR', 'VennDiagram')
+
+for(package in required.packages)
+{
+  print(package)
+  ## Check if package is installed. If not, install
+  if(!package %in% row.names(installed.packages())){install.packages(package, repos ="https://cran.uni-muenster.de/")}
+  # ## Check if package is up to date. If not, update
+  # update.packages(package, repos = "https://cran.uni-muenster.de/")
+  ## Load package
+  library(package, character.only = T)
+}
+
+
+theme_dominik <-
+  theme(panel.grid = element_blank(), panel.background = element_blank(), panel.border = element_rect(fill = NA)) +
+  theme(aspect.ratio = 1) +
+  theme(axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black")) +
+  theme(strip.background = element_blank(), strip.text = element_text(size = 8)) +
+  theme(legend.position = "bottom", legend.direction = "horizontal", legend.title = element_blank()) + #, legend.key.size = unit(0.05, "npc")
+  theme(plot.margin=unit(c(1, 1, 1, 1),"cm")) +
+  theme(plot.title = element_text(hjust = 0, face = "bold", size = 8))
+
+
+load("04_ClusterResultsWithLFC_CutOff_1/190731_ClusterResults.RData")
+
+cluster.resultsexport <- cluster.results$`8`
+
+# colnames(cluster.resultsexport) <- c('kmeans_cluster', 'AGI')
+save(cluster.resultsexport, file = '05_190731_ClusterResults_final.RData')
+
+  LFC_cut <- 1
+  sleuth.results[abs(sleuth.results$b) < LFC_cut & !is.na(sleuth.results$b), 'qval'] <- 1
+  
+  
+  ########################################
+  #### k-means of only genes with at least one sig diff
+  ########################################
+  
+  sleuth.exprt$AGI    <-  do.call(rbind, strsplit(split = ".", as.character(sleuth.exprt$target_id), fixed = T))[,1]
+  sleuth.results$AGI  <-  do.call(rbind, strsplit(split = ".", as.character(sleuth.results$target_id), fixed = T))[,1]
+  
+  lfcs <- sleuth.exprt[,grep('^b_', colnames(sleuth.exprt))]
+  row.names(lfcs) <- sleuth.exprt$AGI
+  
+  sig.lfcs <- lfcs[row.names(lfcs) %in% unique(subset(sleuth.results, qval <= 0.05, AGI, drop = T)), ]
+  sig.lfcs[is.na(sig.lfcs)] <- 0
+  
+  sub_to_cluster <- sig.lfcs
+  
+  plot.d <- as.data.frame(sub_to_cluster)
+  plot.d$AGI <- row.names(plot.d)
+    
+    scaled.tpm <- melt(plot.d)
+    colnames(scaled.tpm) <- c("AGI", "Condition", "z.score")
+    scaled.tpm$Condition <- gsub('b_', '', scaled.tpm$Condition)
+    scaled.tpm$Condition <- factor(gsub('mean.tpm_', '', scaled.tpm$Condition), levels = c('C' ,'S', 'M', 'H', 'SM', 'SH', 'MH', 'SMH'))
+    
+  
+  
+  ############################################################################################################
+  ### Category enrichment using fisher's exact
+  ############################################################################################################
+  
+  
+  alphaEnrich <- 0.05
+  pAdjust <- "BH"
+  
+  GO.type <- 'P'
+  
+  q_thresh <- 0.05
+  
+  fish.list <- list()
+    
+    current.cluster.appr <- cluster.resultsexport
+    
+    # mapman.subset <- droplevels(merge(mapman.X4, current.cluster.appr, "AGI"))
+    mapman.subset <- droplevels(merge(mapman, current.cluster.appr, "AGI"))
+    GO.subset <- droplevels(merge(GOAnno[[GO.type]], current.cluster.appr, "AGI"))
+    
+    for(i in unique(current.cluster.appr$km.new.order))
+    {
+      
+      ############################################################################################################
+      ### Mapman 
+      ############################################################################################################
+      
+      
+      fishers.results <-  data.frame(matrix(NA, 0,3))
+      for(mapman.ID in unique(mapman.subset$BIN1))
+      {
+        gene.count <- length(unique(mapman.subset[mapman.subset$BIN1 == mapman.ID,"AGI"]))
+        
+        InIn <- sum(mapman.subset$km.new.order == i & mapman.subset$BIN1 == mapman.ID)
+        InOut <- sum(mapman.subset$km.new.order == i & mapman.subset$BIN1 != mapman.ID)
+        OutIn <- sum((mapman.subset$km.new.order != i & mapman.subset$BIN1 == mapman.ID))
+        OutOut <- sum((mapman.subset$km.new.order != i & mapman.subset$BIN1 != mapman.ID))
+        
+        p <- fisher.test(matrix(c(InIn, InOut, OutIn, OutOut), nrow = 2))$p.value
+        
+        if(InIn/InOut > OutIn/OutOut){represented  = "over"}else{represented  = "under"}
+        
+        fishers.results <- rbind.data.frame(fishers.results, cbind.data.frame(mapman.ID, p, represented, gene.count, InIn, InOut, OutIn, OutOut))
+        
+      }
+      
+      fishers.results[, pAdjust] <- p.adjust(fishers.results$p, method = pAdjust)
+      # ### Remove non-significant
+      # fishers.results <- fishers.results[fishers.results[,pAdjust] <= alphaEnrich,]
+      # ### Remove under-represented
+      fishers.results <- fishers.results[fishers.results$represented == "over",]
+      
+      fishers.results$km.new.order <- i
+      
+      fish.list[['mapman']] <- rbind(fish.list[['mapman']], fishers.results)
+      
+      
+      ############################################################################################################
+      ### GO terms
+      ############################################################################################################
+      
+      
+      fishers.results <-  data.frame(matrix(NA, 0,3))
+      for(GO.Term in unique(GO.subset$GO.Term))
+      {
+        
+        gene.count <- length(unique(GO.subset[GO.subset$GO.Term == GO.Term,"AGI"]))
+        
+        InIn <- sum(GO.subset$km.new.order == i & GO.subset$GO.Term == GO.Term)
+        InOut <- sum(GO.subset$km.new.order == i & GO.subset$GO.Term != GO.Term)
+        OutIn <- sum((GO.subset$km.new.order != i & GO.subset$GO.Term == GO.Term))
+        OutOut <- sum((GO.subset$km.new.order != i & GO.subset$GO.Term != GO.Term))
+        
+        p <- fisher.test(matrix(c(InIn, InOut, OutIn, OutOut), nrow = 2))$p.value
+        
+        
+        if(InIn/InOut > OutIn/OutOut){represented  = "over"}else{represented  = "under"}
+        
+        fishers.results <- rbind.data.frame(fishers.results, cbind.data.frame(GO.Term, p, represented, gene.count, InIn, InOut, OutIn, OutOut))
+        
+      }
+      
+      fishers.results[, pAdjust] <- p.adjust(fishers.results$p, method = pAdjust)
+      # ### Remove non-significant
+      # fishers.results <- fishers.results[fishers.results[,pAdjust] <= alphaEnrich,]
+      # ### Remove under-represented
+      fishers.results <- fishers.results[fishers.results$represented == "over",]
+      
+      
+      fishers.results$km.new.order <- i
+      
+      fish.list[['go']] <- rbind(fish.list[['go']], fishers.results)
+      
+      
+    }
+  
+  
+  
+    
+    ############################################################################################################
+    ### Plot stuff
+    ############################################################################################################
+  
+    
+    
+    
+    
+  
+  
+    go.ex <- drop.levels(subset(fish.list[['go']], BH <= alphaEnrich))
+    go.ex$GO.Term <-  factor(go.ex$GO.Term, levels = rev(sort(unique(go.ex$GO.Term))))
+    
+    go.plot <- ggplot(go.ex, aes(x=as.factor(km.new.order), y=GO.Term, label  = InIn)) + 
+      geom_label() + 
+      xlab('k-means cluster') + 
+      theme_dominik + theme(aspect.ratio = 2) + 
+      theme(panel.grid = element_line(color = 'grey70'), panel.background = element_blank(), panel.border = element_rect(fill = NA))
+    
+    
+    mm.ex <- drop.levels(subset(fish.list[['mapman']], BH <= alphaEnrich))
+    mm.ex$mapman.ID <-  factor(mm.ex$mapman.ID, levels = rev(sort(unique(mm.ex$mapman.ID))))
+    
+    
+    mm.plot <- ggplot(mm.ex, aes(x=as.factor(km.new.order), y=mapman.ID, label  = InIn)) + 
+      geom_label() + 
+      xlab('k-means cluster') + 
+      theme_dominik + theme(aspect.ratio = 2) + 
+      theme(panel.grid = element_line(color = 'grey70'), panel.background = element_blank(), panel.border = element_rect(fill = NA))
+    
+    
+    plot.data <- merge(scaled.tpm, cluster.resultsexport, 'AGI')
+    plot.data$label <- factor(plot.data$label, levels = unique(plot.data[order(plot.data$km.new.order), 'label']))
+    
+    cluster.plot <- ggplot(plot.data, aes(x=Condition, y=z.score, group=AGI)) + 
+      geom_violin(aes(group = Condition), scale = 'width') +
+      geom_hline(yintercept = 0, color = 'red') +
+      labs(x="Treatment", y="log-FC (Trtmt / Control)") +
+      facet_wrap(~label, scales = 'free', nrow = 2) +
+      theme_dominik +
+      theme(strip.background = element_blank(), strip.text = element_text(size = 12))
+    
+    
+    
+    
+    dir.create('05_kmeans_final')
+    setwd('05_kmeans_final')  
+    
+    
+    png(paste(nice.date, 'go.plot', '.png', sep = ''), res = 300, width = 3200, height = 2000)
+    print(go.plot)
+    dev.off()
+    
+    png(paste(nice.date, 'mm.plot', '.png', sep = ''), res = 300, width = 3200, height = 2000)
+    print(mm.plot)
+    dev.off()
+    
+    png(paste(nice.date, 'cluster.plot', '.png', sep = ''), res = 300, width = 3200, height = 2000)
+    print(cluster.plot)
+    dev.off()
+    
+
+    
+    
+    
+    ############################################################################################################
+    ### Plot heat maps with LFCs
+    ############################################################################################################
+  
+
+    lfc.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('^b_', colnames(sleuth.exprt)), value.name = 'lfc')
+    q.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('qvalue_', colnames(sleuth.exprt)), value.name = 'q')
+
+    lfc.table <- cbind.data.frame(lfc.table, q.table[,2:3])
+    lfc.table$q.star <- cut(lfc.table$q, breaks = c(0, 0.001, 0.01, 0.05, 1), labels = c('***', '**', '*', ' '), include.lowest = T)
+
+    lfc.table$condition <- gsub('b_' , '', lfc.table$variable)
+    lfc.table$condition <- factor(lfc.table$condition, unique(lfc.table[order(lfc.table$variable), 'condition']))
+
+    lfc.table <- merge(lfc.table, araport,'AGI', all.x = T)
+    lfc.table[is.na(lfc.table$name), 'name'] <- lfc.table[is.na(lfc.table$name), 'AGI']
+
+    
+    lfc.table[!lfc.table$name == lfc.table$AGI, 'gene.label'] <- paste0(lfc.table[!lfc.table$name == lfc.table$AGI, 'name'], ' (', lfc.table[!lfc.table$name == lfc.table$AGI, 'AGI'], ')')
+    lfc.table[lfc.table$name == lfc.table$AGI, 'gene.label'] <- lfc.table[lfc.table$name == lfc.table$AGI, 'AGI']
+
+    
+    #### Cluster 4
+    
+    km4 <- merge(subset(cluster.resultsexport, km.new.order == '4'), lfc.table, 'AGI')
+    km4$gene.label <- factor(km4$gene.label, levels = sort(unique(km4$gene.label), decreasing = T))
+
+    png(paste(nice.date, 'heatmap_lfc_cluster4', '.png', sep = ''), res = 300, width = 1200, height = 4000)
+    
+    ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+      # geom_tile() +
+      geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+      geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+      geom_text(size = 3, color = "black", vjust = 0.7) +
+      scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+                           midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+      theme_dominik +
+      theme(aspect.ratio = 7) + 
+      theme(axis.title = element_blank())
+    
+    
+    dev.off()
+    
+
+    
+    #### Cluster 7
+    
+    km7 <- merge(subset(cluster.resultsexport, km.new.order == '7'), lfc.table, 'AGI')
+    km7$gene.label <- factor(km7$gene.label, levels = sort(unique(km7$gene.label), decreasing = T))
+
+    png(paste(nice.date, 'heatmap_lfc_cluster7', '.png', sep = ''), res = 300, width = 1200, height = 4000)
+    
+    ggplot(km7, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+      # geom_tile() +
+      geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+      geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+      geom_text(size = 3, color = "black", vjust = 0.7) +
+      scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+                           midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+      theme_dominik +
+      theme(aspect.ratio = 7) + 
+      theme(axis.title = element_blank())
+    
+    
+    dev.off()
+    
+    # 
+    # 
+    # ############################################################################################################
+    # ### Plot heat maps with means
+    # ############################################################################################################
+    # 
+    # q.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('qvalue_', colnames(sleuth.exprt)), value.name = 'q')
+    # 
+    # mean.table <- merge(means, q.table, 'AGI')
+    # mean.table$q.star <- cut(mean.table$q, breaks = c(0, 0.001, 0.01, 0.05, 1), labels = c('***', '**', '*', ' '), include.lowest = T)
+    # 
+    # mean.table$condition <- factor(mean.table$Cond_short.y,  c('C', 'S', 'M', 'H', 'SM', 'SH', 'MH', 'SMH'))
+    # 
+    # mean.table <- merge(mean.table, araport,'AGI', all.x = T)
+    # mean.table[is.na(mean.table$name), 'name'] <- mean.table[is.na(mean.table$name), 'AGI']
+    # 
+    # mean.table[!mean.table$name == mean.table$AGI, 'gene.label'] <- paste0(mean.table[!mean.table$name == mean.table$AGI, 'name'], ' (', mean.table[!mean.table$name == mean.table$AGI, 'AGI'], ')')
+    # mean.table[mean.table$name == mean.table$AGI, 'gene.label'] <- mean.table[mean.table$name == mean.table$AGI, 'AGI']
+    # 
+    # 
+    # #### Cluster 4
+    # 
+    # km4 <- merge(subset(cluster.resultsexport, km.new.order == '4'), mean.table, 'AGI')
+    # km4$gene.label <- factor(km4$gene.label, levels = sort(unique(km4$gene.label), decreasing = T))
+    # 
+    # png(paste(nice.date, 'heatmap_means_cluster4', '.png', sep = ''), res = 300, width = 1200, height = 4000)
+    # 
+    # ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = tpm)) +
+    #   # geom_tile() +
+    #   geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+    #   geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+    #   geom_text(size = 3, color = "black", vjust = 0.7) +
+    #   scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c") +
+    #   theme_dominik +
+    #   theme(aspect.ratio = 7) + 
+    #   theme(axis.title = element_blank())
+    # 
+    # 
+    # dev.off()
+    # 
+    # 
+    # 
+    # #### Cluster 7
+    # 
+    # km7 <- merge(subset(cluster.resultsexport, km.new.order == '7'), mean.table, 'AGI')
+    # km7$gene.label <- factor(km7$gene.label, levels = sort(unique(km7$gene.label), decreasing = T))
+    # 
+    # png(paste(nice.date, 'heatmap_means_cluster7', '.png', sep = ''), res = 300, width = 1200, height = 4000)
+    # 
+    # ggplot(km7, aes(x = condition, y = gene.label, label = q.star, fill = tpm)) +
+    #   # geom_tile() +
+    #   geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+    #   geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+    #   geom_text(size = 3, color = "black", vjust = 0.7) +
+    #   scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c") +
+    #   theme_dominik +
+    #   theme(aspect.ratio = 7) + 
+    #   theme(axis.title = element_blank())
+    # 
+    # 
+    # dev.off()
+    # 
+    # 
+    
+    
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/.Rhistory b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/.Rhistory
new file mode 100644
index 0000000000000000000000000000000000000000..1c4ed75bf74ac6e7629921ea405b3f383604da12
--- /dev/null
+++ b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/.Rhistory
@@ -0,0 +1,512 @@
+sig.lfcs <- lfcs[row.names(lfcs) %in% unique(subset(sleuth.results, qval <= 0.05, AGI, drop = T)), ]
+sig.lfcs[is.na(sig.lfcs)] <- 0
+sub_to_cluster <- sig.lfcs
+plot.d <- as.data.frame(sub_to_cluster)
+plot.d$AGI <- row.names(plot.d)
+scaled.tpm <- melt(plot.d)
+colnames(scaled.tpm) <- c("AGI", "Condition", "z.score")
+scaled.tpm$Condition <- gsub('b_', '', scaled.tpm$Condition)
+scaled.tpm$Condition <- factor(gsub('mean.tpm_', '', scaled.tpm$Condition), levels = c('C' ,'S', 'M', 'H', 'SM', 'SH', 'MH', 'SMH'))
+cluster.resultsexport
+alphaEnrich <- 0.05
+pAdjust <- "BH"
+GO.type <- 'P'
+q_thresh <- 0.05
+plot.data <- merge(scaled.tpm, cluster.resultsexport, 'AGI')
+plot.data$label <- factor(plot.data$label, levels = unique(plot.data[order(plot.data$kmeans_cluster), 'label']))
+order(plot.data$kmeans_cluster)
+plot.data$kmeans_cluster
+cluster.resultsexport <- cluster.results$`8`
+# colnames(cluster.resultsexport) <- c('kmeans_cluster', 'AGI')
+save(cluster.resultsexport, file = '05_190731_ClusterResults_final.RData')
+load("04_ClusterResultsWithLFC_CutOff_1/190731_ClusterResults.RData")
+cluster.resultsexport <- cluster.results$`8`
+# colnames(cluster.resultsexport) <- c('kmeans_cluster', 'AGI')
+save(cluster.resultsexport, file = '05_190731_ClusterResults_final.RData')
+current.cluster.appr <- cluster.resultsexport
+# mapman.subset <- droplevels(merge(mapman.X4, current.cluster.appr, "AGI"))
+mapman.subset <- droplevels(merge(mapman, current.cluster.appr, "AGI"))
+GO.subset <- droplevels(merge(GOAnno[[GO.type]], current.cluster.appr, "AGI"))
+unique(current.cluster.appr$km.new.order)
+names(cluster.results)
+alphaEnrich <- 0.05
+pAdjust <- "BH"
+GO.type <- 'P'
+q_thresh <- 0.05
+fish.list <- list()
+current.cluster.appr <- cluster.resultsexport
+# mapman.subset <- droplevels(merge(mapman.X4, current.cluster.appr, "AGI"))
+mapman.subset <- droplevels(merge(mapman, current.cluster.appr, "AGI"))
+GO.subset <- droplevels(merge(GOAnno[[GO.type]], current.cluster.appr, "AGI"))
+for(i in unique(current.cluster.appr$km.new.order))
+{
+############################################################################################################
+### Mapman
+############################################################################################################
+fishers.results <-  data.frame(matrix(NA, 0,3))
+for(mapman.ID in unique(mapman.subset$BIN1))
+{
+gene.count <- length(unique(mapman.subset[mapman.subset$BIN1 == mapman.ID,"AGI"]))
+InIn <- sum(mapman.subset$km.new.order == i & mapman.subset$BIN1 == mapman.ID)
+InOut <- sum(mapman.subset$km.new.order == i & mapman.subset$BIN1 != mapman.ID)
+OutIn <- sum((mapman.subset$km.new.order != i & mapman.subset$BIN1 == mapman.ID))
+OutOut <- sum((mapman.subset$km.new.order != i & mapman.subset$BIN1 != mapman.ID))
+p <- fisher.test(matrix(c(InIn, InOut, OutIn, OutOut), nrow = 2))$p.value
+if(InIn/InOut > OutIn/OutOut){represented  = "over"}else{represented  = "under"}
+fishers.results <- rbind.data.frame(fishers.results, cbind.data.frame(mapman.ID, p, represented, gene.count, InIn, InOut, OutIn, OutOut))
+}
+fishers.results[, pAdjust] <- p.adjust(fishers.results$p, method = pAdjust)
+# ### Remove non-significant
+# fishers.results <- fishers.results[fishers.results[,pAdjust] <= alphaEnrich,]
+# ### Remove under-represented
+fishers.results <- fishers.results[fishers.results$represented == "over",]
+fishers.results$km.new.order <- i
+fish.list[['mapman']] <- rbind(fish.list[['mapman']], fishers.results)
+############################################################################################################
+### GO terms
+############################################################################################################
+fishers.results <-  data.frame(matrix(NA, 0,3))
+for(GO.Term in unique(GO.subset$GO.Term))
+{
+gene.count <- length(unique(GO.subset[GO.subset$GO.Term == GO.Term,"AGI"]))
+InIn <- sum(GO.subset$km.new.order == i & GO.subset$GO.Term == GO.Term)
+InOut <- sum(GO.subset$km.new.order == i & GO.subset$GO.Term != GO.Term)
+OutIn <- sum((GO.subset$km.new.order != i & GO.subset$GO.Term == GO.Term))
+OutOut <- sum((GO.subset$km.new.order != i & GO.subset$GO.Term != GO.Term))
+p <- fisher.test(matrix(c(InIn, InOut, OutIn, OutOut), nrow = 2))$p.value
+if(InIn/InOut > OutIn/OutOut){represented  = "over"}else{represented  = "under"}
+fishers.results <- rbind.data.frame(fishers.results, cbind.data.frame(GO.Term, p, represented, gene.count, InIn, InOut, OutIn, OutOut))
+}
+fishers.results[, pAdjust] <- p.adjust(fishers.results$p, method = pAdjust)
+# ### Remove non-significant
+# fishers.results <- fishers.results[fishers.results[,pAdjust] <= alphaEnrich,]
+# ### Remove under-represented
+fishers.results <- fishers.results[fishers.results$represented == "over",]
+fishers.results$km.new.order <- i
+fish.list[['go']] <- rbind(fish.list[['go']], fishers.results)
+}
+plot.data <- merge(scaled.tpm, cluster.resultsexport, 'AGI')
+plot.data$label <- factor(plot.data$label, levels = unique(plot.data[order(plot.data$km.new.order), 'label']))
+cluster.plot <- ggplot(plot.data, aes(x=Condition, y=z.score, group=AGI)) +
+geom_violin(aes(group = Condition), scale = 'width') +
+geom_hline(yintercept = 0, color = 'red') +
+labs(x="Treatment", y="log-FC (Trtmt / Control)") +
+facet_wrap(~label, scales = 'free', nrow = 2) +
+theme_dominik +
+theme(strip.background = element_blank(), strip.text = element_text(size = 12))
+cluster.plot
+dir.create('05_kmeans_final')
+go.ex <- drop.levels(subset(fish.list[['go']], BH <= alphaEnrich))
+go.ex$GO.Term <-  factor(go.ex$GO.Term, levels = rev(sort(unique(go.ex$GO.Term))))
+go.plot <- ggplot(go.ex, aes(x=as.factor(km.new.order), y=GO.Term, label  = InIn)) +
+geom_label() +
+xlab('k-means cluster') +
+theme_dominik + theme(aspect.ratio = 2) +
+theme(panel.grid = element_line(color = 'grey70'), panel.background = element_blank(), panel.border = element_rect(fill = NA))
+go.plot
+setwd('05_kmeans_final')
+png(paste(nice.date, 'go.plot', '.png', sep = ''), res = 300, width = 1600, height = 1600)
+print(go.plot)
+dev.off()
+png(paste(nice.date, 'go.plot', '.png', sep = ''), res = 300, width = 1600, height = 800)
+print(go.plot)
+dev.off()
+png(paste(nice.date, 'go.plot', '.png', sep = ''), res = 300, width = 3200, height = 1600)
+print(go.plot)
+dev.off()
+png(paste(nice.date, 'go.plot', '.png', sep = ''), res = 300, width = 3200, height = 2000)
+print(go.plot)
+dev.off()
+png(paste(nice.date, 'mm.plot', '.png', sep = ''), res = 300, width = 3200, height = 2000)
+print(mm.plot)
+dev.off()
+mm.ex <- drop.levels(subset(fish.list[['mapman']], BH <= alphaEnrich))
+mm.ex$mapman.ID <-  factor(mm.ex$mapman.ID, levels = rev(sort(unique(mm.ex$mapman.ID))))
+mm.plot <- ggplot(mm.ex, aes(x=as.factor(km.new.order), y=mapman.ID, label  = InIn)) +
+geom_label() +
+xlab('k-means cluster') +
+theme_dominik + theme(aspect.ratio = 2) +
+theme(panel.grid = element_line(color = 'grey70'), panel.background = element_blank(), panel.border = element_rect(fill = NA))
+plot.data <- merge(scaled.tpm, cluster.resultsexport, 'AGI')
+plot.data$label <- factor(plot.data$label, levels = unique(plot.data[order(plot.data$km.new.order), 'label']))
+cluster.plot <- ggplot(plot.data, aes(x=Condition, y=z.score, group=AGI)) +
+geom_violin(aes(group = Condition), scale = 'width') +
+geom_hline(yintercept = 0, color = 'red') +
+labs(x="Treatment", y="log-FC (Trtmt / Control)") +
+facet_wrap(~label, scales = 'free', nrow = 2) +
+theme_dominik +
+theme(strip.background = element_blank(), strip.text = element_text(size = 12))
+png(paste(nice.date, 'mm.plot', '.png', sep = ''), res = 300, width = 3200, height = 2000)
+print(mm.plot)
+dev.off()
+png(paste(nice.date, 'cluster.plot', '.png', sep = ''), res = 300, width = 3200, height = 2000)
+print(cluster.plot)
+dev.off()
+cluster.plot
+View(plot.data)
+lfc.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('^b_', colnames(sleuth.exprt)), value.name = 'lfc')
+View(lfc.table)
+q.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('qvalue_', colnames(sleuth.exprt)), value.name = 'q')
+head(q.table)
+lfc.table <- cbind.data.frame(lfc.table, q.table[,2:3])
+lfc.table$q.star <- cut(lfc.table$q, breaks = c(0, 0.001, 0.01, 0.05, 1), labels = c('***', '**', '*', ' '), include.lowest = T)
+sig.at.some <- unique(subset(q.table, q <= 0.05, AGI, drop = T))
+count(mapman.X4[, c('BIN1', 'BIN2')])
+cwllwall <- merge(subset(mapman.X4, BIN1 == 'Cell wall' & BIN2 == 'cellulose'), lfc.table, 'AGI')
+cwllwall$label <- gsub('b_' , '', cwllwall$variable)
+cwllwall$label <- factor(cwllwall$label, unique(cwllwall[order(cwllwall$variable), 'label']))
+ggplot(cwllwall, aes(x = label, y = AGI, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 5)
+candidates <- merge(candidates, araport, 'AGI')
+candidates$label <- gsub('b_' , '', candidates$variable)
+candidates <- merge(subset(GOAnno$P, GO.Term =='pathogenesis'), lfc.table, 'AGI')
+category <- "pathogen"
+candidates <- merge(candidates, araport, 'AGI')
+candidates$label <- gsub('b_' , '', candidates$variable)
+candidates$label <- factor(candidates$label, unique(candidates[order(candidates$variable), 'label']))
+# candidates$gene.label <- paste0(candidates$name, ' (', candidates$AGI, ')')
+candidates$gene.label <- paste0(candidates$NAME, ' (', candidates$AGI, ')')
+pdf(paste0(category, '.pdf'), width = nrow(candidates)/50, height = nrow(candidates)/50 * 2)
+ggplot(candidates, aes(x = label, y = gene.label, label = q.star, fill = lfc)) +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+theme_dominik +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / Ctrl)", sep = ""))) +
+theme(legend.position = 'right', legend.direction = 'vertical', legend.title = element_text(), axis.ticks = element_blank(), panel.background = element_blank()) +
+theme(aspect.ratio = nrow(candidates)/50) +
+labs(x = '', y = '') +
+scale_x_discrete(position = 'top') +
+ggtitle(category)
+dev.off()
+getwd()
+cwllwall <- merge(subset(cluster.resultsexport, km.new.order == '4'), lfc.table, 'AGI')
+km4 <- merge(subset(cluster.resultsexport, km.new.order == '4'), lfc.table, 'AGI')
+cwllwall$variable
+lfc.table$label <- gsub('b_' , '', lfc.table$variable)
+lfc.table$label <- factor(lfc.table$label, unique(lfc.table[order(lfc.table$variable), 'label']))
+km4 <- merge(subset(cluster.resultsexport, km.new.order == '4'), lfc.table, 'AGI')
+View(cluster.resultsexport)
+lfc.table$condition <- gsub('b_' , '', lfc.table$variable)
+lfc.table$condition <- factor(lfc.table$condition, unique(lfc.table[order(lfc.table$variable), 'condition']))
+lfc.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('^b_', colnames(sleuth.exprt)), value.name = 'lfc')
+q.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('qvalue_', colnames(sleuth.exprt)), value.name = 'q')
+head(q.table)
+lfc.table <- cbind.data.frame(lfc.table, q.table[,2:3])
+lfc.table$q.star <- cut(lfc.table$q, breaks = c(0, 0.001, 0.01, 0.05, 1), labels = c('***', '**', '*', ' '), include.lowest = T)
+lfc.table$condition <- gsub('b_' , '', lfc.table$variable)
+lfc.table$condition <- factor(lfc.table$condition, unique(lfc.table[order(lfc.table$variable), 'condition']))
+km4 <- merge(subset(cluster.resultsexport, km.new.order == '4'), lfc.table, 'AGI')
+ggplot(km4, aes(x = condition, y = AGI, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 5)
+View(km4)
+lfc.table <- merge(lfc.table, araport,'AGI')
+lfc.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('^b_', colnames(sleuth.exprt)), value.name = 'lfc')
+q.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('qvalue_', colnames(sleuth.exprt)), value.name = 'q')
+head(q.table)
+lfc.table <- cbind.data.frame(lfc.table, q.table[,2:3])
+lfc.table$q.star <- cut(lfc.table$q, breaks = c(0, 0.001, 0.01, 0.05, 1), labels = c('***', '**', '*', ' '), include.lowest = T)
+lfc.table$condition <- gsub('b_' , '', lfc.table$variable)
+lfc.table$condition <- factor(lfc.table$condition, unique(lfc.table[order(lfc.table$variable), 'condition']))
+lfc.table <- merge(lfc.table, araport,'AGI', all.x = T)
+lfc.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('^b_', colnames(sleuth.exprt)), value.name = 'lfc')
+q.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('qvalue_', colnames(sleuth.exprt)), value.name = 'q')
+head(q.table)
+lfc.table <- cbind.data.frame(lfc.table, q.table[,2:3])
+lfc.table$q.star <- cut(lfc.table$q, breaks = c(0, 0.001, 0.01, 0.05, 1), labels = c('***', '**', '*', ' '), include.lowest = T)
+lfc.table$condition <- gsub('b_' , '', lfc.table$variable)
+lfc.table$condition <- factor(lfc.table$condition, unique(lfc.table[order(lfc.table$variable), 'condition']))
+lfc.table <- merge(lfc.table, araport,'AGI', all.x = T)
+View(lfc.table)
+lfc.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('^b_', colnames(sleuth.exprt)), value.name = 'lfc')
+q.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('qvalue_', colnames(sleuth.exprt)), value.name = 'q')
+lfc.table <- cbind.data.frame(lfc.table, q.table[,2:3])
+lfc.table$q.star <- cut(lfc.table$q, breaks = c(0, 0.001, 0.01, 0.05, 1), labels = c('***', '**', '*', ' '), include.lowest = T)
+lfc.table$condition <- gsub('b_' , '', lfc.table$variable)
+lfc.table$condition <- factor(lfc.table$condition, unique(lfc.table[order(lfc.table$variable), 'condition']))
+lfc.table <- merge(lfc.table, araport,'AGI', all.x = T)
+km4 <- merge(subset(cluster.resultsexport, km.new.order == '4'), lfc.table, 'AGI')
+# ggplot(km4, aes(x = condition, y = AGI, label = q.star, fill = lfc)) +
+ggplot(km4, aes(x = condition, y = name, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 5)
+lfc.table[is.na(lfc.table$name), 'name'] <- lfc.table[is.na(lfc.table$name), 'AGI']
+paste0(lfc.table[!is.na(lfc.table$name), 'name'], ' (', lfc.table[!is.na(lfc.table$name), 'AGI'], ')')
+lfc.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('^b_', colnames(sleuth.exprt)), value.name = 'lfc')
+q.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('qvalue_', colnames(sleuth.exprt)), value.name = 'q')
+lfc.table <- cbind.data.frame(lfc.table, q.table[,2:3])
+lfc.table$q.star <- cut(lfc.table$q, breaks = c(0, 0.001, 0.01, 0.05, 1), labels = c('***', '**', '*', ' '), include.lowest = T)
+lfc.table$condition <- gsub('b_' , '', lfc.table$variable)
+lfc.table$condition <- factor(lfc.table$condition, unique(lfc.table[order(lfc.table$variable), 'condition']))
+lfc.table <- merge(lfc.table, araport,'AGI', all.x = T)
+lfc.table[is.na(lfc.table$name), 'name'] <- lfc.table[is.na(lfc.table$name), 'AGI']
+lfc.table$name == lfc.table$AGI
+lfc.table[!lfc.table$name == lfc.table$AGI, 'gene.label'] <- paste0(lfc.table[!lfc.table$name == lfc.table$AGI, 'name'], ' (', lfc.table[!lfc.table$name == lfc.table$AGI, 'AGI'], ')')
+lfc.table$gene.label
+lfc.table[lfc.table$name == lfc.table$AGI, 'gene.label'] <- lfc.table[!lfc.table$name == lfc.table$AGI, 'AGI']
+lfc.table[!lfc.table$name == lfc.table$AGI, 'gene.label'] <- paste0(lfc.table[!lfc.table$name == lfc.table$AGI, 'name'], ' (', lfc.table[!lfc.table$name == lfc.table$AGI, 'AGI'], ')')
+lfc.table[lfc.table$name == lfc.table$AGI, 'gene.label'] <- lfc.table[lfc.table$name == lfc.table$AGI, 'AGI']
+lfc.table$gene.label
+km4 <- merge(subset(cluster.resultsexport, km.new.order == '4'), lfc.table, 'AGI')
+# ggplot(km4, aes(x = condition, y = AGI, label = q.star, fill = lfc)) +
+ggplot(km4, aes(x = condition, y = name, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 5)
+km4$gene.label
+factor(km4$gene.label, levels = rev(unique(km4$gene.label)))
+km4$gene.label <- factor(km4$gene.label, levels = sort(unique(km4$gene.label), decreasing = T))
+km4$gene.label
+# ggplot(km4, aes(x = condition, y = AGI, label = q.star, fill = lfc)) +
+ggplot(km4, aes(x = condition, y = name, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 5)
+# ggplot(km4, aes(x = condition, y = AGI, label = q.star, fill = lfc)) +
+ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 5)
+km4 <- merge(subset(cluster.resultsexport, km.new.order == '4'), lfc.table, 'AGI')
+km4$gene.label <- factor(km4$gene.label, levels = sort(unique(km4$gene.label), decreasing = T))
+png(paste(nice.date, 'heatmap_cluster4', '.png', sep = ''), res = 300, width = 3200, height = 2000)
+ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 5) +
+theme(axis.title = element_blank())
+dev.off()
+png(paste(nice.date, 'heatmap_cluster4', '.png', sep = ''), res = 300, width = 3200, height = 6400)
+ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 5) +
+theme(axis.title = element_blank())
+dev.off()
+png(paste(nice.date, 'heatmap_cluster4', '.png', sep = ''), res = 300, width = 600, height = 3000)
+ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 5) +
+theme(axis.title = element_blank())
+dev.off()
+png(paste(nice.date, 'heatmap_cluster4', '.png', sep = ''), res = 300, width = 1200, height = 3000)
+ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 5) +
+theme(axis.title = element_blank())
+dev.off()
+png(paste(nice.date, 'heatmap_cluster4', '.png', sep = ''), res = 300, width = 1200, height = 3500)
+ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 5) +
+theme(axis.title = element_blank())
+dev.off()
+png(paste(nice.date, 'heatmap_cluster4', '.png', sep = ''), res = 300, width = 1200, height = 4000)
+ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 5) +
+theme(axis.title = element_blank())
+dev.off()
+png(paste(nice.date, 'heatmap_cluster4', '.png', sep = ''), res = 300, width = 1200, height = 4000)
+ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 6) +
+theme(axis.title = element_blank())
+dev.off()
+png(paste(nice.date, 'heatmap_cluster4', '.png', sep = ''), res = 300, width = 1200, height = 4000)
+ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 7) +
+theme(axis.title = element_blank())
+dev.off()
+km7 <- merge(subset(cluster.resultsexport, km.new.order == '7'), lfc.table, 'AGI')
+km7$gene.label <- factor(km7$gene.label, levels = sort(unique(km7$gene.label), decreasing = T))
+png(paste(nice.date, 'heatmap_cluster7', '.png', sep = ''), res = 300, width = 1200, height = 4000)
+ggplot(km7, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 7) +
+theme(axis.title = element_blank())
+dev.off()
+View(means)
+View(q.table)
+q.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('qvalue_', colnames(sleuth.exprt)), value.name = 'q')
+mean.table <- merge(means, q.table, 'AGI')
+mean.table$q.star <- cut(mean.table$q, breaks = c(0, 0.001, 0.01, 0.05, 1), labels = c('***', '**', '*', ' '), include.lowest = T)
+View(mean.table)
+order(lfc.table$variable)
+lfc.table$condition
+order(mean.table$Cond_short.y)
+mean.table$condition <- factor(mean.table$condition, unique(mean.table[order(mean.table$variable), 'Cond_short.y']))
+mean.table$condition <- factor(mean.table$Cond_short.y, unique(mean.table[order(mean.table$variable), 'Cond_short.y']))
+mean.table$condition
+mean.table$condition <- factor(mean.table$Cond_short.y,  c('C', 'S', 'M', 'H', 'SM', 'SH', 'MH', 'SMH'))
+mean.table$condition
+mean.table <- merge(mean.table, araport,'AGI', all.x = T)
+mean.table[is.na(mean.table$name), 'name'] <- mean.table[is.na(mean.table$name), 'AGI']
+mean.table[!mean.table$name == mean.table$AGI, 'gene.label'] <- paste0(mean.table[!mean.table$name == mean.table$AGI, 'name'], ' (', mean.table[!mean.table$name == mean.table$AGI, 'AGI'], ')')
+mean.table[mean.table$name == mean.table$AGI, 'gene.label'] <- mean.table[mean.table$name == mean.table$AGI, 'AGI']
+km4 <- merge(subset(cluster.resultsexport, km.new.order == '4'), lfc.table, 'AGI')
+km4$gene.label <- factor(km4$gene.label, levels = sort(unique(km4$gene.label), decreasing = T))
+png(paste(nice.date, 'heatmap_means_cluster4', '.png', sep = ''), res = 300, width = 1200, height = 4000)
+ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+# limits = limits.lfc.sleuth.GT,
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 7) +
+theme(axis.title = element_blank())
+dev.off()
+View(km4)
+km4 <- merge(subset(cluster.resultsexport, km.new.order == '4'), mean.table, 'AGI')
+km4$gene.label <- factor(km4$gene.label, levels = sort(unique(km4$gene.label), decreasing = T))
+png(paste(nice.date, 'heatmap_means_cluster4', '.png', sep = ''), res = 300, width = 1200, height = 4000)
+ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = tpm)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c") +
+theme_dominik +
+theme(aspect.ratio = 7) +
+theme(axis.title = element_blank())
+dev.off()
+View(scaled.tpm)
+View(scaled.tpm)
+#### Cluster 4
+km4 <- merge(subset(cluster.resultsexport, km.new.order == '4'), lfc.table, 'AGI')
+km4$gene.label <- factor(km4$gene.label, levels = sort(unique(km4$gene.label), decreasing = T))
+png(paste(nice.date, 'heatmap_lfc_cluster4', '.png', sep = ''), res = 300, width = 1200, height = 4000)
+ggplot(km4, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 7) +
+theme(axis.title = element_blank())
+dev.off()
+#### Cluster 7
+km7 <- merge(subset(cluster.resultsexport, km.new.order == '7'), lfc.table, 'AGI')
+km7$gene.label <- factor(km7$gene.label, levels = sort(unique(km7$gene.label), decreasing = T))
+png(paste(nice.date, 'heatmap_lfc_cluster7', '.png', sep = ''), res = 300, width = 1200, height = 4000)
+ggplot(km7, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+# geom_tile() +
+geom_point(color = "black", stroke = 1, fill = NA, size = 6, shape = 22) +
+geom_point(alpha = 1, stroke = 1, size = 6, shape = 22) +
+geom_text(size = 3, color = "black", vjust = 0.7) +
+scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+theme_dominik +
+theme(aspect.ratio = 7) +
+theme(axis.title = element_blank())
+dev.off()
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731cluster.plot.png b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731cluster.plot.png
new file mode 100644
index 0000000000000000000000000000000000000000..47f7c0614622a9db3a145610dc386c1cc5abfe30
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731cluster.plot.png differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731go.plot.png b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731go.plot.png
new file mode 100644
index 0000000000000000000000000000000000000000..46a52cbca177d460ed95fbcc301348e716518cba
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731go.plot.png differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731heatmap_lfc_cluster4.png b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731heatmap_lfc_cluster4.png
new file mode 100644
index 0000000000000000000000000000000000000000..a60a84f8200f67297126a8884babca7e2a175190
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731heatmap_lfc_cluster4.png differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731heatmap_lfc_cluster7.png b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731heatmap_lfc_cluster7.png
new file mode 100644
index 0000000000000000000000000000000000000000..16ae9ac940ea811543c0491e3e03ba7c034e4b93
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731heatmap_lfc_cluster7.png differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731mm.plot.png b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731mm.plot.png
new file mode 100644
index 0000000000000000000000000000000000000000..0959e87e88fa18f78bb279b05550e4d2a5e841b6
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/05_kmeans_final/190731mm.plot.png differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/06_HeatMaps.R b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/06_HeatMaps.R
new file mode 100644
index 0000000000000000000000000000000000000000..08626bbd48e4e218b453c19512b1dfc7045997d1
--- /dev/null
+++ b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/06_HeatMaps.R
@@ -0,0 +1,193 @@
+
+required.packages <- c("Rmisc", "plyr", "ggplot2", "reshape2", "gridExtra", 'openxlsx', 'sleuth', 'amap', 'ggdendro', 'ellipse', 'UpSetR', 'VennDiagram')
+
+for(package in required.packages)
+{
+  print(package)
+  ## Check if package is installed. If not, install
+  if(!package %in% row.names(installed.packages())){install.packages(package, repos ="https://cran.uni-muenster.de/")}
+  # ## Check if package is up to date. If not, update
+  # update.packages(package, repos = "https://cran.uni-muenster.de/")
+  ## Load package
+  library(package, character.only = T)
+}
+
+
+theme_dominik <-
+  theme(panel.grid = element_blank(), panel.background = element_blank(), panel.border = element_rect(fill = NA, color = NA)) +
+  theme(aspect.ratio = 1) +
+  theme(axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black")) +
+  # theme(legend.position = "bottom", legend.direction = "horizontal", legend.title = element_blank()) + #, legend.key.size = unit(0.05, "npc")
+  theme(plot.margin=unit(c(1, 1, 1, 1),"cm")) +
+  theme(plot.title = element_text(hjust = 0, face = "bold", size = 8))
+
+
+load('01_Sleuth.RData')
+load('RNASeq_Annotation/Arabidopsis/02.0Araport11/02.0Araport.RData')
+
+  # LFC_cut <- 1
+  # sleuth.results[abs(sleuth.results$b) < LFC_cut & !is.na(sleuth.results$b), 'qval'] <- 1
+  sleuth.results[is.na(sleuth.results$b), 'qval'] <- 1
+  
+  sleuth.exprt$AGI    <-  do.call(rbind, strsplit(split = ".", as.character(sleuth.exprt$target_id), fixed = T))[,1]
+  sleuth.results$AGI  <-  do.call(rbind, strsplit(split = ".", as.character(sleuth.results$target_id), fixed = T))[,1]
+  
+  lfcs <- sleuth.exprt[,grep('^b_', colnames(sleuth.exprt))]
+  row.names(lfcs) <- sleuth.exprt$AGI
+  
+  sig.lfcs <- lfcs[row.names(lfcs) %in% unique(subset(sleuth.results, qval <= 0.05, AGI, drop = T)), ]
+  sig.lfcs[is.na(sig.lfcs)] <- 0
+  # 
+  # plot.d <- as.data.frame(sig.lfcs)
+  # plot.d$AGI <- row.names(plot.d)
+  #   
+  #   scaled.tpm <- melt(plot.d)
+  #   colnames(scaled.tpm) <- c("AGI", "Condition", "z.score")
+  #   scaled.tpm$Condition <- gsub('b_', '', scaled.tpm$Condition)
+  #   scaled.tpm$Condition <- factor(gsub('mean.tpm_', '', scaled.tpm$Condition), levels = c('C' ,'S', 'M', 'H', 'SM', 'SH', 'MH', 'SMH'))
+  #   
+  # 
+    
+    ############################################################################################################
+    ### Plot heat maps with LFCs
+    ############################################################################################################
+  
+
+    lfc.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('^b_', colnames(sleuth.exprt)), value.name = 'lfc')
+    q.table <- melt(sleuth.exprt, id.vars = 'AGI', measure.vars = grep('qvalue_', colnames(sleuth.exprt)), value.name = 'q')
+
+    lfc.table <- cbind.data.frame(lfc.table, q.table[,2:3])
+    lfc.table$q.star <- cut(lfc.table$q, breaks = c(0, 0.001, 0.01, 0.05, 1), labels = c('***', '**', '*', ' '), include.lowest = T)
+
+    lfc.table$condition <- gsub('b_' , '', lfc.table$variable)
+    lfc.table$condition <- factor(lfc.table$condition, unique(lfc.table[order(lfc.table$variable), 'condition']))
+
+    lfc.table <- merge(lfc.table, araport,'AGI', all.x = T)
+    lfc.table[is.na(lfc.table$name), 'name'] <- lfc.table[is.na(lfc.table$name), 'AGI']
+
+    
+    lfc.table[!lfc.table$name == lfc.table$AGI, 'gene.label'] <- paste0(lfc.table[!lfc.table$name == lfc.table$AGI, 'name'], ' (', lfc.table[!lfc.table$name == lfc.table$AGI, 'AGI'], ')')
+    lfc.table[lfc.table$name == lfc.table$AGI, 'gene.label'] <- lfc.table[lfc.table$name == lfc.table$AGI, 'AGI']
+
+    
+    
+    
+    # 
+    # current.set <- sample(lfc.table$AGI, 10)
+    # plot.data <- subset(lfc.table, AGI %in% current.set)
+    # 
+    # 
+    # p1 <- ggplot(plot.data, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+    #   geom_point(color = "black", stroke = 0.5, fill = NA, size = 6, shape = 22) +
+    #   geom_point(alpha = 1, stroke = 0.5, size = 6, shape = 22) +
+    #   geom_text(size = 3, color = "black", vjust = 0.7) +
+    #   scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+    #                        midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+    #   theme_dominik +
+    #   theme(aspect.ratio = length(unique(plot.data$AGI))/4) + 
+    #   theme(axis.title = element_blank())
+    # 
+    # 
+    # 
+    # current.set <- sample(lfc.table$AGI, 30)
+    # plot.data <- subset(lfc.table, AGI %in% current.set)
+    # 
+    # p2 <- ggplot(plot.data, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+    #   geom_point(color = "black", stroke = 0.5, fill = NA, size = 6, shape = 22) +
+    #   geom_point(alpha = 1, stroke = 0.5, size = 6, shape = 22) +
+    #   geom_text(size = 3, color = "black", vjust = 0.7) +
+    #   scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+    #                        midpoint = 0, name = expression(paste(log["2"], "-FC (trtmt / WT)", sep = ""))) +
+    #   theme_dominik +
+    #   theme(aspect.ratio = length(unique(plot.data$AGI))/4) + 
+    #   theme(axis.title = element_blank())
+    # 
+    # 
+    # ggplotGrob(p1)
+    # 
+    # 
+    # 
+    # png('test.png', res = 300, height = 4000, width = 1600)
+    # grid.arrange(gtable_rbind(ggplotGrob(p1), ggplotGrob(p2)))
+    # dev.off()
+    
+    
+    
+    
+    gene.cats <- read.xlsx('06_NasserHeatMaps.xlsx')
+    plot.cats <- merge(lfc.table, gene.cats)
+    
+    
+    plotlist <- list()
+    for(i in sort(unique(plot.cats$Table)))
+    {
+      
+      plot.data <- subset(plot.cats, Table %in% i)
+      
+      plotlist[[i]] <- ggplot(plot.data, aes(x = condition, y = gene.label, label = q.star, fill = lfc)) +
+        geom_point(color = "black", stroke = 0.5, fill = NA, size = 6, shape = 22) +
+        geom_point(alpha = 1, stroke = 0.5, size = 6, shape = 22) +
+        geom_text(size = 3, color = "black", vjust = 0.7) +
+        scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+                             midpoint = 0, name = expression(paste(log["2"], "-FC", sep = ""))) +
+        theme_dominik +
+        theme(aspect.ratio = length(unique(plot.data$AGI))/5) +
+        theme(plot.margin = unit(c(2.5, 0, 2.5, 0), 'lines')) + 
+        theme(axis.title = element_blank()) + 
+        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
+      
+    }
+    
+    plotlist <- lapply(plotlist, ggplotGrob)
+    
+
+    png('test.png', res = 500, height = 24000, width = 3000)
+    
+    grid.arrange(do.call(gtable_rbind, plotlist))
+    
+    dev.off()
+    
+    
+    
+    
+    
+    gene.cats <- read.xlsx('06_NasserHeatMaps.xlsx', sheet = 2)
+    gene.cats$yposition <- -1*gene.cats$yposition
+    
+    plot.cats <- merge(lfc.table, gene.cats)
+    
+    i <- "T4_WRKY"
+    
+    
+    plotlist <- list()
+    for(i in sort(unique(plot.cats$Table)))
+    {
+      
+    
+    plot.data <- subset(plot.cats, Table %in% i)
+   
+    
+    plot.data$Subcategory <- factor(plot.data$Subcategory, levels = unique(plot.data[order(plot.data$Group), 'Subcategory']))
+    plot.data$LongLabel <- factor(plot.data$LongLabel, levels = unique(plot.data[order(plot.data$yposition), 'LongLabel']))
+    plot.data$SuperLongLabel <- factor(plot.data$SuperLongLabel, levels = unique(plot.data[order(plot.data$yposition), 'SuperLongLabel']))
+    
+    
+    plotlist[[i]] <- 
+  ggplot(plot.data, aes(x = condition, y = SuperLongLabel, label = q.star, fill = lfc)) +
+      geom_point(color = "black", stroke = 0.5, fill = NA, size = 6, shape = 22) +
+      geom_point(alpha = 1, stroke = 0.5, size = 6, shape = 22) +
+      geom_text(size = 3, color = "black", vjust = 0.7) +
+      scale_fill_gradient2(low =  "#1f78b4", high = "#e41a1c",
+                           midpoint = 0, name = expression(paste(log["2"], "-FC", sep = ""))) +
+      theme_dominik +
+      facet_grid(Subcategory~. , scales = "free_y", space = "free", switch = 'y',  labeller = label_wrap_gen(width=25)) +
+      theme(strip.background = element_rect(fill = NA, color = 'black'), strip.placement = 'outside', strip.text = element_text(face = 'bold')) +
+      theme(axis.title = element_blank(), axis.text.y = element_text(hjust =1), axis.ticks = element_blank()) + 
+      scale_x_discrete(position = 'top') +
+      theme(axis.text.x.top = element_text(angle = 90, vjust = 0.5, hjust = 0))
+    
+    
+    }
+
+plotlist <- lapply(plotlist, ggplotGrob)
+
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/06_NasserHeatMaps.xlsx b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/06_NasserHeatMaps.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..cd191479c4191de63554b25f12f10613dc21619b
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/06_NasserHeatMaps.xlsx differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/10_CollectSupplement.R b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/10_CollectSupplement.R
new file mode 100644
index 0000000000000000000000000000000000000000..b87081bc80e4c7bd76bd1aaa2b7445e146effca4
--- /dev/null
+++ b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/10_CollectSupplement.R
@@ -0,0 +1,125 @@
+
+required.packages <- c("Rmisc", "plyr", "ggplot2", "reshape2", "gridExtra", 'openxlsx', 'sleuth', 'amap', 'ggdendro', 'ellipse', 'UpSetR', 'VennDiagram')
+
+for(package in required.packages)
+{
+  print(package)
+  ## Check if package is installed. If not, install
+  if(!package %in% row.names(installed.packages())){install.packages(package, repos ="https://cran.uni-muenster.de/")}
+  # ## Check if package is up to date. If not, update
+  # update.packages(package, repos = "https://cran.uni-muenster.de/")
+  ## Load package
+  library(package, character.only = T)
+}
+
+nice.date <- format(Sys.time(), "%y%m%d")
+SampleInfo <- read.xlsx('../00_SampleInfo.xlsx')
+
+color.values <- brewer.pal(8, 'Dark2')[8:1]
+names(color.values) <- unique(SampleInfo$Cond_short)
+
+theme_dominik <-
+  theme(panel.grid = element_blank(), panel.background = element_blank(), panel.border = element_rect(fill = NA)) +
+  theme(aspect.ratio = 1) +
+  theme(axis.ticks = element_line(color = "black"), axis.text = element_text(color = "black")) +
+  theme(strip.background = element_blank(), strip.text = element_text(size = 8)) +
+  theme(legend.position = "bottom", legend.direction = "horizontal", legend.title = element_blank()) + #, legend.key.size = unit(0.05, "npc")
+  theme(plot.margin=unit(c(1, 1, 1, 1),"cm")) +
+  theme(plot.title = element_text(hjust = 0, face = "bold", size = 15))
+
+
+########################################
+#### Annotation data
+########################################
+
+load('RNASeq_Annotation/Arabidopsis/02.0Araport11/02.0Araport.RData')
+load("RNASeq_Annotation/Arabidopsis/02.4GO/GOAnno2018-02-06.RData")
+load("RNASeq_Annotation/Arabidopsis/02.3Mapman/180924_MercatorMapman.RData")
+load("RNASeq_Annotation/Arabidopsis/02.3Mapman/02.3Mapman.RData")
+colnames(mapman)[2] <- 'MAPMAN.BIN'
+load("RNASeq_Annotation/Arabidopsis/02.9PlnTFDB/02.9PlnTFDB.RData")
+load("RNASeq_Annotation/Arabidopsis/02.12Lipids/02.12lipids.RData")
+
+load(file = '01_Kallisto.RData')
+load(file = '01_Sleuth.RData')
+
+
+load('05_190731_ClusterResults_final.RData')
+
+
+############################################################################################################
+### Writing the supplementary data file
+############################################################################################################
+
+#### Prepare mapman as a one line to prevent gene duplication
+mapman_edit <- unique(mapman[,c(1,2,4,6)])
+
+mapman_edit$pivot <- 'MAPMAN.BIN'
+mapman_OneLiner.term <- dcast(mapman_edit, AGI~pivot, value.var = 'MAPMAN.BIN', fun.aggregate = 
+                           function(x){paste(unique(x), collapse = '|')})
+
+mapman_edit$pivot <- 'MAPMAN.DESCRIPTION'
+mapman_OneLiner.Desc <- dcast(mapman_edit, AGI~pivot, value.var = 'DESCRIPTION', fun.aggregate = 
+                           function(x){paste(unique(x), collapse = '|')})
+
+mapman_OneLiner <- merge(mapman_OneLiner.term, mapman_OneLiner.Desc, 'AGI')
+
+#### Pull count / tpm data 
+expression.data <- kallisto_table(sleuth_objects$not_trimmed)
+expression.data$AGI <-  do.call(rbind, strsplit(split = ".", as.character(expression.data$target_id), fixed = T))[,1]
+expression.data <- merge(expression.data, SampleInfo, by.x = 'sample', by.y = 'IlluminaID')
+
+est_counts <- dcast(expression.data, AGI+target_id~SampleID, value.var = 'est_counts')
+colnames(est_counts)[3:ncol(est_counts)] <- paste('est_counts', colnames(est_counts)[3:ncol(est_counts)], sep = '_')
+
+est_counts <- est_counts[, 
+                         c(1:2, grep('_C_', colnames(est_counts)), grep('_S_', colnames(est_counts)), grep('_M_', colnames(est_counts)), 
+                           grep('_H_', colnames(est_counts)), grep('_SM_', colnames(est_counts)), grep('_SH_', colnames(est_counts)), 
+                           grep('_MH_', colnames(est_counts)), grep('_SMH_', colnames(est_counts)))]
+
+
+tpm <- dcast(expression.data, AGI~SampleID, value.var = 'tpm')
+colnames(tpm)[2:ncol(tpm)] <- paste('tpm', colnames(tpm)[2:ncol(tpm)], sep = '_')
+
+tpm <- tpm[, c(1, grep('_C_', colnames(tpm)), grep('_S_', colnames(tpm)), grep('_M_', colnames(tpm)), 
+               grep('_H_', colnames(tpm)), grep('_SM_', colnames(tpm)), grep('_SH_', colnames(tpm)), 
+               grep('_MH_', colnames(tpm)), grep('_SMH_', colnames(tpm)))]
+
+
+#### Calculate means and sd
+means <- summarySE(expression.data, groupvars = c('Cond_short.y', 'AGI'), measurevar = 'tpm')
+
+mean.tpm <- dcast(means, AGI~Cond_short.y, value.var = 'tpm')
+mean.tpm <- mean.tpm[, c('AGI', 'C', 'S', 'M', 'H', 'SM', 'SH', 'MH', 'SMH')]
+colnames(mean.tpm)[2:ncol(mean.tpm)] <- paste('mean.tpm', colnames(mean.tpm)[2:ncol(mean.tpm)], sep = '_')
+
+
+sd.tpm <- dcast(means, AGI~Cond_short.y, value.var = 'sd')
+sd.tpm <- sd.tpm[, c('AGI', 'C', 'S', 'M', 'H', 'SM', 'SH', 'MH', 'SMH')]
+colnames(sd.tpm)[2:ncol(sd.tpm)] <- paste('sd.tpm', colnames(sd.tpm)[2:ncol(sd.tpm)], sep = '_')
+
+sleuth.exprt$AGI <-  do.call(rbind, strsplit(split = ".", as.character(sleuth.exprt$target_id), fixed = T))[,1]
+
+
+#### Combine everything together 
+# supp <- merge(unique(araport[,c(1:3, 5, 7:9)]), unique(mapman.X4[,c(1:3, 5)]), 'AGI', all.x = F, all.y = F)
+supp <- merge(unique(araport[,c(1:3, 5, 7:9)]), mapman_OneLiner, 'AGI', all.x = F, all.y = F)
+supp <- merge(supp, unique(GO.export), 'AGI', all.x = T, all.y = F)
+supp <- merge(supp, unique(PlnTFDB[!duplicated(PlnTFDB$AGI) ,c(2, 4, 5)]), 'AGI', all.x = T, all.y = F)
+supp <- merge(supp, unique(lipids), 'AGI', all.x = T, all.y = F)
+
+supp <- merge(supp, est_counts, 'AGI', all.y = T)
+supp <- merge(supp, tpm, 'AGI', all.y = T)
+supp <- merge(supp, mean.tpm, 'AGI', all.y = T)
+supp <- merge(supp, sd.tpm, 'AGI', all.y = T)
+
+supp <- merge(supp, sleuth.exprt[,!colnames(sleuth.exprt) == 'target_id' & !grepl('se_b_', colnames(sleuth.exprt))], 'AGI', all.y = T)
+
+supp <- merge(supp, cluster.results, 'AGI', all = T)
+# supp <- merge(supp, gene.set.list, 'AGI', all = T)
+
+
+write.table(supp, file = paste(nice.date, 'SupplementalDataset.txt', sep = '_'), row.names = F, sep = '\t', na = '')
+
+
+
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/190731_SupplementalDataset.txt b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/190731_SupplementalDataset.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6174e0f74aff8f467e69e5dd259659ede7298849
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/190731_SupplementalDataset.txt differ
diff --git a/_pre-ARC-legacy/190731_PipelineWithlogCutOff/190731_SupplementalDataset.xlsx b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/190731_SupplementalDataset.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..dc3e8b11a00d278a29909d803ab16dcc9dccb261
Binary files /dev/null and b/_pre-ARC-legacy/190731_PipelineWithlogCutOff/190731_SupplementalDataset.xlsx differ
diff --git a/_pre-ARC-legacy/200225_GEO_Submission/00_SampleInfo.xlsx b/_pre-ARC-legacy/200225_GEO_Submission/00_SampleInfo.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..222ca446c463d8a79742fc8c291fc4a615906d6b
Binary files /dev/null and b/_pre-ARC-legacy/200225_GEO_Submission/00_SampleInfo.xlsx differ
diff --git a/_pre-ARC-legacy/200225_GEO_Submission/200302_GEO_MetaData_DB.xlsx b/_pre-ARC-legacy/200225_GEO_Submission/200302_GEO_MetaData_DB.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..2fc83ffcff017d6f4fc94a364c56dee4ac2573a7
Binary files /dev/null and b/_pre-ARC-legacy/200225_GEO_Submission/200302_GEO_MetaData_DB.xlsx differ
diff --git a/_pre-ARC-legacy/200225_GEO_Submission/SupplementalDatasetS1.xlsx b/_pre-ARC-legacy/200225_GEO_Submission/SupplementalDatasetS1.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..bdacae683c060e4d4869b9960691be0ace438935
Binary files /dev/null and b/_pre-ARC-legacy/200225_GEO_Submission/SupplementalDatasetS1.xlsx differ
diff --git a/_pre-ARC-legacy/200225_GEO_Submission/md5sums.xlsx b/_pre-ARC-legacy/200225_GEO_Submission/md5sums.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..66dccf50b8d4cdeea153c8232ac32b323b0b5433
Binary files /dev/null and b/_pre-ARC-legacy/200225_GEO_Submission/md5sums.xlsx differ
diff --git a/_pre-ARC-legacy/Kallisto-Pipeline.sh b/_pre-ARC-legacy/Kallisto-Pipeline.sh
new file mode 100644
index 0000000000000000000000000000000000000000..5bf0fc7821143736a1998af002d347d4affbf126
--- /dev/null
+++ b/_pre-ARC-legacy/Kallisto-Pipeline.sh
@@ -0,0 +1,71 @@
+#### 04.04.2019
+#### RNASeq Analysis for AG Maurino
+
+############################################
+#### Prepare genome reference for Kallisto
+
+GENOME=~/190404_NasserJessica/2015_Nasser_JessicaSchmitz_AGMaurino_DB/Araport11_genes.201606.cds.repr.fasta
+/home/bin/kallisto_linux-v0.45.1/kallisto index -i $GENOME'_index' $GENOME
+
+############################################
+#### One Script for everything 
+#### (See comments for details)
+
+#### Specify the genome reference
+GENOME=Araport11_genes.201606.cds.repr.fasta
+
+#### prep some directories to collect data 
+mkdir fastqc_results
+mkdir kallisto_trimmed
+mkdir kallisto_not_trimmed
+
+#### Loop through all samples one after the other
+for j in Sample_*/;
+	do
+		
+		cd $j 	
+			
+		sampleName=$(echo $j | sed -e 's/\///g')
+		echo $sampleName
+				
+		############################################
+		### Concatenate data from multiple fastq.gz files originating from the same library (and split for size limit reasons)
+
+		### Unzip files
+		gunzip *fastq.gz
+
+		### Concatenate by direction (fwd or rev)
+		cat *R1_00* > $sampleName'_R1.fastq'		
+		cat *R2_00* > $sampleName'_R2.fastq'
+
+
+		### Remove original files
+		rm *_001.fastq *_002.fastq
+		
+		### Gzip back to save space
+		gzip *fastq
+		
+		############################################
+		### Trimmomatic
+		java -jar /home/bin/Trimmomatic-0.33/trimmomatic-0.33.jar PE $sampleName'_R1.fastq.gz' $sampleName'_R2.fastq.gz' $sampleName'_fwd_paired.fastq.gz' $sampleName'_fwd_unpaired.fastq.gz' $sampleName'_rev_paired.fastq.gz' $sampleName'_rev_unpaired.fastq.gz' ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 MAXINFO:50:0.8 MINLEN:36
+		
+		echo 'Trimmomatic done'
+
+		############################################
+		### Run FastQC
+		
+		fastqc *fastq.gz -o '../fastqc_results/'$sampleName
+
+		echo 'FastQC done'
+
+		############################################
+		### Run Kallisto on trimmed and non-trimmed reads 
+		
+		/home/bin/kallisto_linux-v0.45.1/kallisto quant -b 50 -t 10 -i $GENOME'_index' -o '../kallisto_trimmed/'$sampleName $sampleName'_fwd_paired.fastq.gz' $sampleName'_rev_paired.fastq.gz'
+		/home/bin/kallisto_linux-v0.45.1/kallisto quant -b 50 -t 10 -i $GENOME'_index' -o '../kallisto_not_trimmed/'$sampleName $sampleName'_R1.fastq.gz' $sampleName'_R2.fastq.gz'
+
+		echo 'Kallisto done'
+
+		cd ..
+
+done