diff --git a/runs/deseq2-run/job.yml b/runs/deseq2-run/job.yml index b0386bd4bbf18d2b229848e339880d5f9477e99e..25f66f6d26c6cdd6179212db52d906332973f912 100644 --- a/runs/deseq2-run/job.yml +++ b/runs/deseq2-run/job.yml @@ -5,5 +5,5 @@ inMetadataFile: class: File path: ../../runs/merged_isa_metadata/out/merged_isa.tsv inMetadataSample: "Source.Name" -inMetadataFactor: +inMetadataFactorList: - "Factor..Photosynthesis.mode." \ No newline at end of file diff --git a/workflows/deseq2/deseq2-dev.R b/workflows/deseq2/deseq2-dev.R deleted file mode 100644 index 565142189e96783199acbd5fa1df5573a2f21601..0000000000000000000000000000000000000000 --- a/workflows/deseq2/deseq2-dev.R +++ /dev/null @@ -1,76 +0,0 @@ -# DESeq2 - -## Libraries - -library("DESeq2") -library("tximport") -library("rhdf5") -library("ggplot2") - -## In-and-out - -# inKallistoResults <- "../../runs/kallisto/kallisto_results" -# inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv" -# inMetadataSample <- "Source.Name" -# inMetadataFactorList <- list("Factor..Photosynthesis.mode.", "Factor..Biosource.amount.") - -### Read arguments from CLI - -args <- commandArgs(trailingOnly = T) - -inKallistoResults <- args[1] -inMetadataFile <- args[2] -inMetadataSample <- args[3] -inMetadataFactorList <- args[4] - -## Import kallisto count data - -files <- dir(inKallistoResults, recursive = T, full.names = T ,"abundance.h5") -names(files) <- dir(inKallistoResults) - -txi <- tximport(files, type = "kallisto", txOut = TRUE) - -head(txi$counts) - -## Read sample metadata - -samples_metadata <- read.table(file = inMetadataFile, sep = "\t") -samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, unlist(inMetadataFactorList))] -rownames(samples) <- samples[,inMetadataSample] - -factors <- sapply(inMetadataFactorList, function(x) x[[1]]) -design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + "))) - - -## DESeq - -dds <- DESeqDataSetFromTximport(txi, colData = samples, design = design_formula) - -dds <- DESeq(dds) - -## Outputs - -### Extract results - -res <- results(dds) -write.csv(res, file = "results_stats.csv", quote = TRUE) - -### Generate and save default plots - -png("results_ma-plot.png") - plotMA(res, ylim=c(-2,2)) -dev.off() - -vsd <- vst(dds, blind=FALSE) -pcaData <- plotPCA(vsd, intgroup=factors, returnData=TRUE) -percentVar <- round(100 * attr(pcaData, "percentVar")) - -p2 <- ggplot(pcaData, aes(PC1, PC2, color=factors[[1]])) + - geom_point(size=3) + - xlab(paste0("PC1: ",percentVar[1],"% variance")) + - ylab(paste0("PC2: ",percentVar[2],"% variance")) + - coord_fixed() - -png("results_pca-plot.png") - print(p2) -dev.off() diff --git a/workflows/deseq2/deseq2.R b/workflows/deseq2/deseq2.R index e9ad42163f586644a1916a7707003d5b4a316791..565142189e96783199acbd5fa1df5573a2f21601 100644 --- a/workflows/deseq2/deseq2.R +++ b/workflows/deseq2/deseq2.R @@ -12,7 +12,7 @@ library("ggplot2") # inKallistoResults <- "../../runs/kallisto/kallisto_results" # inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv" # inMetadataSample <- "Source.Name" -# inMetadataFactor <- "Factor..Photosynthesis.mode." +# inMetadataFactorList <- list("Factor..Photosynthesis.mode.", "Factor..Biosource.amount.") ### Read arguments from CLI @@ -21,7 +21,7 @@ args <- commandArgs(trailingOnly = T) inKallistoResults <- args[1] inMetadataFile <- args[2] inMetadataSample <- args[3] -inMetadataFactor <- args[4] +inMetadataFactorList <- args[4] ## Import kallisto count data @@ -35,15 +35,16 @@ head(txi$counts) ## Read sample metadata samples_metadata <- read.table(file = inMetadataFile, sep = "\t") +samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, unlist(inMetadataFactorList))] +rownames(samples) <- samples[,inMetadataSample] -samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, inMetadataFactor)] -colnames(samples)[1:2] <- c("sampleID", "condition") +factors <- sapply(inMetadataFactorList, function(x) x[[1]]) +design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + "))) -rownames(samples) <- samples$sampleID ## DESeq -dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ condition) +dds <- DESeqDataSetFromTximport(txi, colData = samples, design = design_formula) dds <- DESeq(dds) @@ -52,7 +53,7 @@ dds <- DESeq(dds) ### Extract results res <- results(dds) -write.csv(res, file = "results_stats.csv", append = FALSE, quote = TRUE) +write.csv(res, file = "results_stats.csv", quote = TRUE) ### Generate and save default plots @@ -61,10 +62,10 @@ png("results_ma-plot.png") dev.off() vsd <- vst(dds, blind=FALSE) -pcaData <- plotPCA(vsd, intgroup=c("condition"), returnData=TRUE) +pcaData <- plotPCA(vsd, intgroup=factors, returnData=TRUE) percentVar <- round(100 * attr(pcaData, "percentVar")) -p2 <- ggplot(pcaData, aes(PC1, PC2, color=condition)) + +p2 <- ggplot(pcaData, aes(PC1, PC2, color=factors[[1]])) + geom_point(size=3) + xlab(paste0("PC1: ",percentVar[1],"% variance")) + ylab(paste0("PC2: ",percentVar[2],"% variance")) + @@ -73,8 +74,3 @@ p2 <- ggplot(pcaData, aes(PC1, PC2, color=condition)) + png("results_pca-plot.png") print(p2) dev.off() - - - - - diff --git a/workflows/deseq2/deseq2.cwl b/workflows/deseq2/deseq2.cwl index 4cdda2fe7fcd33a9ca6c8559527ee1bbcce68ea4..c3dd72790b6493ced407c5525754856d7ab4b441 100644 --- a/workflows/deseq2/deseq2.cwl +++ b/workflows/deseq2/deseq2.cwl @@ -27,7 +27,7 @@ inputs: type: string inputBinding: position: 3 - inMetadataFactor: + inMetadataFactorList: type: string[] inputBinding: position: 4