diff --git a/workflows/deseq2/Dockerfile b/workflows/deseq2/Dockerfile deleted file mode 100644 index fb3edf185ab1df36a475ff87f2507ecb1e537a68..0000000000000000000000000000000000000000 --- a/workflows/deseq2/Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM rstudio/r-base:4.3-jammy - -RUN apt-get update && \ - R -e "install.packages(c('BiocManager'), repos='https://cloud.r-project.org/'); BiocManager::install('DESeq2'); BiocManager::install('tximport'); BiocManager::install('rhdf5'); \\" && \ - apt-get clean -y \ No newline at end of file diff --git a/workflows/deseq2/README.md b/workflows/deseq2/README.md index 673fb4dc63ef924c2e57283cfad6a7d09eed71cd..39cf2dcfacc76eeaee434d0f5d1abc07809fbe5d 100644 --- a/workflows/deseq2/README.md +++ b/workflows/deseq2/README.md @@ -12,10 +12,36 @@ Workflow used for **differential gene expression analysis** ## Run pure script (to test) +### Install R dependencies for deseq2 + +```R +if (!require("BiocManager", quietly = TRUE)) + install.packages("BiocManager") + +BiocManager::install("DESeq2") +library("DESeq2") + +BiocManager::install("tximport") +library("tximport") + +BiocManager::install("rhdf5") +library("rhdf5") +``` + +### test + ```bash RScript deseq2.R "../../runs/kallisto/kallisto_results" "../../runs/merged_isa_metadata/out/merged_isa.tsv" "Source.Name" "Factor..Photosynthesis.mode." ``` + ## Run CWL-wrapped script -see [runs/deseq2-run](../../runs/deseq2-run) \ No newline at end of file +see [runs/deseq2-run](../../runs/deseq2-run) + + +## Multi-package containers + +- R and combinations of library dependencies are available as multi-package containers from [BioContainers](https://github.com/BioContainers/multi-package-containers) +- Searched for `repo:BioContainers/multi-package-containers deseq2 tximport rhdf5` +- and found `quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0` :tada: diff --git a/workflows/deseq2/dependencies.R b/workflows/deseq2/dependencies.R deleted file mode 100644 index d010bbb7caa76ff4fbb114655812cb7ccea20dfb..0000000000000000000000000000000000000000 --- a/workflows/deseq2/dependencies.R +++ /dev/null @@ -1,14 +0,0 @@ - -# Install dependencies for deseq2 - -if (!require("BiocManager", quietly = TRUE)) - install.packages("BiocManager") - -BiocManager::install("DESeq2") -library("DESeq2") - -BiocManager::install("tximport") -library("tximport") - -BiocManager::install("rhdf5") -library("rhdf5") diff --git a/workflows/deseq2/deseq2-dev.R b/workflows/deseq2/deseq2-dev.R new file mode 100644 index 0000000000000000000000000000000000000000..565142189e96783199acbd5fa1df5573a2f21601 --- /dev/null +++ b/workflows/deseq2/deseq2-dev.R @@ -0,0 +1,76 @@ +# DESeq2 + +## Libraries + +library("DESeq2") +library("tximport") +library("rhdf5") +library("ggplot2") + +## In-and-out + +# inKallistoResults <- "../../runs/kallisto/kallisto_results" +# inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv" +# inMetadataSample <- "Source.Name" +# inMetadataFactorList <- list("Factor..Photosynthesis.mode.", "Factor..Biosource.amount.") + +### Read arguments from CLI + +args <- commandArgs(trailingOnly = T) + +inKallistoResults <- args[1] +inMetadataFile <- args[2] +inMetadataSample <- args[3] +inMetadataFactorList <- args[4] + +## Import kallisto count data + +files <- dir(inKallistoResults, recursive = T, full.names = T ,"abundance.h5") +names(files) <- dir(inKallistoResults) + +txi <- tximport(files, type = "kallisto", txOut = TRUE) + +head(txi$counts) + +## Read sample metadata + +samples_metadata <- read.table(file = inMetadataFile, sep = "\t") +samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, unlist(inMetadataFactorList))] +rownames(samples) <- samples[,inMetadataSample] + +factors <- sapply(inMetadataFactorList, function(x) x[[1]]) +design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + "))) + + +## DESeq + +dds <- DESeqDataSetFromTximport(txi, colData = samples, design = design_formula) + +dds <- DESeq(dds) + +## Outputs + +### Extract results + +res <- results(dds) +write.csv(res, file = "results_stats.csv", quote = TRUE) + +### Generate and save default plots + +png("results_ma-plot.png") + plotMA(res, ylim=c(-2,2)) +dev.off() + +vsd <- vst(dds, blind=FALSE) +pcaData <- plotPCA(vsd, intgroup=factors, returnData=TRUE) +percentVar <- round(100 * attr(pcaData, "percentVar")) + +p2 <- ggplot(pcaData, aes(PC1, PC2, color=factors[[1]])) + + geom_point(size=3) + + xlab(paste0("PC1: ",percentVar[1],"% variance")) + + ylab(paste0("PC2: ",percentVar[2],"% variance")) + + coord_fixed() + +png("results_pca-plot.png") + print(p2) +dev.off() diff --git a/workflows/deseq2/deseq2-localDocker-test.cwl b/workflows/deseq2/deseq2-localDocker-test.cwl deleted file mode 100644 index 3e26cb86e3d2c4fc6e3d77895a879fdce58e17ed..0000000000000000000000000000000000000000 --- a/workflows/deseq2/deseq2-localDocker-test.cwl +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env cwl-runner - -cwlVersion: v1.2 -class: CommandLineTool -hints: - DockerRequirement: - dockerFile: {$include: "./Dockerfile"} - dockerImageId: "deseq-docker" -requirements: - - class: NetworkAccess - networkAccess: true -baseCommand: [Rscript, --help] - -inputs: [] - -outputs: [] diff --git a/workflows/deseq2/deseq2.R b/workflows/deseq2/deseq2.R index 0384bc5d321b4d2ab18b59322be90aaf47d770d3..e9ad42163f586644a1916a7707003d5b4a316791 100644 --- a/workflows/deseq2/deseq2.R +++ b/workflows/deseq2/deseq2.R @@ -9,10 +9,10 @@ library("ggplot2") ## In-and-out -inKallistoResults <- "../../runs/kallisto/kallisto_results" -inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv" -inMetadataSample <- "Source.Name" -inMetadataFactor <- "Factor..Photosynthesis.mode." +# inKallistoResults <- "../../runs/kallisto/kallisto_results" +# inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv" +# inMetadataSample <- "Source.Name" +# inMetadataFactor <- "Factor..Photosynthesis.mode." ### Read arguments from CLI diff --git a/workflows/deseq2/deseq2.cwl b/workflows/deseq2/deseq2.cwl index b35f8a48f071f8711ea7d56c05ee59defe3ef4f5..4cdda2fe7fcd33a9ca6c8559527ee1bbcce68ea4 100644 --- a/workflows/deseq2/deseq2.cwl +++ b/workflows/deseq2/deseq2.cwl @@ -3,11 +3,8 @@ cwlVersion: v1.2 class: CommandLineTool hints: - # DockerRequirement: - # dockerPull: quay.io/biocontainers/bioconductor-deseq2:1.42.0--r43hf17093f_2 DockerRequirement: - dockerFile: {$include: "./Dockerfile"} - dockerImageId: "deseq-docker" + dockerPull: quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0 requirements: - class: InitialWorkDirRequirement listing: diff --git a/workflows/deseq2/r-docker-test.cwl b/workflows/deseq2/mutli-docker-test.cwl similarity index 56% rename from workflows/deseq2/r-docker-test.cwl rename to workflows/deseq2/mutli-docker-test.cwl index b964a83d8051ce4ff1e1093d9daec29e793deb3d..136d87e72742f52c9ac1cdfe287273e5462b541a 100644 --- a/workflows/deseq2/r-docker-test.cwl +++ b/workflows/deseq2/mutli-docker-test.cwl @@ -5,7 +5,7 @@ class: CommandLineTool requirements: - class: DockerRequirement - dockerPull: quay.io/biocontainers/bioconductor-deseq2:1.42.0--r43hf17093f_2 + dockerPull: quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0 baseCommand: [Rscript, --help]