From ffdece03c88225752dce1f59d1979c128c663f70 Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Thu, 7 Nov 2024 10:12:35 +0100 Subject: [PATCH] multi-docker works, cleanup --- workflows/deseq2/Dockerfile | 5 -- workflows/deseq2/README.md | 28 ++++++- workflows/deseq2/dependencies.R | 14 ---- workflows/deseq2/deseq2-dev.R | 76 +++++++++++++++++++ workflows/deseq2/deseq2-localDocker-test.cwl | 16 ---- workflows/deseq2/deseq2.R | 8 +- workflows/deseq2/deseq2.cwl | 5 +- ...-docker-test.cwl => mutli-docker-test.cwl} | 2 +- 8 files changed, 109 insertions(+), 45 deletions(-) delete mode 100644 workflows/deseq2/Dockerfile delete mode 100644 workflows/deseq2/dependencies.R create mode 100644 workflows/deseq2/deseq2-dev.R delete mode 100644 workflows/deseq2/deseq2-localDocker-test.cwl rename workflows/deseq2/{r-docker-test.cwl => mutli-docker-test.cwl} (56%) diff --git a/workflows/deseq2/Dockerfile b/workflows/deseq2/Dockerfile deleted file mode 100644 index fb3edf1..0000000 --- a/workflows/deseq2/Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM rstudio/r-base:4.3-jammy - -RUN apt-get update && \ - R -e "install.packages(c('BiocManager'), repos='https://cloud.r-project.org/'); BiocManager::install('DESeq2'); BiocManager::install('tximport'); BiocManager::install('rhdf5'); \\" && \ - apt-get clean -y \ No newline at end of file diff --git a/workflows/deseq2/README.md b/workflows/deseq2/README.md index 673fb4d..39cf2dc 100644 --- a/workflows/deseq2/README.md +++ b/workflows/deseq2/README.md @@ -12,10 +12,36 @@ Workflow used for **differential gene expression analysis** ## Run pure script (to test) +### Install R dependencies for deseq2 + +```R +if (!require("BiocManager", quietly = TRUE)) + install.packages("BiocManager") + +BiocManager::install("DESeq2") +library("DESeq2") + +BiocManager::install("tximport") +library("tximport") + +BiocManager::install("rhdf5") +library("rhdf5") +``` + +### test + ```bash RScript deseq2.R "../../runs/kallisto/kallisto_results" "../../runs/merged_isa_metadata/out/merged_isa.tsv" "Source.Name" "Factor..Photosynthesis.mode." ``` + ## Run CWL-wrapped script -see [runs/deseq2-run](../../runs/deseq2-run) \ No newline at end of file +see [runs/deseq2-run](../../runs/deseq2-run) + + +## Multi-package containers + +- R and combinations of library dependencies are available as multi-package containers from [BioContainers](https://github.com/BioContainers/multi-package-containers) +- Searched for `repo:BioContainers/multi-package-containers deseq2 tximport rhdf5` +- and found `quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0` :tada: diff --git a/workflows/deseq2/dependencies.R b/workflows/deseq2/dependencies.R deleted file mode 100644 index d010bbb..0000000 --- a/workflows/deseq2/dependencies.R +++ /dev/null @@ -1,14 +0,0 @@ - -# Install dependencies for deseq2 - -if (!require("BiocManager", quietly = TRUE)) - install.packages("BiocManager") - -BiocManager::install("DESeq2") -library("DESeq2") - -BiocManager::install("tximport") -library("tximport") - -BiocManager::install("rhdf5") -library("rhdf5") diff --git a/workflows/deseq2/deseq2-dev.R b/workflows/deseq2/deseq2-dev.R new file mode 100644 index 0000000..5651421 --- /dev/null +++ b/workflows/deseq2/deseq2-dev.R @@ -0,0 +1,76 @@ +# DESeq2 + +## Libraries + +library("DESeq2") +library("tximport") +library("rhdf5") +library("ggplot2") + +## In-and-out + +# inKallistoResults <- "../../runs/kallisto/kallisto_results" +# inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv" +# inMetadataSample <- "Source.Name" +# inMetadataFactorList <- list("Factor..Photosynthesis.mode.", "Factor..Biosource.amount.") + +### Read arguments from CLI + +args <- commandArgs(trailingOnly = T) + +inKallistoResults <- args[1] +inMetadataFile <- args[2] +inMetadataSample <- args[3] +inMetadataFactorList <- args[4] + +## Import kallisto count data + +files <- dir(inKallistoResults, recursive = T, full.names = T ,"abundance.h5") +names(files) <- dir(inKallistoResults) + +txi <- tximport(files, type = "kallisto", txOut = TRUE) + +head(txi$counts) + +## Read sample metadata + +samples_metadata <- read.table(file = inMetadataFile, sep = "\t") +samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, unlist(inMetadataFactorList))] +rownames(samples) <- samples[,inMetadataSample] + +factors <- sapply(inMetadataFactorList, function(x) x[[1]]) +design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + "))) + + +## DESeq + +dds <- DESeqDataSetFromTximport(txi, colData = samples, design = design_formula) + +dds <- DESeq(dds) + +## Outputs + +### Extract results + +res <- results(dds) +write.csv(res, file = "results_stats.csv", quote = TRUE) + +### Generate and save default plots + +png("results_ma-plot.png") + plotMA(res, ylim=c(-2,2)) +dev.off() + +vsd <- vst(dds, blind=FALSE) +pcaData <- plotPCA(vsd, intgroup=factors, returnData=TRUE) +percentVar <- round(100 * attr(pcaData, "percentVar")) + +p2 <- ggplot(pcaData, aes(PC1, PC2, color=factors[[1]])) + + geom_point(size=3) + + xlab(paste0("PC1: ",percentVar[1],"% variance")) + + ylab(paste0("PC2: ",percentVar[2],"% variance")) + + coord_fixed() + +png("results_pca-plot.png") + print(p2) +dev.off() diff --git a/workflows/deseq2/deseq2-localDocker-test.cwl b/workflows/deseq2/deseq2-localDocker-test.cwl deleted file mode 100644 index 3e26cb8..0000000 --- a/workflows/deseq2/deseq2-localDocker-test.cwl +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env cwl-runner - -cwlVersion: v1.2 -class: CommandLineTool -hints: - DockerRequirement: - dockerFile: {$include: "./Dockerfile"} - dockerImageId: "deseq-docker" -requirements: - - class: NetworkAccess - networkAccess: true -baseCommand: [Rscript, --help] - -inputs: [] - -outputs: [] diff --git a/workflows/deseq2/deseq2.R b/workflows/deseq2/deseq2.R index 0384bc5..e9ad421 100644 --- a/workflows/deseq2/deseq2.R +++ b/workflows/deseq2/deseq2.R @@ -9,10 +9,10 @@ library("ggplot2") ## In-and-out -inKallistoResults <- "../../runs/kallisto/kallisto_results" -inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv" -inMetadataSample <- "Source.Name" -inMetadataFactor <- "Factor..Photosynthesis.mode." +# inKallistoResults <- "../../runs/kallisto/kallisto_results" +# inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv" +# inMetadataSample <- "Source.Name" +# inMetadataFactor <- "Factor..Photosynthesis.mode." ### Read arguments from CLI diff --git a/workflows/deseq2/deseq2.cwl b/workflows/deseq2/deseq2.cwl index b35f8a4..4cdda2f 100644 --- a/workflows/deseq2/deseq2.cwl +++ b/workflows/deseq2/deseq2.cwl @@ -3,11 +3,8 @@ cwlVersion: v1.2 class: CommandLineTool hints: - # DockerRequirement: - # dockerPull: quay.io/biocontainers/bioconductor-deseq2:1.42.0--r43hf17093f_2 DockerRequirement: - dockerFile: {$include: "./Dockerfile"} - dockerImageId: "deseq-docker" + dockerPull: quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0 requirements: - class: InitialWorkDirRequirement listing: diff --git a/workflows/deseq2/r-docker-test.cwl b/workflows/deseq2/mutli-docker-test.cwl similarity index 56% rename from workflows/deseq2/r-docker-test.cwl rename to workflows/deseq2/mutli-docker-test.cwl index b964a83..136d87e 100644 --- a/workflows/deseq2/r-docker-test.cwl +++ b/workflows/deseq2/mutli-docker-test.cwl @@ -5,7 +5,7 @@ class: CommandLineTool requirements: - class: DockerRequirement - dockerPull: quay.io/biocontainers/bioconductor-deseq2:1.42.0--r43hf17093f_2 + dockerPull: quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0 baseCommand: [Rscript, --help] -- GitLab