Skip to content
Snippets Groups Projects
Commit ffdece03 authored by Dominik Brilhaus's avatar Dominik Brilhaus
Browse files

multi-docker works, cleanup

parent 6efab19f
No related branches found
No related tags found
1 merge request!4fix docker
Pipeline #5011 passed
This commit is part of merge request !4. Comments created here will be created in the context of that merge request.
FROM rstudio/r-base:4.3-jammy
RUN apt-get update && \
R -e "install.packages(c('BiocManager'), repos='https://cloud.r-project.org/'); BiocManager::install('DESeq2'); BiocManager::install('tximport'); BiocManager::install('rhdf5'); \\" && \
apt-get clean -y
\ No newline at end of file
......@@ -12,10 +12,36 @@ Workflow used for **differential gene expression analysis**
## Run pure script (to test)
### Install R dependencies for deseq2
```R
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("DESeq2")
library("DESeq2")
BiocManager::install("tximport")
library("tximport")
BiocManager::install("rhdf5")
library("rhdf5")
```
### test
```bash
RScript deseq2.R "../../runs/kallisto/kallisto_results" "../../runs/merged_isa_metadata/out/merged_isa.tsv" "Source.Name" "Factor..Photosynthesis.mode."
```
## Run CWL-wrapped script
see [runs/deseq2-run](../../runs/deseq2-run)
\ No newline at end of file
see [runs/deseq2-run](../../runs/deseq2-run)
## Multi-package containers
- R and combinations of library dependencies are available as multi-package containers from [BioContainers](https://github.com/BioContainers/multi-package-containers)
- Searched for `repo:BioContainers/multi-package-containers deseq2 tximport rhdf5`
- and found `quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0` :tada:
# Install dependencies for deseq2
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("DESeq2")
library("DESeq2")
BiocManager::install("tximport")
library("tximport")
BiocManager::install("rhdf5")
library("rhdf5")
# DESeq2
## Libraries
library("DESeq2")
library("tximport")
library("rhdf5")
library("ggplot2")
## In-and-out
# inKallistoResults <- "../../runs/kallisto/kallisto_results"
# inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv"
# inMetadataSample <- "Source.Name"
# inMetadataFactorList <- list("Factor..Photosynthesis.mode.", "Factor..Biosource.amount.")
### Read arguments from CLI
args <- commandArgs(trailingOnly = T)
inKallistoResults <- args[1]
inMetadataFile <- args[2]
inMetadataSample <- args[3]
inMetadataFactorList <- args[4]
## Import kallisto count data
files <- dir(inKallistoResults, recursive = T, full.names = T ,"abundance.h5")
names(files) <- dir(inKallistoResults)
txi <- tximport(files, type = "kallisto", txOut = TRUE)
head(txi$counts)
## Read sample metadata
samples_metadata <- read.table(file = inMetadataFile, sep = "\t")
samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, unlist(inMetadataFactorList))]
rownames(samples) <- samples[,inMetadataSample]
factors <- sapply(inMetadataFactorList, function(x) x[[1]])
design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + ")))
## DESeq
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = design_formula)
dds <- DESeq(dds)
## Outputs
### Extract results
res <- results(dds)
write.csv(res, file = "results_stats.csv", quote = TRUE)
### Generate and save default plots
png("results_ma-plot.png")
plotMA(res, ylim=c(-2,2))
dev.off()
vsd <- vst(dds, blind=FALSE)
pcaData <- plotPCA(vsd, intgroup=factors, returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
p2 <- ggplot(pcaData, aes(PC1, PC2, color=factors[[1]])) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()
png("results_pca-plot.png")
print(p2)
dev.off()
#!/usr/bin/env cwl-runner
cwlVersion: v1.2
class: CommandLineTool
hints:
DockerRequirement:
dockerFile: {$include: "./Dockerfile"}
dockerImageId: "deseq-docker"
requirements:
- class: NetworkAccess
networkAccess: true
baseCommand: [Rscript, --help]
inputs: []
outputs: []
......@@ -9,10 +9,10 @@ library("ggplot2")
## In-and-out
inKallistoResults <- "../../runs/kallisto/kallisto_results"
inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv"
inMetadataSample <- "Source.Name"
inMetadataFactor <- "Factor..Photosynthesis.mode."
# inKallistoResults <- "../../runs/kallisto/kallisto_results"
# inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv"
# inMetadataSample <- "Source.Name"
# inMetadataFactor <- "Factor..Photosynthesis.mode."
### Read arguments from CLI
......
......@@ -3,11 +3,8 @@
cwlVersion: v1.2
class: CommandLineTool
hints:
# DockerRequirement:
# dockerPull: quay.io/biocontainers/bioconductor-deseq2:1.42.0--r43hf17093f_2
DockerRequirement:
dockerFile: {$include: "./Dockerfile"}
dockerImageId: "deseq-docker"
dockerPull: quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0
requirements:
- class: InitialWorkDirRequirement
listing:
......
......@@ -5,7 +5,7 @@ class: CommandLineTool
requirements:
- class: DockerRequirement
dockerPull: quay.io/biocontainers/bioconductor-deseq2:1.42.0--r43hf17093f_2
dockerPull: quay.io/biocontainers/mulled-v2-05fd88b9ac812a9149da2f2d881d62f01cc49835:a10f0e3a7a70fc45494f8781d33901086d2214d0-0
baseCommand: [Rscript, --help]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment