Skip to content
Snippets Groups Projects
Commit 95b5b5bd authored by Dominik Brilhaus's avatar Dominik Brilhaus
Browse files

towards wrap deseq in cwl

parent 28e57415
No related branches found
No related tags found
1 merge request!3Deseq2
Pipeline #4962 passed
```bash
cd runs/deseq2
```
```bash
cwltool ../../workflows/deseq2/deseq2.cwl job.yml
```
arcPath: "../../"
inKallistoResults: "runs/kallisto/kallisto_results"
inMetadataFile: "runs/merged_isa_metadata/out/merged_isa.tsv"
inMetadataSample: "Source.Name"
inMetadataFactor:
- "Factor..Photosynthesis.mode."
\ No newline at end of file
...@@ -9,3 +9,14 @@ Workflow used for **differential gene expression analysis** ...@@ -9,3 +9,14 @@ Workflow used for **differential gene expression analysis**
## Importing kallisto output with tximport ## Importing kallisto output with tximport
- https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto - https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto
## Run pure script
```bash
RScript deseq2.R "../../" "runs/kallisto/kallisto_results" "runs/merged_isa_metadata/out/merged_isa.tsv" "Source.Name" "Factor..Photosynthesis.mode."
```
## Run CWL
File added
# DESeq2
## Libraries
library("DESeq2")
library("tximport")
library("rhdf5")
library("ggplot2")
## In-and-out
# arcPath <- "../../"
# inKallistoResults <- "runs/kallisto/kallisto_results"
# inMetadataFile <- "runs/merged_isa_metadata/out/merged_isa.tsv"
# inMetadataSample <- "Source.Name"
# inMetadataFactor <- "Factor..Photosynthesis.mode."
### Read arguments from CLI
args <- commandArgs(trailingOnly = T)
arcPath <- args[1]
inKallistoResults <- args[2]
inMetadataFile <- args[3]
inMetadataSample <- args[4]
inMetadataFactor <- args[5]
## Import kallisto count data
files <- dir(file.path(arcPath, inKallistoResults) , recursive = T, full.names = T ,"abundance.h5")
names(files) <- dir(file.path(arcPath, inKallistoResults))
txi <- tximport(files, type = "kallisto", txOut = TRUE)
head(txi$counts)
## Read sample metadata
samples_metadata <- read.table(file = file.path(arcPath, inMetadataFile), sep = "\t")
samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, inMetadataFactor)]
colnames(samples)[1:2] <- c("sampleID", "condition")
rownames(samples) <- samples$sampleID
## DESeq
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ condition)
dds <- DESeq(dds)
## Extract results
res <- results(dds)
res
## Outputs
### Generate and save default plots
png("ma-plot.png")
plotMA(res, ylim=c(-2,2))
dev.off()
vsd <- vst(dds, blind=FALSE)
pcaData <- plotPCA(vsd, intgroup=c("condition"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
p2 <- ggplot(pcaData, aes(PC1, PC2, color=condition)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()
png("pca-plot.png")
print(p2)
dev.off()
cwlVersion: v1.2
class: CommandLineTool
# hints:
# DockerRequirement:
# dockerPull: r-base:4.4.2
requirements:
- class: InitialWorkDirRequirement
listing:
- entryname: deseq2.R
entry:
$include: deseq2.R
- class: NetworkAccess
networkAccess: true
baseCommand: [RScript, deseq2.R]
inputs:
arcPath:
type: string
inputBinding:
position: 1
inKallistoResults:
type: string
inputBinding:
position: 2
inMetadataFile:
type: string
inputBinding:
position: 3
inMetadataSample:
type: string
inputBinding:
position: 4
inMetadataFactor:
type: string[]
inputBinding:
position: 5
outputs:
output:
type: File[]
outputBinding:
glob:
- "*"
cwlVersion: v1.2
class: CommandLineTool
requirements:
- class: NetworkAccess
networkAccess: true
# - class: DockerRequirement
# dockerPull: r-base:4.4.2
baseCommand: [RScript, --help]
inputs: []
outputs: []
\ No newline at end of file
```bash ```bash
dotnet fsi isa-sampleToRawDataSeq.fsx ../../ Talinum_RNASeq_minimal 1 rnaseq-samples dotnet fsi isaSampleToRawDataSeq.fsx ../../ Talinum_RNASeq_minimal 1 rnaseq-samples
``` ```
...@@ -6,16 +6,16 @@ hints: ...@@ -6,16 +6,16 @@ hints:
requirements: requirements:
- class: InitialWorkDirRequirement - class: InitialWorkDirRequirement
listing: listing:
- entryname: isa-sampleToRawDataSeq.fsx - entryname: isaSampleToRawDataSeq.fsx
entry: entry:
$include: isa-sampleToRawDataSeq.fsx $include: isaSampleToRawDataSeq.fsx
- class: EnvVarRequirement - class: EnvVarRequirement
envDef: envDef:
- envName: DOTNET_NOLOGO - envName: DOTNET_NOLOGO
envValue: "true" envValue: "true"
- class: NetworkAccess - class: NetworkAccess
networkAccess: true networkAccess: true
baseCommand: [dotnet, fsi, isa-sampleToRawDataSeq.fsx] baseCommand: [dotnet, fsi, isaSampleToRawDataSeq.fsx]
inputs: inputs:
arcPath: arcPath:
type: Directory type: Directory
......
...@@ -3,15 +3,13 @@ ...@@ -3,15 +3,13 @@
// Dependencies // Dependencies
#r "nuget: ARCtrl.NET, 2.0.2" #r "nuget: ARCtrl.NET, 2.0.2"
#r "nuget: ARCtrl.QueryModel, 1.0.5" #r "nuget: ARCtrl.QueryModel, 2.0.2"
#r "nuget: FsSpreadsheet.CsvIO, 6.2.0"
open FsSpreadsheet.CsvIO // open FsSpreadsheet.CsvIO
open FsSpreadsheet.Net open FsSpreadsheet.Net
open System.IO open System.IO
open ARCtrl.NET open ARCtrl.NET
open ARCtrl open ARCtrl
open ARCtrl.ISA
open ARCtrl.QueryModel open ARCtrl.QueryModel
// input parameters // input parameters
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment