Skip to content
Snippets Groups Projects
Commit 8a4c41b4 authored by Dominik Brilhaus's avatar Dominik Brilhaus
Browse files

deseq cwl works (without docker)

parent 95b5b5bd
No related branches found
No related tags found
1 merge request!3Deseq2
Pipeline #4966 failed
This commit is part of merge request !3. Comments created here will be created in the context of that merge request.
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
```bash ```bash
cd runs/deseq2 cd runs/deseq2-run
``` ```
```bash ```bash
......
arcPath: "../../" inKallistoResults:
inKallistoResults: "runs/kallisto/kallisto_results" class: Directory
inMetadataFile: "runs/merged_isa_metadata/out/merged_isa.tsv" path: ../../runs/kallisto/kallisto_results
inMetadataFile:
class: File
path: ../../runs/merged_isa_metadata/out/merged_isa.tsv
inMetadataSample: "Source.Name" inMetadataSample: "Source.Name"
inMetadataFactor: inMetadataFactor:
- "Factor..Photosynthesis.mode." - "Factor..Photosynthesis.mode."
\ No newline at end of file
...@@ -11,11 +11,10 @@ Workflow used for **differential gene expression analysis** ...@@ -11,11 +11,10 @@ Workflow used for **differential gene expression analysis**
- https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto - https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto
## Run pure script ## Run pure script
```bash ```bash
RScript deseq2.R "../../" "runs/kallisto/kallisto_results" "runs/merged_isa_metadata/out/merged_isa.tsv" "Source.Name" "Factor..Photosynthesis.mode." RScript deseq2.R "../../runs/kallisto/kallisto_results" "../../runs/merged_isa_metadata/out/merged_isa.tsv" "Source.Name" "Factor..Photosynthesis.mode."
``` ```
## Run CWL ## Run CWL
......
File deleted
---
title: "Install dependencies"
author: "Dominik Brilhaus"
date: "`r Sys.Date()`"
output: html_document
---
# Install dependencies for deseq2
```{r}
if (!require("BiocManager", quietly = TRUE)) if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager") install.packages("BiocManager")
BiocManager::install("DESeq2") BiocManager::install("DESeq2")
library("DESeq2") library("DESeq2")
...@@ -19,6 +12,3 @@ library("tximport") ...@@ -19,6 +12,3 @@ library("tximport")
BiocManager::install("rhdf5") BiocManager::install("rhdf5")
library("rhdf5") library("rhdf5")
```
...@@ -9,26 +9,24 @@ library("ggplot2") ...@@ -9,26 +9,24 @@ library("ggplot2")
## In-and-out ## In-and-out
# arcPath <- "../../" inKallistoResults <- "../../runs/kallisto/kallisto_results"
# inKallistoResults <- "runs/kallisto/kallisto_results" inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv"
# inMetadataFile <- "runs/merged_isa_metadata/out/merged_isa.tsv" inMetadataSample <- "Source.Name"
# inMetadataSample <- "Source.Name" inMetadataFactor <- "Factor..Photosynthesis.mode."
# inMetadataFactor <- "Factor..Photosynthesis.mode."
### Read arguments from CLI ### Read arguments from CLI
args <- commandArgs(trailingOnly = T) args <- commandArgs(trailingOnly = T)
arcPath <- args[1] inKallistoResults <- args[1]
inKallistoResults <- args[2] inMetadataFile <- args[2]
inMetadataFile <- args[3] inMetadataSample <- args[3]
inMetadataSample <- args[4] inMetadataFactor <- args[4]
inMetadataFactor <- args[5]
## Import kallisto count data ## Import kallisto count data
files <- dir(file.path(arcPath, inKallistoResults) , recursive = T, full.names = T ,"abundance.h5") files <- dir(inKallistoResults, recursive = T, full.names = T ,"abundance.h5")
names(files) <- dir(file.path(arcPath, inKallistoResults)) names(files) <- dir(inKallistoResults)
txi <- tximport(files, type = "kallisto", txOut = TRUE) txi <- tximport(files, type = "kallisto", txOut = TRUE)
...@@ -36,7 +34,7 @@ head(txi$counts) ...@@ -36,7 +34,7 @@ head(txi$counts)
## Read sample metadata ## Read sample metadata
samples_metadata <- read.table(file = file.path(arcPath, inMetadataFile), sep = "\t") samples_metadata <- read.table(file = inMetadataFile, sep = "\t")
samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, inMetadataFactor)] samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, inMetadataFactor)]
colnames(samples)[1:2] <- c("sampleID", "condition") colnames(samples)[1:2] <- c("sampleID", "condition")
...@@ -49,16 +47,16 @@ dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ condition) ...@@ -49,16 +47,16 @@ dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ condition)
dds <- DESeq(dds) dds <- DESeq(dds)
## Extract results ## Outputs
res <- results(dds) ### Extract results
res
## Outputs res <- results(dds)
write.csv(res, file = "results_stats.csv", append = FALSE, quote = TRUE)
### Generate and save default plots ### Generate and save default plots
png("ma-plot.png") png("results_ma-plot.png")
plotMA(res, ylim=c(-2,2)) plotMA(res, ylim=c(-2,2))
dev.off() dev.off()
...@@ -72,7 +70,7 @@ p2 <- ggplot(pcaData, aes(PC1, PC2, color=condition)) + ...@@ -72,7 +70,7 @@ p2 <- ggplot(pcaData, aes(PC1, PC2, color=condition)) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) + ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() coord_fixed()
png("pca-plot.png") png("results_pca-plot.png")
print(p2) print(p2)
dev.off() dev.off()
......
---
title: "deseq2"
author: "Dominik Brilhaus"
date: "`r Sys.Date()`"
output: html_document
---
## Libraries
```{r}
library("DESeq2")
library("tximport")
library("rhdf5")
library("ggplot2")
```
## In-and-out
```{r}
arc <- "../../"
inKallistoResults <- file.path(arc, "runs/kallisto/kallisto_results")
inMetadataFile <- file.path(arc, "runs/merged_isa_metadata/out/merged_isa.tsv")
inMetadataSample <- "Source.Name"
inMetadataFactor <- "Factor..Photosynthesis.mode."
```
## Import kallisto count data
```{r}
files <- dir(inKallistoResults, recursive = T, full.names = T ,"abundance.h5")
names(files) <- dir(inKallistoResults)
txi <- tximport(files, type = "kallisto", txOut = TRUE)
head(txi$counts)
```
## Read sample metadata
```{r}
samples_metadata <- read.table(file = inMetadataFile, sep = "\t")
samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, inMetadataFactor)]
colnames(samples)[1:2] <- c("sampleID", "condition")
rownames(samples) <- samples$sampleID
```
## DESeq
```{r}
dds <- DESeqDataSetFromTximport(txi,
colData = samples,
design = ~ condition)
dds <- DESeq(dds)
res <- results(dds)
res
plotMA(res, ylim=c(-2,2))
```
```{r}
vsd <- vst(dds, blind=FALSE)
pcaData <- plotPCA(vsd, intgroup=c("condition"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=condition)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()
```
...@@ -13,30 +13,27 @@ requirements: ...@@ -13,30 +13,27 @@ requirements:
networkAccess: true networkAccess: true
baseCommand: [RScript, deseq2.R] baseCommand: [RScript, deseq2.R]
inputs: inputs:
arcPath:
type: string
inputBinding:
position: 1
inKallistoResults: inKallistoResults:
type: string type: Directory
inputBinding: inputBinding:
position: 2 position: 1
inMetadataFile: inMetadataFile:
type: string type: File
inputBinding: inputBinding:
position: 3 position: 2
inMetadataSample: inMetadataSample:
type: string type: string
inputBinding: inputBinding:
position: 4 position: 3
inMetadataFactor: inMetadataFactor:
type: string[] type: string[]
inputBinding: inputBinding:
position: 5 position: 4
outputs: outputs:
output: output:
type: File[] type: File[]
outputBinding: outputBinding:
glob: glob:
- "*" - "*.png"
- "*.csv"
```bash ```bash
dotnet fsi isaSampleToRawDataSeq.fsx ../../ Talinum_RNASeq_minimal 1 rnaseq-samples
dotnet fsi isaSampleToRawDataSeq.fsx ../../ Talinum_RNASeq_minimal 1 rnaseq-samples
``` ```
...@@ -5,8 +5,6 @@ ...@@ -5,8 +5,6 @@
#r "nuget: ARCtrl.NET, 2.0.2" #r "nuget: ARCtrl.NET, 2.0.2"
#r "nuget: ARCtrl.QueryModel, 2.0.2" #r "nuget: ARCtrl.QueryModel, 2.0.2"
// open FsSpreadsheet.CsvIO
open FsSpreadsheet.Net
open System.IO open System.IO
open ARCtrl.NET open ARCtrl.NET
open ARCtrl open ARCtrl
...@@ -17,19 +15,17 @@ open ARCtrl.QueryModel ...@@ -17,19 +15,17 @@ open ARCtrl.QueryModel
let args : string array = fsi.CommandLineArgs |> Array.tail let args : string array = fsi.CommandLineArgs |> Array.tail
let arcPath = args.[0] let arcPath = args.[0]
let assayName = args.[1] let assayName = args.[1]
let outName = args.[2] let startingNodeNum = args.[2] |> int
let outName = args.[3]
let startingNodeNum = args.[3] |> int
// test parameters // test parameters
let source = __SOURCE_DIRECTORY__
let arcPath = Path.Combine(source, "../../")
let assayName = "Talinum_RNASeq_minimal"
let startingNodeNum = 1
let outName = "rnaseq-samples"
// let source = __SOURCE_DIRECTORY__
// let arcPath = Path.Combine(source, "../../")
// let assayName = "pick2012_illumina_rnaseq"
// let outName = "out.csv"
// Load ARC // Load ARC
...@@ -55,7 +51,7 @@ let headers = [ ...@@ -55,7 +51,7 @@ let headers = [
CompositeHeader.Component v.Category CompositeHeader.Component v.Category
else failwithf "what the f is %O" v else failwithf "what the f is %O" v
CompositeHeader.Output IOType.RawDataFile CompositeHeader.Output IOType.Data
] ]
// Create rows // Create rows
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment