Skip to content
Snippets Groups Projects
Commit 8a4c41b4 authored by Dominik Brilhaus's avatar Dominik Brilhaus
Browse files

deseq cwl works (without docker)

parent 95b5b5bd
No related branches found
No related tags found
1 merge request!3Deseq2
Pipeline #4966 failed
......@@ -2,7 +2,7 @@
```bash
cd runs/deseq2
cd runs/deseq2-run
```
```bash
......
arcPath: "../../"
inKallistoResults: "runs/kallisto/kallisto_results"
inMetadataFile: "runs/merged_isa_metadata/out/merged_isa.tsv"
inKallistoResults:
class: Directory
path: ../../runs/kallisto/kallisto_results
inMetadataFile:
class: File
path: ../../runs/merged_isa_metadata/out/merged_isa.tsv
inMetadataSample: "Source.Name"
inMetadataFactor:
- "Factor..Photosynthesis.mode."
\ No newline at end of file
......@@ -11,11 +11,10 @@ Workflow used for **differential gene expression analysis**
- https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto
## Run pure script
```bash
RScript deseq2.R "../../" "runs/kallisto/kallisto_results" "runs/merged_isa_metadata/out/merged_isa.tsv" "Source.Name" "Factor..Photosynthesis.mode."
RScript deseq2.R "../../runs/kallisto/kallisto_results" "../../runs/merged_isa_metadata/out/merged_isa.tsv" "Source.Name" "Factor..Photosynthesis.mode."
```
## Run CWL
......
File deleted
---
title: "Install dependencies"
author: "Dominik Brilhaus"
date: "`r Sys.Date()`"
output: html_document
---
# Install dependencies for deseq2
```{r}
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("DESeq2")
library("DESeq2")
......@@ -19,6 +12,3 @@ library("tximport")
BiocManager::install("rhdf5")
library("rhdf5")
```
......@@ -9,26 +9,24 @@ library("ggplot2")
## In-and-out
# arcPath <- "../../"
# inKallistoResults <- "runs/kallisto/kallisto_results"
# inMetadataFile <- "runs/merged_isa_metadata/out/merged_isa.tsv"
# inMetadataSample <- "Source.Name"
# inMetadataFactor <- "Factor..Photosynthesis.mode."
inKallistoResults <- "../../runs/kallisto/kallisto_results"
inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv"
inMetadataSample <- "Source.Name"
inMetadataFactor <- "Factor..Photosynthesis.mode."
### Read arguments from CLI
args <- commandArgs(trailingOnly = T)
arcPath <- args[1]
inKallistoResults <- args[2]
inMetadataFile <- args[3]
inMetadataSample <- args[4]
inMetadataFactor <- args[5]
inKallistoResults <- args[1]
inMetadataFile <- args[2]
inMetadataSample <- args[3]
inMetadataFactor <- args[4]
## Import kallisto count data
files <- dir(file.path(arcPath, inKallistoResults) , recursive = T, full.names = T ,"abundance.h5")
names(files) <- dir(file.path(arcPath, inKallistoResults))
files <- dir(inKallistoResults, recursive = T, full.names = T ,"abundance.h5")
names(files) <- dir(inKallistoResults)
txi <- tximport(files, type = "kallisto", txOut = TRUE)
......@@ -36,7 +34,7 @@ head(txi$counts)
## Read sample metadata
samples_metadata <- read.table(file = file.path(arcPath, inMetadataFile), sep = "\t")
samples_metadata <- read.table(file = inMetadataFile, sep = "\t")
samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, inMetadataFactor)]
colnames(samples)[1:2] <- c("sampleID", "condition")
......@@ -49,16 +47,16 @@ dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ condition)
dds <- DESeq(dds)
## Extract results
## Outputs
res <- results(dds)
res
### Extract results
## Outputs
res <- results(dds)
write.csv(res, file = "results_stats.csv", append = FALSE, quote = TRUE)
### Generate and save default plots
png("ma-plot.png")
png("results_ma-plot.png")
plotMA(res, ylim=c(-2,2))
dev.off()
......@@ -72,7 +70,7 @@ p2 <- ggplot(pcaData, aes(PC1, PC2, color=condition)) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()
png("pca-plot.png")
png("results_pca-plot.png")
print(p2)
dev.off()
......
---
title: "deseq2"
author: "Dominik Brilhaus"
date: "`r Sys.Date()`"
output: html_document
---
## Libraries
```{r}
library("DESeq2")
library("tximport")
library("rhdf5")
library("ggplot2")
```
## In-and-out
```{r}
arc <- "../../"
inKallistoResults <- file.path(arc, "runs/kallisto/kallisto_results")
inMetadataFile <- file.path(arc, "runs/merged_isa_metadata/out/merged_isa.tsv")
inMetadataSample <- "Source.Name"
inMetadataFactor <- "Factor..Photosynthesis.mode."
```
## Import kallisto count data
```{r}
files <- dir(inKallistoResults, recursive = T, full.names = T ,"abundance.h5")
names(files) <- dir(inKallistoResults)
txi <- tximport(files, type = "kallisto", txOut = TRUE)
head(txi$counts)
```
## Read sample metadata
```{r}
samples_metadata <- read.table(file = inMetadataFile, sep = "\t")
samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, inMetadataFactor)]
colnames(samples)[1:2] <- c("sampleID", "condition")
rownames(samples) <- samples$sampleID
```
## DESeq
```{r}
dds <- DESeqDataSetFromTximport(txi,
colData = samples,
design = ~ condition)
dds <- DESeq(dds)
res <- results(dds)
res
plotMA(res, ylim=c(-2,2))
```
```{r}
vsd <- vst(dds, blind=FALSE)
pcaData <- plotPCA(vsd, intgroup=c("condition"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=condition)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()
```
......@@ -13,30 +13,27 @@ requirements:
networkAccess: true
baseCommand: [RScript, deseq2.R]
inputs:
arcPath:
type: string
inputBinding:
position: 1
inKallistoResults:
type: string
type: Directory
inputBinding:
position: 2
position: 1
inMetadataFile:
type: string
type: File
inputBinding:
position: 3
position: 2
inMetadataSample:
type: string
inputBinding:
position: 4
position: 3
inMetadataFactor:
type: string[]
inputBinding:
position: 5
position: 4
outputs:
output:
type: File[]
outputBinding:
glob:
- "*"
- "*.png"
- "*.csv"
```bash
dotnet fsi isaSampleToRawDataSeq.fsx ../../ Talinum_RNASeq_minimal 1 rnaseq-samples
dotnet fsi isaSampleToRawDataSeq.fsx ../../ Talinum_RNASeq_minimal 1 rnaseq-samples
```
......@@ -5,8 +5,6 @@
#r "nuget: ARCtrl.NET, 2.0.2"
#r "nuget: ARCtrl.QueryModel, 2.0.2"
// open FsSpreadsheet.CsvIO
open FsSpreadsheet.Net
open System.IO
open ARCtrl.NET
open ARCtrl
......@@ -17,19 +15,17 @@ open ARCtrl.QueryModel
let args : string array = fsi.CommandLineArgs |> Array.tail
let arcPath = args.[0]
let assayName = args.[1]
let outName = args.[2]
let startingNodeNum = args.[3] |> int
let startingNodeNum = args.[2] |> int
let outName = args.[3]
// test parameters
let source = __SOURCE_DIRECTORY__
let arcPath = Path.Combine(source, "../../")
let assayName = "Talinum_RNASeq_minimal"
let startingNodeNum = 1
let outName = "rnaseq-samples"
// let source = __SOURCE_DIRECTORY__
// let arcPath = Path.Combine(source, "../../")
// let assayName = "pick2012_illumina_rnaseq"
// let outName = "out.csv"
// Load ARC
......@@ -55,7 +51,7 @@ let headers = [
CompositeHeader.Component v.Category
else failwithf "what the f is %O" v
CompositeHeader.Output IOType.RawDataFile
CompositeHeader.Output IOType.Data
]
// Create rows
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment