Skip to content
Snippets Groups Projects
Commit 1a6c288a authored by Dominik Brilhaus's avatar Dominik Brilhaus
Browse files

Merge branch 'deseq2' into 'main'

Deseq2

See merge request !3
parents 9fd2c9f4 8a4c41b4
No related branches found
No related tags found
1 merge request!3Deseq2
Pipeline #4967 passed
```bash
cd runs/deseq2-run
```
```bash
cwltool ../../workflows/deseq2/deseq2.cwl job.yml
```
inKallistoResults:
class: Directory
path: ../../runs/kallisto/kallisto_results
inMetadataFile:
class: File
path: ../../runs/merged_isa_metadata/out/merged_isa.tsv
inMetadataSample: "Source.Name"
inMetadataFactor:
- "Factor..Photosynthesis.mode."
\ No newline at end of file
```bash
cd runs/isaSampleToRawDataSeq
```
```bash
cwltool ../../workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl job.yml
```
arcPath:
class: Directory
path: ../../
assayName: "Talinum_RNASeq_minimal"
outName: "rnaseq-samples"
startingNodeNum: 1
# DESeq2
Workflow used for **differential gene expression analysis**
## DESeq2 docs
- https://bioconductor.org/packages/release/bioc/html/DESeq2.html
## Importing kallisto output with tximport
- https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto
## Run pure script
```bash
RScript deseq2.R "../../runs/kallisto/kallisto_results" "../../runs/merged_isa_metadata/out/merged_isa.tsv" "Source.Name" "Factor..Photosynthesis.mode."
```
## Run CWL
# Install dependencies for deseq2
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("DESeq2")
library("DESeq2")
BiocManager::install("tximport")
library("tximport")
BiocManager::install("rhdf5")
library("rhdf5")
# DESeq2
## Libraries
library("DESeq2")
library("tximport")
library("rhdf5")
library("ggplot2")
## In-and-out
inKallistoResults <- "../../runs/kallisto/kallisto_results"
inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv"
inMetadataSample <- "Source.Name"
inMetadataFactor <- "Factor..Photosynthesis.mode."
### Read arguments from CLI
args <- commandArgs(trailingOnly = T)
inKallistoResults <- args[1]
inMetadataFile <- args[2]
inMetadataSample <- args[3]
inMetadataFactor <- args[4]
## Import kallisto count data
files <- dir(inKallistoResults, recursive = T, full.names = T ,"abundance.h5")
names(files) <- dir(inKallistoResults)
txi <- tximport(files, type = "kallisto", txOut = TRUE)
head(txi$counts)
## Read sample metadata
samples_metadata <- read.table(file = inMetadataFile, sep = "\t")
samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, inMetadataFactor)]
colnames(samples)[1:2] <- c("sampleID", "condition")
rownames(samples) <- samples$sampleID
## DESeq
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ condition)
dds <- DESeq(dds)
## Outputs
### Extract results
res <- results(dds)
write.csv(res, file = "results_stats.csv", append = FALSE, quote = TRUE)
### Generate and save default plots
png("results_ma-plot.png")
plotMA(res, ylim=c(-2,2))
dev.off()
vsd <- vst(dds, blind=FALSE)
pcaData <- plotPCA(vsd, intgroup=c("condition"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
p2 <- ggplot(pcaData, aes(PC1, PC2, color=condition)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()
png("results_pca-plot.png")
print(p2)
dev.off()
cwlVersion: v1.2
class: CommandLineTool
# hints:
# DockerRequirement:
# dockerPull: r-base:4.4.2
requirements:
- class: InitialWorkDirRequirement
listing:
- entryname: deseq2.R
entry:
$include: deseq2.R
- class: NetworkAccess
networkAccess: true
baseCommand: [RScript, deseq2.R]
inputs:
inKallistoResults:
type: Directory
inputBinding:
position: 1
inMetadataFile:
type: File
inputBinding:
position: 2
inMetadataSample:
type: string
inputBinding:
position: 3
inMetadataFactor:
type: string[]
inputBinding:
position: 4
outputs:
output:
type: File[]
outputBinding:
glob:
- "*.png"
- "*.csv"
cwlVersion: v1.2
class: CommandLineTool
requirements:
- class: NetworkAccess
networkAccess: true
# - class: DockerRequirement
# dockerPull: r-base:4.4.2
baseCommand: [RScript, --help]
inputs: []
outputs: []
\ No newline at end of file
```bash
dotnet fsi isaSampleToRawDataSeq.fsx ../../ Talinum_RNASeq_minimal 1 rnaseq-samples
```
cwlVersion: v1.2
class: CommandLineTool
hints:
DockerRequirement:
dockerPull: mcr.microsoft.com/dotnet/sdk:6.0
requirements:
- class: InitialWorkDirRequirement
listing:
- entryname: isaSampleToRawDataSeq.fsx
entry:
$include: isaSampleToRawDataSeq.fsx
- class: EnvVarRequirement
envDef:
- envName: DOTNET_NOLOGO
envValue: "true"
- class: NetworkAccess
networkAccess: true
baseCommand: [dotnet, fsi, isaSampleToRawDataSeq.fsx]
inputs:
arcPath:
type: Directory
inputBinding:
position: 1
assayName:
type: string
inputBinding:
position: 2
outName:
type: string
inputBinding:
position: 3
startingNodeNum:
type: int
inputBinding:
position: 4
outputs:
output:
type: File[]
outputBinding:
glob:
- "*.tsv"
- "*.xlsx"
// Pull out the full ISA process sequence (incl. all metadata) leading to the first Raw Data Node
// Dependencies
#r "nuget: ARCtrl.NET, 2.0.2"
#r "nuget: ARCtrl.QueryModel, 2.0.2"
open System.IO
open ARCtrl.NET
open ARCtrl
open ARCtrl.QueryModel
// input parameters
let args : string array = fsi.CommandLineArgs |> Array.tail
let arcPath = args.[0]
let assayName = args.[1]
let startingNodeNum = args.[2] |> int
let outName = args.[3]
// test parameters
let source = __SOURCE_DIRECTORY__
let arcPath = Path.Combine(source, "../../")
let assayName = "Talinum_RNASeq_minimal"
let startingNodeNum = 1
let outName = "rnaseq-samples"
// Load ARC
let arc = ARC.load(arcPath)
let inv = arc.ISA.Value
// Load first data node
let firstData = inv.GetAssay(assayName).FirstData
// Create headers for output table
let headers = [
CompositeHeader.Input IOType.Sample
for v in inv.ArcTables.ValuesOf firstData.[0] do
if v.IsCharacteristicValue then
CompositeHeader.Characteristic v.Category
elif v.IsParameterValue then
CompositeHeader.Parameter v.Category
elif v.IsFactorValue then
CompositeHeader.Factor v.Category
elif v.IsComponent then
CompositeHeader.Component v.Category
else failwithf "what the f is %O" v
CompositeHeader.Output IOType.Data
]
// Create rows
let getRow (d: QNode) =
[|
CompositeCell.createFreeText (inv.ArcTables.SamplesOf d).[startingNodeNum].Name
for v in inv.ArcTables.ValuesOf d do
if v.HasUnit then
CompositeCell.Unitized(v.ValueText, v.Unit)
else
CompositeCell.Term(v.Value.AsOntology())
CompositeCell.FreeText d.Name
|]
// Combine into table
let t = ArcTable.init "FullTable"
t.Headers <- ResizeArray headers
for d in firstData do
t.AddRow (getRow d)
// Small detour via workbook
let ws = Spreadsheet.ArcTable.toFsWorksheet t
let wb = new FsSpreadsheet.FsWorkbook()
wb.AddWorksheet ws
// Write to csv
wb.ToCsvFile (outName + ".tsv", Separator = '\t')
wb.ToXlsxFile (outName + ".xlsx")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment