diff --git a/renv.Rmd b/renv.Rmd deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/runs/deseq2-run/README.md b/runs/deseq2-run/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f416fa74b81c27b40e1ede6cbaf308d5ba2e9287 --- /dev/null +++ b/runs/deseq2-run/README.md @@ -0,0 +1,10 @@ + + + +```bash +cd runs/deseq2-run +``` + +```bash +cwltool ../../workflows/deseq2/deseq2.cwl job.yml +``` diff --git a/runs/deseq2-run/job.yml b/runs/deseq2-run/job.yml new file mode 100644 index 0000000000000000000000000000000000000000..b0386bd4bbf18d2b229848e339880d5f9477e99e --- /dev/null +++ b/runs/deseq2-run/job.yml @@ -0,0 +1,9 @@ +inKallistoResults: + class: Directory + path: ../../runs/kallisto/kallisto_results +inMetadataFile: + class: File + path: ../../runs/merged_isa_metadata/out/merged_isa.tsv +inMetadataSample: "Source.Name" +inMetadataFactor: + - "Factor..Photosynthesis.mode." \ No newline at end of file diff --git a/runs/isaSampleToRawDataSeq/README.md b/runs/isaSampleToRawDataSeq/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d8e1dcbbfe5360db3efbbe6c68c1233ef3ad7e1a --- /dev/null +++ b/runs/isaSampleToRawDataSeq/README.md @@ -0,0 +1,10 @@ + + + +```bash +cd runs/isaSampleToRawDataSeq +``` + +```bash +cwltool ../../workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl job.yml +``` diff --git a/runs/isaSampleToRawDataSeq/job.yml b/runs/isaSampleToRawDataSeq/job.yml new file mode 100644 index 0000000000000000000000000000000000000000..803a637a8ceb31f03237a2cda90c8752c638a140 --- /dev/null +++ b/runs/isaSampleToRawDataSeq/job.yml @@ -0,0 +1,6 @@ +arcPath: + class: Directory + path: ../../ +assayName: "Talinum_RNASeq_minimal" +outName: "rnaseq-samples" +startingNodeNum: 1 diff --git a/workflows/deseq2/README.md b/workflows/deseq2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..03944ea1446b8c7a58ed5a530e9a238d10e68c6a --- /dev/null +++ b/workflows/deseq2/README.md @@ -0,0 +1,21 @@ +# DESeq2 + +Workflow used for **differential gene expression analysis** + +## DESeq2 docs + +- https://bioconductor.org/packages/release/bioc/html/DESeq2.html + +## Importing kallisto output with tximport + +- https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto + + +## Run pure script + +```bash +RScript deseq2.R "../../runs/kallisto/kallisto_results" "../../runs/merged_isa_metadata/out/merged_isa.tsv" "Source.Name" "Factor..Photosynthesis.mode." +``` + +## Run CWL + diff --git a/workflows/deseq2/dependencies.R b/workflows/deseq2/dependencies.R new file mode 100644 index 0000000000000000000000000000000000000000..d010bbb7caa76ff4fbb114655812cb7ccea20dfb --- /dev/null +++ b/workflows/deseq2/dependencies.R @@ -0,0 +1,14 @@ + +# Install dependencies for deseq2 + +if (!require("BiocManager", quietly = TRUE)) + install.packages("BiocManager") + +BiocManager::install("DESeq2") +library("DESeq2") + +BiocManager::install("tximport") +library("tximport") + +BiocManager::install("rhdf5") +library("rhdf5") diff --git a/workflows/deseq2/deseq2.R b/workflows/deseq2/deseq2.R new file mode 100644 index 0000000000000000000000000000000000000000..0384bc5d321b4d2ab18b59322be90aaf47d770d3 --- /dev/null +++ b/workflows/deseq2/deseq2.R @@ -0,0 +1,80 @@ +# DESeq2 + +## Libraries + +library("DESeq2") +library("tximport") +library("rhdf5") +library("ggplot2") + +## In-and-out + +inKallistoResults <- "../../runs/kallisto/kallisto_results" +inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv" +inMetadataSample <- "Source.Name" +inMetadataFactor <- "Factor..Photosynthesis.mode." + +### Read arguments from CLI + +args <- commandArgs(trailingOnly = T) + +inKallistoResults <- args[1] +inMetadataFile <- args[2] +inMetadataSample <- args[3] +inMetadataFactor <- args[4] + +## Import kallisto count data + +files <- dir(inKallistoResults, recursive = T, full.names = T ,"abundance.h5") +names(files) <- dir(inKallistoResults) + +txi <- tximport(files, type = "kallisto", txOut = TRUE) + +head(txi$counts) + +## Read sample metadata + +samples_metadata <- read.table(file = inMetadataFile, sep = "\t") + +samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, inMetadataFactor)] +colnames(samples)[1:2] <- c("sampleID", "condition") + +rownames(samples) <- samples$sampleID + +## DESeq + +dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ condition) + +dds <- DESeq(dds) + +## Outputs + +### Extract results + +res <- results(dds) +write.csv(res, file = "results_stats.csv", append = FALSE, quote = TRUE) + +### Generate and save default plots + +png("results_ma-plot.png") + plotMA(res, ylim=c(-2,2)) +dev.off() + +vsd <- vst(dds, blind=FALSE) +pcaData <- plotPCA(vsd, intgroup=c("condition"), returnData=TRUE) +percentVar <- round(100 * attr(pcaData, "percentVar")) + +p2 <- ggplot(pcaData, aes(PC1, PC2, color=condition)) + + geom_point(size=3) + + xlab(paste0("PC1: ",percentVar[1],"% variance")) + + ylab(paste0("PC2: ",percentVar[2],"% variance")) + + coord_fixed() + +png("results_pca-plot.png") + print(p2) +dev.off() + + + + + diff --git a/workflows/deseq2/deseq2.cwl b/workflows/deseq2/deseq2.cwl new file mode 100644 index 0000000000000000000000000000000000000000..2f926aaf9f58afb994efa1d20ff809c56a5f86a9 --- /dev/null +++ b/workflows/deseq2/deseq2.cwl @@ -0,0 +1,39 @@ +cwlVersion: v1.2 +class: CommandLineTool +# hints: +# DockerRequirement: +# dockerPull: r-base:4.4.2 +requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: deseq2.R + entry: + $include: deseq2.R + - class: NetworkAccess + networkAccess: true +baseCommand: [RScript, deseq2.R] +inputs: + inKallistoResults: + type: Directory + inputBinding: + position: 1 + inMetadataFile: + type: File + inputBinding: + position: 2 + inMetadataSample: + type: string + inputBinding: + position: 3 + inMetadataFactor: + type: string[] + inputBinding: + position: 4 + +outputs: + output: + type: File[] + outputBinding: + glob: + - "*.png" + - "*.csv" diff --git a/workflows/deseq2/r-docker-test.cwl b/workflows/deseq2/r-docker-test.cwl new file mode 100644 index 0000000000000000000000000000000000000000..3b2627952f299875eaf7e742f572317226f0ed1d --- /dev/null +++ b/workflows/deseq2/r-docker-test.cwl @@ -0,0 +1,15 @@ +cwlVersion: v1.2 +class: CommandLineTool + +requirements: + - class: NetworkAccess + networkAccess: true + # - class: DockerRequirement + # dockerPull: r-base:4.4.2 + +baseCommand: [RScript, --help] + +inputs: [] + +outputs: [] + \ No newline at end of file diff --git a/workflows/isaSampleToRawDataSeq/README.md b/workflows/isaSampleToRawDataSeq/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8ecbd0b4085a745e8261cee82c4a8116ed624667 --- /dev/null +++ b/workflows/isaSampleToRawDataSeq/README.md @@ -0,0 +1,7 @@ + + +```bash + +dotnet fsi isaSampleToRawDataSeq.fsx ../../ Talinum_RNASeq_minimal 1 rnaseq-samples + +``` diff --git a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl new file mode 100644 index 0000000000000000000000000000000000000000..a00b3e8187346d19521718fed02c0fd50c10d0ce --- /dev/null +++ b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl @@ -0,0 +1,43 @@ +cwlVersion: v1.2 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: mcr.microsoft.com/dotnet/sdk:6.0 +requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: isaSampleToRawDataSeq.fsx + entry: + $include: isaSampleToRawDataSeq.fsx + - class: EnvVarRequirement + envDef: + - envName: DOTNET_NOLOGO + envValue: "true" + - class: NetworkAccess + networkAccess: true +baseCommand: [dotnet, fsi, isaSampleToRawDataSeq.fsx] +inputs: + arcPath: + type: Directory + inputBinding: + position: 1 + assayName: + type: string + inputBinding: + position: 2 + outName: + type: string + inputBinding: + position: 3 + startingNodeNum: + type: int + inputBinding: + position: 4 + +outputs: + output: + type: File[] + outputBinding: + glob: + - "*.tsv" + - "*.xlsx" diff --git a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx new file mode 100644 index 0000000000000000000000000000000000000000..4a9483c8630a1699465acb19c6dd027097e7193a --- /dev/null +++ b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx @@ -0,0 +1,92 @@ +// Pull out the full ISA process sequence (incl. all metadata) leading to the first Raw Data Node + +// Dependencies + +#r "nuget: ARCtrl.NET, 2.0.2" +#r "nuget: ARCtrl.QueryModel, 2.0.2" + +open System.IO +open ARCtrl.NET +open ARCtrl +open ARCtrl.QueryModel + +// input parameters + +let args : string array = fsi.CommandLineArgs |> Array.tail +let arcPath = args.[0] +let assayName = args.[1] +let startingNodeNum = args.[2] |> int +let outName = args.[3] + + +// test parameters +let source = __SOURCE_DIRECTORY__ +let arcPath = Path.Combine(source, "../../") +let assayName = "Talinum_RNASeq_minimal" +let startingNodeNum = 1 +let outName = "rnaseq-samples" + + +// Load ARC + +let arc = ARC.load(arcPath) + +let inv = arc.ISA.Value + +// Load first data node + +let firstData = inv.GetAssay(assayName).FirstData + +// Create headers for output table +let headers = [ + CompositeHeader.Input IOType.Sample + for v in inv.ArcTables.ValuesOf firstData.[0] do + if v.IsCharacteristicValue then + CompositeHeader.Characteristic v.Category + elif v.IsParameterValue then + CompositeHeader.Parameter v.Category + elif v.IsFactorValue then + CompositeHeader.Factor v.Category + elif v.IsComponent then + CompositeHeader.Component v.Category + else failwithf "what the f is %O" v + + CompositeHeader.Output IOType.Data +] + +// Create rows + +let getRow (d: QNode) = + [| + + CompositeCell.createFreeText (inv.ArcTables.SamplesOf d).[startingNodeNum].Name + + for v in inv.ArcTables.ValuesOf d do + if v.HasUnit then + CompositeCell.Unitized(v.ValueText, v.Unit) + else + CompositeCell.Term(v.Value.AsOntology()) + + CompositeCell.FreeText d.Name + + |] + +// Combine into table + +let t = ArcTable.init "FullTable" +t.Headers <- ResizeArray headers + +for d in firstData do + t.AddRow (getRow d) + +// Small detour via workbook +let ws = Spreadsheet.ArcTable.toFsWorksheet t + +let wb = new FsSpreadsheet.FsWorkbook() + +wb.AddWorksheet ws + +// Write to csv + +wb.ToCsvFile (outName + ".tsv", Separator = '\t') +wb.ToXlsxFile (outName + ".xlsx")