diff --git a/runs/deseq2/README.md b/runs/deseq2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..40fc457539dcfaea4a97a7a09da915326772d03d --- /dev/null +++ b/runs/deseq2/README.md @@ -0,0 +1,10 @@ + + + +```bash +cd runs/deseq2 +``` + +```bash +cwltool ../../workflows/deseq2/deseq2.cwl job.yml +``` diff --git a/runs/deseq2/job.yml b/runs/deseq2/job.yml new file mode 100644 index 0000000000000000000000000000000000000000..9e1620b7daad31a9db2823e275d5161360f1ee47 --- /dev/null +++ b/runs/deseq2/job.yml @@ -0,0 +1,6 @@ +arcPath: "../../" +inKallistoResults: "runs/kallisto/kallisto_results" +inMetadataFile: "runs/merged_isa_metadata/out/merged_isa.tsv" +inMetadataSample: "Source.Name" +inMetadataFactor: + - "Factor..Photosynthesis.mode." \ No newline at end of file diff --git a/workflows/deseq2/README.md b/workflows/deseq2/README.md index 5c1de5c392472eea78490002ea2f55e3116e2176..77af7fd2cf7a2792632216d0e71622f09ffaae57 100644 --- a/workflows/deseq2/README.md +++ b/workflows/deseq2/README.md @@ -9,3 +9,14 @@ Workflow used for **differential gene expression analysis** ## Importing kallisto output with tximport - https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto + + + +## Run pure script + +```bash +RScript deseq2.R "../../" "runs/kallisto/kallisto_results" "runs/merged_isa_metadata/out/merged_isa.tsv" "Source.Name" "Factor..Photosynthesis.mode." +``` + +## Run CWL + diff --git a/workflows/deseq2/Rplots.pdf b/workflows/deseq2/Rplots.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2e31bb14782fb773db7184266402900554696778 Binary files /dev/null and b/workflows/deseq2/Rplots.pdf differ diff --git a/workflows/deseq2/deseq2.R b/workflows/deseq2/deseq2.R new file mode 100644 index 0000000000000000000000000000000000000000..3de7cfca7d0878e3179a187684258d8e8cf7fce1 --- /dev/null +++ b/workflows/deseq2/deseq2.R @@ -0,0 +1,82 @@ +# DESeq2 + +## Libraries + +library("DESeq2") +library("tximport") +library("rhdf5") +library("ggplot2") + +## In-and-out + +# arcPath <- "../../" +# inKallistoResults <- "runs/kallisto/kallisto_results" +# inMetadataFile <- "runs/merged_isa_metadata/out/merged_isa.tsv" +# inMetadataSample <- "Source.Name" +# inMetadataFactor <- "Factor..Photosynthesis.mode." + +### Read arguments from CLI + +args <- commandArgs(trailingOnly = T) + +arcPath <- args[1] +inKallistoResults <- args[2] +inMetadataFile <- args[3] +inMetadataSample <- args[4] +inMetadataFactor <- args[5] + +## Import kallisto count data + +files <- dir(file.path(arcPath, inKallistoResults) , recursive = T, full.names = T ,"abundance.h5") +names(files) <- dir(file.path(arcPath, inKallistoResults)) + +txi <- tximport(files, type = "kallisto", txOut = TRUE) + +head(txi$counts) + +## Read sample metadata + +samples_metadata <- read.table(file = file.path(arcPath, inMetadataFile), sep = "\t") + +samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, inMetadataFactor)] +colnames(samples)[1:2] <- c("sampleID", "condition") + +rownames(samples) <- samples$sampleID + +## DESeq + +dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ condition) + +dds <- DESeq(dds) + +## Extract results + +res <- results(dds) +res + +## Outputs + +### Generate and save default plots + +png("ma-plot.png") + plotMA(res, ylim=c(-2,2)) +dev.off() + +vsd <- vst(dds, blind=FALSE) +pcaData <- plotPCA(vsd, intgroup=c("condition"), returnData=TRUE) +percentVar <- round(100 * attr(pcaData, "percentVar")) + +p2 <- ggplot(pcaData, aes(PC1, PC2, color=condition)) + + geom_point(size=3) + + xlab(paste0("PC1: ",percentVar[1],"% variance")) + + ylab(paste0("PC2: ",percentVar[2],"% variance")) + + coord_fixed() + +png("pca-plot.png") + print(p2) +dev.off() + + + + + diff --git a/workflows/deseq2/deseq2.cwl b/workflows/deseq2/deseq2.cwl new file mode 100644 index 0000000000000000000000000000000000000000..1349e7ce2d9283004cde1384a87518880e7ef1f5 --- /dev/null +++ b/workflows/deseq2/deseq2.cwl @@ -0,0 +1,42 @@ +cwlVersion: v1.2 +class: CommandLineTool +# hints: +# DockerRequirement: +# dockerPull: r-base:4.4.2 +requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: deseq2.R + entry: + $include: deseq2.R + - class: NetworkAccess + networkAccess: true +baseCommand: [RScript, deseq2.R] +inputs: + arcPath: + type: string + inputBinding: + position: 1 + inKallistoResults: + type: string + inputBinding: + position: 2 + inMetadataFile: + type: string + inputBinding: + position: 3 + inMetadataSample: + type: string + inputBinding: + position: 4 + inMetadataFactor: + type: string[] + inputBinding: + position: 5 + +outputs: + output: + type: File[] + outputBinding: + glob: + - "*" diff --git a/workflows/deseq2/r-docker-test.cwl b/workflows/deseq2/r-docker-test.cwl new file mode 100644 index 0000000000000000000000000000000000000000..3b2627952f299875eaf7e742f572317226f0ed1d --- /dev/null +++ b/workflows/deseq2/r-docker-test.cwl @@ -0,0 +1,15 @@ +cwlVersion: v1.2 +class: CommandLineTool + +requirements: + - class: NetworkAccess + networkAccess: true + # - class: DockerRequirement + # dockerPull: r-base:4.4.2 + +baseCommand: [RScript, --help] + +inputs: [] + +outputs: [] + \ No newline at end of file diff --git a/workflows/isaSampleToRawDataSeq/README.md b/workflows/isaSampleToRawDataSeq/README.md index e831cf15ddc19dc8fcb8ba5e5424bc38c0b17e72..70526a1c518c0beb2556e6fdba1b2a0cb7b5225a 100644 --- a/workflows/isaSampleToRawDataSeq/README.md +++ b/workflows/isaSampleToRawDataSeq/README.md @@ -1,7 +1,8 @@ ```bash -dotnet fsi isa-sampleToRawDataSeq.fsx ../../ Talinum_RNASeq_minimal 1 rnaseq-samples +dotnet fsi isaSampleToRawDataSeq.fsx ../../ Talinum_RNASeq_minimal 1 rnaseq-samples + ``` diff --git a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl index 9916ed4327b05557fa59b6ceee3995a94dc3d837..a00b3e8187346d19521718fed02c0fd50c10d0ce 100644 --- a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl +++ b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl @@ -6,16 +6,16 @@ hints: requirements: - class: InitialWorkDirRequirement listing: - - entryname: isa-sampleToRawDataSeq.fsx + - entryname: isaSampleToRawDataSeq.fsx entry: - $include: isa-sampleToRawDataSeq.fsx + $include: isaSampleToRawDataSeq.fsx - class: EnvVarRequirement envDef: - envName: DOTNET_NOLOGO envValue: "true" - class: NetworkAccess networkAccess: true -baseCommand: [dotnet, fsi, isa-sampleToRawDataSeq.fsx] +baseCommand: [dotnet, fsi, isaSampleToRawDataSeq.fsx] inputs: arcPath: type: Directory diff --git a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx index 530a186ff2583e10303f923a76fd6d12d5098f6d..912b9779b9a7c5fac81613f1167a72a1afe13b6d 100644 --- a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx +++ b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx @@ -3,15 +3,13 @@ // Dependencies #r "nuget: ARCtrl.NET, 2.0.2" -#r "nuget: ARCtrl.QueryModel, 1.0.5" -#r "nuget: FsSpreadsheet.CsvIO, 6.2.0" +#r "nuget: ARCtrl.QueryModel, 2.0.2" -open FsSpreadsheet.CsvIO +// open FsSpreadsheet.CsvIO open FsSpreadsheet.Net open System.IO open ARCtrl.NET open ARCtrl -open ARCtrl.ISA open ARCtrl.QueryModel // input parameters