diff --git a/runs/deseq2/README.md b/runs/deseq2-run/README.md similarity index 80% rename from runs/deseq2/README.md rename to runs/deseq2-run/README.md index 40fc457539dcfaea4a97a7a09da915326772d03d..f416fa74b81c27b40e1ede6cbaf308d5ba2e9287 100644 --- a/runs/deseq2/README.md +++ b/runs/deseq2-run/README.md @@ -2,7 +2,7 @@ ```bash -cd runs/deseq2 +cd runs/deseq2-run ``` ```bash diff --git a/runs/deseq2-run/job.yml b/runs/deseq2-run/job.yml new file mode 100644 index 0000000000000000000000000000000000000000..b0386bd4bbf18d2b229848e339880d5f9477e99e --- /dev/null +++ b/runs/deseq2-run/job.yml @@ -0,0 +1,9 @@ +inKallistoResults: + class: Directory + path: ../../runs/kallisto/kallisto_results +inMetadataFile: + class: File + path: ../../runs/merged_isa_metadata/out/merged_isa.tsv +inMetadataSample: "Source.Name" +inMetadataFactor: + - "Factor..Photosynthesis.mode." \ No newline at end of file diff --git a/runs/deseq2/job.yml b/runs/deseq2/job.yml deleted file mode 100644 index 9e1620b7daad31a9db2823e275d5161360f1ee47..0000000000000000000000000000000000000000 --- a/runs/deseq2/job.yml +++ /dev/null @@ -1,6 +0,0 @@ -arcPath: "../../" -inKallistoResults: "runs/kallisto/kallisto_results" -inMetadataFile: "runs/merged_isa_metadata/out/merged_isa.tsv" -inMetadataSample: "Source.Name" -inMetadataFactor: - - "Factor..Photosynthesis.mode." \ No newline at end of file diff --git a/workflows/deseq2/README.md b/workflows/deseq2/README.md index 77af7fd2cf7a2792632216d0e71622f09ffaae57..03944ea1446b8c7a58ed5a530e9a238d10e68c6a 100644 --- a/workflows/deseq2/README.md +++ b/workflows/deseq2/README.md @@ -11,11 +11,10 @@ Workflow used for **differential gene expression analysis** - https://bioconductor.org/packages/release/bioc/vignettes/tximport/inst/doc/tximport.html#kallisto - ## Run pure script ```bash -RScript deseq2.R "../../" "runs/kallisto/kallisto_results" "runs/merged_isa_metadata/out/merged_isa.tsv" "Source.Name" "Factor..Photosynthesis.mode." +RScript deseq2.R "../../runs/kallisto/kallisto_results" "../../runs/merged_isa_metadata/out/merged_isa.tsv" "Source.Name" "Factor..Photosynthesis.mode." ``` ## Run CWL diff --git a/workflows/deseq2/Rplots.pdf b/workflows/deseq2/Rplots.pdf deleted file mode 100644 index 2e31bb14782fb773db7184266402900554696778..0000000000000000000000000000000000000000 Binary files a/workflows/deseq2/Rplots.pdf and /dev/null differ diff --git a/workflows/deseq2/dependencies.Rmd b/workflows/deseq2/dependencies.R similarity index 65% rename from workflows/deseq2/dependencies.Rmd rename to workflows/deseq2/dependencies.R index 77b80914474c31b2713ed085c34d2ef160cb7660..d010bbb7caa76ff4fbb114655812cb7ccea20dfb 100644 --- a/workflows/deseq2/dependencies.Rmd +++ b/workflows/deseq2/dependencies.R @@ -1,16 +1,9 @@ ---- -title: "Install dependencies" -author: "Dominik Brilhaus" -date: "`r Sys.Date()`" -output: html_document ---- +# Install dependencies for deseq2 -```{r} if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager") - BiocManager::install("DESeq2") library("DESeq2") @@ -19,6 +12,3 @@ library("tximport") BiocManager::install("rhdf5") library("rhdf5") - -``` - diff --git a/workflows/deseq2/deseq2.R b/workflows/deseq2/deseq2.R index 3de7cfca7d0878e3179a187684258d8e8cf7fce1..0384bc5d321b4d2ab18b59322be90aaf47d770d3 100644 --- a/workflows/deseq2/deseq2.R +++ b/workflows/deseq2/deseq2.R @@ -9,26 +9,24 @@ library("ggplot2") ## In-and-out -# arcPath <- "../../" -# inKallistoResults <- "runs/kallisto/kallisto_results" -# inMetadataFile <- "runs/merged_isa_metadata/out/merged_isa.tsv" -# inMetadataSample <- "Source.Name" -# inMetadataFactor <- "Factor..Photosynthesis.mode." +inKallistoResults <- "../../runs/kallisto/kallisto_results" +inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv" +inMetadataSample <- "Source.Name" +inMetadataFactor <- "Factor..Photosynthesis.mode." ### Read arguments from CLI args <- commandArgs(trailingOnly = T) -arcPath <- args[1] -inKallistoResults <- args[2] -inMetadataFile <- args[3] -inMetadataSample <- args[4] -inMetadataFactor <- args[5] +inKallistoResults <- args[1] +inMetadataFile <- args[2] +inMetadataSample <- args[3] +inMetadataFactor <- args[4] ## Import kallisto count data -files <- dir(file.path(arcPath, inKallistoResults) , recursive = T, full.names = T ,"abundance.h5") -names(files) <- dir(file.path(arcPath, inKallistoResults)) +files <- dir(inKallistoResults, recursive = T, full.names = T ,"abundance.h5") +names(files) <- dir(inKallistoResults) txi <- tximport(files, type = "kallisto", txOut = TRUE) @@ -36,7 +34,7 @@ head(txi$counts) ## Read sample metadata -samples_metadata <- read.table(file = file.path(arcPath, inMetadataFile), sep = "\t") +samples_metadata <- read.table(file = inMetadataFile, sep = "\t") samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, inMetadataFactor)] colnames(samples)[1:2] <- c("sampleID", "condition") @@ -49,16 +47,16 @@ dds <- DESeqDataSetFromTximport(txi, colData = samples, design = ~ condition) dds <- DESeq(dds) -## Extract results +## Outputs -res <- results(dds) -res +### Extract results -## Outputs +res <- results(dds) +write.csv(res, file = "results_stats.csv", append = FALSE, quote = TRUE) ### Generate and save default plots -png("ma-plot.png") +png("results_ma-plot.png") plotMA(res, ylim=c(-2,2)) dev.off() @@ -72,7 +70,7 @@ p2 <- ggplot(pcaData, aes(PC1, PC2, color=condition)) + ylab(paste0("PC2: ",percentVar[2],"% variance")) + coord_fixed() -png("pca-plot.png") +png("results_pca-plot.png") print(p2) dev.off() diff --git a/workflows/deseq2/deseq2.Rmd b/workflows/deseq2/deseq2.Rmd deleted file mode 100644 index c2adacadd543bc78e8ad34c3287f189e496bed15..0000000000000000000000000000000000000000 --- a/workflows/deseq2/deseq2.Rmd +++ /dev/null @@ -1,98 +0,0 @@ ---- -title: "deseq2" -author: "Dominik Brilhaus" -date: "`r Sys.Date()`" -output: html_document ---- - - -## Libraries - -```{r} - -library("DESeq2") -library("tximport") -library("rhdf5") -library("ggplot2") - -``` - - - - -## In-and-out - -```{r} - -arc <- "../../" - -inKallistoResults <- file.path(arc, "runs/kallisto/kallisto_results") -inMetadataFile <- file.path(arc, "runs/merged_isa_metadata/out/merged_isa.tsv") -inMetadataSample <- "Source.Name" -inMetadataFactor <- "Factor..Photosynthesis.mode." - - -``` - -## Import kallisto count data - -```{r} - -files <- dir(inKallistoResults, recursive = T, full.names = T ,"abundance.h5") -names(files) <- dir(inKallistoResults) - -txi <- tximport(files, type = "kallisto", txOut = TRUE) - -head(txi$counts) - -``` - - -## Read sample metadata - -```{r} - - -samples_metadata <- read.table(file = inMetadataFile, sep = "\t") - -samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, inMetadataFactor)] -colnames(samples)[1:2] <- c("sampleID", "condition") - -rownames(samples) <- samples$sampleID - - -``` - -## DESeq - -```{r} -dds <- DESeqDataSetFromTximport(txi, - colData = samples, - design = ~ condition) - -dds <- DESeq(dds) - -res <- results(dds) -res - -plotMA(res, ylim=c(-2,2)) - - -``` - - -```{r} -vsd <- vst(dds, blind=FALSE) - -pcaData <- plotPCA(vsd, intgroup=c("condition"), returnData=TRUE) -percentVar <- round(100 * attr(pcaData, "percentVar")) -ggplot(pcaData, aes(PC1, PC2, color=condition)) + - geom_point(size=3) + - xlab(paste0("PC1: ",percentVar[1],"% variance")) + - ylab(paste0("PC2: ",percentVar[2],"% variance")) + - coord_fixed() - -``` - - - diff --git a/workflows/deseq2/deseq2.cwl b/workflows/deseq2/deseq2.cwl index 1349e7ce2d9283004cde1384a87518880e7ef1f5..2f926aaf9f58afb994efa1d20ff809c56a5f86a9 100644 --- a/workflows/deseq2/deseq2.cwl +++ b/workflows/deseq2/deseq2.cwl @@ -13,30 +13,27 @@ requirements: networkAccess: true baseCommand: [RScript, deseq2.R] inputs: - arcPath: - type: string - inputBinding: - position: 1 inKallistoResults: - type: string + type: Directory inputBinding: - position: 2 + position: 1 inMetadataFile: - type: string + type: File inputBinding: - position: 3 + position: 2 inMetadataSample: type: string inputBinding: - position: 4 + position: 3 inMetadataFactor: type: string[] inputBinding: - position: 5 + position: 4 outputs: output: type: File[] outputBinding: glob: - - "*" + - "*.png" + - "*.csv" diff --git a/workflows/isaSampleToRawDataSeq/README.md b/workflows/isaSampleToRawDataSeq/README.md index 70526a1c518c0beb2556e6fdba1b2a0cb7b5225a..8ecbd0b4085a745e8261cee82c4a8116ed624667 100644 --- a/workflows/isaSampleToRawDataSeq/README.md +++ b/workflows/isaSampleToRawDataSeq/README.md @@ -1,8 +1,7 @@ ```bash -dotnet fsi isaSampleToRawDataSeq.fsx ../../ Talinum_RNASeq_minimal 1 rnaseq-samples - +dotnet fsi isaSampleToRawDataSeq.fsx ../../ Talinum_RNASeq_minimal 1 rnaseq-samples ``` diff --git a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx index 912b9779b9a7c5fac81613f1167a72a1afe13b6d..4a9483c8630a1699465acb19c6dd027097e7193a 100644 --- a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx +++ b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx @@ -5,8 +5,6 @@ #r "nuget: ARCtrl.NET, 2.0.2" #r "nuget: ARCtrl.QueryModel, 2.0.2" -// open FsSpreadsheet.CsvIO -open FsSpreadsheet.Net open System.IO open ARCtrl.NET open ARCtrl @@ -17,19 +15,17 @@ open ARCtrl.QueryModel let args : string array = fsi.CommandLineArgs |> Array.tail let arcPath = args.[0] let assayName = args.[1] -let outName = args.[2] - -let startingNodeNum = args.[3] |> int +let startingNodeNum = args.[2] |> int +let outName = args.[3] // test parameters +let source = __SOURCE_DIRECTORY__ +let arcPath = Path.Combine(source, "../../") +let assayName = "Talinum_RNASeq_minimal" +let startingNodeNum = 1 +let outName = "rnaseq-samples" -// let source = __SOURCE_DIRECTORY__ -// let arcPath = Path.Combine(source, "../../") - -// let assayName = "pick2012_illumina_rnaseq" - -// let outName = "out.csv" // Load ARC @@ -55,7 +51,7 @@ let headers = [ CompositeHeader.Component v.Category else failwithf "what the f is %O" v - CompositeHeader.Output IOType.RawDataFile + CompositeHeader.Output IOType.Data ] // Create rows