Skip to content
Snippets Groups Projects
Commit 4b039258 authored by Dominik Brilhaus's avatar Dominik Brilhaus
Browse files

Merge branch 'fsharp-cwl' into 'main'

Fsharp cwl

See merge request !9
parents 0d104868 46773aa6
No related branches found
No related tags found
1 merge request!9Fsharp cwl
Pipeline #7606 passed
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
```bash ```bash
cd runs/isaSampleToRawDataSeq cd runs/isaSampleToRawDataSeq-run
``` ```
```bash ```bash
......
...@@ -2,5 +2,5 @@ arcPath: ...@@ -2,5 +2,5 @@ arcPath:
class: Directory class: Directory
path: ../../ path: ../../
assayName: "Talinum_RNASeq_minimal" assayName: "Talinum_RNASeq_minimal"
outName: "rnaseq-samples" outName: rnaseq-samples
startingNodeNum: 1 startingNodeNum: 0
Input [Sample Name],Characteristic [organism],Term Source REF (OBI:0100026),Term Accession Number (OBI:0100026),Characteristic [organism part],Term Source REF (EFO:0000635),Term Accession Number (EFO:0000635),Characteristic [plant age],Term Source REF (DPBO:0000033),Term Accession Number (DPBO:0000033),Parameter [growth day length],Term Source REF (DPBO:0000041),Term Accession Number (DPBO:0000041),Parameter [light intensity exposure],Unit,Term Source REF (PECO:0007224),Term Accession Number (PECO:0007224),Parameter [humidity day],Unit ,Term Source REF (DPBO:0000005),Term Accession Number (DPBO:0000005),Parameter [temperature day],Unit ,Term Source REF (DPBO:0000007),Term Accession Number (DPBO:0000007),Parameter [temperature night],Unit ,Term Source REF (DPBO:0000008),Term Accession Number (DPBO:0000008),Factor [watering exposure],Term Source REF (PECO:0007383),Term Accession Number (PECO:0007383),Factor [Timepoint],Term Source REF (NCIT:C68568),Term Accession Number (NCIT:C68568),Factor [timepoint-ZT],Term Source REF (),Term Accession Number (),Factor [Photosynthesis mode],Term Source REF () ,Term Accession Number () ,Parameter [biosource amount],Unit ,Term Source REF (DPBO:0000013),Term Accession Number (DPBO:0000013),Parameter [extraction method],Term Source REF (DPBO:0000054),Term Accession Number (DPBO:0000054),Parameter [extraction buffer],Term Source REF (DPBO:0000050),Term Accession Number (DPBO:0000050),Parameter [extraction buffer volume],Unit ,Term Source REF (DPBO:0000051),Term Accession Number (DPBO:0000051),Parameter [RNA quality check],Term Source REF (DPBO:0000062),Term Accession Number (DPBO:0000062),Parameter [library strategy],Term Source REF (DPBO:0000035),Term Accession Number (DPBO:0000035),Parameter [library selection],Term Source REF (DPBO:0000036),Term Accession Number (DPBO:0000036),Parameter [library layout],Term Source REF (DPBO:0000015),Term Accession Number (DPBO:0000015),Parameter [library preparation kit],Term Source REF (GENEPIO:0000085),Term Accession Number (GENEPIO:0000085),Parameter [library preparation kit version],Term Source REF (GENEPIO:0000149),Term Accession Number (GENEPIO:0000149),Parameter [adapter sequence],Term Source REF (GENEPIO:0000083),Term Accession Number (GENEPIO:0000083),Parameter [next generation sequencing instrument model],Term Source REF (DPBO:0000040),Term Accession Number (DPBO:0000040),Parameter [base-calling software],Term Source REF (DPBO:0000017),Term Accession Number (DPBO:0000017),Parameter [base-calling software version],Term Source REF (DPBO:0000018),Term Accession Number (DPBO:0000018),Parameter [Raw data file format],Term Source REF (DPBO:0000021),Term Accession Number (DPBO:0000021),Output [Data]
CAM_01,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,80,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,CAGATC,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_097_CAMMD_CAGATC_L001_R1_001.fastq.gz
CAM_02,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,78,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.7 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,CTTGTA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_099_CAMMD_CTTGTA_L001_R1_001.fastq.gz
CAM_03,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,93,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 6.5 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,AGTCAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_103_CAMMD_AGTCAA_L001_R1_001.fastq.gz
reC3_01,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,82,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.8 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTCCGC,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_161_reC3MD_GTCCGC_L001_R1_001.fastq.gz
reC3_02,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,96,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTGAAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_163_reC3MD_GTGAAA_L001_R1_001.fastq.gz
reC3_03,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,78,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTGAAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_165_re-C3MD_GTGAAA_L002_R1_001.fastq.gz
\ No newline at end of file
File added
...@@ -6,13 +6,14 @@ library("DESeq2") ...@@ -6,13 +6,14 @@ library("DESeq2")
library("tximport") library("tximport")
library("rhdf5") library("rhdf5")
library("ggplot2") library("ggplot2")
library("readxl")
## In-and-out ## In-and-out
# inKallistoResults <- "../../runs/kallisto/kallisto_results" inKallistoResults <- "../../runs/kallisto/kallisto_results"
# inMetadataFile <- "../../runs/merged_isa_metadata/out/merged_isa.tsv" inMetadataFile <- "../../runs/isaSampleToRawDataSeq-run/rnaseq-samples.xlsx"
# inMetadataSample <- "Source.Name" inMetadataSample <- "Input [Source Name]"
# inMetadataFactorList <- list("Factor..Photosynthesis.mode.", "Factor..Biosource.amount.") inMetadataFactorList <- list("Factor [Photosynthesis mode]")
### Read arguments from CLI ### Read arguments from CLI
...@@ -34,13 +35,18 @@ head(txi$counts) ...@@ -34,13 +35,18 @@ head(txi$counts)
## Read sample metadata ## Read sample metadata
samples_metadata <- read.table(file = inMetadataFile, sep = "\t") samples_metadata <- as.data.frame(read_xlsx(path = inMetadataFile))
samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, unlist(inMetadataFactorList))] samples <- samples_metadata[order(samples_metadata[[inMetadataSample]]), c(inMetadataSample, unlist(inMetadataFactorList))]
rownames(samples) <- samples[,inMetadataSample] rownames(samples) <- samples[,inMetadataSample]
factors <- sapply(inMetadataFactorList, function(x) x[[1]]) factors <- sapply(inMetadataFactorList, function(x) x[[1]])
design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + ")))
## Annoying workaround to prevent formula error with special chars in column headers
colnames(samples) <- make.names(colnames(samples))
factors <- make.names(factors)
design_formula <- as.formula(paste("~", paste(rev(factors), collapse = " + ")))
## DESeq ## DESeq
......
...@@ -2,7 +2,7 @@ cwlVersion: v1.2 ...@@ -2,7 +2,7 @@ cwlVersion: v1.2
class: CommandLineTool class: CommandLineTool
hints: hints:
DockerRequirement: DockerRequirement:
dockerPull: mcr.microsoft.com/dotnet/sdk:6.0 dockerPull: mcr.microsoft.com/dotnet/sdk:8.0
requirements: requirements:
- class: InitialWorkDirRequirement - class: InitialWorkDirRequirement
listing: listing:
...@@ -25,12 +25,12 @@ inputs: ...@@ -25,12 +25,12 @@ inputs:
type: string type: string
inputBinding: inputBinding:
position: 2 position: 2
outName:
type: string
inputBinding:
position: 3
startingNodeNum: startingNodeNum:
type: int type: int
inputBinding:
position: 3
outName:
type: string
inputBinding: inputBinding:
position: 4 position: 4
...@@ -39,5 +39,5 @@ outputs: ...@@ -39,5 +39,5 @@ outputs:
type: File[] type: File[]
outputBinding: outputBinding:
glob: glob:
- "*.tsv" - "*.csv"
- "*.xlsx" - "*.xlsx"
...@@ -4,22 +4,22 @@ ...@@ -4,22 +4,22 @@
#r "nuget: ARCtrl.NET" #r "nuget: ARCtrl.NET"
#r "nuget: ARCtrl.QueryModel" #r "nuget: ARCtrl.QueryModel"
#r "nuget: FsSpreadsheet.CsvIO"
open System.IO
open ARCtrl.NET open ARCtrl.NET
open ARCtrl open ARCtrl
open ARCtrl.QueryModel open ARCtrl.QueryModel
open ARCtrl.Helper open ARCtrl.Helper
open FsSpreadsheet
open FsSpreadsheet.Net open FsSpreadsheet.Net
open FsSpreadsheet.CsvIO
// input parameters // input parameters
// let args : string array = fsi.CommandLineArgs |> Array.tail let args : string array = fsi.CommandLineArgs |> Array.tail
// let arcPath = args.[0] let arcPath = args.[0]
// let assayName = args.[1] let assayName = args.[1]
// let startingNodeNum = args.[2] |> int let startingNodeNum = args.[2] |> int
// let outName = args.[3] let outName = args.[3]
type ArcTables with type ArcTables with
...@@ -31,16 +31,16 @@ type ArcTables with ...@@ -31,16 +31,16 @@ type ArcTables with
) )
|> ArcTables |> ArcTables
// test parameters // // test parameters
let source = __SOURCE_DIRECTORY__ // let source = __SOURCE_DIRECTORY__
let arcPath = Path.Combine(source, "../../") // let arcPath = Path.Combine(source, "../../")
let assayName = "Talinum_RNASeq_minimal" // let assayName = "Talinum_RNASeq_minimal"
let startingNodeNum = 0 // let startingNodeNum = 0
let outName = "rnaseq-samples" // let outName = "rnaseq-samples"
// Load ARC // Load ARC
// Remove all tables with either an input or output column missing 🤣😀 // Remove all tables with either an input or output column missing
let clean (a : ARC) = let clean (a : ARC) =
a.ISA.Value.Assays |> Seq.iter (fun a -> a.ISA.Value.Assays |> Seq.iter (fun a ->
a.Tables a.Tables
...@@ -64,7 +64,7 @@ let clean (a : ARC) = ...@@ -64,7 +64,7 @@ let clean (a : ARC) =
) )
a a
// transform all data cells to freetext cells 😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀 // transform all data cells to freetext cells
let shittify (a : ARC) = let shittify (a : ARC) =
a.ISA.Value.Assays |> Seq.iter (fun a -> a.ISA.Value.Assays |> Seq.iter (fun a ->
a.Tables a.Tables
...@@ -161,6 +161,8 @@ let wb = new FsSpreadsheet.FsWorkbook() ...@@ -161,6 +161,8 @@ let wb = new FsSpreadsheet.FsWorkbook()
wb.AddWorksheet ws wb.AddWorksheet ws
// Write to csv // Write to xlsx
wb.ToXlsxFile ($"{outName}.xlsx")
wb.ToXlsxFile (outName + ".xlsx") // Write to csv
\ No newline at end of file wb.ToCsvFile ($"{outName}.csv")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment