Skip to content
Snippets Groups Projects
Commit a3a1fdb8 authored by Dominik Brilhaus's avatar Dominik Brilhaus
Browse files

fix isa sample process reader

parent 181db967
No related branches found
No related tags found
1 merge request!9Fsharp cwl
Pipeline #5037 passed
......@@ -2,7 +2,7 @@
```bash
cd runs/isaSampleToRawDataSeq
cd runs/isaSampleToRawDataSeq-run
```
```bash
......
......@@ -2,5 +2,5 @@ arcPath:
class: Directory
path: ../../
assayName: "Talinum_RNASeq_minimal"
outName: "rnaseq-samples"
outName: rnaseq-samples
startingNodeNum: 1
Input [Sample Name],Characteristic [organism],Term Source REF (OBI:0100026),Term Accession Number (OBI:0100026),Characteristic [organism part],Term Source REF (EFO:0000635),Term Accession Number (EFO:0000635),Characteristic [plant age],Term Source REF (DPBO:0000033),Term Accession Number (DPBO:0000033),Parameter [growth day length],Term Source REF (DPBO:0000041),Term Accession Number (DPBO:0000041),Parameter [light intensity exposure],Unit,Term Source REF (PECO:0007224),Term Accession Number (PECO:0007224),Parameter [humidity day],Unit ,Term Source REF (DPBO:0000005),Term Accession Number (DPBO:0000005),Parameter [temperature day],Unit ,Term Source REF (DPBO:0000007),Term Accession Number (DPBO:0000007),Parameter [temperature night],Unit ,Term Source REF (DPBO:0000008),Term Accession Number (DPBO:0000008),Factor [watering exposure],Term Source REF (PECO:0007383),Term Accession Number (PECO:0007383),Factor [Timepoint],Term Source REF (NCIT:C68568),Term Accession Number (NCIT:C68568),Factor [timepoint-ZT],Term Source REF (),Term Accession Number (),Factor [Photosynthesis mode],Term Source REF () ,Term Accession Number () ,Parameter [biosource amount],Unit ,Term Source REF (DPBO:0000013),Term Accession Number (DPBO:0000013),Parameter [extraction method],Term Source REF (DPBO:0000054),Term Accession Number (DPBO:0000054),Parameter [extraction buffer],Term Source REF (DPBO:0000050),Term Accession Number (DPBO:0000050),Parameter [extraction buffer volume],Unit ,Term Source REF (DPBO:0000051),Term Accession Number (DPBO:0000051),Parameter [RNA quality check],Term Source REF (DPBO:0000062),Term Accession Number (DPBO:0000062),Parameter [library strategy],Term Source REF (DPBO:0000035),Term Accession Number (DPBO:0000035),Parameter [library selection],Term Source REF (DPBO:0000036),Term Accession Number (DPBO:0000036),Parameter [library layout],Term Source REF (DPBO:0000015),Term Accession Number (DPBO:0000015),Parameter [library preparation kit],Term Source REF (GENEPIO:0000085),Term Accession Number (GENEPIO:0000085),Parameter [library preparation kit version],Term Source REF (GENEPIO:0000149),Term Accession Number (GENEPIO:0000149),Parameter [adapter sequence],Term Source REF (GENEPIO:0000083),Term Accession Number (GENEPIO:0000083),Parameter [next generation sequencing instrument model],Term Source REF (DPBO:0000040),Term Accession Number (DPBO:0000040),Parameter [base-calling software],Term Source REF (DPBO:0000017),Term Accession Number (DPBO:0000017),Parameter [base-calling software version],Term Source REF (DPBO:0000018),Term Accession Number (DPBO:0000018),Parameter [Raw data file format],Term Source REF (DPBO:0000021),Term Accession Number (DPBO:0000021),Output [Data]
CAM_01_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,80,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,CAGATC,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_097_CAMMD_CAGATC_L001_R1_001.fastq.gz
CAM_02_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,78,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.7 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,CTTGTA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_099_CAMMD_CTTGTA_L001_R1_001.fastq.gz
CAM_03_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,93,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 6.5 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,AGTCAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_103_CAMMD_AGTCAA_L001_R1_001.fastq.gz
reC3_01_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,82,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.8 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTCCGC,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_161_reC3MD_GTCCGC_L001_R1_001.fastq.gz
reC3_02_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,96,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTGAAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_163_reC3MD_GTGAAA_L001_R1_001.fastq.gz
reC3_03_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,78,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTGAAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_165_re-C3MD_GTGAAA_L002_R1_001.fastq.gz
\ No newline at end of file
File added
......@@ -2,7 +2,7 @@ cwlVersion: v1.2
class: CommandLineTool
hints:
DockerRequirement:
dockerPull: mcr.microsoft.com/dotnet/sdk:6.0
dockerPull: mcr.microsoft.com/dotnet/sdk:8.0
requirements:
- class: InitialWorkDirRequirement
listing:
......@@ -25,12 +25,12 @@ inputs:
type: string
inputBinding:
position: 2
outName:
type: string
inputBinding:
position: 3
startingNodeNum:
type: int
inputBinding:
position: 3
outName:
type: string
inputBinding:
position: 4
......@@ -39,5 +39,5 @@ outputs:
type: File[]
outputBinding:
glob:
- "*.tsv"
- "*.csv"
- "*.xlsx"
......@@ -4,22 +4,22 @@
#r "nuget: ARCtrl.NET"
#r "nuget: ARCtrl.QueryModel"
#r "nuget: FsSpreadsheet.CsvIO"
open System.IO
open ARCtrl.NET
open ARCtrl
open ARCtrl.QueryModel
open ARCtrl.Helper
open FsSpreadsheet
open FsSpreadsheet.Net
open FsSpreadsheet.CsvIO
// input parameters
// let args : string array = fsi.CommandLineArgs |> Array.tail
// let arcPath = args.[0]
// let assayName = args.[1]
// let startingNodeNum = args.[2] |> int
// let outName = args.[3]
let args : string array = fsi.CommandLineArgs |> Array.tail
let arcPath = args.[0]
let assayName = args.[1]
let startingNodeNum = args.[2] |> int
let outName = args.[3]
type ArcTables with
......@@ -31,16 +31,16 @@ type ArcTables with
)
|> ArcTables
// test parameters
let source = __SOURCE_DIRECTORY__
let arcPath = Path.Combine(source, "../../")
let assayName = "Talinum_RNASeq_minimal"
let startingNodeNum = 0
let outName = "rnaseq-samples"
// // test parameters
// let source = __SOURCE_DIRECTORY__
// let arcPath = Path.Combine(source, "../../")
// let assayName = "Talinum_RNASeq_minimal"
// let startingNodeNum = 0
// let outName = "rnaseq-samples"
// Load ARC
// Remove all tables with either an input or output column missing 🤣😀
// Remove all tables with either an input or output column missing
let clean (a : ARC) =
a.ISA.Value.Assays |> Seq.iter (fun a ->
a.Tables
......@@ -64,7 +64,7 @@ let clean (a : ARC) =
)
a
// transform all data cells to freetext cells 😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀
// transform all data cells to freetext cells
let shittify (a : ARC) =
a.ISA.Value.Assays |> Seq.iter (fun a ->
a.Tables
......@@ -161,6 +161,8 @@ let wb = new FsSpreadsheet.FsWorkbook()
wb.AddWorksheet ws
// Write to csv
// Write to xlsx
wb.ToXlsxFile ($"{outName}.xlsx")
wb.ToXlsxFile (outName + ".xlsx")
\ No newline at end of file
// Write to csv
wb.ToCsvFile ($"{outName}.csv")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment