Skip to content
Snippets Groups Projects
Commit a3a1fdb8 authored by Dominik Brilhaus's avatar Dominik Brilhaus
Browse files

fix isa sample process reader

parent 181db967
No related branches found
No related tags found
1 merge request!9Fsharp cwl
Pipeline #5037 passed
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
```bash ```bash
cd runs/isaSampleToRawDataSeq cd runs/isaSampleToRawDataSeq-run
``` ```
```bash ```bash
......
...@@ -2,5 +2,5 @@ arcPath: ...@@ -2,5 +2,5 @@ arcPath:
class: Directory class: Directory
path: ../../ path: ../../
assayName: "Talinum_RNASeq_minimal" assayName: "Talinum_RNASeq_minimal"
outName: "rnaseq-samples" outName: rnaseq-samples
startingNodeNum: 1 startingNodeNum: 1
Input [Sample Name],Characteristic [organism],Term Source REF (OBI:0100026),Term Accession Number (OBI:0100026),Characteristic [organism part],Term Source REF (EFO:0000635),Term Accession Number (EFO:0000635),Characteristic [plant age],Term Source REF (DPBO:0000033),Term Accession Number (DPBO:0000033),Parameter [growth day length],Term Source REF (DPBO:0000041),Term Accession Number (DPBO:0000041),Parameter [light intensity exposure],Unit,Term Source REF (PECO:0007224),Term Accession Number (PECO:0007224),Parameter [humidity day],Unit ,Term Source REF (DPBO:0000005),Term Accession Number (DPBO:0000005),Parameter [temperature day],Unit ,Term Source REF (DPBO:0000007),Term Accession Number (DPBO:0000007),Parameter [temperature night],Unit ,Term Source REF (DPBO:0000008),Term Accession Number (DPBO:0000008),Factor [watering exposure],Term Source REF (PECO:0007383),Term Accession Number (PECO:0007383),Factor [Timepoint],Term Source REF (NCIT:C68568),Term Accession Number (NCIT:C68568),Factor [timepoint-ZT],Term Source REF (),Term Accession Number (),Factor [Photosynthesis mode],Term Source REF () ,Term Accession Number () ,Parameter [biosource amount],Unit ,Term Source REF (DPBO:0000013),Term Accession Number (DPBO:0000013),Parameter [extraction method],Term Source REF (DPBO:0000054),Term Accession Number (DPBO:0000054),Parameter [extraction buffer],Term Source REF (DPBO:0000050),Term Accession Number (DPBO:0000050),Parameter [extraction buffer volume],Unit ,Term Source REF (DPBO:0000051),Term Accession Number (DPBO:0000051),Parameter [RNA quality check],Term Source REF (DPBO:0000062),Term Accession Number (DPBO:0000062),Parameter [library strategy],Term Source REF (DPBO:0000035),Term Accession Number (DPBO:0000035),Parameter [library selection],Term Source REF (DPBO:0000036),Term Accession Number (DPBO:0000036),Parameter [library layout],Term Source REF (DPBO:0000015),Term Accession Number (DPBO:0000015),Parameter [library preparation kit],Term Source REF (GENEPIO:0000085),Term Accession Number (GENEPIO:0000085),Parameter [library preparation kit version],Term Source REF (GENEPIO:0000149),Term Accession Number (GENEPIO:0000149),Parameter [adapter sequence],Term Source REF (GENEPIO:0000083),Term Accession Number (GENEPIO:0000083),Parameter [next generation sequencing instrument model],Term Source REF (DPBO:0000040),Term Accession Number (DPBO:0000040),Parameter [base-calling software],Term Source REF (DPBO:0000017),Term Accession Number (DPBO:0000017),Parameter [base-calling software version],Term Source REF (DPBO:0000018),Term Accession Number (DPBO:0000018),Parameter [Raw data file format],Term Source REF (DPBO:0000021),Term Accession Number (DPBO:0000021),Output [Data]
CAM_01_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,80,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,CAGATC,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_097_CAMMD_CAGATC_L001_R1_001.fastq.gz
CAM_02_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,78,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.7 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,CTTGTA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_099_CAMMD_CTTGTA_L001_R1_001.fastq.gz
CAM_03_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,93,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 6.5 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,AGTCAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_103_CAMMD_AGTCAA_L001_R1_001.fastq.gz
reC3_01_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,82,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.8 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTCCGC,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_161_reC3MD_GTCCGC_L001_R1_001.fastq.gz
reC3_02_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,96,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTGAAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_163_reC3MD_GTGAAA_L001_R1_001.fastq.gz
reC3_03_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,78,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTGAAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_165_re-C3MD_GTGAAA_L002_R1_001.fastq.gz
\ No newline at end of file
File added
...@@ -2,7 +2,7 @@ cwlVersion: v1.2 ...@@ -2,7 +2,7 @@ cwlVersion: v1.2
class: CommandLineTool class: CommandLineTool
hints: hints:
DockerRequirement: DockerRequirement:
dockerPull: mcr.microsoft.com/dotnet/sdk:6.0 dockerPull: mcr.microsoft.com/dotnet/sdk:8.0
requirements: requirements:
- class: InitialWorkDirRequirement - class: InitialWorkDirRequirement
listing: listing:
...@@ -25,12 +25,12 @@ inputs: ...@@ -25,12 +25,12 @@ inputs:
type: string type: string
inputBinding: inputBinding:
position: 2 position: 2
outName:
type: string
inputBinding:
position: 3
startingNodeNum: startingNodeNum:
type: int type: int
inputBinding:
position: 3
outName:
type: string
inputBinding: inputBinding:
position: 4 position: 4
...@@ -39,5 +39,5 @@ outputs: ...@@ -39,5 +39,5 @@ outputs:
type: File[] type: File[]
outputBinding: outputBinding:
glob: glob:
- "*.tsv" - "*.csv"
- "*.xlsx" - "*.xlsx"
...@@ -4,22 +4,22 @@ ...@@ -4,22 +4,22 @@
#r "nuget: ARCtrl.NET" #r "nuget: ARCtrl.NET"
#r "nuget: ARCtrl.QueryModel" #r "nuget: ARCtrl.QueryModel"
#r "nuget: FsSpreadsheet.CsvIO"
open System.IO
open ARCtrl.NET open ARCtrl.NET
open ARCtrl open ARCtrl
open ARCtrl.QueryModel open ARCtrl.QueryModel
open ARCtrl.Helper open ARCtrl.Helper
open FsSpreadsheet
open FsSpreadsheet.Net open FsSpreadsheet.Net
open FsSpreadsheet.CsvIO
// input parameters // input parameters
// let args : string array = fsi.CommandLineArgs |> Array.tail let args : string array = fsi.CommandLineArgs |> Array.tail
// let arcPath = args.[0] let arcPath = args.[0]
// let assayName = args.[1] let assayName = args.[1]
// let startingNodeNum = args.[2] |> int let startingNodeNum = args.[2] |> int
// let outName = args.[3] let outName = args.[3]
type ArcTables with type ArcTables with
...@@ -31,16 +31,16 @@ type ArcTables with ...@@ -31,16 +31,16 @@ type ArcTables with
) )
|> ArcTables |> ArcTables
// test parameters // // test parameters
let source = __SOURCE_DIRECTORY__ // let source = __SOURCE_DIRECTORY__
let arcPath = Path.Combine(source, "../../") // let arcPath = Path.Combine(source, "../../")
let assayName = "Talinum_RNASeq_minimal" // let assayName = "Talinum_RNASeq_minimal"
let startingNodeNum = 0 // let startingNodeNum = 0
let outName = "rnaseq-samples" // let outName = "rnaseq-samples"
// Load ARC // Load ARC
// Remove all tables with either an input or output column missing 🤣😀 // Remove all tables with either an input or output column missing
let clean (a : ARC) = let clean (a : ARC) =
a.ISA.Value.Assays |> Seq.iter (fun a -> a.ISA.Value.Assays |> Seq.iter (fun a ->
a.Tables a.Tables
...@@ -64,7 +64,7 @@ let clean (a : ARC) = ...@@ -64,7 +64,7 @@ let clean (a : ARC) =
) )
a a
// transform all data cells to freetext cells 😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀 // transform all data cells to freetext cells
let shittify (a : ARC) = let shittify (a : ARC) =
a.ISA.Value.Assays |> Seq.iter (fun a -> a.ISA.Value.Assays |> Seq.iter (fun a ->
a.Tables a.Tables
...@@ -161,6 +161,8 @@ let wb = new FsSpreadsheet.FsWorkbook() ...@@ -161,6 +161,8 @@ let wb = new FsSpreadsheet.FsWorkbook()
wb.AddWorksheet ws wb.AddWorksheet ws
// Write to csv // Write to xlsx
wb.ToXlsxFile ($"{outName}.xlsx")
wb.ToXlsxFile (outName + ".xlsx") // Write to csv
\ No newline at end of file wb.ToCsvFile ($"{outName}.csv")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment