diff --git a/runs/isaSampleToRawDataSeq-run/README.md b/runs/isaSampleToRawDataSeq-run/README.md index d8e1dcbbfe5360db3efbbe6c68c1233ef3ad7e1a..6c83a17dc1ab439d90ae08c2749a9047ac74ba3f 100644 --- a/runs/isaSampleToRawDataSeq-run/README.md +++ b/runs/isaSampleToRawDataSeq-run/README.md @@ -2,7 +2,7 @@ ```bash -cd runs/isaSampleToRawDataSeq +cd runs/isaSampleToRawDataSeq-run ``` ```bash diff --git a/runs/isaSampleToRawDataSeq-run/job.yml b/runs/isaSampleToRawDataSeq-run/job.yml index 803a637a8ceb31f03237a2cda90c8752c638a140..2c640ff48d7469b1cd005dd198be55c4eb8f7df8 100644 --- a/runs/isaSampleToRawDataSeq-run/job.yml +++ b/runs/isaSampleToRawDataSeq-run/job.yml @@ -2,5 +2,5 @@ arcPath: class: Directory path: ../../ assayName: "Talinum_RNASeq_minimal" -outName: "rnaseq-samples" +outName: rnaseq-samples startingNodeNum: 1 diff --git a/runs/isaSampleToRawDataSeq-run/rnaseq-samples.csv b/runs/isaSampleToRawDataSeq-run/rnaseq-samples.csv new file mode 100644 index 0000000000000000000000000000000000000000..ca63a9dea6774a0a2198c9b568a58dc6cccc38cd --- /dev/null +++ b/runs/isaSampleToRawDataSeq-run/rnaseq-samples.csv @@ -0,0 +1,7 @@ +Input [Sample Name],Characteristic [organism],Term Source REF (OBI:0100026),Term Accession Number (OBI:0100026),Characteristic [organism part],Term Source REF (EFO:0000635),Term Accession Number (EFO:0000635),Characteristic [plant age],Term Source REF (DPBO:0000033),Term Accession Number (DPBO:0000033),Parameter [growth day length],Term Source REF (DPBO:0000041),Term Accession Number (DPBO:0000041),Parameter [light intensity exposure],Unit,Term Source REF (PECO:0007224),Term Accession Number (PECO:0007224),Parameter [humidity day],Unit ,Term Source REF (DPBO:0000005),Term Accession Number (DPBO:0000005),Parameter [temperature day],Unit ,Term Source REF (DPBO:0000007),Term Accession Number (DPBO:0000007),Parameter [temperature night],Unit ,Term Source REF (DPBO:0000008),Term Accession Number (DPBO:0000008),Factor [watering exposure],Term Source REF (PECO:0007383),Term Accession Number (PECO:0007383),Factor [Timepoint],Term Source REF (NCIT:C68568),Term Accession Number (NCIT:C68568),Factor [timepoint-ZT],Term Source REF (),Term Accession Number (),Factor [Photosynthesis mode],Term Source REF () ,Term Accession Number () ,Parameter [biosource amount],Unit ,Term Source REF (DPBO:0000013),Term Accession Number (DPBO:0000013),Parameter [extraction method],Term Source REF (DPBO:0000054),Term Accession Number (DPBO:0000054),Parameter [extraction buffer],Term Source REF (DPBO:0000050),Term Accession Number (DPBO:0000050),Parameter [extraction buffer volume],Unit ,Term Source REF (DPBO:0000051),Term Accession Number (DPBO:0000051),Parameter [RNA quality check],Term Source REF (DPBO:0000062),Term Accession Number (DPBO:0000062),Parameter [library strategy],Term Source REF (DPBO:0000035),Term Accession Number (DPBO:0000035),Parameter [library selection],Term Source REF (DPBO:0000036),Term Accession Number (DPBO:0000036),Parameter [library layout],Term Source REF (DPBO:0000015),Term Accession Number (DPBO:0000015),Parameter [library preparation kit],Term Source REF (GENEPIO:0000085),Term Accession Number (GENEPIO:0000085),Parameter [library preparation kit version],Term Source REF (GENEPIO:0000149),Term Accession Number (GENEPIO:0000149),Parameter [adapter sequence],Term Source REF (GENEPIO:0000083),Term Accession Number (GENEPIO:0000083),Parameter [next generation sequencing instrument model],Term Source REF (DPBO:0000040),Term Accession Number (DPBO:0000040),Parameter [base-calling software],Term Source REF (DPBO:0000017),Term Accession Number (DPBO:0000017),Parameter [base-calling software version],Term Source REF (DPBO:0000018),Term Accession Number (DPBO:0000018),Parameter [Raw data file format],Term Source REF (DPBO:0000021),Term Accession Number (DPBO:0000021),Output [Data] +CAM_01_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,80,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,CAGATC,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_097_CAMMD_CAGATC_L001_R1_001.fastq.gz +CAM_02_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,78,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.7 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,CTTGTA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_099_CAMMD_CTTGTA_L001_R1_001.fastq.gz +CAM_03_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought,user-specific,,MD,user-specific,,6,user-specific,,CAM,user-specific,,93,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 6.5 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,AGTCAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_103_CAMMD_AGTCAA_L001_R1_001.fastq.gz +reC3_01_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,82,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.8 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTCCGC,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_161_reC3MD_GTCCGC_L001_R1_001.fastq.gz +reC3_02_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,96,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTGAAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_163_reC3MD_GTGAAA_L001_R1_001.fastq.gz +reC3_03_ext,Talinum fruticosum,NCBITaxon,http://purl.obolibrary.org/obo/NCBITaxon_110664,leaf,PO,https://www.ebi.ac.uk/ols4/ontologies/po/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FPO_0025034,28 days after germination,user-specific,,12 hr light / 12 hr dark,user-specific,,425,microeinstein per square meter per second,UO,https://bioregistry.io/UO:0000160,37,percent,UO,https://bioregistry.io/UO:0000187,30,degree celsius,UO,https://bioregistry.io/UO:0000027,22,degree celsius,UO,https://bioregistry.io/UO:0000027,12 days drought + 2 days rewatered,user-specific,,MD,user-specific,,6,user-specific,,reC3,user-specific,,78,milligram,UO,https://bioregistry.io/UO:0000022,Roboklon EURx GeneMATRIX Universal RNA Purification version 2.3 September 2011,user-specific,,Roboklon commercial buffers,user-specific,,300,microliter,UO,https://bioregistry.io/UO:0000101,RIN 7.6 (Agilent Bioanalyzer 2100 expert_Plant RNA Nano),user-specific,,RNA-seq,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000003,cDNA method,user-specific,,single-end,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_0000086,Illumina TruSeq RNA Sample Prep Kit,,,version 2,,,GTGAAA,,,Illumina HiSeq 2000,DPBO,http://purl.org/nfdi4plants/ontology/dpbo/DPBO_1000041,Illumina Cassava,user-specific,,v1.8.2,,,*.fastq.gz,,,DB_165_re-C3MD_GTGAAA_L002_R1_001.fastq.gz \ No newline at end of file diff --git a/runs/isaSampleToRawDataSeq-run/rnaseq-samples.xlsx b/runs/isaSampleToRawDataSeq-run/rnaseq-samples.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..819fa73035565c9b359e6c3c18a9cf80ae2d9b84 Binary files /dev/null and b/runs/isaSampleToRawDataSeq-run/rnaseq-samples.xlsx differ diff --git a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl index a00b3e8187346d19521718fed02c0fd50c10d0ce..e743f327893969785482e36b2ba868dde93de35a 100644 --- a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl +++ b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl @@ -2,7 +2,7 @@ cwlVersion: v1.2 class: CommandLineTool hints: DockerRequirement: - dockerPull: mcr.microsoft.com/dotnet/sdk:6.0 + dockerPull: mcr.microsoft.com/dotnet/sdk:8.0 requirements: - class: InitialWorkDirRequirement listing: @@ -25,12 +25,12 @@ inputs: type: string inputBinding: position: 2 - outName: - type: string - inputBinding: - position: 3 startingNodeNum: type: int + inputBinding: + position: 3 + outName: + type: string inputBinding: position: 4 @@ -39,5 +39,5 @@ outputs: type: File[] outputBinding: glob: - - "*.tsv" + - "*.csv" - "*.xlsx" diff --git a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx index 20ba577698d0671ca2a8ea97b67b6c4faa968915..5ac29ad4f621be4d6914973b674c266ef4037851 100644 --- a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx +++ b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx @@ -4,22 +4,22 @@ #r "nuget: ARCtrl.NET" #r "nuget: ARCtrl.QueryModel" +#r "nuget: FsSpreadsheet.CsvIO" -open System.IO open ARCtrl.NET open ARCtrl open ARCtrl.QueryModel open ARCtrl.Helper -open FsSpreadsheet open FsSpreadsheet.Net +open FsSpreadsheet.CsvIO // input parameters -// let args : string array = fsi.CommandLineArgs |> Array.tail -// let arcPath = args.[0] -// let assayName = args.[1] -// let startingNodeNum = args.[2] |> int -// let outName = args.[3] +let args : string array = fsi.CommandLineArgs |> Array.tail +let arcPath = args.[0] +let assayName = args.[1] +let startingNodeNum = args.[2] |> int +let outName = args.[3] type ArcTables with @@ -31,16 +31,16 @@ type ArcTables with ) |> ArcTables -// test parameters -let source = __SOURCE_DIRECTORY__ -let arcPath = Path.Combine(source, "../../") -let assayName = "Talinum_RNASeq_minimal" -let startingNodeNum = 0 -let outName = "rnaseq-samples" +// // test parameters +// let source = __SOURCE_DIRECTORY__ +// let arcPath = Path.Combine(source, "../../") +// let assayName = "Talinum_RNASeq_minimal" +// let startingNodeNum = 0 +// let outName = "rnaseq-samples" // Load ARC -// Remove all tables with either an input or output column missing 🤣😀 +// Remove all tables with either an input or output column missing let clean (a : ARC) = a.ISA.Value.Assays |> Seq.iter (fun a -> a.Tables @@ -64,7 +64,7 @@ let clean (a : ARC) = ) a -// transform all data cells to freetext cells 😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀😀🤣😀 +// transform all data cells to freetext cells let shittify (a : ARC) = a.ISA.Value.Assays |> Seq.iter (fun a -> a.Tables @@ -161,6 +161,8 @@ let wb = new FsSpreadsheet.FsWorkbook() wb.AddWorksheet ws -// Write to csv +// Write to xlsx +wb.ToXlsxFile ($"{outName}.xlsx") -wb.ToXlsxFile (outName + ".xlsx") \ No newline at end of file +// Write to csv +wb.ToCsvFile ($"{outName}.csv") \ No newline at end of file