From 28e57415662c2ebdaa85063425ff340362064f6e Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Mon, 4 Nov 2024 18:00:38 +0100 Subject: [PATCH] [needs f# fix] add isaSampleToRawDataSeq --- runs/isaSampleToRawDataSeq/README.md | 10 ++ runs/isaSampleToRawDataSeq/job.yml | 6 ++ workflows/isaSampleToRawDataSeq/README.md | 7 ++ .../isaSampleToRawDataSeq.cwl | 43 ++++++++ .../isaSampleToRawDataSeq.fsx | 98 +++++++++++++++++++ 5 files changed, 164 insertions(+) create mode 100644 runs/isaSampleToRawDataSeq/README.md create mode 100644 runs/isaSampleToRawDataSeq/job.yml create mode 100644 workflows/isaSampleToRawDataSeq/README.md create mode 100644 workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl create mode 100644 workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx diff --git a/runs/isaSampleToRawDataSeq/README.md b/runs/isaSampleToRawDataSeq/README.md new file mode 100644 index 0000000..d8e1dcb --- /dev/null +++ b/runs/isaSampleToRawDataSeq/README.md @@ -0,0 +1,10 @@ + + + +```bash +cd runs/isaSampleToRawDataSeq +``` + +```bash +cwltool ../../workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl job.yml +``` diff --git a/runs/isaSampleToRawDataSeq/job.yml b/runs/isaSampleToRawDataSeq/job.yml new file mode 100644 index 0000000..803a637 --- /dev/null +++ b/runs/isaSampleToRawDataSeq/job.yml @@ -0,0 +1,6 @@ +arcPath: + class: Directory + path: ../../ +assayName: "Talinum_RNASeq_minimal" +outName: "rnaseq-samples" +startingNodeNum: 1 diff --git a/workflows/isaSampleToRawDataSeq/README.md b/workflows/isaSampleToRawDataSeq/README.md new file mode 100644 index 0000000..e831cf1 --- /dev/null +++ b/workflows/isaSampleToRawDataSeq/README.md @@ -0,0 +1,7 @@ + + +```bash +dotnet fsi isa-sampleToRawDataSeq.fsx ../../ Talinum_RNASeq_minimal 1 rnaseq-samples + + +``` diff --git a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl new file mode 100644 index 0000000..9916ed4 --- /dev/null +++ b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.cwl @@ -0,0 +1,43 @@ +cwlVersion: v1.2 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: mcr.microsoft.com/dotnet/sdk:6.0 +requirements: + - class: InitialWorkDirRequirement + listing: + - entryname: isa-sampleToRawDataSeq.fsx + entry: + $include: isa-sampleToRawDataSeq.fsx + - class: EnvVarRequirement + envDef: + - envName: DOTNET_NOLOGO + envValue: "true" + - class: NetworkAccess + networkAccess: true +baseCommand: [dotnet, fsi, isa-sampleToRawDataSeq.fsx] +inputs: + arcPath: + type: Directory + inputBinding: + position: 1 + assayName: + type: string + inputBinding: + position: 2 + outName: + type: string + inputBinding: + position: 3 + startingNodeNum: + type: int + inputBinding: + position: 4 + +outputs: + output: + type: File[] + outputBinding: + glob: + - "*.tsv" + - "*.xlsx" diff --git a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx new file mode 100644 index 0000000..530a186 --- /dev/null +++ b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx @@ -0,0 +1,98 @@ +// Pull out the full ISA process sequence (incl. all metadata) leading to the first Raw Data Node + +// Dependencies + +#r "nuget: ARCtrl.NET, 2.0.2" +#r "nuget: ARCtrl.QueryModel, 1.0.5" +#r "nuget: FsSpreadsheet.CsvIO, 6.2.0" + +open FsSpreadsheet.CsvIO +open FsSpreadsheet.Net +open System.IO +open ARCtrl.NET +open ARCtrl +open ARCtrl.ISA +open ARCtrl.QueryModel + +// input parameters + +let args : string array = fsi.CommandLineArgs |> Array.tail +let arcPath = args.[0] +let assayName = args.[1] +let outName = args.[2] + +let startingNodeNum = args.[3] |> int + + +// test parameters + +// let source = __SOURCE_DIRECTORY__ +// let arcPath = Path.Combine(source, "../../") + +// let assayName = "pick2012_illumina_rnaseq" + +// let outName = "out.csv" + +// Load ARC + +let arc = ARC.load(arcPath) + +let inv = arc.ISA.Value + +// Load first data node + +let firstData = inv.GetAssay(assayName).FirstData + +// Create headers for output table +let headers = [ + CompositeHeader.Input IOType.Sample + for v in inv.ArcTables.ValuesOf firstData.[0] do + if v.IsCharacteristicValue then + CompositeHeader.Characteristic v.Category + elif v.IsParameterValue then + CompositeHeader.Parameter v.Category + elif v.IsFactorValue then + CompositeHeader.Factor v.Category + elif v.IsComponent then + CompositeHeader.Component v.Category + else failwithf "what the f is %O" v + + CompositeHeader.Output IOType.RawDataFile +] + +// Create rows + +let getRow (d: QNode) = + [| + + CompositeCell.createFreeText (inv.ArcTables.SamplesOf d).[startingNodeNum].Name + + for v in inv.ArcTables.ValuesOf d do + if v.HasUnit then + CompositeCell.Unitized(v.ValueText, v.Unit) + else + CompositeCell.Term(v.Value.AsOntology()) + + CompositeCell.FreeText d.Name + + |] + +// Combine into table + +let t = ArcTable.init "FullTable" +t.Headers <- ResizeArray headers + +for d in firstData do + t.AddRow (getRow d) + +// Small detour via workbook +let ws = Spreadsheet.ArcTable.toFsWorksheet t + +let wb = new FsSpreadsheet.FsWorkbook() + +wb.AddWorksheet ws + +// Write to csv + +wb.ToCsvFile (outName + ".tsv", Separator = '\t') +wb.ToXlsxFile (outName + ".xlsx") -- GitLab