From 8aaa86654d51bdd875d9fe7bf876bef11769c02a Mon Sep 17 00:00:00 2001 From: Dominik Brilhaus <brilhaus@nfdi4plants.org> Date: Tue, 5 Nov 2024 12:20:51 +0100 Subject: [PATCH] try fix fsharp isa read --- .../isaSampleToRawDataSeq.fsx | 221 ++++++++++-------- 1 file changed, 129 insertions(+), 92 deletions(-) diff --git a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx index 4a9483c..a67f56a 100644 --- a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx +++ b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx @@ -1,92 +1,129 @@ -// Pull out the full ISA process sequence (incl. all metadata) leading to the first Raw Data Node - -// Dependencies - -#r "nuget: ARCtrl.NET, 2.0.2" -#r "nuget: ARCtrl.QueryModel, 2.0.2" - -open System.IO -open ARCtrl.NET -open ARCtrl -open ARCtrl.QueryModel - -// input parameters - -let args : string array = fsi.CommandLineArgs |> Array.tail -let arcPath = args.[0] -let assayName = args.[1] -let startingNodeNum = args.[2] |> int -let outName = args.[3] - - -// test parameters -let source = __SOURCE_DIRECTORY__ -let arcPath = Path.Combine(source, "../../") -let assayName = "Talinum_RNASeq_minimal" -let startingNodeNum = 1 -let outName = "rnaseq-samples" - - -// Load ARC - -let arc = ARC.load(arcPath) - -let inv = arc.ISA.Value - -// Load first data node - -let firstData = inv.GetAssay(assayName).FirstData - -// Create headers for output table -let headers = [ - CompositeHeader.Input IOType.Sample - for v in inv.ArcTables.ValuesOf firstData.[0] do - if v.IsCharacteristicValue then - CompositeHeader.Characteristic v.Category - elif v.IsParameterValue then - CompositeHeader.Parameter v.Category - elif v.IsFactorValue then - CompositeHeader.Factor v.Category - elif v.IsComponent then - CompositeHeader.Component v.Category - else failwithf "what the f is %O" v - - CompositeHeader.Output IOType.Data -] - -// Create rows - -let getRow (d: QNode) = - [| - - CompositeCell.createFreeText (inv.ArcTables.SamplesOf d).[startingNodeNum].Name - - for v in inv.ArcTables.ValuesOf d do - if v.HasUnit then - CompositeCell.Unitized(v.ValueText, v.Unit) - else - CompositeCell.Term(v.Value.AsOntology()) - - CompositeCell.FreeText d.Name - - |] - -// Combine into table - -let t = ArcTable.init "FullTable" -t.Headers <- ResizeArray headers - -for d in firstData do - t.AddRow (getRow d) - -// Small detour via workbook -let ws = Spreadsheet.ArcTable.toFsWorksheet t - -let wb = new FsSpreadsheet.FsWorkbook() - -wb.AddWorksheet ws - -// Write to csv - -wb.ToCsvFile (outName + ".tsv", Separator = '\t') -wb.ToXlsxFile (outName + ".xlsx") +// Pull out the full ISA process sequence (incl. all metadata) leading to the first Raw Data Node + +// Dependencies + +#r "nuget: ARCtrl.NET" +#r "nuget: ARCtrl.QueryModel" + +open System.IO +open ARCtrl.NET +open ARCtrl +open ARCtrl.QueryModel +open ARCtrl.Helper +open FsSpreadsheet +open FsSpreadsheet.Net + +// input parameters + +// let args : string array = fsi.CommandLineArgs |> Array.tail +// let arcPath = args.[0] +// let assayName = args.[1] +// let startingNodeNum = args.[2] |> int +// let outName = args.[3] + + +type ArcTables with + + member this.IgnoreShitty() : ArcTables = + this.Tables + |> ResizeArray.filter (fun t -> + t.TryGetInputColumn().IsSome && t.TryGetOutputColumn().IsSome + ) + |> ArcTables + + +// test parameters +let source = __SOURCE_DIRECTORY__ +let arcPath = Path.Combine(source, "../../") +let assayName = "Talinum_RNASeq_minimal" +let startingNodeNum = 1 +let outName = "rnaseq-samples" + +// Load ARC + +let clean (a : ARC) = + a.ISA.Value.Assays |> Seq.iter (fun a -> + a.Tables + |> Seq.toArray + |> Seq.iter (fun t -> + if not (t.TryGetInputColumn().IsSome && t.TryGetOutputColumn().IsSome) then + a.RemoveTable t.Name + + ) + + ) + a.ISA.Value.Studies |> Seq.iter (fun s -> + s.Tables + |> Seq.toArray + |> Seq.iter (fun t -> + if not (t.TryGetInputColumn().IsSome && t.TryGetOutputColumn().IsSome) then + s.RemoveTable t.Name + + ) + + ) + a + +let arc = ARC.load(arcPath) |> clean + + +let inv = arc.ISA.Value + +// Load first data node + +let firstData = inv.GetAssay(assayName).FirstData + +// Create headers for output table +let headers = [ + CompositeHeader.Input IOType.Sample + for v in inv.ArcTables.IgnoreShitty().ValuesOf firstData.[0].Name do + if v.IsCharacteristicValue then + CompositeHeader.Characteristic v.Category + elif v.IsParameterValue then + CompositeHeader.Parameter v.Category + elif v.IsFactorValue then + CompositeHeader.Factor v.Category + elif v.IsComponent then + CompositeHeader.Component v.Category + else failwithf "what the f is %O" v + + CompositeHeader.Output IOType.Data +] + + +// Create rows + +let getRow (d: QNode) = + [| + + CompositeCell.createFreeText (inv.ArcTables.SamplesOf d).[startingNodeNum].Name + + for v in inv.ArcTables.ValuesOf d do + if v.HasUnit then + CompositeCell.Unitized(v.ValueText, v.Unit) + else + CompositeCell.Term(v.Value.AsOntology()) + + CompositeCell.FreeText d.Name + + |] + +// Combine into table + +let t = ArcTable.init "FullTable" +t.Headers <- ResizeArray headers + +for d in firstData do + t.AddRow (getRow d) + +// Small detour via workbook +let ws = Spreadsheet.ArcTable.toFsWorksheet None t + +let wb = new FsSpreadsheet.FsWorkbook() + +wb.AddWorksheet ws + +// Write to csv + +// wb.To (outName + ".tsv", Separator = '\t') +wb.ToXlsxFile (outName + ".xlsx") -- GitLab