From 8aaa86654d51bdd875d9fe7bf876bef11769c02a Mon Sep 17 00:00:00 2001
From: Dominik Brilhaus <brilhaus@nfdi4plants.org>
Date: Tue, 5 Nov 2024 12:20:51 +0100
Subject: [PATCH] try fix fsharp isa read

---
 .../isaSampleToRawDataSeq.fsx                 | 221 ++++++++++--------
 1 file changed, 129 insertions(+), 92 deletions(-)

diff --git a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx
index 4a9483c..a67f56a 100644
--- a/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx
+++ b/workflows/isaSampleToRawDataSeq/isaSampleToRawDataSeq.fsx
@@ -1,92 +1,129 @@
-// Pull out the full ISA process sequence (incl. all metadata) leading to the first Raw Data Node 
-
-// Dependencies
-
-#r "nuget: ARCtrl.NET, 2.0.2"
-#r "nuget: ARCtrl.QueryModel, 2.0.2"
-
-open System.IO
-open ARCtrl.NET
-open ARCtrl
-open ARCtrl.QueryModel
-
-// input parameters
-
-let args : string array = fsi.CommandLineArgs |> Array.tail
-let arcPath = args.[0]
-let assayName = args.[1]
-let startingNodeNum = args.[2] |> int
-let outName = args.[3]
-
-
-// test parameters
-let source = __SOURCE_DIRECTORY__
-let arcPath = Path.Combine(source, "../../")
-let assayName = "Talinum_RNASeq_minimal"
-let startingNodeNum = 1
-let outName = "rnaseq-samples"
-
-
-// Load ARC
-
-let arc = ARC.load(arcPath)
-
-let inv = arc.ISA.Value 
-
-// Load first data node
-
-let firstData = inv.GetAssay(assayName).FirstData
-
-// Create headers for output table
-let headers = [
-    CompositeHeader.Input IOType.Sample
-    for v in inv.ArcTables.ValuesOf firstData.[0] do
-        if v.IsCharacteristicValue then
-            CompositeHeader.Characteristic v.Category
-        elif v.IsParameterValue then
-            CompositeHeader.Parameter v.Category
-        elif v.IsFactorValue then
-            CompositeHeader.Factor v.Category
-        elif v.IsComponent then
-            CompositeHeader.Component v.Category
-        else failwithf "what the f is %O" v
-
-    CompositeHeader.Output IOType.Data
-]
-
-// Create rows
-
-let getRow (d: QNode) = 
-    [|
-
-    CompositeCell.createFreeText (inv.ArcTables.SamplesOf d).[startingNodeNum].Name
-
-    for v in inv.ArcTables.ValuesOf d do
-        if v.HasUnit then
-            CompositeCell.Unitized(v.ValueText, v.Unit)
-        else
-            CompositeCell.Term(v.Value.AsOntology())
-
-    CompositeCell.FreeText d.Name
-    
-    |]
-
-// Combine into table
-
-let t = ArcTable.init "FullTable"
-t.Headers <- ResizeArray headers
-
-for d in firstData do
-    t.AddRow (getRow d)
-
-// Small detour via workbook
-let ws = Spreadsheet.ArcTable.toFsWorksheet t
-
-let wb = new FsSpreadsheet.FsWorkbook()
-
-wb.AddWorksheet ws
-
-// Write to csv
-
-wb.ToCsvFile (outName + ".tsv", Separator = '\t')
-wb.ToXlsxFile (outName + ".xlsx")
+// Pull out the full ISA process sequence (incl. all metadata) leading to the first Raw Data Node 
+
+// Dependencies
+
+#r "nuget: ARCtrl.NET"
+#r "nuget: ARCtrl.QueryModel"
+
+open System.IO
+open ARCtrl.NET
+open ARCtrl
+open ARCtrl.QueryModel
+open ARCtrl.Helper
+open FsSpreadsheet
+open FsSpreadsheet.Net
+
+// input parameters
+
+// let args : string array = fsi.CommandLineArgs |> Array.tail
+// let arcPath = args.[0]
+// let assayName = args.[1]
+// let startingNodeNum = args.[2] |> int
+// let outName = args.[3]
+
+
+type ArcTables with
+
+    member this.IgnoreShitty() : ArcTables = 
+        this.Tables
+        |> ResizeArray.filter (fun t ->
+            t.TryGetInputColumn().IsSome && t.TryGetOutputColumn().IsSome       
+        )
+        |> ArcTables
+
+
+// test parameters
+let source = __SOURCE_DIRECTORY__
+let arcPath = Path.Combine(source, "../../")
+let assayName = "Talinum_RNASeq_minimal"
+let startingNodeNum = 1
+let outName = "rnaseq-samples"
+
+// Load ARC
+
+let clean (a : ARC)  = 
+    a.ISA.Value.Assays |> Seq.iter (fun a -> 
+        a.Tables
+        |> Seq.toArray
+        |> Seq.iter (fun t -> 
+            if  not (t.TryGetInputColumn().IsSome && t.TryGetOutputColumn().IsSome) then
+                a.RemoveTable t.Name
+        
+        )
+    
+    )
+    a.ISA.Value.Studies |> Seq.iter (fun s -> 
+        s.Tables
+        |> Seq.toArray
+        |> Seq.iter (fun t -> 
+            if  not (t.TryGetInputColumn().IsSome && t.TryGetOutputColumn().IsSome) then
+                s.RemoveTable t.Name
+        
+        )
+    
+    )
+    a
+
+let arc = ARC.load(arcPath) |> clean
+
+
+let inv = arc.ISA.Value 
+
+// Load first data node
+
+let firstData = inv.GetAssay(assayName).FirstData
+
+// Create headers for output table
+let headers = [
+    CompositeHeader.Input IOType.Sample
+    for v in inv.ArcTables.IgnoreShitty().ValuesOf firstData.[0].Name do
+        if v.IsCharacteristicValue then
+            CompositeHeader.Characteristic v.Category
+        elif v.IsParameterValue then
+            CompositeHeader.Parameter v.Category
+        elif v.IsFactorValue then
+            CompositeHeader.Factor v.Category
+        elif v.IsComponent then
+            CompositeHeader.Component v.Category
+        else failwithf "what the f is %O" v
+
+    CompositeHeader.Output IOType.Data
+]
+
+
+// Create rows
+
+let getRow (d: QNode) = 
+    [|
+
+    CompositeCell.createFreeText (inv.ArcTables.SamplesOf d).[startingNodeNum].Name
+
+    for v in inv.ArcTables.ValuesOf d do
+        if v.HasUnit then
+            CompositeCell.Unitized(v.ValueText, v.Unit)
+        else
+            CompositeCell.Term(v.Value.AsOntology())
+
+    CompositeCell.FreeText d.Name
+    
+    |]
+
+// Combine into table
+
+let t = ArcTable.init "FullTable"
+t.Headers <- ResizeArray headers
+
+for d in firstData do
+    t.AddRow (getRow d)
+
+// Small detour via workbook
+let ws = Spreadsheet.ArcTable.toFsWorksheet None t
+
+let wb = new FsSpreadsheet.FsWorkbook()
+
+wb.AddWorksheet ws
+
+// Write to csv
+
+// wb.To (outName + ".tsv", Separator = '\t')
+wb.ToXlsxFile (outName + ".xlsx")
-- 
GitLab