From e6a83e28ef3e611cdba53098dcc17034607aa78f Mon Sep 17 00:00:00 2001
From: Jonathan Ott <jonathan.ott1994@gmail.com>
Date: Thu, 11 Jan 2024 16:21:01 +0100
Subject: [PATCH] add cwl compatible version and workflow

---
 runs/TurboIDSampleArc.yml                  |  13 +
 workflows/EvalTurboID/EvalTurboID.fsx      | 696 ++++++++++++++++++++-
 workflows/EvalTurboID/TurboIDSampleArc.cwl |  48 ++
 3 files changed, 731 insertions(+), 26 deletions(-)
 create mode 100644 runs/TurboIDSampleArc.yml
 create mode 100644 workflows/EvalTurboID/TurboIDSampleArc.cwl

diff --git a/runs/TurboIDSampleArc.yml b/runs/TurboIDSampleArc.yml
new file mode 100644
index 0000000..87e4dae
--- /dev/null
+++ b/runs/TurboIDSampleArc.yml
@@ -0,0 +1,13 @@
+arcDirectory:
+  class: Directory
+  path: ../
+mappingFile:
+  class: File
+  path: ../workflows/EvalTurboID/scripts/chlamy_jgi55.txt
+scriptFile:
+  class: File
+  path: ../workflows/EvalTurboID/EvalTurboID.fsx
+input:
+  class: File
+  path: ../assays/MSEval/dataset/TEF_PSBQ_LON_MS264DDAfs_combined_protein.tsv
+runID: test
diff --git a/workflows/EvalTurboID/EvalTurboID.fsx b/workflows/EvalTurboID/EvalTurboID.fsx
index 452d083..09344df 100644
--- a/workflows/EvalTurboID/EvalTurboID.fsx
+++ b/workflows/EvalTurboID/EvalTurboID.fsx
@@ -19,18 +19,659 @@ open ISADotNet
 open ISADotNet
 open ISADotNet.QueryModel
 
-#load "scripts/annotate.fsx"
+module Annotate =
+
+    open System
+    open Argu
+    open Deedle 
+
+    printfn "=====\nCheck if dotnet sdk reference can be rechanged to latest: https://stackoverflow.com/a/65934809 \n====="
+
+    /// \t or "\t" doesnt work as argument
+    let getSeparator str = 
+        match str with
+        | "tab"             -> "\t"
+        | "tabulator"       -> "\t"
+        | _ -> str
+
+    /// Snapshots of MapMan and Gene Ontology were generated with FATool and genome releases 5.5 (Chlamy) and Araport11 (Arabidopsis)
+    module Mapping =
+    
+        /// reads FA tool snapshot for araport11 or chlamyJGI_v5.5 or other annotation files
+        let readMapping (mappingFilePath:string) (columnSeparator:string) (identifierColHeader:string) :Frame<string,string> = 
+            Frame.ReadCsv(mappingFilePath,hasHeaders=true,separators = columnSeparator)
+            |> Frame.indexRows identifierColHeader
+ 
+        /// truncateID processes identifier (e.g. Cre10.g123456.t2.1 -> Cre10.g123456)
+        let getAnnotationsFromIdentifier (frame:Frame<string,string>) (annotationHeader:string[]) (multipleIdentifierSeparator:string) (multipleAnnotationSeparator:string) (truncateID:string->string) (identifier:string) =
+        
+            /// identifier that should be mapped
+            let identifier = identifier.Split([|multipleIdentifierSeparator|],StringSplitOptions.None)
+            
+            /// Mappings from all identifer to annotations, that are present in annotation frame
+            let mappings :Series<string,string> []= 
+                identifier 
+                |> Array.choose (fun ident -> 
+                    let truncId = truncateID ident
+                    let k = frame.TryGetRow truncId
+                    if k.HasValue then 
+                        Some k.Value
+                    else
+                        printfn "Warning: The following id could not be found within mapping file: %s" truncId
+                        None
+                    )
+
+            /// all annotations that should be used
+            annotationHeader //["GO","Synonym"]
+            |> Array.map (fun annotationType -> 
+                mappings //[[GO => GO:006; Synonym => Q0WV96];[GO => GO:001; Synonym => Q01337]]
+                |> Array.map (fun mapping -> 
+                    let annotation = mapping.[annotationType]
+                    annotation.Split ';' 
+                    |> String.concat multipleAnnotationSeparator
+                    )
+                )
+
+        let getAnnotationRow (frame:Frame<string,string>) (annotationHeader:string[]) multipleIdentifierSeparator multipleAnnotationSeparator truncateID (identifier:string) = 
+            getAnnotationsFromIdentifier frame annotationHeader multipleIdentifierSeparator multipleAnnotationSeparator truncateID identifier
+            |> Array.map (String.concat multipleIdentifierSeparator)
+
+
+    /// User data is read, annotated and written to a new file
+    module Data = 
+    
+        let getDataFrame (columnSeparator:string) inputPath = 
+            System.IO.File.ReadAllLines(inputPath)
+            |> Array.map (fun x ->
+                x.Split([|columnSeparator|],System.StringSplitOptions.None)
+                )
+
+        /// index of column that contains the identifier to annotate
+        let getColIndex (dataFrame:string[][]) columnHeader= 
+            Array.tryFindIndex (fun x -> x = columnHeader) dataFrame.[0]
+            |> fun o -> 
+                match o with
+                | Some i -> i
+                | _ -> failwithf "ColumnHeader %s not found." columnHeader
+    
+        /// based on given mapping arguments the file is extended with given mapping columns
+        let getHeader (dataFrame:string[][]) (columnSeparator:string) (annotationHeader:string[]) = 
+            Array.append dataFrame.[0] annotationHeader
+            |> String.concat columnSeparator
+   
+        /// every row of the file is processed and converted to a new string with additional information attached at the end of the line
+        let getAnnotatedLines annotationFrame inputPath (columnSeparator:string) columnHeader (annotations:string[]) (multipleIdentifierSeparator:string) truncateID (multipleAnnotationSeparator:string) =       
+            let dataFrame = getDataFrame columnSeparator inputPath           
+            let colIndex = getColIndex dataFrame columnHeader     
+            let header = getHeader dataFrame columnSeparator annotations
+            //let rowCount = dataFrame.Length
+            dataFrame
+            |> Array.tail
+            |> Array.mapi (fun i x -> 
+                let identifier = x.[colIndex].Replace("\"","")
+            
+                //if i%50=0 then printfn "%04i/%i: %s" i rowCount identifier
+                //printfn "%04i/%i: %s" i rowCount identifier
+                let annotations = 
+                    if identifier = "" then 
+                        Array.init annotations.Length (fun _ -> "")
+                    else 
+                        Mapping.getAnnotationRow annotationFrame annotations multipleIdentifierSeparator multipleAnnotationSeparator truncateID identifier
+                Seq.append x annotations
+                |> String.concat columnSeparator
+                )
+            |> Array.append [|header|]
+
+        let annotateAndWriteData annotationFrame inputPath (columnSeparator:string) columnHeader truncateID (annotations:string[]) (multipleIdentifierSeparator:string) (multipleAnnotationSeparator:string) outputPath =
+            /// warns user if separators are identical
+            let checkSeparators = 
+                if columnSeparator = multipleIdentifierSeparator then failwith "WARNING: Column separator is equal to identifier separator"
+                if columnSeparator = multipleAnnotationSeparator then failwith "WARNING: Column separator is equal to annotation separator"
+            let annotatedRows = 
+                getAnnotatedLines annotationFrame inputPath columnSeparator columnHeader annotations multipleIdentifierSeparator truncateID multipleAnnotationSeparator
+            System.IO.File.WriteAllLines(outputPath,annotatedRows)
+
+module SamPlotting =
+    open Deedle
+    open FSharp.Stats
+    open Plotly.NET
+    open FSharp.Stats.Testing
+    open SAM
+    open Plotly.NET.StyleParam
+
+    let createSAMChart res = 
+
+        let observed = [| res.NegSigBioitem; res.NonSigBioitem; res.PosSigBioitem|] |> Array.concat 
+        let obs = observed |> Array.map (fun x -> x.Statistics) 
+        let expected = res.AveragePermutations |> Array.map (fun x -> x.Statistics)
+        let minDi = Seq.min obs
+        let maxDi = Seq.max obs
+
+
+        // positive significant changes 
+        let posExpected = expected.[res.NegSigBioitem.Length + res.NonSigBioitem.Length .. res.NegSigBioitem.Length + res.NonSigBioitem.Length + res.PosSigBioitem.Length-1]
+        let posChart = 
+            Chart.Point(posExpected,res.PosSigBioitem |> Array.map (fun x -> x.Statistics))
+            |> Chart.withLineStyle(Color=Color.fromKeyword Green)
+            |> Chart.withTraceInfo("positive change",Visible = Visible.True )
+
+
+        // no significant changes
+        let nonex = expected.[res.NegSigBioitem.Length .. res.NegSigBioitem.Length + res.NonSigBioitem.Length-1]
+        let nonchart = 
+            Chart.Point(nonex,res.NonSigBioitem |> Array.map (fun x -> x.Statistics))
+            |> Chart.withLineStyle(Color=Color.fromKeyword Gray)
+            |> Chart.withTraceInfo("no change",Visible = Visible.True)
+
+        // negative significant changes 
+        let negex = expected.[0 .. res.NegSigBioitem.Length-1]
+        let negchart = 
+            Chart.Point(negex,res.NegSigBioitem |> Array.map (fun x -> x.Statistics))
+            |> Chart.withLineStyle(Color=Color.fromKeyword Red)
+            |> Chart.withTraceInfo("negative change",Visible = Visible.True)
+
+        let samValues = 
+            [
+                negchart
+                nonchart
+                posChart
+            ]
+            |> Chart.combine
+        
+        let chartConfig =
+            let svdConfig =
+                ConfigObjects.ToImageButtonOptions.init(
+                    Format = StyleParam.ImageFormat.SVG)
+            Config.init (
+                ToImageButtonOptions = svdConfig,
+                ModeBarButtonsToAdd=[ModeBarButton.HoverCompareCartesian]
+            
+            )
+
+        let cutLineUp = [(minDi + res.Delta) ; (maxDi + res.Delta)]
+        let cutsUp = 
+            Chart.Line(cutLineUp,[minDi;maxDi])
+            |> Chart.withLineStyle(Color=Color.fromKeyword Purple,Dash = StyleParam.DrawingStyle.Dash, Width = 0.5)
+            |> Chart.withTraceInfo("delta",Visible = Visible.True)
+        let cutLineLow = [(minDi - res.Delta) ; (maxDi - res.Delta)]
+        let cutsLow = 
+            Chart.Line(cutLineLow,[minDi;maxDi])
+            |> Chart.withLineStyle(Color=Color.fromKeyword Purple,Dash = StyleParam.DrawingStyle.Dash, Width = 0.5)
+            |> Chart.withTraceInfo("delta",Visible = Visible.True)
+        let linechart = 
+            Chart.Line([minDi;maxDi], [minDi;maxDi])
+            |> Chart.withTraceInfo("bisecting angle",Visible = Visible.True)
+            |> Chart.withLineStyle(Color=Color.fromKeyword Black, Width = 1)
+
+        let uppercut = 
+            let xAnchorUppercut = [minDi .. 5. .. maxDi]
+            Chart.Line (xAnchorUppercut, List.init xAnchorUppercut.Length (fun x -> res.UpperCut))
+
+            |> Chart.withTraceInfo("upper cut",Visible = Visible.True)
+            |> Chart.withLineStyle(Color=Color.fromKeyword Black,Dash = StyleParam.DrawingStyle.Dash, Width = 0.3)
+
+        let lowercut = 
+            Chart.Line([minDi;maxDi],[res.LowerCut;res.LowerCut])
+            |> Chart.withTraceInfo("lower cut",Visible = Visible.True)
+            |> Chart.withLineStyle(Color=Color.fromKeyword Black,Dash = StyleParam.DrawingStyle.Dash,Width = 0.3)
+            //|> Chart.withXAxisStyle(MinMax = (-15,20))
+        
+            |> Chart.withTraceInfo("lower cut",Visible = Visible.True)
+        let plot = 
+            [linechart;
+            samValues;
+            cutsUp;
+            cutsLow;
+            uppercut;
+            lowercut]
+            |> Chart.combine
+            |> Chart.withTitle(title = "SAM results")
+            |> Chart.withXAxisStyle("expected Score")
+            |> Chart.withYAxisStyle ("observed Score")
+            |> Chart.withConfig(chartConfig)
+            |> Chart.withTemplate(ChartTemplates.lightMirrored)
+        plot 
+
+module Plotting =
+
+    open FSharp.Stats
+    open Plotly.NET
+    open Deedle
+    open BioFSharp.IO
+    open DeedleExtensions
+
+    let plotMA highlightList (f:Frame<string,string>) = 
+        let dataAll = 
+            f
+            |> Frame.mapRows (fun k s -> 
+                let a = s.GetAs<float>("MeanAbundance",nan)
+                let r = s.GetAs<float>("MeanR",nan)
+                let stDev = s.GetAs<float>("StDevR",nan)
+                let l = 
+                    let x = s.GetAs<string>("ProtName","")
+                    if x = "" then 
+                        "" 
+                    else 
+                        x
+                let c = s.GetAs<bool>("IsCandidate")
+                {|
+                        ProtName         = l
+                        MeanAbundance    = a
+                        MeanR            = r
+                        StDevR           = stDev
+                        IsCandidate      = c
+                |}
+                )
+            |> Series.values
+            |> Array.ofSeq
+    
+        let all = 
+            let x = dataAll |> Seq.map (fun x -> x.MeanR) 
+            let y = dataAll |> Seq.map (fun x -> x.MeanAbundance) 
+            let label = dataAll |> Seq.map (fun x -> x.ProtName)
+            Chart.Point(y,x, MultiText = label)
+
+        let candidates =
+            let dataFiltered = dataAll |> Array.filter (fun x -> x.IsCandidate)  
+            let x = dataFiltered |> Seq.map (fun x -> x.MeanR) 
+            let y = dataFiltered |> Seq.map (fun x -> x.MeanAbundance) 
+            let labels = dataFiltered |> Seq.map (fun x -> x.ProtName)
+            Chart.Point(
+            y,x,
+            Name="points",
+            MultiText=labels,
+            TextPosition=StyleParam.TextPosition.TopRight
+            )
+            |> Chart.withTraceInfo "candidates"
+        let candidatesNoStabw =
+            let dataFiltered = dataAll |> Array.filter (fun x -> x.IsCandidate)  
+            let x = dataFiltered |> Seq.map (fun x -> x.MeanR) 
+            let y = dataFiltered |> Seq.map (fun x -> x.MeanAbundance) 
+            let labels = dataFiltered |> Seq.map (fun x -> x.ProtName)
+        
+            Chart.Point(
+            y,x,
+            Name="points", MultiText = labels
+            )
+            |> Chart.withTraceInfo "candidatesNoStabw"
+        let candidates' =
+            let dataFiltered = dataAll |> Array.filter (fun x -> highlightList |> List.contains x.ProtName )  
+            let x = dataFiltered |> Seq.map (fun x -> x.MeanR) 
+            let y = dataFiltered |> Seq.map (fun x -> x.MeanAbundance) 
+            let labels = dataFiltered |> Seq.map (fun x -> x.ProtName)
+            Chart.Point(
+            y,x,
+            Name="points",
+            MultiText=labels,
+            TextPosition=StyleParam.TextPosition.TopRight
+            )
+            |> Chart.withTraceInfo "candidates Reci"
+        [
+            all
+            candidatesNoStabw
+            candidates'
+
+        ]
+        |> Chart.combine
+        |> Chart.withYAxisStyle "log2(bait)-log2(control)"
+        |> Chart.withXAxisStyle "log2(Mean intensity)"
+
+    let plotVulcano highlightList f = 
+        let dataAll = 
+            f
+            |> Frame.mapRows (fun k s -> 
+                let a = s.GetAs<float>("MeanAbundance",nan)
+                let r = s.GetAs<float>("MeanR",nan)
+                let stDev = s.GetAs<float>("StDevR",nan)
+                let qValue = s.GetAs<float>("qValue",nan)
+                let l = 
+                    let x = s.GetAs<string>("ProtName","")
+                    if x = "" then 
+                        "" 
+                    else 
+                        x
+                let c = s.GetAs<bool>("IsCandidate")
+                {|
+                        ProtName         = l
+                        MeanAbundance    = a
+                        MeanR            = r
+                        StDevR           = stDev
+                        IsCandidate      = c
+                        QVal = qValue
+                |}
+                )
+            |> Series.values
+            |> Array.ofSeq
+    
+        let all = 
+            let y = dataAll |> Seq.map (fun x -> x.MeanR)
+            let x = 
+                dataAll 
+                |> Seq.map (fun x -> x.QVal) 
+                |> Seq.map (fun x -> 
+                    let res = -log10 x
+                    if infinity.Equals res then 4. else res
+                    )
+            let label = dataAll |> Seq.map (fun x -> x.ProtName)
+            let xError = 
+                dataAll |> Seq.map (fun x -> x.StDevR)
+            Chart.Point(y,x, MultiText = label)
+        let candidatesNoStabw =
+            let dataFiltered = dataAll |> Array.filter (fun x -> x.IsCandidate)  
+            let y = dataFiltered |> Seq.map (fun x -> x.MeanR) 
+            let x = 
+                dataFiltered 
+                |> Seq.map (fun x -> x.QVal) 
+                |> Seq.map (fun x -> 
+                    let res = -log10 x
+                    if infinity.Equals res then 4. else res
+                    )
+            let labels = dataFiltered |> Seq.map (fun x -> x.ProtName)
+            let xError = dataFiltered |> Seq.map (fun x -> x.StDevR)
+            Chart.Point(
+            y,x,
+            Name="points",
+            MultiText=labels
+            )
+            |> Chart.withTraceInfo "candidatesNoStabw"
+        let candidates' =
+            let dataFiltered = dataAll |> Array.filter (fun x -> highlightList |> List.contains x.ProtName )  
+            let y = dataFiltered |> Seq.map (fun x -> x.MeanR) 
+            let x = 
+                dataFiltered 
+                |> Seq.map (fun x -> x.QVal) 
+                |> Seq.map (fun x -> 
+                    let res = -log10 x
+                    if infinity.Equals res then 4. else res
+                    )
+            let labels = dataFiltered |> Seq.map (fun x -> x.ProtName)
+            Chart.Point(
+            y,x,
+            Name="points",
+            MultiText=labels,
+            TextPosition=StyleParam.TextPosition.TopRight
+            )
+            |> Chart.withTraceInfo "candidates Reci"
+        [
+            all
+            candidatesNoStabw
+            candidates'
+
+        ]
+        |> Chart.combine
+        |> Chart.withYAxisStyle "log2(bait)-log2(control)"
+        |> Chart.withXAxisStyle "log2(Mean intensity)"
+
+    let plotHisto (f:Frame<string,string>) =
+        let dataAll = 
+            f
+            |> Frame.mapRows (fun k s -> 
+                let a = s.GetAs<float>("MeanAbundance",nan)
+                let r = s.GetAs<float>("MeanR",nan)
+                let stDev = s.GetAs<float>("StDevR",nan)
+                let l = ""//s.GetAs<string>("ProtName","")
+                let c = s.GetAs<bool>("IsCandidate")
+                {|
+                        ProtName         = l
+                        MeanAbundance    = a
+                        MeanR            = r
+                        StDevR           = stDev
+                        IsCandidate      = c
+                |}
+                )
+            |> Series.values
+            |> Array.ofSeq
+        let histo =     
+            let data = dataAll |> Seq.map (fun x -> x.MeanR)   |> Seq.filter (fun x -> nan.Equals x |> not)|> Array.ofSeq
+            let bw = FSharp.Stats.Distributions.Bandwidth.nrd0 data 
+            let binned = FSharp.Stats.Distributions.Frequency.create  (bw) data |> Map.toSeq
+            Chart.Column binned
+            |> Chart.withXAxisStyle "log2(bait)-log2(control)"
+        histo
+    
+module MetaxaTemp =
+    open MetaXa
+    open MetaFrame
+    open Stats
+    open FSharp.Stats
+    open Plotly.NET
+    open Deedle
+    open BioFSharp.IO
+    open DeedleExtensions
+    open ISADotNet
+
+
+    module Global = 
+
+        /// As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md 
+        module MedianOfRatios = 
+
+            /// expects data frame with shape frame<sampleIds,GeneIds>
+            let medianOfRatiosWide (data:Frame<string,string>) =
+                //data frame with shape frame<sampleIds,GeneIds>
+                let ratios = 
+                    data
+                    |> Frame.dropSparseCols
+                    |> Frame.getNumericCols
+                    |> Series.map (fun geneID s -> 
+                        let geometricMeanAcrossAllSamples = 
+                            s.Values 
+                            |> FSharp.Stats.Seq.median
+                        let ratioPerSample = 
+                            s
+                            |> Series.map (fun x sample ->  sample / geometricMeanAcrossAllSamples)
+                        ratioPerSample
+                        )
+                    |> Frame.ofColumns
+                // Series of shape: Series<sampleId,correctionFactor:float>
+                let correctionFactorPerSample = 
+                    ratios
+                    |> Frame.transpose
+                    |> Stats.median
+                // data frame with frame<sampleIds,GeneIds>
+                // contains gene data divided by the sample wise estimated correction factor
+                let correctedData = 
+                    data
+                    |> Frame.getNumericCols 
+                    |> Series.map (fun geneID s -> 
+                        s / correctionFactorPerSample
+                        )
+                    |> Frame.ofColumns
+                let abundanceInit = 
+                    Chart.Column(data |> Frame.transpose |> Frame.dropSparseRows |> Stats.median |> Series.observations)
+                    |> Chart.withXAxisStyle "SampleId"
+                    |> Chart.withXAxisStyle "Median of Initial Abundances" 
+                let c = 
+                    Chart.Column(correctionFactorPerSample |> Series.observations)
+                    |> Chart.withXAxisStyle "SampleId"
+                    |> Chart.withXAxisStyle "Correction Factor."
+                let abundanceCorrected = 
+                    Chart.Column(correctedData |> Frame.transpose |> Frame.dropSparseRows  |> Stats.median |> Series.observations)
+                    |> Chart.withXAxisStyle "SampleId"
+                    |> Chart.withXAxisStyle "Median of corrected abundances" 
+                correctedData,abundanceInit,c,abundanceCorrected
+
+
+    module Imputation = 
+
+
+        module SimpleSampling = 
+            /// Expects normalized data.
+            /// (learnVarianceMeanDependence: seq<seq<float>> -> (float -> float)) function, which learns how to map from a mean expression to an estimator for variance.
+            let computeColumnWiseUsing (sampleFunction: float -> float -> float) minCountsForVarEst (quantileForLowMean:float option) (learnVarianceMeanDependence: seq<float*float> -> (float -> float)) (grouped:Frame<string*string,string>) =
+                ///
+                let meanWithAtLeast k (x:Series<_,float>) =
+                    let tmp = Series.values x
+                    if tmp |> Seq.length < k  then 
+                        nan
+                    else 
+                        x |> Series.values |> Seq.mean 
+                ///
+                let cvWithAtLeast k (x:Series<_,float>) =
+                    let tmp = Series.values x
+                    if tmp |> Seq.length < k  then 
+                        nan
+                    else 
+                        x |> Series.values |> cv //Seq.disp
+
+                /// Computes means and variances of complete replicates.
+                let log2MeanToCVEmpirical = 
+                    grouped
+                    |> Frame.getNumericCols
+                    |> Series.map (fun geneID s -> 
+                        let means = s |> (meanWithAtLeast minCountsForVarEst)
+                        let cols = s |> (cvWithAtLeast minCountsForVarEst)
+                        means, cols
+                        )
+                    |> Series.values
+                    |> Array.ofSeq
+                    |> Array.filter (fun x -> nan.Equals(fst x) |> not && nan.Equals(snd x) |> not)
+                    |> Array.sortBy fst
+                    |> Array.map (fun (mean,dis) -> log2 mean, dis)
+                    |> fun data -> 
+                        let borders = FSharp.Stats.Testing.Outliers.tukey 1.5 (data |> Array.map snd)
+                        data 
+                        |> Array.filter (fun x -> snd x > borders.Lower && snd x < borders.Upper )
+                /// function mapping from a mean expression to an estimator for dispersion (cv).
+                let log2MeanToCv = 
+                    learnVarianceMeanDependence log2MeanToCVEmpirical
+
+                let log2MeanToCVByFit = 
+                    log2MeanToCVEmpirical
+                    |> Array.map (fun x -> fst x, fst x |> log2MeanToCv)
+                let meanToCvEstChart = 
+                    [
+                        [
+                        
+                            Chart.PointDensity(log2MeanToCVEmpirical |> Array.map fst,log2MeanToCVEmpirical |> Array.map snd)
+                            // |> Chart.withMarkerStyle (3)
+                        ]
+                        |> Chart.combine
+                        |> Chart.withTraceInfo "mean to CV empirical"
+        
+                        Chart.Spline(log2MeanToCVByFit)
+                        |> Chart.withTraceInfo "mean Vs. CV fitted"
+                    ]
+                    |> Chart.combine
+                    |> Chart.withXAxisStyle "log2(mean Abundance)"
+                    |> Chart.withYAxisStyle "CV"
+
+                let quantileForLowMean, quantilesToMeansChart = 
+                    let data = 
+                        Array.map fst log2MeanToCVEmpirical
+                        |> Array.map (fun x -> 2.**x) 
+                    match quantileForLowMean with 
+                    | None -> 
+                        None,
+                        [
+                            [|0.05 .. 0.05 .. 0.95|]
+                            |> Array.map (fun q -> q,FSharp.Stats.Quantile.compute q data)
+                            |> Chart.Point
+                            |> Chart.withTraceInfo "quantiles VS Abundance"
+                        ]
+                        |> Chart.combine
+                        |> Chart.withXAxisStyle "Quantiles"
+                        |> Chart.withYAxisStyle "Abundance"
+                    | Some quantileForLowMean -> 
+                        let q = data |> FSharp.Stats.Quantile.compute quantileForLowMean
+                        Some q,
+                        [
+                            [|0.00 .. 0.05 .. 0.95|]
+                            |> Array.mapi (fun i q -> if i = 0 then 0.01 else q)
+                            |> Array.map (fun q -> q,FSharp.Stats.Quantile.compute q data)
+                            |> Chart.Point
+                            |> Chart.withTraceInfo "quantiles VS Abundance"
+                            Chart.Point([quantileForLowMean],[q])
+                            |> Chart.withTraceInfo "selected quantile"
+                        ]
+                        |> Chart.combine
+                        |> Chart.withXAxisStyle "Quantiles"
+                        |> Chart.withYAxisStyle "Abundance"
+
+                /// Imputation by sampling from a gausian normal distribution based on the input vector
+                /// function is truncated at zero. Can not return negative values.
+                let initImputeUsing sampleF (estimateCv:float -> float) (lowMean:float option) (d:  seq<float>) =          
+                        let clean = d |> Seq.filter (fun x -> nan.Equals x |> not)
+                        let sampleSafe mean standardDev = 
+                            let rec sample () = 
+                                let tmp = sampleF mean standardDev 
+                                if tmp < 0. then sample () else tmp
+                            sample()
+                        if clean |> Seq.isEmpty |> not then 
+                            let mean = Seq.mean clean
+                            let cvEst = estimateCv mean 
+                            let standardDev = cvEst * mean 
+                            //printfn "mean %f" mean
+                            //printfn "stndev %f" standardDev
+                            //printfn "before %A" d 
+                            let res = 
+                                d 
+                                |> Seq.map (fun x -> if nan.Equals x then sampleSafe mean standardDev else x)
+                            //printfn "%A" res
+                            res
+                        else 
+                            match lowMean with 
+                            | Some lowMean -> 
+                                let mean = lowMean
+                                let cvEst = estimateCv mean 
+                                let standardDev = cvEst * mean 
+                                d 
+                                |> Seq.map (fun x -> if nan.Equals x then sampleSafe mean standardDev else x)
+                            | None -> d
+
+                let imputeUsing (x:seq<float>) = 
+                    initImputeUsing sampleFunction (log2 >> log2MeanToCv) (quantileForLowMean) x
+
+                /// Computes means and variances of complete replicates.
+                let imputed = 
+                    // Step 1: Set genes missing when they are not comparable
+                    // because all sample groups have 1 or zero observations. 
+                    grouped
+                    |> Frame.getNumericCols
+                    |> Series.map (fun geneID s -> 
+                        let geneDataByFactorLevels = 
+                            s 
+                            |> Series.applyLevel fst (Series.values >> Array.ofSeq)
+                        let geneData = 
+                            geneDataByFactorLevels
+                            |> Series.observations
+                        let present = 
+                            geneData 
+                            |> Seq.map (fun (sampleID,v) -> 
+                                    v.Length > 1
+                                )
+                            |> Seq.contains true 
+                        if present then s else s |> Series.mapValues (fun s -> nan)
+                        )
+                    |> Frame.ofColumns
+                    // Step 2: 
+                    // x present 0 not present *nan*
+                    // [x; x; x] -> [x; x; x]
+                    // [x; x; 0] -> [x; x; x] by sampling, mean from sample, var by est
+                    // Will be biased towards the remaining point.
+                    // [x; 0; 0] -> [x; x; x] by sampling, mean from sample, var by est
+                    //
+                    // [0; 0; 0] -> 
+                        //depending on the value of quantileForLowMean :
+                        //Some x -> [x; x; x] 
+                        //None   -> [0; 0; 0] 
+                    |> Map.mapColWiseGroupedByFactorLevels imputeUsing
+
+                // meanToCvEstChart |> Chart.show
+                // quantilesToMeansChart |> Chart.show
+                imputed
+
 open Annotate 
 open Argu
-#load "scripts/SamPlotting.fsx"
 open SamPlotting
-#load "scripts/Plotting.fsx"
 open Plotting
-#load "scripts/MetaxaTemp.fsx"
 open MetaxaTemp
 module Arguments = 
     type WorkflowArgs =
         | [<Unique>] [<Mandatory>] [<AltCommandLine("-i")>] InputPath of path:string
+        | [<Unique>] [<Mandatory>] [<AltCommandLine("-a")>] ArcPath of path:string
+        | [<Unique>] [<Mandatory>] [<AltCommandLine("-m")>] MappingFilePath of path:string
+        | [<Unique>] [<Mandatory>] [<AltCommandLine("-id")>] RunID of path:string
         // | [<Unique>] [<Mandatory>] [<AltCommandLine("-o")>] OutputPath of path:string
         | [<Unique>]  [<AltCommandLine("-c")>] IdentifierColumnHeader of string
         | [<Unique>]  [<AltCommandLine("-fdr")>] FDR of string
@@ -43,6 +684,9 @@ module Arguments =
             member this.Usage =
                 match this with
                 | InputPath x                   -> "relative input file path in /arc/"
+                | ArcPath x                     -> "relative path to the arc"
+                | RunID x                       -> "ID for the run"
+                | MappingFilePath x             -> "relative path to the mapping file path in /arc/"
                 // | OutputPath x                  -> "relative output file path in /arc/runs/"
                 | IdentifierColumnHeader x      -> "column header of identifier column file to annotate"
                 | FDR x                         -> "FDR"
@@ -61,6 +705,9 @@ module Arguments =
    
     // arguments are converted
     let inputPath                    = annotationR.GetResult(InputPath)//@"/arc/assays/" + annotationR.GetResult(InputPath)
+    let arcPath                      = annotationR.GetResult(ArcPath)
+    let runID                        = annotationR.GetResult(RunID)
+    let mappingFilePath              = annotationR.GetResult(MappingFilePath)
     // let outputFilePath               = annotationR.GetResult(OutputPath)
     let columnHeader                 = 
         match annotationR.TryGetResult(IdentifierColumnHeader) with 
@@ -90,10 +737,8 @@ module Arguments =
 ////////////////////////////////////////
 ////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
-let runID = System.Guid.NewGuid().ToString()
-let source = __SOURCE_DIRECTORY__
-let runP = System.IO.Path.Combine([|source + @"\..\..\runs";runID|])
-System.IO.Directory.CreateDirectory runP
+let runID = Arguments.runID
+System.IO.Directory.CreateDirectory runID
 |> ignore
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////
@@ -102,10 +747,9 @@ System.IO.Directory.CreateDirectory runP
 ////////////////////////////////////////
 ////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
-let arcPath = __SOURCE_DIRECTORY__ + @"\..\..\"
 
 let qi = 
-    arcIO.NET.Investigation.fromArcFolder arcPath
+    arcIO.NET.Investigation.fromArcFolder Arguments.arcPath
     |> QueryModel.QInvestigation.fromInvestigation
 
 // qi.ProtocolNames
@@ -151,10 +795,10 @@ let qSampleNameToIntensityColumn fN =
 ////////////////////////////////////////
 ////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
-let mappingFrame = Mapping.readMapping (__SOURCE_DIRECTORY__ + @"\scripts\chlamy_jgi55.txt") "\t" "Identifier"
+let mappingFrame = Mapping.readMapping Arguments.mappingFilePath "\t" "Identifier"
 mappingFrame.Print()
 let truncID (id : string) = (id.Split([|".p";".t"|],System.StringSplitOptions.None)).[0]
-let outPathA = runP + "/AnnotatedResult.tsv"
+let outPathA = runID + "/AnnotatedResult.tsv"
 
 Data.annotateAndWriteData 
     mappingFrame 
@@ -525,45 +1169,45 @@ let doAnalysis (allDataForAnalysis:Frame<string,string>) dataForAnalysis (allCon
     /////////////////////////////////////////////////////
     // Save Results
     /////////////////////////////////////////////////////
-    let outP = System.IO.Path.Combine([|runP;baitGroup|])
+    let outP = System.IO.Path.Combine([|runID;baitGroup|])
     System.IO.Directory.CreateDirectory outP
     |> ignore
 
     abundanceChart
     |> Chart.withTemplate ChartTemplates.lightMirrored
-    |> Chart.saveHtml (outP + @"\1_Norm_PreNormGlobalAbundanceChart.html")
+    |> Chart.saveHtml (outP + @"/1_Norm_PreNormGlobalAbundanceChart.html")
     corrFChart
     |> Chart.withTemplate ChartTemplates.lightMirrored
-    |> Chart.saveHtml (outP + @"\2_Norm_CorrectionfactorChart.html")
+    |> Chart.saveHtml (outP + @"/2_Norm_CorrectionfactorChart.html")
     corrAbundanceChart
     |> Chart.withTemplate ChartTemplates.lightMirrored
-    |> Chart.saveHtml (outP + @"\3_Norm_AfterNormGlobalAbundanceChart.html")
+    |> Chart.saveHtml (outP + @"/3_Norm_AfterNormGlobalAbundanceChart.html")
     preImpChart
     |> Chart.withTemplate ChartTemplates.lightMirrored
-    |> Chart.saveHtml (outP + @"\4_Imputation_preImpChart.html")
+    |> Chart.saveHtml (outP + @"/4_Imputation_preImpChart.html")
     afterImpChart
     |> Chart.withTemplate ChartTemplates.lightMirrored
-    |> Chart.saveHtml (outP + @"\5_Imputation_afterImpChart.html")
+    |> Chart.saveHtml (outP + @"/5_Imputation_afterImpChart.html")
     preNormChart
     |> Chart.withTemplate ChartTemplates.lightMirrored
-    |> Chart.saveHtml (outP + @"\6_QuantilNorm_PreNormChart.html")
+    |> Chart.saveHtml (outP + @"/6_QuantilNorm_PreNormChart.html")
     afterNormChart
     |> Chart.withTemplate ChartTemplates.lightMirrored
-    |> Chart.saveHtml (outP + @"\7_QuantilNorm_AfterNormChart.html")
+    |> Chart.saveHtml (outP + @"/7_QuantilNorm_AfterNormChart.html")
     createSAMChart res
     |> Chart.withTemplate ChartTemplates.lightMirrored
-    |> Chart.saveHtml (outP + @"\8_Testing_SAM.html")
+    |> Chart.saveHtml (outP + @"/8_Testing_SAM.html")
     plotHisto finPlot
     |> Chart.withTemplate ChartTemplates.lightMirrored
-    |> Chart.saveHtml (outP + @"\9_Testing_DifferencesHistogram.html")
+    |> Chart.saveHtml (outP + @"/9_Testing_DifferencesHistogram.html")
     plotMA [] finPlot // ["CDJ5";"LON";"PSBQLD";"PGRL1"]
     |> Chart.withTemplate ChartTemplates.lightMirrored
-    |> Chart.saveHtml (outP + @"\10_Testing_MAPlot.html")
+    |> Chart.saveHtml (outP + @"/10_Testing_MAPlot.html")
     plotVulcano [] finPlot // ["CDJ5";"LON";"PSBQLD";"PGRL1"]
     |> Chart.withTemplate ChartTemplates.lightMirrored
-    |> Chart.saveHtml (outP + @"\11_Testing_VulcanoPlot.html")
-    toSave.SaveCsv(outP + @"\Analysis_complete.tsv",includeRowKeys=true,keyNames=["Protein"],separator='\t')
-    System.IO.File.WriteAllLines(outP + @"\params.txt",fsi.CommandLineArgs)
+    |> Chart.saveHtml (outP + @"/11_Testing_VulcanoPlot.html")
+    toSave.SaveCsv(outP + @"/Analysis_complete.tsv",includeRowKeys=true,keyNames=["Protein"],separator='\t')
+    System.IO.File.WriteAllLines(outP + @"/params.txt",fsi.CommandLineArgs)
     printfn "Finished Analysis! results can be found at:%s" outP 
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/workflows/EvalTurboID/TurboIDSampleArc.cwl b/workflows/EvalTurboID/TurboIDSampleArc.cwl
new file mode 100644
index 0000000..e3fbda7
--- /dev/null
+++ b/workflows/EvalTurboID/TurboIDSampleArc.cwl
@@ -0,0 +1,48 @@
+cwlVersion: v1.2
+class: CommandLineTool
+hints:
+  DockerRequirement:
+    dockerPull: mcr.microsoft.com/dotnet/sdk:7.0
+requirements:
+  # - class: InlineJavascriptRequirement
+  # - class: InitialWorkDirRequirement
+  #   listing:
+  #     - entry: $(inputs.scriptDirectory)
+  #       writable: true
+  - class: EnvVarRequirement
+    envDef:
+      - envName: DOTNET_NOLOGO
+        envValue: "true"
+  - class: NetworkAccess
+    networkAccess: true
+baseCommand: [dotnet, fsi]
+inputs:
+  scriptFile:
+    type: File
+    inputBinding:
+      position: 1
+  input:
+    type: File
+    inputBinding:
+      position: 2
+      prefix: -i
+  mappingFile:
+    type: File
+    inputBinding:
+      position: 3
+      prefix: -m
+  arcDirectory:
+    type: Directory
+    inputBinding:
+      position: 4
+      prefix: -a
+  runID:
+    type: string
+    inputBinding:
+      position: 5
+      prefix: -id
+outputs:
+  output:
+    type: Directory
+    outputBinding:
+      glob: $(runtime.outdir)/$(inputs.runID)
\ No newline at end of file
-- 
GitLab