diff --git a/runs/TurboIDSampleArc.yml b/runs/TurboIDSampleArc.yml new file mode 100644 index 0000000000000000000000000000000000000000..87e4dae55a4d7520d93d5e722ec3cb962085ca10 --- /dev/null +++ b/runs/TurboIDSampleArc.yml @@ -0,0 +1,13 @@ +arcDirectory: + class: Directory + path: ../ +mappingFile: + class: File + path: ../workflows/EvalTurboID/scripts/chlamy_jgi55.txt +scriptFile: + class: File + path: ../workflows/EvalTurboID/EvalTurboID.fsx +input: + class: File + path: ../assays/MSEval/dataset/TEF_PSBQ_LON_MS264DDAfs_combined_protein.tsv +runID: test diff --git a/workflows/EvalTurboID/EvalTurboID.fsx b/workflows/EvalTurboID/EvalTurboID.fsx index 452d083ab52b22f5513b795f9d63227e8f8f7d10..09344df0b84189cc9d6eff35a6ac36abb2894329 100644 --- a/workflows/EvalTurboID/EvalTurboID.fsx +++ b/workflows/EvalTurboID/EvalTurboID.fsx @@ -19,18 +19,659 @@ open ISADotNet open ISADotNet open ISADotNet.QueryModel -#load "scripts/annotate.fsx" +module Annotate = + + open System + open Argu + open Deedle + + printfn "=====\nCheck if dotnet sdk reference can be rechanged to latest: https://stackoverflow.com/a/65934809 \n=====" + + /// \t or "\t" doesnt work as argument + let getSeparator str = + match str with + | "tab" -> "\t" + | "tabulator" -> "\t" + | _ -> str + + /// Snapshots of MapMan and Gene Ontology were generated with FATool and genome releases 5.5 (Chlamy) and Araport11 (Arabidopsis) + module Mapping = + + /// reads FA tool snapshot for araport11 or chlamyJGI_v5.5 or other annotation files + let readMapping (mappingFilePath:string) (columnSeparator:string) (identifierColHeader:string) :Frame<string,string> = + Frame.ReadCsv(mappingFilePath,hasHeaders=true,separators = columnSeparator) + |> Frame.indexRows identifierColHeader + + /// truncateID processes identifier (e.g. Cre10.g123456.t2.1 -> Cre10.g123456) + let getAnnotationsFromIdentifier (frame:Frame<string,string>) (annotationHeader:string[]) (multipleIdentifierSeparator:string) (multipleAnnotationSeparator:string) (truncateID:string->string) (identifier:string) = + + /// identifier that should be mapped + let identifier = identifier.Split([|multipleIdentifierSeparator|],StringSplitOptions.None) + + /// Mappings from all identifer to annotations, that are present in annotation frame + let mappings :Series<string,string> []= + identifier + |> Array.choose (fun ident -> + let truncId = truncateID ident + let k = frame.TryGetRow truncId + if k.HasValue then + Some k.Value + else + printfn "Warning: The following id could not be found within mapping file: %s" truncId + None + ) + + /// all annotations that should be used + annotationHeader //["GO","Synonym"] + |> Array.map (fun annotationType -> + mappings //[[GO => GO:006; Synonym => Q0WV96];[GO => GO:001; Synonym => Q01337]] + |> Array.map (fun mapping -> + let annotation = mapping.[annotationType] + annotation.Split ';' + |> String.concat multipleAnnotationSeparator + ) + ) + + let getAnnotationRow (frame:Frame<string,string>) (annotationHeader:string[]) multipleIdentifierSeparator multipleAnnotationSeparator truncateID (identifier:string) = + getAnnotationsFromIdentifier frame annotationHeader multipleIdentifierSeparator multipleAnnotationSeparator truncateID identifier + |> Array.map (String.concat multipleIdentifierSeparator) + + + /// User data is read, annotated and written to a new file + module Data = + + let getDataFrame (columnSeparator:string) inputPath = + System.IO.File.ReadAllLines(inputPath) + |> Array.map (fun x -> + x.Split([|columnSeparator|],System.StringSplitOptions.None) + ) + + /// index of column that contains the identifier to annotate + let getColIndex (dataFrame:string[][]) columnHeader= + Array.tryFindIndex (fun x -> x = columnHeader) dataFrame.[0] + |> fun o -> + match o with + | Some i -> i + | _ -> failwithf "ColumnHeader %s not found." columnHeader + + /// based on given mapping arguments the file is extended with given mapping columns + let getHeader (dataFrame:string[][]) (columnSeparator:string) (annotationHeader:string[]) = + Array.append dataFrame.[0] annotationHeader + |> String.concat columnSeparator + + /// every row of the file is processed and converted to a new string with additional information attached at the end of the line + let getAnnotatedLines annotationFrame inputPath (columnSeparator:string) columnHeader (annotations:string[]) (multipleIdentifierSeparator:string) truncateID (multipleAnnotationSeparator:string) = + let dataFrame = getDataFrame columnSeparator inputPath + let colIndex = getColIndex dataFrame columnHeader + let header = getHeader dataFrame columnSeparator annotations + //let rowCount = dataFrame.Length + dataFrame + |> Array.tail + |> Array.mapi (fun i x -> + let identifier = x.[colIndex].Replace("\"","") + + //if i%50=0 then printfn "%04i/%i: %s" i rowCount identifier + //printfn "%04i/%i: %s" i rowCount identifier + let annotations = + if identifier = "" then + Array.init annotations.Length (fun _ -> "") + else + Mapping.getAnnotationRow annotationFrame annotations multipleIdentifierSeparator multipleAnnotationSeparator truncateID identifier + Seq.append x annotations + |> String.concat columnSeparator + ) + |> Array.append [|header|] + + let annotateAndWriteData annotationFrame inputPath (columnSeparator:string) columnHeader truncateID (annotations:string[]) (multipleIdentifierSeparator:string) (multipleAnnotationSeparator:string) outputPath = + /// warns user if separators are identical + let checkSeparators = + if columnSeparator = multipleIdentifierSeparator then failwith "WARNING: Column separator is equal to identifier separator" + if columnSeparator = multipleAnnotationSeparator then failwith "WARNING: Column separator is equal to annotation separator" + let annotatedRows = + getAnnotatedLines annotationFrame inputPath columnSeparator columnHeader annotations multipleIdentifierSeparator truncateID multipleAnnotationSeparator + System.IO.File.WriteAllLines(outputPath,annotatedRows) + +module SamPlotting = + open Deedle + open FSharp.Stats + open Plotly.NET + open FSharp.Stats.Testing + open SAM + open Plotly.NET.StyleParam + + let createSAMChart res = + + let observed = [| res.NegSigBioitem; res.NonSigBioitem; res.PosSigBioitem|] |> Array.concat + let obs = observed |> Array.map (fun x -> x.Statistics) + let expected = res.AveragePermutations |> Array.map (fun x -> x.Statistics) + let minDi = Seq.min obs + let maxDi = Seq.max obs + + + // positive significant changes + let posExpected = expected.[res.NegSigBioitem.Length + res.NonSigBioitem.Length .. res.NegSigBioitem.Length + res.NonSigBioitem.Length + res.PosSigBioitem.Length-1] + let posChart = + Chart.Point(posExpected,res.PosSigBioitem |> Array.map (fun x -> x.Statistics)) + |> Chart.withLineStyle(Color=Color.fromKeyword Green) + |> Chart.withTraceInfo("positive change",Visible = Visible.True ) + + + // no significant changes + let nonex = expected.[res.NegSigBioitem.Length .. res.NegSigBioitem.Length + res.NonSigBioitem.Length-1] + let nonchart = + Chart.Point(nonex,res.NonSigBioitem |> Array.map (fun x -> x.Statistics)) + |> Chart.withLineStyle(Color=Color.fromKeyword Gray) + |> Chart.withTraceInfo("no change",Visible = Visible.True) + + // negative significant changes + let negex = expected.[0 .. res.NegSigBioitem.Length-1] + let negchart = + Chart.Point(negex,res.NegSigBioitem |> Array.map (fun x -> x.Statistics)) + |> Chart.withLineStyle(Color=Color.fromKeyword Red) + |> Chart.withTraceInfo("negative change",Visible = Visible.True) + + let samValues = + [ + negchart + nonchart + posChart + ] + |> Chart.combine + + let chartConfig = + let svdConfig = + ConfigObjects.ToImageButtonOptions.init( + Format = StyleParam.ImageFormat.SVG) + Config.init ( + ToImageButtonOptions = svdConfig, + ModeBarButtonsToAdd=[ModeBarButton.HoverCompareCartesian] + + ) + + let cutLineUp = [(minDi + res.Delta) ; (maxDi + res.Delta)] + let cutsUp = + Chart.Line(cutLineUp,[minDi;maxDi]) + |> Chart.withLineStyle(Color=Color.fromKeyword Purple,Dash = StyleParam.DrawingStyle.Dash, Width = 0.5) + |> Chart.withTraceInfo("delta",Visible = Visible.True) + let cutLineLow = [(minDi - res.Delta) ; (maxDi - res.Delta)] + let cutsLow = + Chart.Line(cutLineLow,[minDi;maxDi]) + |> Chart.withLineStyle(Color=Color.fromKeyword Purple,Dash = StyleParam.DrawingStyle.Dash, Width = 0.5) + |> Chart.withTraceInfo("delta",Visible = Visible.True) + let linechart = + Chart.Line([minDi;maxDi], [minDi;maxDi]) + |> Chart.withTraceInfo("bisecting angle",Visible = Visible.True) + |> Chart.withLineStyle(Color=Color.fromKeyword Black, Width = 1) + + let uppercut = + let xAnchorUppercut = [minDi .. 5. .. maxDi] + Chart.Line (xAnchorUppercut, List.init xAnchorUppercut.Length (fun x -> res.UpperCut)) + + |> Chart.withTraceInfo("upper cut",Visible = Visible.True) + |> Chart.withLineStyle(Color=Color.fromKeyword Black,Dash = StyleParam.DrawingStyle.Dash, Width = 0.3) + + let lowercut = + Chart.Line([minDi;maxDi],[res.LowerCut;res.LowerCut]) + |> Chart.withTraceInfo("lower cut",Visible = Visible.True) + |> Chart.withLineStyle(Color=Color.fromKeyword Black,Dash = StyleParam.DrawingStyle.Dash,Width = 0.3) + //|> Chart.withXAxisStyle(MinMax = (-15,20)) + + |> Chart.withTraceInfo("lower cut",Visible = Visible.True) + let plot = + [linechart; + samValues; + cutsUp; + cutsLow; + uppercut; + lowercut] + |> Chart.combine + |> Chart.withTitle(title = "SAM results") + |> Chart.withXAxisStyle("expected Score") + |> Chart.withYAxisStyle ("observed Score") + |> Chart.withConfig(chartConfig) + |> Chart.withTemplate(ChartTemplates.lightMirrored) + plot + +module Plotting = + + open FSharp.Stats + open Plotly.NET + open Deedle + open BioFSharp.IO + open DeedleExtensions + + let plotMA highlightList (f:Frame<string,string>) = + let dataAll = + f + |> Frame.mapRows (fun k s -> + let a = s.GetAs<float>("MeanAbundance",nan) + let r = s.GetAs<float>("MeanR",nan) + let stDev = s.GetAs<float>("StDevR",nan) + let l = + let x = s.GetAs<string>("ProtName","") + if x = "" then + "" + else + x + let c = s.GetAs<bool>("IsCandidate") + {| + ProtName = l + MeanAbundance = a + MeanR = r + StDevR = stDev + IsCandidate = c + |} + ) + |> Series.values + |> Array.ofSeq + + let all = + let x = dataAll |> Seq.map (fun x -> x.MeanR) + let y = dataAll |> Seq.map (fun x -> x.MeanAbundance) + let label = dataAll |> Seq.map (fun x -> x.ProtName) + Chart.Point(y,x, MultiText = label) + + let candidates = + let dataFiltered = dataAll |> Array.filter (fun x -> x.IsCandidate) + let x = dataFiltered |> Seq.map (fun x -> x.MeanR) + let y = dataFiltered |> Seq.map (fun x -> x.MeanAbundance) + let labels = dataFiltered |> Seq.map (fun x -> x.ProtName) + Chart.Point( + y,x, + Name="points", + MultiText=labels, + TextPosition=StyleParam.TextPosition.TopRight + ) + |> Chart.withTraceInfo "candidates" + let candidatesNoStabw = + let dataFiltered = dataAll |> Array.filter (fun x -> x.IsCandidate) + let x = dataFiltered |> Seq.map (fun x -> x.MeanR) + let y = dataFiltered |> Seq.map (fun x -> x.MeanAbundance) + let labels = dataFiltered |> Seq.map (fun x -> x.ProtName) + + Chart.Point( + y,x, + Name="points", MultiText = labels + ) + |> Chart.withTraceInfo "candidatesNoStabw" + let candidates' = + let dataFiltered = dataAll |> Array.filter (fun x -> highlightList |> List.contains x.ProtName ) + let x = dataFiltered |> Seq.map (fun x -> x.MeanR) + let y = dataFiltered |> Seq.map (fun x -> x.MeanAbundance) + let labels = dataFiltered |> Seq.map (fun x -> x.ProtName) + Chart.Point( + y,x, + Name="points", + MultiText=labels, + TextPosition=StyleParam.TextPosition.TopRight + ) + |> Chart.withTraceInfo "candidates Reci" + [ + all + candidatesNoStabw + candidates' + + ] + |> Chart.combine + |> Chart.withYAxisStyle "log2(bait)-log2(control)" + |> Chart.withXAxisStyle "log2(Mean intensity)" + + let plotVulcano highlightList f = + let dataAll = + f + |> Frame.mapRows (fun k s -> + let a = s.GetAs<float>("MeanAbundance",nan) + let r = s.GetAs<float>("MeanR",nan) + let stDev = s.GetAs<float>("StDevR",nan) + let qValue = s.GetAs<float>("qValue",nan) + let l = + let x = s.GetAs<string>("ProtName","") + if x = "" then + "" + else + x + let c = s.GetAs<bool>("IsCandidate") + {| + ProtName = l + MeanAbundance = a + MeanR = r + StDevR = stDev + IsCandidate = c + QVal = qValue + |} + ) + |> Series.values + |> Array.ofSeq + + let all = + let y = dataAll |> Seq.map (fun x -> x.MeanR) + let x = + dataAll + |> Seq.map (fun x -> x.QVal) + |> Seq.map (fun x -> + let res = -log10 x + if infinity.Equals res then 4. else res + ) + let label = dataAll |> Seq.map (fun x -> x.ProtName) + let xError = + dataAll |> Seq.map (fun x -> x.StDevR) + Chart.Point(y,x, MultiText = label) + let candidatesNoStabw = + let dataFiltered = dataAll |> Array.filter (fun x -> x.IsCandidate) + let y = dataFiltered |> Seq.map (fun x -> x.MeanR) + let x = + dataFiltered + |> Seq.map (fun x -> x.QVal) + |> Seq.map (fun x -> + let res = -log10 x + if infinity.Equals res then 4. else res + ) + let labels = dataFiltered |> Seq.map (fun x -> x.ProtName) + let xError = dataFiltered |> Seq.map (fun x -> x.StDevR) + Chart.Point( + y,x, + Name="points", + MultiText=labels + ) + |> Chart.withTraceInfo "candidatesNoStabw" + let candidates' = + let dataFiltered = dataAll |> Array.filter (fun x -> highlightList |> List.contains x.ProtName ) + let y = dataFiltered |> Seq.map (fun x -> x.MeanR) + let x = + dataFiltered + |> Seq.map (fun x -> x.QVal) + |> Seq.map (fun x -> + let res = -log10 x + if infinity.Equals res then 4. else res + ) + let labels = dataFiltered |> Seq.map (fun x -> x.ProtName) + Chart.Point( + y,x, + Name="points", + MultiText=labels, + TextPosition=StyleParam.TextPosition.TopRight + ) + |> Chart.withTraceInfo "candidates Reci" + [ + all + candidatesNoStabw + candidates' + + ] + |> Chart.combine + |> Chart.withYAxisStyle "log2(bait)-log2(control)" + |> Chart.withXAxisStyle "log2(Mean intensity)" + + let plotHisto (f:Frame<string,string>) = + let dataAll = + f + |> Frame.mapRows (fun k s -> + let a = s.GetAs<float>("MeanAbundance",nan) + let r = s.GetAs<float>("MeanR",nan) + let stDev = s.GetAs<float>("StDevR",nan) + let l = ""//s.GetAs<string>("ProtName","") + let c = s.GetAs<bool>("IsCandidate") + {| + ProtName = l + MeanAbundance = a + MeanR = r + StDevR = stDev + IsCandidate = c + |} + ) + |> Series.values + |> Array.ofSeq + let histo = + let data = dataAll |> Seq.map (fun x -> x.MeanR) |> Seq.filter (fun x -> nan.Equals x |> not)|> Array.ofSeq + let bw = FSharp.Stats.Distributions.Bandwidth.nrd0 data + let binned = FSharp.Stats.Distributions.Frequency.create (bw) data |> Map.toSeq + Chart.Column binned + |> Chart.withXAxisStyle "log2(bait)-log2(control)" + histo + +module MetaxaTemp = + open MetaXa + open MetaFrame + open Stats + open FSharp.Stats + open Plotly.NET + open Deedle + open BioFSharp.IO + open DeedleExtensions + open ISADotNet + + + module Global = + + /// As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md + module MedianOfRatios = + + /// expects data frame with shape frame<sampleIds,GeneIds> + let medianOfRatiosWide (data:Frame<string,string>) = + //data frame with shape frame<sampleIds,GeneIds> + let ratios = + data + |> Frame.dropSparseCols + |> Frame.getNumericCols + |> Series.map (fun geneID s -> + let geometricMeanAcrossAllSamples = + s.Values + |> FSharp.Stats.Seq.median + let ratioPerSample = + s + |> Series.map (fun x sample -> sample / geometricMeanAcrossAllSamples) + ratioPerSample + ) + |> Frame.ofColumns + // Series of shape: Series<sampleId,correctionFactor:float> + let correctionFactorPerSample = + ratios + |> Frame.transpose + |> Stats.median + // data frame with frame<sampleIds,GeneIds> + // contains gene data divided by the sample wise estimated correction factor + let correctedData = + data + |> Frame.getNumericCols + |> Series.map (fun geneID s -> + s / correctionFactorPerSample + ) + |> Frame.ofColumns + let abundanceInit = + Chart.Column(data |> Frame.transpose |> Frame.dropSparseRows |> Stats.median |> Series.observations) + |> Chart.withXAxisStyle "SampleId" + |> Chart.withXAxisStyle "Median of Initial Abundances" + let c = + Chart.Column(correctionFactorPerSample |> Series.observations) + |> Chart.withXAxisStyle "SampleId" + |> Chart.withXAxisStyle "Correction Factor." + let abundanceCorrected = + Chart.Column(correctedData |> Frame.transpose |> Frame.dropSparseRows |> Stats.median |> Series.observations) + |> Chart.withXAxisStyle "SampleId" + |> Chart.withXAxisStyle "Median of corrected abundances" + correctedData,abundanceInit,c,abundanceCorrected + + + module Imputation = + + + module SimpleSampling = + /// Expects normalized data. + /// (learnVarianceMeanDependence: seq<seq<float>> -> (float -> float)) function, which learns how to map from a mean expression to an estimator for variance. + let computeColumnWiseUsing (sampleFunction: float -> float -> float) minCountsForVarEst (quantileForLowMean:float option) (learnVarianceMeanDependence: seq<float*float> -> (float -> float)) (grouped:Frame<string*string,string>) = + /// + let meanWithAtLeast k (x:Series<_,float>) = + let tmp = Series.values x + if tmp |> Seq.length < k then + nan + else + x |> Series.values |> Seq.mean + /// + let cvWithAtLeast k (x:Series<_,float>) = + let tmp = Series.values x + if tmp |> Seq.length < k then + nan + else + x |> Series.values |> cv //Seq.disp + + /// Computes means and variances of complete replicates. + let log2MeanToCVEmpirical = + grouped + |> Frame.getNumericCols + |> Series.map (fun geneID s -> + let means = s |> (meanWithAtLeast minCountsForVarEst) + let cols = s |> (cvWithAtLeast minCountsForVarEst) + means, cols + ) + |> Series.values + |> Array.ofSeq + |> Array.filter (fun x -> nan.Equals(fst x) |> not && nan.Equals(snd x) |> not) + |> Array.sortBy fst + |> Array.map (fun (mean,dis) -> log2 mean, dis) + |> fun data -> + let borders = FSharp.Stats.Testing.Outliers.tukey 1.5 (data |> Array.map snd) + data + |> Array.filter (fun x -> snd x > borders.Lower && snd x < borders.Upper ) + /// function mapping from a mean expression to an estimator for dispersion (cv). + let log2MeanToCv = + learnVarianceMeanDependence log2MeanToCVEmpirical + + let log2MeanToCVByFit = + log2MeanToCVEmpirical + |> Array.map (fun x -> fst x, fst x |> log2MeanToCv) + let meanToCvEstChart = + [ + [ + + Chart.PointDensity(log2MeanToCVEmpirical |> Array.map fst,log2MeanToCVEmpirical |> Array.map snd) + // |> Chart.withMarkerStyle (3) + ] + |> Chart.combine + |> Chart.withTraceInfo "mean to CV empirical" + + Chart.Spline(log2MeanToCVByFit) + |> Chart.withTraceInfo "mean Vs. CV fitted" + ] + |> Chart.combine + |> Chart.withXAxisStyle "log2(mean Abundance)" + |> Chart.withYAxisStyle "CV" + + let quantileForLowMean, quantilesToMeansChart = + let data = + Array.map fst log2MeanToCVEmpirical + |> Array.map (fun x -> 2.**x) + match quantileForLowMean with + | None -> + None, + [ + [|0.05 .. 0.05 .. 0.95|] + |> Array.map (fun q -> q,FSharp.Stats.Quantile.compute q data) + |> Chart.Point + |> Chart.withTraceInfo "quantiles VS Abundance" + ] + |> Chart.combine + |> Chart.withXAxisStyle "Quantiles" + |> Chart.withYAxisStyle "Abundance" + | Some quantileForLowMean -> + let q = data |> FSharp.Stats.Quantile.compute quantileForLowMean + Some q, + [ + [|0.00 .. 0.05 .. 0.95|] + |> Array.mapi (fun i q -> if i = 0 then 0.01 else q) + |> Array.map (fun q -> q,FSharp.Stats.Quantile.compute q data) + |> Chart.Point + |> Chart.withTraceInfo "quantiles VS Abundance" + Chart.Point([quantileForLowMean],[q]) + |> Chart.withTraceInfo "selected quantile" + ] + |> Chart.combine + |> Chart.withXAxisStyle "Quantiles" + |> Chart.withYAxisStyle "Abundance" + + /// Imputation by sampling from a gausian normal distribution based on the input vector + /// function is truncated at zero. Can not return negative values. + let initImputeUsing sampleF (estimateCv:float -> float) (lowMean:float option) (d: seq<float>) = + let clean = d |> Seq.filter (fun x -> nan.Equals x |> not) + let sampleSafe mean standardDev = + let rec sample () = + let tmp = sampleF mean standardDev + if tmp < 0. then sample () else tmp + sample() + if clean |> Seq.isEmpty |> not then + let mean = Seq.mean clean + let cvEst = estimateCv mean + let standardDev = cvEst * mean + //printfn "mean %f" mean + //printfn "stndev %f" standardDev + //printfn "before %A" d + let res = + d + |> Seq.map (fun x -> if nan.Equals x then sampleSafe mean standardDev else x) + //printfn "%A" res + res + else + match lowMean with + | Some lowMean -> + let mean = lowMean + let cvEst = estimateCv mean + let standardDev = cvEst * mean + d + |> Seq.map (fun x -> if nan.Equals x then sampleSafe mean standardDev else x) + | None -> d + + let imputeUsing (x:seq<float>) = + initImputeUsing sampleFunction (log2 >> log2MeanToCv) (quantileForLowMean) x + + /// Computes means and variances of complete replicates. + let imputed = + // Step 1: Set genes missing when they are not comparable + // because all sample groups have 1 or zero observations. + grouped + |> Frame.getNumericCols + |> Series.map (fun geneID s -> + let geneDataByFactorLevels = + s + |> Series.applyLevel fst (Series.values >> Array.ofSeq) + let geneData = + geneDataByFactorLevels + |> Series.observations + let present = + geneData + |> Seq.map (fun (sampleID,v) -> + v.Length > 1 + ) + |> Seq.contains true + if present then s else s |> Series.mapValues (fun s -> nan) + ) + |> Frame.ofColumns + // Step 2: + // x present 0 not present *nan* + // [x; x; x] -> [x; x; x] + // [x; x; 0] -> [x; x; x] by sampling, mean from sample, var by est + // Will be biased towards the remaining point. + // [x; 0; 0] -> [x; x; x] by sampling, mean from sample, var by est + // + // [0; 0; 0] -> + //depending on the value of quantileForLowMean : + //Some x -> [x; x; x] + //None -> [0; 0; 0] + |> Map.mapColWiseGroupedByFactorLevels imputeUsing + + // meanToCvEstChart |> Chart.show + // quantilesToMeansChart |> Chart.show + imputed + open Annotate open Argu -#load "scripts/SamPlotting.fsx" open SamPlotting -#load "scripts/Plotting.fsx" open Plotting -#load "scripts/MetaxaTemp.fsx" open MetaxaTemp module Arguments = type WorkflowArgs = | [<Unique>] [<Mandatory>] [<AltCommandLine("-i")>] InputPath of path:string + | [<Unique>] [<Mandatory>] [<AltCommandLine("-a")>] ArcPath of path:string + | [<Unique>] [<Mandatory>] [<AltCommandLine("-m")>] MappingFilePath of path:string + | [<Unique>] [<Mandatory>] [<AltCommandLine("-id")>] RunID of path:string // | [<Unique>] [<Mandatory>] [<AltCommandLine("-o")>] OutputPath of path:string | [<Unique>] [<AltCommandLine("-c")>] IdentifierColumnHeader of string | [<Unique>] [<AltCommandLine("-fdr")>] FDR of string @@ -43,6 +684,9 @@ module Arguments = member this.Usage = match this with | InputPath x -> "relative input file path in /arc/" + | ArcPath x -> "relative path to the arc" + | RunID x -> "ID for the run" + | MappingFilePath x -> "relative path to the mapping file path in /arc/" // | OutputPath x -> "relative output file path in /arc/runs/" | IdentifierColumnHeader x -> "column header of identifier column file to annotate" | FDR x -> "FDR" @@ -61,6 +705,9 @@ module Arguments = // arguments are converted let inputPath = annotationR.GetResult(InputPath)//@"/arc/assays/" + annotationR.GetResult(InputPath) + let arcPath = annotationR.GetResult(ArcPath) + let runID = annotationR.GetResult(RunID) + let mappingFilePath = annotationR.GetResult(MappingFilePath) // let outputFilePath = annotationR.GetResult(OutputPath) let columnHeader = match annotationR.TryGetResult(IdentifierColumnHeader) with @@ -90,10 +737,8 @@ module Arguments = //////////////////////////////////////// //////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -let runID = System.Guid.NewGuid().ToString() -let source = __SOURCE_DIRECTORY__ -let runP = System.IO.Path.Combine([|source + @"\..\..\runs";runID|]) -System.IO.Directory.CreateDirectory runP +let runID = Arguments.runID +System.IO.Directory.CreateDirectory runID |> ignore //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////// @@ -102,10 +747,9 @@ System.IO.Directory.CreateDirectory runP //////////////////////////////////////// //////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -let arcPath = __SOURCE_DIRECTORY__ + @"\..\..\" let qi = - arcIO.NET.Investigation.fromArcFolder arcPath + arcIO.NET.Investigation.fromArcFolder Arguments.arcPath |> QueryModel.QInvestigation.fromInvestigation // qi.ProtocolNames @@ -151,10 +795,10 @@ let qSampleNameToIntensityColumn fN = //////////////////////////////////////// //////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -let mappingFrame = Mapping.readMapping (__SOURCE_DIRECTORY__ + @"\scripts\chlamy_jgi55.txt") "\t" "Identifier" +let mappingFrame = Mapping.readMapping Arguments.mappingFilePath "\t" "Identifier" mappingFrame.Print() let truncID (id : string) = (id.Split([|".p";".t"|],System.StringSplitOptions.None)).[0] -let outPathA = runP + "/AnnotatedResult.tsv" +let outPathA = runID + "/AnnotatedResult.tsv" Data.annotateAndWriteData mappingFrame @@ -525,45 +1169,45 @@ let doAnalysis (allDataForAnalysis:Frame<string,string>) dataForAnalysis (allCon ///////////////////////////////////////////////////// // Save Results ///////////////////////////////////////////////////// - let outP = System.IO.Path.Combine([|runP;baitGroup|]) + let outP = System.IO.Path.Combine([|runID;baitGroup|]) System.IO.Directory.CreateDirectory outP |> ignore abundanceChart |> Chart.withTemplate ChartTemplates.lightMirrored - |> Chart.saveHtml (outP + @"\1_Norm_PreNormGlobalAbundanceChart.html") + |> Chart.saveHtml (outP + @"/1_Norm_PreNormGlobalAbundanceChart.html") corrFChart |> Chart.withTemplate ChartTemplates.lightMirrored - |> Chart.saveHtml (outP + @"\2_Norm_CorrectionfactorChart.html") + |> Chart.saveHtml (outP + @"/2_Norm_CorrectionfactorChart.html") corrAbundanceChart |> Chart.withTemplate ChartTemplates.lightMirrored - |> Chart.saveHtml (outP + @"\3_Norm_AfterNormGlobalAbundanceChart.html") + |> Chart.saveHtml (outP + @"/3_Norm_AfterNormGlobalAbundanceChart.html") preImpChart |> Chart.withTemplate ChartTemplates.lightMirrored - |> Chart.saveHtml (outP + @"\4_Imputation_preImpChart.html") + |> Chart.saveHtml (outP + @"/4_Imputation_preImpChart.html") afterImpChart |> Chart.withTemplate ChartTemplates.lightMirrored - |> Chart.saveHtml (outP + @"\5_Imputation_afterImpChart.html") + |> Chart.saveHtml (outP + @"/5_Imputation_afterImpChart.html") preNormChart |> Chart.withTemplate ChartTemplates.lightMirrored - |> Chart.saveHtml (outP + @"\6_QuantilNorm_PreNormChart.html") + |> Chart.saveHtml (outP + @"/6_QuantilNorm_PreNormChart.html") afterNormChart |> Chart.withTemplate ChartTemplates.lightMirrored - |> Chart.saveHtml (outP + @"\7_QuantilNorm_AfterNormChart.html") + |> Chart.saveHtml (outP + @"/7_QuantilNorm_AfterNormChart.html") createSAMChart res |> Chart.withTemplate ChartTemplates.lightMirrored - |> Chart.saveHtml (outP + @"\8_Testing_SAM.html") + |> Chart.saveHtml (outP + @"/8_Testing_SAM.html") plotHisto finPlot |> Chart.withTemplate ChartTemplates.lightMirrored - |> Chart.saveHtml (outP + @"\9_Testing_DifferencesHistogram.html") + |> Chart.saveHtml (outP + @"/9_Testing_DifferencesHistogram.html") plotMA [] finPlot // ["CDJ5";"LON";"PSBQLD";"PGRL1"] |> Chart.withTemplate ChartTemplates.lightMirrored - |> Chart.saveHtml (outP + @"\10_Testing_MAPlot.html") + |> Chart.saveHtml (outP + @"/10_Testing_MAPlot.html") plotVulcano [] finPlot // ["CDJ5";"LON";"PSBQLD";"PGRL1"] |> Chart.withTemplate ChartTemplates.lightMirrored - |> Chart.saveHtml (outP + @"\11_Testing_VulcanoPlot.html") - toSave.SaveCsv(outP + @"\Analysis_complete.tsv",includeRowKeys=true,keyNames=["Protein"],separator='\t') - System.IO.File.WriteAllLines(outP + @"\params.txt",fsi.CommandLineArgs) + |> Chart.saveHtml (outP + @"/11_Testing_VulcanoPlot.html") + toSave.SaveCsv(outP + @"/Analysis_complete.tsv",includeRowKeys=true,keyNames=["Protein"],separator='\t') + System.IO.File.WriteAllLines(outP + @"/params.txt",fsi.CommandLineArgs) printfn "Finished Analysis! results can be found at:%s" outP //////////////////////////////////////////////////////////////////////////////// diff --git a/workflows/EvalTurboID/TurboIDSampleArc.cwl b/workflows/EvalTurboID/TurboIDSampleArc.cwl new file mode 100644 index 0000000000000000000000000000000000000000..e3fbda77590b819dd73464e88cf8e7e61c72d2dc --- /dev/null +++ b/workflows/EvalTurboID/TurboIDSampleArc.cwl @@ -0,0 +1,48 @@ +cwlVersion: v1.2 +class: CommandLineTool +hints: + DockerRequirement: + dockerPull: mcr.microsoft.com/dotnet/sdk:7.0 +requirements: + # - class: InlineJavascriptRequirement + # - class: InitialWorkDirRequirement + # listing: + # - entry: $(inputs.scriptDirectory) + # writable: true + - class: EnvVarRequirement + envDef: + - envName: DOTNET_NOLOGO + envValue: "true" + - class: NetworkAccess + networkAccess: true +baseCommand: [dotnet, fsi] +inputs: + scriptFile: + type: File + inputBinding: + position: 1 + input: + type: File + inputBinding: + position: 2 + prefix: -i + mappingFile: + type: File + inputBinding: + position: 3 + prefix: -m + arcDirectory: + type: Directory + inputBinding: + position: 4 + prefix: -a + runID: + type: string + inputBinding: + position: 5 + prefix: -id +outputs: + output: + type: Directory + outputBinding: + glob: $(runtime.outdir)/$(inputs.runID) \ No newline at end of file