Skip to content
Snippets Groups Projects
Commit e6a83e28 authored by Jonathan Ott's avatar Jonathan Ott
Browse files

add cwl compatible version and workflow

parent 9a05e754
Branches main
No related tags found
1 merge request!1Cwl Descriptions
This commit is part of merge request !1. Comments created here will be created in the context of that merge request.
arcDirectory:
class: Directory
path: ../
mappingFile:
class: File
path: ../workflows/EvalTurboID/scripts/chlamy_jgi55.txt
scriptFile:
class: File
path: ../workflows/EvalTurboID/EvalTurboID.fsx
input:
class: File
path: ../assays/MSEval/dataset/TEF_PSBQ_LON_MS264DDAfs_combined_protein.tsv
runID: test
......@@ -19,18 +19,659 @@ open ISADotNet
open ISADotNet
open ISADotNet.QueryModel
#load "scripts/annotate.fsx"
module Annotate =
open System
open Argu
open Deedle
printfn "=====\nCheck if dotnet sdk reference can be rechanged to latest: https://stackoverflow.com/a/65934809 \n====="
/// \t or "\t" doesnt work as argument
let getSeparator str =
match str with
| "tab" -> "\t"
| "tabulator" -> "\t"
| _ -> str
/// Snapshots of MapMan and Gene Ontology were generated with FATool and genome releases 5.5 (Chlamy) and Araport11 (Arabidopsis)
module Mapping =
/// reads FA tool snapshot for araport11 or chlamyJGI_v5.5 or other annotation files
let readMapping (mappingFilePath:string) (columnSeparator:string) (identifierColHeader:string) :Frame<string,string> =
Frame.ReadCsv(mappingFilePath,hasHeaders=true,separators = columnSeparator)
|> Frame.indexRows identifierColHeader
/// truncateID processes identifier (e.g. Cre10.g123456.t2.1 -> Cre10.g123456)
let getAnnotationsFromIdentifier (frame:Frame<string,string>) (annotationHeader:string[]) (multipleIdentifierSeparator:string) (multipleAnnotationSeparator:string) (truncateID:string->string) (identifier:string) =
/// identifier that should be mapped
let identifier = identifier.Split([|multipleIdentifierSeparator|],StringSplitOptions.None)
/// Mappings from all identifer to annotations, that are present in annotation frame
let mappings :Series<string,string> []=
identifier
|> Array.choose (fun ident ->
let truncId = truncateID ident
let k = frame.TryGetRow truncId
if k.HasValue then
Some k.Value
else
printfn "Warning: The following id could not be found within mapping file: %s" truncId
None
)
/// all annotations that should be used
annotationHeader //["GO","Synonym"]
|> Array.map (fun annotationType ->
mappings //[[GO => GO:006; Synonym => Q0WV96];[GO => GO:001; Synonym => Q01337]]
|> Array.map (fun mapping ->
let annotation = mapping.[annotationType]
annotation.Split ';'
|> String.concat multipleAnnotationSeparator
)
)
let getAnnotationRow (frame:Frame<string,string>) (annotationHeader:string[]) multipleIdentifierSeparator multipleAnnotationSeparator truncateID (identifier:string) =
getAnnotationsFromIdentifier frame annotationHeader multipleIdentifierSeparator multipleAnnotationSeparator truncateID identifier
|> Array.map (String.concat multipleIdentifierSeparator)
/// User data is read, annotated and written to a new file
module Data =
let getDataFrame (columnSeparator:string) inputPath =
System.IO.File.ReadAllLines(inputPath)
|> Array.map (fun x ->
x.Split([|columnSeparator|],System.StringSplitOptions.None)
)
/// index of column that contains the identifier to annotate
let getColIndex (dataFrame:string[][]) columnHeader=
Array.tryFindIndex (fun x -> x = columnHeader) dataFrame.[0]
|> fun o ->
match o with
| Some i -> i
| _ -> failwithf "ColumnHeader %s not found." columnHeader
/// based on given mapping arguments the file is extended with given mapping columns
let getHeader (dataFrame:string[][]) (columnSeparator:string) (annotationHeader:string[]) =
Array.append dataFrame.[0] annotationHeader
|> String.concat columnSeparator
/// every row of the file is processed and converted to a new string with additional information attached at the end of the line
let getAnnotatedLines annotationFrame inputPath (columnSeparator:string) columnHeader (annotations:string[]) (multipleIdentifierSeparator:string) truncateID (multipleAnnotationSeparator:string) =
let dataFrame = getDataFrame columnSeparator inputPath
let colIndex = getColIndex dataFrame columnHeader
let header = getHeader dataFrame columnSeparator annotations
//let rowCount = dataFrame.Length
dataFrame
|> Array.tail
|> Array.mapi (fun i x ->
let identifier = x.[colIndex].Replace("\"","")
//if i%50=0 then printfn "%04i/%i: %s" i rowCount identifier
//printfn "%04i/%i: %s" i rowCount identifier
let annotations =
if identifier = "" then
Array.init annotations.Length (fun _ -> "")
else
Mapping.getAnnotationRow annotationFrame annotations multipleIdentifierSeparator multipleAnnotationSeparator truncateID identifier
Seq.append x annotations
|> String.concat columnSeparator
)
|> Array.append [|header|]
let annotateAndWriteData annotationFrame inputPath (columnSeparator:string) columnHeader truncateID (annotations:string[]) (multipleIdentifierSeparator:string) (multipleAnnotationSeparator:string) outputPath =
/// warns user if separators are identical
let checkSeparators =
if columnSeparator = multipleIdentifierSeparator then failwith "WARNING: Column separator is equal to identifier separator"
if columnSeparator = multipleAnnotationSeparator then failwith "WARNING: Column separator is equal to annotation separator"
let annotatedRows =
getAnnotatedLines annotationFrame inputPath columnSeparator columnHeader annotations multipleIdentifierSeparator truncateID multipleAnnotationSeparator
System.IO.File.WriteAllLines(outputPath,annotatedRows)
module SamPlotting =
open Deedle
open FSharp.Stats
open Plotly.NET
open FSharp.Stats.Testing
open SAM
open Plotly.NET.StyleParam
let createSAMChart res =
let observed = [| res.NegSigBioitem; res.NonSigBioitem; res.PosSigBioitem|] |> Array.concat
let obs = observed |> Array.map (fun x -> x.Statistics)
let expected = res.AveragePermutations |> Array.map (fun x -> x.Statistics)
let minDi = Seq.min obs
let maxDi = Seq.max obs
// positive significant changes
let posExpected = expected.[res.NegSigBioitem.Length + res.NonSigBioitem.Length .. res.NegSigBioitem.Length + res.NonSigBioitem.Length + res.PosSigBioitem.Length-1]
let posChart =
Chart.Point(posExpected,res.PosSigBioitem |> Array.map (fun x -> x.Statistics))
|> Chart.withLineStyle(Color=Color.fromKeyword Green)
|> Chart.withTraceInfo("positive change",Visible = Visible.True )
// no significant changes
let nonex = expected.[res.NegSigBioitem.Length .. res.NegSigBioitem.Length + res.NonSigBioitem.Length-1]
let nonchart =
Chart.Point(nonex,res.NonSigBioitem |> Array.map (fun x -> x.Statistics))
|> Chart.withLineStyle(Color=Color.fromKeyword Gray)
|> Chart.withTraceInfo("no change",Visible = Visible.True)
// negative significant changes
let negex = expected.[0 .. res.NegSigBioitem.Length-1]
let negchart =
Chart.Point(negex,res.NegSigBioitem |> Array.map (fun x -> x.Statistics))
|> Chart.withLineStyle(Color=Color.fromKeyword Red)
|> Chart.withTraceInfo("negative change",Visible = Visible.True)
let samValues =
[
negchart
nonchart
posChart
]
|> Chart.combine
let chartConfig =
let svdConfig =
ConfigObjects.ToImageButtonOptions.init(
Format = StyleParam.ImageFormat.SVG)
Config.init (
ToImageButtonOptions = svdConfig,
ModeBarButtonsToAdd=[ModeBarButton.HoverCompareCartesian]
)
let cutLineUp = [(minDi + res.Delta) ; (maxDi + res.Delta)]
let cutsUp =
Chart.Line(cutLineUp,[minDi;maxDi])
|> Chart.withLineStyle(Color=Color.fromKeyword Purple,Dash = StyleParam.DrawingStyle.Dash, Width = 0.5)
|> Chart.withTraceInfo("delta",Visible = Visible.True)
let cutLineLow = [(minDi - res.Delta) ; (maxDi - res.Delta)]
let cutsLow =
Chart.Line(cutLineLow,[minDi;maxDi])
|> Chart.withLineStyle(Color=Color.fromKeyword Purple,Dash = StyleParam.DrawingStyle.Dash, Width = 0.5)
|> Chart.withTraceInfo("delta",Visible = Visible.True)
let linechart =
Chart.Line([minDi;maxDi], [minDi;maxDi])
|> Chart.withTraceInfo("bisecting angle",Visible = Visible.True)
|> Chart.withLineStyle(Color=Color.fromKeyword Black, Width = 1)
let uppercut =
let xAnchorUppercut = [minDi .. 5. .. maxDi]
Chart.Line (xAnchorUppercut, List.init xAnchorUppercut.Length (fun x -> res.UpperCut))
|> Chart.withTraceInfo("upper cut",Visible = Visible.True)
|> Chart.withLineStyle(Color=Color.fromKeyword Black,Dash = StyleParam.DrawingStyle.Dash, Width = 0.3)
let lowercut =
Chart.Line([minDi;maxDi],[res.LowerCut;res.LowerCut])
|> Chart.withTraceInfo("lower cut",Visible = Visible.True)
|> Chart.withLineStyle(Color=Color.fromKeyword Black,Dash = StyleParam.DrawingStyle.Dash,Width = 0.3)
//|> Chart.withXAxisStyle(MinMax = (-15,20))
|> Chart.withTraceInfo("lower cut",Visible = Visible.True)
let plot =
[linechart;
samValues;
cutsUp;
cutsLow;
uppercut;
lowercut]
|> Chart.combine
|> Chart.withTitle(title = "SAM results")
|> Chart.withXAxisStyle("expected Score")
|> Chart.withYAxisStyle ("observed Score")
|> Chart.withConfig(chartConfig)
|> Chart.withTemplate(ChartTemplates.lightMirrored)
plot
module Plotting =
open FSharp.Stats
open Plotly.NET
open Deedle
open BioFSharp.IO
open DeedleExtensions
let plotMA highlightList (f:Frame<string,string>) =
let dataAll =
f
|> Frame.mapRows (fun k s ->
let a = s.GetAs<float>("MeanAbundance",nan)
let r = s.GetAs<float>("MeanR",nan)
let stDev = s.GetAs<float>("StDevR",nan)
let l =
let x = s.GetAs<string>("ProtName","")
if x = "" then
""
else
x
let c = s.GetAs<bool>("IsCandidate")
{|
ProtName = l
MeanAbundance = a
MeanR = r
StDevR = stDev
IsCandidate = c
|}
)
|> Series.values
|> Array.ofSeq
let all =
let x = dataAll |> Seq.map (fun x -> x.MeanR)
let y = dataAll |> Seq.map (fun x -> x.MeanAbundance)
let label = dataAll |> Seq.map (fun x -> x.ProtName)
Chart.Point(y,x, MultiText = label)
let candidates =
let dataFiltered = dataAll |> Array.filter (fun x -> x.IsCandidate)
let x = dataFiltered |> Seq.map (fun x -> x.MeanR)
let y = dataFiltered |> Seq.map (fun x -> x.MeanAbundance)
let labels = dataFiltered |> Seq.map (fun x -> x.ProtName)
Chart.Point(
y,x,
Name="points",
MultiText=labels,
TextPosition=StyleParam.TextPosition.TopRight
)
|> Chart.withTraceInfo "candidates"
let candidatesNoStabw =
let dataFiltered = dataAll |> Array.filter (fun x -> x.IsCandidate)
let x = dataFiltered |> Seq.map (fun x -> x.MeanR)
let y = dataFiltered |> Seq.map (fun x -> x.MeanAbundance)
let labels = dataFiltered |> Seq.map (fun x -> x.ProtName)
Chart.Point(
y,x,
Name="points", MultiText = labels
)
|> Chart.withTraceInfo "candidatesNoStabw"
let candidates' =
let dataFiltered = dataAll |> Array.filter (fun x -> highlightList |> List.contains x.ProtName )
let x = dataFiltered |> Seq.map (fun x -> x.MeanR)
let y = dataFiltered |> Seq.map (fun x -> x.MeanAbundance)
let labels = dataFiltered |> Seq.map (fun x -> x.ProtName)
Chart.Point(
y,x,
Name="points",
MultiText=labels,
TextPosition=StyleParam.TextPosition.TopRight
)
|> Chart.withTraceInfo "candidates Reci"
[
all
candidatesNoStabw
candidates'
]
|> Chart.combine
|> Chart.withYAxisStyle "log2(bait)-log2(control)"
|> Chart.withXAxisStyle "log2(Mean intensity)"
let plotVulcano highlightList f =
let dataAll =
f
|> Frame.mapRows (fun k s ->
let a = s.GetAs<float>("MeanAbundance",nan)
let r = s.GetAs<float>("MeanR",nan)
let stDev = s.GetAs<float>("StDevR",nan)
let qValue = s.GetAs<float>("qValue",nan)
let l =
let x = s.GetAs<string>("ProtName","")
if x = "" then
""
else
x
let c = s.GetAs<bool>("IsCandidate")
{|
ProtName = l
MeanAbundance = a
MeanR = r
StDevR = stDev
IsCandidate = c
QVal = qValue
|}
)
|> Series.values
|> Array.ofSeq
let all =
let y = dataAll |> Seq.map (fun x -> x.MeanR)
let x =
dataAll
|> Seq.map (fun x -> x.QVal)
|> Seq.map (fun x ->
let res = -log10 x
if infinity.Equals res then 4. else res
)
let label = dataAll |> Seq.map (fun x -> x.ProtName)
let xError =
dataAll |> Seq.map (fun x -> x.StDevR)
Chart.Point(y,x, MultiText = label)
let candidatesNoStabw =
let dataFiltered = dataAll |> Array.filter (fun x -> x.IsCandidate)
let y = dataFiltered |> Seq.map (fun x -> x.MeanR)
let x =
dataFiltered
|> Seq.map (fun x -> x.QVal)
|> Seq.map (fun x ->
let res = -log10 x
if infinity.Equals res then 4. else res
)
let labels = dataFiltered |> Seq.map (fun x -> x.ProtName)
let xError = dataFiltered |> Seq.map (fun x -> x.StDevR)
Chart.Point(
y,x,
Name="points",
MultiText=labels
)
|> Chart.withTraceInfo "candidatesNoStabw"
let candidates' =
let dataFiltered = dataAll |> Array.filter (fun x -> highlightList |> List.contains x.ProtName )
let y = dataFiltered |> Seq.map (fun x -> x.MeanR)
let x =
dataFiltered
|> Seq.map (fun x -> x.QVal)
|> Seq.map (fun x ->
let res = -log10 x
if infinity.Equals res then 4. else res
)
let labels = dataFiltered |> Seq.map (fun x -> x.ProtName)
Chart.Point(
y,x,
Name="points",
MultiText=labels,
TextPosition=StyleParam.TextPosition.TopRight
)
|> Chart.withTraceInfo "candidates Reci"
[
all
candidatesNoStabw
candidates'
]
|> Chart.combine
|> Chart.withYAxisStyle "log2(bait)-log2(control)"
|> Chart.withXAxisStyle "log2(Mean intensity)"
let plotHisto (f:Frame<string,string>) =
let dataAll =
f
|> Frame.mapRows (fun k s ->
let a = s.GetAs<float>("MeanAbundance",nan)
let r = s.GetAs<float>("MeanR",nan)
let stDev = s.GetAs<float>("StDevR",nan)
let l = ""//s.GetAs<string>("ProtName","")
let c = s.GetAs<bool>("IsCandidate")
{|
ProtName = l
MeanAbundance = a
MeanR = r
StDevR = stDev
IsCandidate = c
|}
)
|> Series.values
|> Array.ofSeq
let histo =
let data = dataAll |> Seq.map (fun x -> x.MeanR) |> Seq.filter (fun x -> nan.Equals x |> not)|> Array.ofSeq
let bw = FSharp.Stats.Distributions.Bandwidth.nrd0 data
let binned = FSharp.Stats.Distributions.Frequency.create (bw) data |> Map.toSeq
Chart.Column binned
|> Chart.withXAxisStyle "log2(bait)-log2(control)"
histo
module MetaxaTemp =
open MetaXa
open MetaFrame
open Stats
open FSharp.Stats
open Plotly.NET
open Deedle
open BioFSharp.IO
open DeedleExtensions
open ISADotNet
module Global =
/// As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md
module MedianOfRatios =
/// expects data frame with shape frame<sampleIds,GeneIds>
let medianOfRatiosWide (data:Frame<string,string>) =
//data frame with shape frame<sampleIds,GeneIds>
let ratios =
data
|> Frame.dropSparseCols
|> Frame.getNumericCols
|> Series.map (fun geneID s ->
let geometricMeanAcrossAllSamples =
s.Values
|> FSharp.Stats.Seq.median
let ratioPerSample =
s
|> Series.map (fun x sample -> sample / geometricMeanAcrossAllSamples)
ratioPerSample
)
|> Frame.ofColumns
// Series of shape: Series<sampleId,correctionFactor:float>
let correctionFactorPerSample =
ratios
|> Frame.transpose
|> Stats.median
// data frame with frame<sampleIds,GeneIds>
// contains gene data divided by the sample wise estimated correction factor
let correctedData =
data
|> Frame.getNumericCols
|> Series.map (fun geneID s ->
s / correctionFactorPerSample
)
|> Frame.ofColumns
let abundanceInit =
Chart.Column(data |> Frame.transpose |> Frame.dropSparseRows |> Stats.median |> Series.observations)
|> Chart.withXAxisStyle "SampleId"
|> Chart.withXAxisStyle "Median of Initial Abundances"
let c =
Chart.Column(correctionFactorPerSample |> Series.observations)
|> Chart.withXAxisStyle "SampleId"
|> Chart.withXAxisStyle "Correction Factor."
let abundanceCorrected =
Chart.Column(correctedData |> Frame.transpose |> Frame.dropSparseRows |> Stats.median |> Series.observations)
|> Chart.withXAxisStyle "SampleId"
|> Chart.withXAxisStyle "Median of corrected abundances"
correctedData,abundanceInit,c,abundanceCorrected
module Imputation =
module SimpleSampling =
/// Expects normalized data.
/// (learnVarianceMeanDependence: seq<seq<float>> -> (float -> float)) function, which learns how to map from a mean expression to an estimator for variance.
let computeColumnWiseUsing (sampleFunction: float -> float -> float) minCountsForVarEst (quantileForLowMean:float option) (learnVarianceMeanDependence: seq<float*float> -> (float -> float)) (grouped:Frame<string*string,string>) =
///
let meanWithAtLeast k (x:Series<_,float>) =
let tmp = Series.values x
if tmp |> Seq.length < k then
nan
else
x |> Series.values |> Seq.mean
///
let cvWithAtLeast k (x:Series<_,float>) =
let tmp = Series.values x
if tmp |> Seq.length < k then
nan
else
x |> Series.values |> cv //Seq.disp
/// Computes means and variances of complete replicates.
let log2MeanToCVEmpirical =
grouped
|> Frame.getNumericCols
|> Series.map (fun geneID s ->
let means = s |> (meanWithAtLeast minCountsForVarEst)
let cols = s |> (cvWithAtLeast minCountsForVarEst)
means, cols
)
|> Series.values
|> Array.ofSeq
|> Array.filter (fun x -> nan.Equals(fst x) |> not && nan.Equals(snd x) |> not)
|> Array.sortBy fst
|> Array.map (fun (mean,dis) -> log2 mean, dis)
|> fun data ->
let borders = FSharp.Stats.Testing.Outliers.tukey 1.5 (data |> Array.map snd)
data
|> Array.filter (fun x -> snd x > borders.Lower && snd x < borders.Upper )
/// function mapping from a mean expression to an estimator for dispersion (cv).
let log2MeanToCv =
learnVarianceMeanDependence log2MeanToCVEmpirical
let log2MeanToCVByFit =
log2MeanToCVEmpirical
|> Array.map (fun x -> fst x, fst x |> log2MeanToCv)
let meanToCvEstChart =
[
[
Chart.PointDensity(log2MeanToCVEmpirical |> Array.map fst,log2MeanToCVEmpirical |> Array.map snd)
// |> Chart.withMarkerStyle (3)
]
|> Chart.combine
|> Chart.withTraceInfo "mean to CV empirical"
Chart.Spline(log2MeanToCVByFit)
|> Chart.withTraceInfo "mean Vs. CV fitted"
]
|> Chart.combine
|> Chart.withXAxisStyle "log2(mean Abundance)"
|> Chart.withYAxisStyle "CV"
let quantileForLowMean, quantilesToMeansChart =
let data =
Array.map fst log2MeanToCVEmpirical
|> Array.map (fun x -> 2.**x)
match quantileForLowMean with
| None ->
None,
[
[|0.05 .. 0.05 .. 0.95|]
|> Array.map (fun q -> q,FSharp.Stats.Quantile.compute q data)
|> Chart.Point
|> Chart.withTraceInfo "quantiles VS Abundance"
]
|> Chart.combine
|> Chart.withXAxisStyle "Quantiles"
|> Chart.withYAxisStyle "Abundance"
| Some quantileForLowMean ->
let q = data |> FSharp.Stats.Quantile.compute quantileForLowMean
Some q,
[
[|0.00 .. 0.05 .. 0.95|]
|> Array.mapi (fun i q -> if i = 0 then 0.01 else q)
|> Array.map (fun q -> q,FSharp.Stats.Quantile.compute q data)
|> Chart.Point
|> Chart.withTraceInfo "quantiles VS Abundance"
Chart.Point([quantileForLowMean],[q])
|> Chart.withTraceInfo "selected quantile"
]
|> Chart.combine
|> Chart.withXAxisStyle "Quantiles"
|> Chart.withYAxisStyle "Abundance"
/// Imputation by sampling from a gausian normal distribution based on the input vector
/// function is truncated at zero. Can not return negative values.
let initImputeUsing sampleF (estimateCv:float -> float) (lowMean:float option) (d: seq<float>) =
let clean = d |> Seq.filter (fun x -> nan.Equals x |> not)
let sampleSafe mean standardDev =
let rec sample () =
let tmp = sampleF mean standardDev
if tmp < 0. then sample () else tmp
sample()
if clean |> Seq.isEmpty |> not then
let mean = Seq.mean clean
let cvEst = estimateCv mean
let standardDev = cvEst * mean
//printfn "mean %f" mean
//printfn "stndev %f" standardDev
//printfn "before %A" d
let res =
d
|> Seq.map (fun x -> if nan.Equals x then sampleSafe mean standardDev else x)
//printfn "%A" res
res
else
match lowMean with
| Some lowMean ->
let mean = lowMean
let cvEst = estimateCv mean
let standardDev = cvEst * mean
d
|> Seq.map (fun x -> if nan.Equals x then sampleSafe mean standardDev else x)
| None -> d
let imputeUsing (x:seq<float>) =
initImputeUsing sampleFunction (log2 >> log2MeanToCv) (quantileForLowMean) x
/// Computes means and variances of complete replicates.
let imputed =
// Step 1: Set genes missing when they are not comparable
// because all sample groups have 1 or zero observations.
grouped
|> Frame.getNumericCols
|> Series.map (fun geneID s ->
let geneDataByFactorLevels =
s
|> Series.applyLevel fst (Series.values >> Array.ofSeq)
let geneData =
geneDataByFactorLevels
|> Series.observations
let present =
geneData
|> Seq.map (fun (sampleID,v) ->
v.Length > 1
)
|> Seq.contains true
if present then s else s |> Series.mapValues (fun s -> nan)
)
|> Frame.ofColumns
// Step 2:
// x present 0 not present *nan*
// [x; x; x] -> [x; x; x]
// [x; x; 0] -> [x; x; x] by sampling, mean from sample, var by est
// Will be biased towards the remaining point.
// [x; 0; 0] -> [x; x; x] by sampling, mean from sample, var by est
//
// [0; 0; 0] ->
//depending on the value of quantileForLowMean :
//Some x -> [x; x; x]
//None -> [0; 0; 0]
|> Map.mapColWiseGroupedByFactorLevels imputeUsing
// meanToCvEstChart |> Chart.show
// quantilesToMeansChart |> Chart.show
imputed
open Annotate
open Argu
#load "scripts/SamPlotting.fsx"
open SamPlotting
#load "scripts/Plotting.fsx"
open Plotting
#load "scripts/MetaxaTemp.fsx"
open MetaxaTemp
module Arguments =
type WorkflowArgs =
| [<Unique>] [<Mandatory>] [<AltCommandLine("-i")>] InputPath of path:string
| [<Unique>] [<Mandatory>] [<AltCommandLine("-a")>] ArcPath of path:string
| [<Unique>] [<Mandatory>] [<AltCommandLine("-m")>] MappingFilePath of path:string
| [<Unique>] [<Mandatory>] [<AltCommandLine("-id")>] RunID of path:string
// | [<Unique>] [<Mandatory>] [<AltCommandLine("-o")>] OutputPath of path:string
| [<Unique>] [<AltCommandLine("-c")>] IdentifierColumnHeader of string
| [<Unique>] [<AltCommandLine("-fdr")>] FDR of string
......@@ -43,6 +684,9 @@ module Arguments =
member this.Usage =
match this with
| InputPath x -> "relative input file path in /arc/"
| ArcPath x -> "relative path to the arc"
| RunID x -> "ID for the run"
| MappingFilePath x -> "relative path to the mapping file path in /arc/"
// | OutputPath x -> "relative output file path in /arc/runs/"
| IdentifierColumnHeader x -> "column header of identifier column file to annotate"
| FDR x -> "FDR"
......@@ -61,6 +705,9 @@ module Arguments =
// arguments are converted
let inputPath = annotationR.GetResult(InputPath)//@"/arc/assays/" + annotationR.GetResult(InputPath)
let arcPath = annotationR.GetResult(ArcPath)
let runID = annotationR.GetResult(RunID)
let mappingFilePath = annotationR.GetResult(MappingFilePath)
// let outputFilePath = annotationR.GetResult(OutputPath)
let columnHeader =
match annotationR.TryGetResult(IdentifierColumnHeader) with
......@@ -90,10 +737,8 @@ module Arguments =
////////////////////////////////////////
////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
let runID = System.Guid.NewGuid().ToString()
let source = __SOURCE_DIRECTORY__
let runP = System.IO.Path.Combine([|source + @"\..\..\runs";runID|])
System.IO.Directory.CreateDirectory runP
let runID = Arguments.runID
System.IO.Directory.CreateDirectory runID
|> ignore
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////
......@@ -102,10 +747,9 @@ System.IO.Directory.CreateDirectory runP
////////////////////////////////////////
////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
let arcPath = __SOURCE_DIRECTORY__ + @"\..\..\"
let qi =
arcIO.NET.Investigation.fromArcFolder arcPath
arcIO.NET.Investigation.fromArcFolder Arguments.arcPath
|> QueryModel.QInvestigation.fromInvestigation
// qi.ProtocolNames
......@@ -151,10 +795,10 @@ let qSampleNameToIntensityColumn fN =
////////////////////////////////////////
////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
let mappingFrame = Mapping.readMapping (__SOURCE_DIRECTORY__ + @"\scripts\chlamy_jgi55.txt") "\t" "Identifier"
let mappingFrame = Mapping.readMapping Arguments.mappingFilePath "\t" "Identifier"
mappingFrame.Print()
let truncID (id : string) = (id.Split([|".p";".t"|],System.StringSplitOptions.None)).[0]
let outPathA = runP + "/AnnotatedResult.tsv"
let outPathA = runID + "/AnnotatedResult.tsv"
Data.annotateAndWriteData
mappingFrame
......@@ -525,45 +1169,45 @@ let doAnalysis (allDataForAnalysis:Frame<string,string>) dataForAnalysis (allCon
/////////////////////////////////////////////////////
// Save Results
/////////////////////////////////////////////////////
let outP = System.IO.Path.Combine([|runP;baitGroup|])
let outP = System.IO.Path.Combine([|runID;baitGroup|])
System.IO.Directory.CreateDirectory outP
|> ignore
abundanceChart
|> Chart.withTemplate ChartTemplates.lightMirrored
|> Chart.saveHtml (outP + @"\1_Norm_PreNormGlobalAbundanceChart.html")
|> Chart.saveHtml (outP + @"/1_Norm_PreNormGlobalAbundanceChart.html")
corrFChart
|> Chart.withTemplate ChartTemplates.lightMirrored
|> Chart.saveHtml (outP + @"\2_Norm_CorrectionfactorChart.html")
|> Chart.saveHtml (outP + @"/2_Norm_CorrectionfactorChart.html")
corrAbundanceChart
|> Chart.withTemplate ChartTemplates.lightMirrored
|> Chart.saveHtml (outP + @"\3_Norm_AfterNormGlobalAbundanceChart.html")
|> Chart.saveHtml (outP + @"/3_Norm_AfterNormGlobalAbundanceChart.html")
preImpChart
|> Chart.withTemplate ChartTemplates.lightMirrored
|> Chart.saveHtml (outP + @"\4_Imputation_preImpChart.html")
|> Chart.saveHtml (outP + @"/4_Imputation_preImpChart.html")
afterImpChart
|> Chart.withTemplate ChartTemplates.lightMirrored
|> Chart.saveHtml (outP + @"\5_Imputation_afterImpChart.html")
|> Chart.saveHtml (outP + @"/5_Imputation_afterImpChart.html")
preNormChart
|> Chart.withTemplate ChartTemplates.lightMirrored
|> Chart.saveHtml (outP + @"\6_QuantilNorm_PreNormChart.html")
|> Chart.saveHtml (outP + @"/6_QuantilNorm_PreNormChart.html")
afterNormChart
|> Chart.withTemplate ChartTemplates.lightMirrored
|> Chart.saveHtml (outP + @"\7_QuantilNorm_AfterNormChart.html")
|> Chart.saveHtml (outP + @"/7_QuantilNorm_AfterNormChart.html")
createSAMChart res
|> Chart.withTemplate ChartTemplates.lightMirrored
|> Chart.saveHtml (outP + @"\8_Testing_SAM.html")
|> Chart.saveHtml (outP + @"/8_Testing_SAM.html")
plotHisto finPlot
|> Chart.withTemplate ChartTemplates.lightMirrored
|> Chart.saveHtml (outP + @"\9_Testing_DifferencesHistogram.html")
|> Chart.saveHtml (outP + @"/9_Testing_DifferencesHistogram.html")
plotMA [] finPlot // ["CDJ5";"LON";"PSBQLD";"PGRL1"]
|> Chart.withTemplate ChartTemplates.lightMirrored
|> Chart.saveHtml (outP + @"\10_Testing_MAPlot.html")
|> Chart.saveHtml (outP + @"/10_Testing_MAPlot.html")
plotVulcano [] finPlot // ["CDJ5";"LON";"PSBQLD";"PGRL1"]
|> Chart.withTemplate ChartTemplates.lightMirrored
|> Chart.saveHtml (outP + @"\11_Testing_VulcanoPlot.html")
toSave.SaveCsv(outP + @"\Analysis_complete.tsv",includeRowKeys=true,keyNames=["Protein"],separator='\t')
System.IO.File.WriteAllLines(outP + @"\params.txt",fsi.CommandLineArgs)
|> Chart.saveHtml (outP + @"/11_Testing_VulcanoPlot.html")
toSave.SaveCsv(outP + @"/Analysis_complete.tsv",includeRowKeys=true,keyNames=["Protein"],separator='\t')
System.IO.File.WriteAllLines(outP + @"/params.txt",fsi.CommandLineArgs)
printfn "Finished Analysis! results can be found at:%s" outP
////////////////////////////////////////////////////////////////////////////////
......
cwlVersion: v1.2
class: CommandLineTool
hints:
DockerRequirement:
dockerPull: mcr.microsoft.com/dotnet/sdk:7.0
requirements:
# - class: InlineJavascriptRequirement
# - class: InitialWorkDirRequirement
# listing:
# - entry: $(inputs.scriptDirectory)
# writable: true
- class: EnvVarRequirement
envDef:
- envName: DOTNET_NOLOGO
envValue: "true"
- class: NetworkAccess
networkAccess: true
baseCommand: [dotnet, fsi]
inputs:
scriptFile:
type: File
inputBinding:
position: 1
input:
type: File
inputBinding:
position: 2
prefix: -i
mappingFile:
type: File
inputBinding:
position: 3
prefix: -m
arcDirectory:
type: Directory
inputBinding:
position: 4
prefix: -a
runID:
type: string
inputBinding:
position: 5
prefix: -id
outputs:
output:
type: Directory
outputBinding:
glob: $(runtime.outdir)/$(inputs.runID)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment