From 1ef25f291146708a10c065f5fe805993eb234e97 Mon Sep 17 00:00:00 2001
From: Kevin Schneider <schneider@nfdi4plants.org>
Date: Fri, 27 May 2022 10:11:43 +0200
Subject: [PATCH] update notebook

---
 runs/structure-proportion-extraction.ipynb | 98 +++++++++++++++++++++-
 1 file changed, 97 insertions(+), 1 deletion(-)

diff --git a/runs/structure-proportion-extraction.ipynb b/runs/structure-proportion-extraction.ipynb
index 952946cb..06b4290b 100644
--- a/runs/structure-proportion-extraction.ipynb
+++ b/runs/structure-proportion-extraction.ipynb
@@ -542,6 +542,102 @@
     "// |> fun lines -> File.WriteAllLines(\"C:/Users/schne/source/dataplant-gitlab/yeast-structure-proportions/assays/dataset/fldpnn/all-fldpnn-predictions.txt\",lines)\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "dotnet_interactive": {
+     "language": "fsharp"
+    },
+    "vscode": {
+     "languageId": "dotnet-interactive.fsharp"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\r\n",
+       "<style scoped>,\r\n",
+       "  .dataframe tbody tr th:only-of-type {\r\n",
+       "    vertical-align: middle;\r\n",
+       "  }\r\n",
+       "  .dataframe tbody tr th {,\r\n",
+       "    vertical-align: top\r\n",
+       "  }\r\n",
+       "  .dataframe thead th {\r\n",
+       "    text-align: right;\r\n",
+       "  }\r\n",
+       "  .no-wrap {\r\n",
+       "    white-space: nowrap;\r\n",
+       "  }\r\n",
+       "</style>\r\n",
+       "<table border='1' class='dataframe'>\r\n",
+       "<thead><th></th><th></th><th>HelixProportion</th><th>SheetProportion</th><th>CoilProportion</th><th>Sequence</th><th>StructureSequence</th><th>BinaryDisorderPrediction</th><th>DisorderPropensity</th></thead><thead><th></th><th></th><th>(float)</th><th>(float)</th><th>(float)</th><th>(string)</th><th>(string)</th><th>(string)</th><th>(string)</th></thead>\r\n",
+       "<tr><td><b>A0A023PYF4</b></td><td class=\"no-wrap\">-></td><td>0</td><td>0.372</td><td>0.628</td><td>MAILLPLKSILPWCCITFSFLLSSSGSISHSTASSSITLTKSSKPTNVPSNSRFDCSTINTFWLIVLSMTSKGKISGRLILRASVYACECTCIRYACCETIYPPRKPFSLSLYFFYFNKKASILFCYPDAKTKPEHPGNKRAGSG</td><td>CCCCCCCCCCCCCCCCCCCCCCCSSSCCCCCCCCCCCCCCCCCCCCCSCTTCCCCEEEEEEEEEEEEEECTTSCEEEEEEEEEEEEEETTEEEEEEEEEEEECCSSCCCCEEEEEEETTEEEEEEECCCTTCCCCCSCCCCCCCC</td><td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1</td><td>0.046,0.054,0.051,0.041,0.044,0.048,0.055,0.056,0.048,0.055,0.043,0.056,0.056,0.047,0.051,0.051,0.04,0.045,0.053,0.051,0.055,0.063,0.069,0.108,0.157,0.132,0.131,0.132,0.183,0.211,0.267,0.284,0.285,0.304,0.322,0.292,0.27,0.255,0.197,0.196,0.191,0.194,0.202,0.186,0.204,0.174,0.17,0.165,0.131,0.169,0.121,0.094,0.076,0.093,0.077,0.068,0.049,0.047,0.042,0.037,0.035,0.035,0.033,0.027,0.026,0.03,0.039,0.042,0.048,0.058,0.07,0.09,0.119,0.122,0.164,0.177,0.155,0.102,0.06,0.041,0.032,0.029,0.031,0.025,0.023,0.021,0.022,0.032,0.03,0.034,0.024,0.03,0.024,0.027,0.03,0.034,0.044,0.043,0.042,0.048,0.05,0.064,0.06,0.068,0.073,0.061,0.051,0.043,0.041,0.035,0.032,0.029,0.03,0.027,0.028,0.031,0.029,0.033,0.037,0.037,0.043,0.04,0.04,0.042,0.048,0.067,0.078,0.105,0.115,0.134,0.131,0.199,0.213,0.308,0.39,0.533,0.557,0.684,0.614,0.619,0.616,0.598,0.75,0.878,0.886</td></tr><tr><td><b>A0A023PZB3</b></td><td class=\"no-wrap\">-></td><td>0.603</td><td>0</td><td>0.397</td><td>MYYFSRVAARTFCCCIFFCLATAYSRPDRNPRKIEKKDKKFFGASKNTNPANAMGNLFKAPTIEYVVEEVTRTHQPEQYDIPTDMSPLMTIAASESADKFTDKFFVDQSSIMKEKTSSKGNARTLL</td><td>CCHHHHHHHHHHHHHHHHHHHHHHTSCCCCCSCCCCCCCCCCCCCCCCCHHHHHHHHTTSCCHHHHHHHHHHHTCCCCCCCCTTCCHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHTTSCC</td><td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1</td><td>0.188,0.176,0.159,0.125,0.101,0.123,0.115,0.104,0.102,0.103,0.123,0.149,0.167,0.153,0.201,0.189,0.195,0.172,0.214,0.177,0.155,0.13,0.147,0.174,0.195,0.238,0.252,0.364,0.419,0.41,0.398,0.405,0.397,0.407,0.389,0.399,0.395,0.449,0.41,0.364,0.39,0.415,0.407,0.371,0.376,0.361,0.347,0.345,0.321,0.294,0.283,0.273,0.258,0.249,0.261,0.188,0.188,0.175,0.184,0.195,0.182,0.179,0.157,0.165,0.16,0.146,0.157,0.182,0.207,0.194,0.239,0.215,0.271,0.291,0.315,0.391,0.398,0.386,0.341,0.441,0.327,0.306,0.324,0.315,0.319,0.263,0.259,0.248,0.223,0.21,0.22,0.214,0.22,0.233,0.23,0.25,0.209,0.188,0.179,0.167,0.182,0.184,0.199,0.176,0.169,0.179,0.217,0.262,0.287,0.278,0.298,0.332,0.401,0.479,0.457,0.521,0.545,0.541,0.542,0.609,0.599,0.556,0.571,0.594,0.547,0.531</td></tr><tr><td><b>A0A023PZE8</b></td><td class=\"no-wrap\">-></td><td>0.323</td><td>0.293</td><td>0.383</td><td>MSETCSSSLALLHKILHIHSHTPSVYYNICISVRILTSERLQCFFFSFFPDPNITGSGLKVPGFLFFHTFFFSKSCCQALIDSFSSDYYQFKMLEKNRKAEKINKRTIFICSFTFEYKIKSCFSCFHLSTHTN</td><td>CCHHHHHHHHHHHHHHHHGGGSTTCCEEEEEEEEECSSSEEEEEEEEEEECTTSCSSSSCCCEEEEEEEEEEEHHHHHHHHHHHHHHHHHHHHHHHHCSSSCCCCCCEEEEEEEECSSSCCCCCCCCCCCCCC</td><td>1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0</td><td>0.373,0.376,0.374,0.32,0.264,0.254,0.273,0.263,0.262,0.232,0.237,0.177,0.197,0.169,0.148,0.146,0.105,0.088,0.058,0.075,0.084,0.083,0.066,0.054,0.046,0.046,0.044,0.036,0.032,0.03,0.03,0.03,0.038,0.038,0.047,0.042,0.039,0.041,0.045,0.037,0.038,0.024,0.033,0.029,0.033,0.033,0.032,0.041,0.048,0.057,0.082,0.071,0.08,0.086,0.084,0.132,0.134,0.086,0.06,0.049,0.052,0.051,0.047,0.033,0.025,0.025,0.032,0.031,0.034,0.033,0.038,0.038,0.047,0.052,0.055,0.057,0.059,0.053,0.069,0.07,0.063,0.053,0.068,0.073,0.071,0.058,0.051,0.049,0.042,0.039,0.046,0.051,0.05,0.056,0.061,0.065,0.077,0.083,0.084,0.096,0.083,0.072,0.062,0.051,0.056,0.049,0.042,0.037,0.031,0.025,0.032,0.025,0.026,0.028,0.024,0.026,0.033,0.031,0.033,0.034,0.031,0.035,0.045,0.038,0.051,0.046,0.054,0.056,0.05,0.063,0.07,0.075,0.08</td></tr><tr><td><b>A0A023PZF2</b></td><td class=\"no-wrap\">-></td><td>0.859</td><td>0</td><td>0.141</td><td>MNTLLKKYRKQRYAWLRFLLFSKIEGSLPVALRILLSLQPFCCNIYRKYYQENKKVKSTSGNTALKIIEKLESLVSNLRLKYSQMFSLLFLHSYNCLQSLATETFRIIKKREKNLLSLLIPGSCIEIARHFVLKE</td><td>CHHHHHHHHHHHHHHHHHHHHHHHTTCHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHTCCTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHSCGGGTTTSSCHHHHHHHHHHHTTC</td><td>1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0</td><td>0.621,0.661,0.6,0.524,0.62,0.552,0.445,0.44,0.333,0.327,0.215,0.196,0.178,0.127,0.117,0.134,0.115,0.104,0.074,0.091,0.088,0.096,0.075,0.104,0.092,0.108,0.094,0.101,0.141,0.153,0.121,0.134,0.125,0.114,0.076,0.072,0.071,0.084,0.071,0.075,0.064,0.062,0.077,0.058,0.062,0.072,0.077,0.068,0.074,0.075,0.087,0.104,0.132,0.146,0.138,0.153,0.153,0.15,0.15,0.151,0.16,0.173,0.151,0.111,0.11,0.093,0.059,0.053,0.057,0.06,0.06,0.056,0.059,0.051,0.054,0.061,0.064,0.068,0.055,0.062,0.063,0.068,0.068,0.054,0.061,0.074,0.064,0.076,0.07,0.071,0.064,0.063,0.064,0.076,0.06,0.07,0.067,0.072,0.085,0.081,0.062,0.078,0.075,0.088,0.077,0.087,0.093,0.094,0.103,0.086,0.076,0.055,0.06,0.052,0.059,0.064,0.059,0.057,0.06,0.059,0.068,0.082,0.063,0.05,0.05,0.048,0.062,0.062,0.072,0.077,0.065,0.065,0.09,0.115,0.176</td></tr><tr><td><b>A0A023PZG4</b></td><td class=\"no-wrap\">-></td><td>0.561</td><td>0</td><td>0.439</td><td>MHTICLRSPIDESSPLPYKSIRQPLENAHSCQALCSLMAVLCASAAHRLSETFPMRLVVAREYANWGAFQHAFTRRAGASVAATSAWFDAVAAGTENAHMQSAESCN</td><td>CCCCCCCSCCCTTCCSSHHHHHHHHHSCCSHHHHHHHHHHHHHHHHSTTCTTHHHHHHHHHHHSCHHHHHHHHHHHHTCCHHHHHHHHHHHHTTCTTSCCCCSSCCC</td><td>0,1,0,0,0,0,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1</td><td>0.286,0.312,0.287,0.264,0.25,0.253,0.324,0.318,0.303,0.303,0.255,0.271,0.315,0.307,0.313,0.284,0.265,0.287,0.258,0.227,0.2,0.182,0.185,0.164,0.14,0.154,0.119,0.105,0.117,0.101,0.113,0.101,0.14,0.101,0.144,0.136,0.183,0.141,0.164,0.207,0.116,0.102,0.105,0.088,0.086,0.075,0.069,0.067,0.075,0.09,0.079,0.098,0.101,0.101,0.106,0.096,0.099,0.09,0.084,0.079,0.089,0.092,0.094,0.078,0.098,0.119,0.103,0.087,0.085,0.103,0.099,0.107,0.113,0.123,0.135,0.151,0.161,0.161,0.176,0.148,0.139,0.13,0.141,0.184,0.163,0.152,0.161,0.148,0.152,0.194,0.177,0.199,0.157,0.179,0.22,0.29,0.284,0.305,0.366,0.464,0.571,0.571,0.586,0.59,0.581,0.501,0.494</td></tr><tr><td><b>:</b></td><td class=\"no-wrap\"></td><td>...</td><td>...</td><td>...</td><td>...</td><td>...</td><td>...</td><td>...</td></tr><tr><td><b>Q9ZZX0</b></td><td class=\"no-wrap\">-></td><td>0.54</td><td>0.169</td><td>0.291</td><td>MLMLLMMILTNNKVFMETLYYYLMFNFQLMSPFGVPVPGPAPETKDIKNLYESIMNNYINILNKYTININKDNINKLKFLDNYTEEEKGYYLSGLFEGDGNIYTRCFSITFSLEDVLLANYLCTYFKIGHITAKYNFNKELTAVKWNIMKKKEQEVFMNYINGKLLTYKRYDQYFKYNFNNRLNIKLLKPKEFDLTLNPWLTGFNDADGYFYTGFQKHKNSQWLKFHLELSQKDSYILDIIKKYFKTGGILKRDYKSGATAYIYKAQSSKAMKPFIEYFNNYQPLSTRRYKQYLLLNIAYLLKLNKLHMLTNSLLMLKELMLLQSVKNMSLEMKNELNNRVKIIINKTHYNNIE</td><td>CHHHHHHHTTSCHHHHHHHHHHHHCSCCCCCCSCCCCCCCCCCHHHHHHHHHHHHHHHHHHHHHTTCCCCHHHHSCCCCGGGSCHHHHHHHHHHHHHHHEEEETTEEEEEEEGGGHHHHHHHHHHHTCCEEEEEECTTCCEEEEEEEECSHHHHHHHHHHHTTCCCCHHHHHHHHHTTHHHHHCCCCCCCCCCCGGGSTHHHHHHHHHEEEEEEEEEETTEEEEEEEEEEEESSSHHHHHHHHHHTCCEEEEEECTTSCEEEEEEECSHHHHHHHHHHHHHSCCSSHHHHHHHHHHHHHHHHHHTTGGGSHHHHHHHHHHHHHHTTTCCCHHHHHHHHHHHHHHHHHHHHTTCC</td><td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1</td><td>0.033,0.034,0.039,0.044,0.048,0.039,0.039,0.045,0.036,0.042,0.057,0.073,0.062,0.057,0.043,0.049,0.05,0.045,0.052,0.043,0.046,0.05,0.057,0.055,0.05,0.05,0.055,0.051,0.056,0.063,0.064,0.091,0.092,0.099,0.107,0.111,0.131,0.226,0.213,0.268,0.253,0.188,0.189,0.16,0.133,0.123,0.126,0.113,0.106,0.098,0.09,0.079,0.083,0.077,0.069,0.065,0.068,0.066,0.063,0.061,0.061,0.065,0.069,0.073,0.084,0.1,0.104,0.092,0.086,0.088,0.101,0.103,0.099,0.109,0.111,0.108,0.106,0.105,0.134,0.128,0.107,0.109,0.106,0.086,0.075,0.073,0.069,0.066,0.053,0.04,0.038,0.037,0.032,0.031,0.024,0.028,0.028,0.033,0.038,0.036,0.027,0.025,0.029,0.03,0.036,0.031,0.03,0.026,0.022,0.03,0.035,0.039,0.037,0.035,0.031,0.032,0.035,0.036,0.034,0.025,0.029,0.035,0.037,0.034,0.037,0.041,0.041,0.038,0.041,0.044,0.048,0.045,0.044,0.06,0.066,0.071,0.069,0.073,0.072,0.076,0.062,0.052,0.05,0.039,0.029,0.028,0.023,0.028,0.031,0.031,0.034,0.036,0.031,0.03,0.029,0.026,0.026,0.026,0.019,0.021,0.025,0.033,0.036,0.036,0.036,0.042,0.036,0.033,0.043,0.041,0.04,0.032,0.03,0.031,0.034,0.03,0.034,0.035,0.036,0.035,0.035,0.045,0.046,0.051,0.043,0.041,0.053,0.064,0.063,0.082,0.087,0.093,0.067,0.047,0.051,0.045,0.038,0.028,0.025,0.028,0.027,0.023,0.028,0.027,0.032,0.027,0.036,0.046,0.049,0.039,0.041,0.047,0.054,0.06,0.065,0.078,0.078,0.084,0.098,0.1,0.1,0.094,0.081,0.072,0.058,0.054,0.046,0.046,0.039,0.033,0.04,0.045,0.039,0.034,0.036,0.034,0.033,0.033,0.03,0.046,0.069,0.067,0.071,0.089,0.096,0.114,0.142,0.159,0.196,0.188,0.158,0.13,0.143,0.147,0.121,0.12,0.108,0.13,0.122,0.141,0.177,0.183,0.151,0.177,0.123,0.099,0.083,0.072,0.059,0.057,0.047,0.043,0.036,0.031,0.034,0.032,0.031,0.032,0.037,0.044,0.051,0.057,0.06,0.071,0.074,0.06,0.069,0.066,0.049,0.055,0.048,0.04,0.039,0.03,0.031,0.034,0.034,0.031,0.035,0.029,0.027,0.028,0.031,0.047,0.056,0.053,0.051,0.058,0.057,0.062,0.072,0.08,0.074,0.064,0.053,0.051,0.066,0.068,0.056,0.061,0.052,0.049,0.065,0.055,0.065,0.07,0.066,0.082,0.098,0.098,0.12,0.12,0.122,0.131,0.134,0.125,0.106,0.09,0.094,0.097,0.102,0.087,0.061,0.068,0.085,0.088,0.102,0.113,0.128,0.163,0.175,0.218,0.294,0.326</td></tr><tr><td><b>Q9ZZX1</b></td><td class=\"no-wrap\">-></td><td>0.597</td><td>0.087</td><td>0.316</td><td>MVQRWLYSTNAKDIAVLYFMLAIFSGMAGTAMSLIIRLELAAPGSQYLHGNSQLFNVLVVGHAVLMIFFLVMPALIGGFGNYLLPLMIGATDTAFPRINNIAFWVLPMGLVCLVTSTLVESGAGTGWTVYPPLSSIQAHSGPSVDLAIFALHLTSISSLLGAINFIVTTLNMRTNGMTMHKLPLFVWSIFITAFLLLLSLPVLSAGITMLLLDRNFNTSFFEVSGGGDPILYEHLFWFFGHPEVYILIIPGFGIISHVVSTYSKKPVFGEISMVYAMASIGLLGFLVWSHHMYIVGLDADTRAYFTSATMIIAIPTGIKIFSWLMNPFSKDKNKNKNKKLIRNYQKMNNNNMMKTYLNNNNMIMMNMYKGNLYDIYPRSNRNYIQPNNINKELVVYGYNLESCVGMPTYTNIVKHMVGIPNNILYIMTGILLTDGWIDYTSKKDLDKKTIMEINCRFRLKQSMIHSEYLMYVFMLLSHYCMSYPKMKIAKVKGKSYNQLEFYTRSLPCFTILRYMFYNGRVKIVPNNLYDLLNYESLAHMIMCDGSFVKGGGLYLNLQSFTTKELIFIMNILKIKFNLNCTLHKSRNKYTIYMRVESVKRLFPMIYKYILPSMRYKFDIMLWQKKYNMIN</td><td>CHHHHTSCCCHHHHHHHHHHHHHHHHHHHHHHHHHHHHHTTSSSCSSSTTCHHHHHHHHHHHHHIIIIIIIHIIIIIIIIHHHHHHHHTCSSCSCHHHHHHHHHHHHHHHHHHHHHHHSTTCCCSTTTTCTTTTSTTTCCSTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHCCCTTCCGGGSCHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHSCCCSSCGGGTCCHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHTSCCTTHHHHHHHHHHHHHHHTTCGGGGGTTTTCCHHHHHHHHHHHHHTHHHHHHHHHHHHHHHHSCCSCTTSHHHHHHHHHHHHHHHHHHHHHHCSSCCCCSCCCSCGGGTCGGGCTTCCCCCTTCCSCCSSSSCCCCCTTCCCCCHHHHHHCCCCHHHHHHHHHHHTTTCEEEESCCTTGGGSCTTTCCEEEEEEEEGGGHHHHHHHHHHTGGGBSSCCEEEEEEETTEEEEEEEEEBCCBHHHHHHHHHHEETTEECCCTTHHHHCCHHHHHHHHHHHEEEETTTEEEECCTTSCHHHHHHHHHHHHHHHCCCEEEEEETTEEEEEECHHHHHHHHHHHGGGSCGGGGGGSCHHHHHHHHCCCC</td><td>0.04,0.037,0.024,0.019,0.017,0.014,0.014,0.016,0.016,0.019,0.019,0.014,0.025,0.051,0.034,0.019,0.015,0.03,0.032,0.029,0.04,0.07,0.047,0.037,0.034,0.066,0.04,0.071,0.062,0.039,0.03,0.029,0.023,0.014,0.024,0.03,0.013,0.018,0.023,0.024,0.025,0.03,0.039,0.045,0.053,0.039,0.035,0.041,0.037,0.039,0.026,0.023,0.017,0.015,0.014,0.015,0.015,0.017,0.016,0.012,0.016,0.022,0.025,0.015,0.02,0.014,0.015,0.014,0.019,0.025,0.033,0.032,0.043,0.024,0.019,0.025,0.018,0.027,0.017,0.013,0.011,0.016,0.012,0.021,0.016,0.013,0.015,0.016,0.023,0.027,0.027,0.033,0.029,0.028,0.03,0.027,0.022,0.021,0.017,0.017,0.015,0.013,0.012,0.012,0.01,0.013,0.015,0.013,0.015,0.012,0.012,0.01,0.013,0.018,0.021,0.019,0.02,0.023,0.029,0.036,0.053,0.074,0.059,0.055,0.034,0.025,0.029,0.033,0.022,0.031,0.029,0.031,0.038,0.031,0.039,0.073,0.061,0.089,0.113,0.131,0.149,0.077,0.064,0.03,0.023,0.015,0.014,0.014,0.013,0.011,0.011,0.011,0.009,0.012,0.019,0.024,0.026,0.01,0.015,0.017,0.018,0.011,0.01,0.009,0.008,0.008,0.008,0.011,0.012,0.01,0.016,0.023,0.036,0.05,0.033,0.033,0.027,0.019,0.019,0.015,0.016,0.015,0.012,0.009,0.011,0.011,0.015,0.015,0.013,0.01,0.008,0.011,0.01,0.008,0.007,0.008,0.007,0.006,0.009,0.008,0.008,0.01,0.014,0.018,0.015,0.019,0.009,0.008,0.007,0.008,0.01,0.01,0.01,0.007,0.008,0.01,0.01,0.013,0.014,0.014,0.014,0.016,0.017,0.028,0.028,0.024,0.024,0.02,0.016,0.013,0.01,0.009,0.009,0.01,0.011,0.01,0.009,0.011,0.011,0.013,0.012,0.01,0.01,0.009,0.007,0.007,0.007,0.006,0.009,0.008,0.007,0.006,0.005,0.005,0.005,0.005,0.005,0.006,0.007,0.007,0.008,0.013,0.015,0.017,0.019,0.016,0.015,0.013,0.012,0.014,0.011,0.011,0.009,0.007,0.007,0.01,0.008,0.008,0.007,0.007,0.005,0.006,0.007,0.005,0.005,0.004,0.004,0.004,0.003,0.004,0.004,0.004,0.005,0.006,0.007,0.01,0.01,0.01,0.01,0.01,0.012,0.009,0.009,0.01,0.008,0.008,0.006,0.006,0.006,0.005,0.004,0.005,0.006,0.005,0.007,0.006,0.006,0.006,0.006,0.005,0.005,0.005,0.008,0.008,0.008,0.01,0.012,0.017,0.025,0.036,0.039,0.049,0.042,0.06,0.09,0.105,0.118,0.088,0.074,0.053,0.05,0.046,0.037,0.03,0.023,0.021,0.025,0.028,0.021,0.028,0.025,0.03,0.057,0.054,0.033,0.032,0.022,0.022,0.024,0.023,0.019,0.015,0.014,0.016,0.019,0.019,0.022,0.02,0.018,0.018,0.019,0.021,0.021,0.021,0.027,0.031,0.032,0.035,0.043,0.052,0.055,0.041,0.043,0.052,0.055,0.05,0.045,0.037,0.03,0.025,0.022,0.023,0.019,0.016,0.016,0.017,0.017,0.016,0.013,0.017,0.022,0.022,0.024,0.024,0.023,0.022,0.022,0.023,0.02,0.014,0.013,0.013,0.014,0.017,0.017,0.019,0.018,0.017,0.017,0.02,0.019,0.02,0.019,0.015,0.012,0.01,0.012,0.01,0.01,0.008,0.012,0.015,0.019,0.021,0.023,0.022,0.026,0.029,0.037,0.062,0.061,0.056,0.062,0.071,0.084,0.089,0.089,0.082,0.094,0.095,0.083,0.07,0.052,0.033,0.024,0.018,0.019,0.019,0.023,0.026,0.025,0.023,0.023,0.021,0.022,0.019,0.02,0.018,0.012,0.015,0.017,0.014,0.016,0.013,0.017,0.022,0.025,0.028,0.029,0.043,0.06,0.094,0.136,0.19,0.181,0.172,0.181,0.175,0.132,0.089,0.078,0.056,0.041,0.038,0.035,0.032,0.024,0.023,0.024,0.019,0.019,0.014,0.018,0.017,0.02,0.023,0.025,0.024,0.022,0.017,0.014,0.019,0.017,0.016,0.012,0.012,0.018,0.016,0.024,0.03,0.023,0.017,0.02,0.022,0.02,0.019,0.02,0.018,0.017,0.013,0.017,0.017,0.013,0.013,0.01,0.012,0.011,0.008,0.008,0.008,0.009,0.012,0.019,0.025,0.031,0.017,0.021,0.024,0.03,0.036,0.035,0.027,0.023,0.021,0.021,0.022,0.033,0.033,0.029,0.029,0.032,0.032,0.034,0.035,0.024,0.021,0.022,0.031,0.024,0.021,0.018,0.018,0.017,0.018,0.019,0.021,0.021,0.022,0.02,0.02,0.018,0.021,0.019,0.021,0.025,0.025,0.027,0.021,0.019,0.021,0.017,0.017,0.021,0.021,0.022,0.024,0.024,0.024,0.025,0.026,0.029,0.026,0.018,0.02,0.026,0.019,0.022,0.018,0.025,0.022,0.019,0.017,0.019,0.017,0.015,0.014,0.018,0.017,0.012,0.01,0.012,0.016,0.023,0.022,0.024,0.027,0.024,0.031,0.037,0.055</td><td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0</td></tr><tr><td><b>Q9ZZX7</b></td><td class=\"no-wrap\">-></td><td>0.948</td><td>0</td><td>0.052</td><td>MLMMYMLFIMMKTYPMLSYHMMSYHIMLYTIMWYMKYSTYMRLWLLYKSYFIFIFIWTNNNYNNNYWYVTMLMNTYLYYNMNIHFLTINKKFLYSL</td><td>CHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHSCCHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHC</td><td>0.05,0.056,0.065,0.06,0.082,0.085,0.063,0.095,0.118,0.097,0.144,0.113,0.121,0.123,0.106,0.099,0.09,0.065,0.08,0.069,0.067,0.058,0.058,0.066,0.062,0.068,0.054,0.093,0.107,0.091,0.119,0.086,0.103,0.128,0.118,0.111,0.15,0.103,0.074,0.09,0.109,0.12,0.108,0.08,0.097,0.072,0.067,0.056,0.033,0.024,0.023,0.028,0.027,0.034,0.031,0.042,0.041,0.049,0.057,0.067,0.065,0.066,0.072,0.089,0.079,0.072,0.065,0.094,0.089,0.072,0.074,0.065,0.066,0.073,0.073,0.058,0.061,0.046,0.048,0.056,0.059,0.057,0.053,0.05,0.049,0.047,0.046,0.053,0.057,0.055,0.064,0.047,0.052,0.059,0.053,0.065</td><td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0</td></tr><tr><td><b>Q9ZZX8</b></td><td class=\"no-wrap\">-></td><td>0.453</td><td>0</td><td>0.547</td><td>MCATYMFNITVIITHPTPTLRTRGPGFVRNRDLYIYKYKSNLINNLNNMTYIL</td><td>CCCCCCCCCCCCCCCCCCCCSCCCTTHHHHHHHHHHHHHHHHHHHHHHHHTCC</td><td>0.149,0.128,0.117,0.068,0.045,0.038,0.038,0.044,0.042,0.036,0.035,0.042,0.039,0.057,0.081,0.097,0.109,0.097,0.112,0.128,0.124,0.127,0.137,0.167,0.152,0.15,0.112,0.106,0.079,0.074,0.055,0.051,0.05,0.035,0.037,0.036,0.047,0.066,0.065,0.071,0.078,0.075,0.079,0.08,0.093,0.118,0.122,0.128,0.103,0.087,0.106,0.105,0.098</td><td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0</td></tr><tr><td><b>Q9ZZX9</b></td><td class=\"no-wrap\">-></td><td>0.766</td><td>0</td><td>0.234</td><td>MYYIMFLYNMLLIIILIFYSIVGVPIIIFNNNYYWDPDIFLFIIYYFIKFIIIFNLYLYYMINYIVYTPSGSPPGRGTYILLYNMLYSYNMFIDYVMKFITCVTYMYLMFWLLSPTPSPYYVSEVPVS</td><td>CHHHHHHHHHHHHHHHHHIIIIIHHHHHHHTTCCCCHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHSCTTSCCCHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHSCCCCTTCCCCCCCC</td><td>0.041,0.06,0.071,0.07,0.061,0.103,0.084,0.092,0.101,0.088,0.082,0.119,0.139,0.14,0.138,0.113,0.124,0.1,0.115,0.085,0.07,0.063,0.073,0.051,0.045,0.043,0.036,0.032,0.038,0.047,0.049,0.052,0.044,0.045,0.05,0.059,0.06,0.057,0.055,0.038,0.055,0.065,0.092,0.077,0.063,0.069,0.07,0.077,0.077,0.099,0.086,0.067,0.057,0.064,0.066,0.051,0.04,0.055,0.044,0.048,0.033,0.033,0.033,0.036,0.03,0.039,0.049,0.063,0.092,0.177,0.159,0.168,0.169,0.114,0.131,0.106,0.098,0.06,0.05,0.041,0.04,0.04,0.053,0.055,0.041,0.042,0.052,0.043,0.048,0.049,0.053,0.046,0.045,0.067,0.077,0.063,0.079,0.101,0.117,0.093,0.075,0.142,0.103,0.073,0.095,0.062,0.071,0.074,0.061,0.07,0.08,0.069,0.079,0.073,0.073,0.109,0.107,0.1,0.096,0.095,0.106,0.087,0.072,0.083,0.121,0.136,0.174,0.252</td><td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0</td></tr>\r\n",
+       "</table>\r\n",
+       "<p><b>6040</b> rows x <b>7</b> columns</p><p><b>0</b> missing values</p>\r\n",
+       "</div>\r\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "let disorderAggregated =  File.ReadAllLines(\"C:/Users/schne/source/dataplant-gitlab/yeast-structure-proportions/assays/dataset/fldpnn/all-disorder-predictions.txt\")\n",
+    "let noDisorderAggregated =  File.ReadAllLines(\"C:/Users/schne/source/dataplant-gitlab/yeast-structure-proportions/assays/dataset/fldpnn/no-disorder-predictions.txt\")\n",
+    "let allFldpnnPredictions = File.ReadAllLines(\"C:/Users/schne/source/dataplant-gitlab/yeast-structure-proportions/assays/dataset/fldpnn/all-fldpnn-predictions.txt\")\n",
+    "\n",
+    "// use this for all predictions, these 3 records are contained for bot disorder and no disorder predictions.\n",
+    "type BasicFldpnnPrediction = {\n",
+    "    Id: string\n",
+    "    Sequence: string\n",
+    "    BinaryDisorderPrediction: string\n",
+    "    DisorderPropensity: string\n",
+    "}\n",
+    "\n",
+    "let fldpnnPredictionMap = \n",
+    "    allFldpnnPredictions\n",
+    "    |> Array.skip 15\n",
+    "    |> String.concat Environment.NewLine\n",
+    "    |> fun s -> s.Split(\">\")\n",
+    "    |> Array.skip 1\n",
+    "    |> Array.map (fun s ->\n",
+    "        let lines = s.Split(Environment.NewLine)\n",
+    "        lines[0],\n",
+    "        {\n",
+    "            Id = lines[0]\n",
+    "            Sequence = lines[1]\n",
+    "            BinaryDisorderPrediction = lines[2]\n",
+    "            DisorderPropensity = lines[3]\n",
+    "        }\n",
+    "    )\n",
+    "    |> Map\n",
+    "\n",
+    "let aggregatedDf = \n",
+    "    df\n",
+    "    |> Frame.indexRowsString \"ProteinUId\"\n",
+    "    |> fun f ->\n",
+    "        let binaryPredictionCol = \n",
+    "            f \n",
+    "            |> Frame.mapRows(fun rk os ->\n",
+    "                let fldpnnPreds = fldpnnPredictionMap[rk]\n",
+    "                fldpnnPreds.BinaryDisorderPrediction\n",
+    "            )\n",
+    "        let propensityCol = \n",
+    "            f \n",
+    "            |> Frame.mapRows(fun rk os ->\n",
+    "                let fldpnnPreds = fldpnnPredictionMap[rk]\n",
+    "                fldpnnPreds.DisorderPropensity\n",
+    "            )\n",
+    "        f\n",
+    "        |> Frame.addCol \"BinaryDisorderPrediction\" binaryPredictionCol\n",
+    "        |> Frame.addCol \"DisorderPropensity\" propensityCol\n",
+    "\n",
+    "aggregatedDf"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -555,7 +651,7 @@
    },
    "outputs": [],
    "source": [
-    "let "
+    "aggregatedDf.SaveCsv(@\"C:/Users/schne/source/dataplant-gitlab\\yeast-structure-proportions\\assays\\results/structure-proportions-with-fldpnn-disorder-predictions.tsv\",includeRowKeys=true,keyNames=[\"ProteinUId\"],separator = '\\t')"
    ]
   }
  ],
-- 
GitLab