{ "cells": [ { "cell_type": "markdown", "id": "8c83058e-1fdf-4d1a-9202-4816a300e7d2", "metadata": {}, "source": [ "### read number of associated TEs for all species, store in tsv file" ] }, { "cell_type": "code", "execution_count": 26, "id": "0b3bd0f8-fc7e-4cc5-98c7-ea3ccd8fecb0", "metadata": {}, "outputs": [], "source": [ "import gffpandas.gffpandas as gffpd\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import glob\n", "import os" ] }, { "cell_type": "code", "execution_count": 51, "id": "bc351336-f616-43a3-ac51-b07e85d02c7e", "metadata": {}, "outputs": [], "source": [ "#get all associations\n", "all_locations = [i for i in os.listdir(\"<arc/runs/association_gff3>/.\") if i[0] != '.']" ] }, { "cell_type": "code", "execution_count": 77, "id": "3db55a8e-5a77-4182-a7e6-cf96fc00cdd7", "metadata": {}, "outputs": [], "source": [ "species_file = open('<arc/runs>/Fig6_species').read().split('\\n')\n", "species_dic = {}\n", "for i in species_file[:-1]:\n", " species_dic[i.split('\\t')[0]] = i.split('\\t')[1] " ] }, { "cell_type": "code", "execution_count": 87, "id": "06fdddce-1198-4048-91dc-304ea7f00147", "metadata": {}, "outputs": [], "source": [ "with open(\"<arc/runs>/Fig5_data.tsv\", \"w\") as file:\n", " file.write(\"species\\ttype\\tlocation\\tnumber_of_associated_tes\\n\")\n", " file.close()\n", "for species in species_dic.keys():\n", " for location in all_locations:\n", " gff = gffpd.read_gff3(f'<arc/runs/association_gff3>/{location}/{species}_te_gene_associaton.gff3').df\n", " with open(\"<arc/runs>/Fig5_data.tsv\", \"a\") as file:\n", " file.write(f'{species}\\t{species_dic[species]}\\t{location}\\t{len(set(gff.attributes))}\\n')\n", " " ] }, { "cell_type": "markdown", "id": "00c864fd-3021-4ed6-8389-7242cbc330a0", "metadata": {}, "source": [ "### now continue working with this data" ] }, { "cell_type": "code", "execution_count": 64, "id": "9d7af3ad-ff4f-4f18-bb5c-f0e8ee554c4c", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('<arc/runs>/Fig5_data.tsv', sep = '\\t')" ] }, { "cell_type": "code", "execution_count": 72, "id": "91956aef-0364-467e-b1eb-0e2543d69520", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>species</th>\n", " <th>type</th>\n", " <th>location</th>\n", " <th>number_of_associated_tes</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>a_alpina</td>\n", " <td>c3</td>\n", " <td>start_end</td>\n", " <td>1665</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>a_alpina</td>\n", " <td>c3</td>\n", " <td>upstream</td>\n", " <td>1801</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>a_alpina</td>\n", " <td>c3</td>\n", " <td>downstream</td>\n", " <td>639</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>a_alpina</td>\n", " <td>c3</td>\n", " <td>inside</td>\n", " <td>234</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>a_thaliana</td>\n", " <td>c3</td>\n", " <td>start_end</td>\n", " <td>654</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>59</th>\n", " <td>m_nitens</td>\n", " <td>c34</td>\n", " <td>inside</td>\n", " <td>251</td>\n", " </tr>\n", " <tr>\n", " <th>60</th>\n", " <td>m_suffruticosa</td>\n", " <td>c34</td>\n", " <td>start_end</td>\n", " <td>842</td>\n", " </tr>\n", " <tr>\n", " <th>61</th>\n", " <td>m_suffruticosa</td>\n", " <td>c34</td>\n", " <td>upstream</td>\n", " <td>1450</td>\n", " </tr>\n", " <tr>\n", " <th>62</th>\n", " <td>m_suffruticosa</td>\n", " <td>c34</td>\n", " <td>downstream</td>\n", " <td>653</td>\n", " </tr>\n", " <tr>\n", " <th>63</th>\n", " <td>m_suffruticosa</td>\n", " <td>c34</td>\n", " <td>inside</td>\n", " <td>257</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>64 rows × 4 columns</p>\n", "</div>" ], "text/plain": [ " species type location number_of_associated_tes\n", "0 a_alpina c3 start_end 1665\n", "1 a_alpina c3 upstream 1801\n", "2 a_alpina c3 downstream 639\n", "3 a_alpina c3 inside 234\n", "4 a_thaliana c3 start_end 654\n", ".. ... ... ... ...\n", "59 m_nitens c34 inside 251\n", "60 m_suffruticosa c34 start_end 842\n", "61 m_suffruticosa c34 upstream 1450\n", "62 m_suffruticosa c34 downstream 653\n", "63 m_suffruticosa c34 inside 257\n", "\n", "[64 rows x 4 columns]" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 71, "id": "a7d2c829-e982-43de-bf0b-af2009fdaa2f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "55148" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sum(df['number_of_associated_tes'])" ] }, { "cell_type": "code", "execution_count": 75, "id": "d3ac1be6-a578-4f92-bb73-3fe75b060e42", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "28379" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sum(df[df.type=='c34']['number_of_associated_tes'])" ] }, { "cell_type": "code", "execution_count": 195, "id": "269edc66-14aa-48ae-b187-481ce6068769", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAAlYUlEQVR4nO3de1hVZdo/8O+9QTkInhBRQUVRQUAdAw+ZVmp2aWM2pWVaWZONjmYHs7Kmmqb62a+DNmmOlWVljW+WdnLK8NUGU9OxwTNHUfOYJKRyUCQ23O8fe+EQA7hA1t4L9/dzXVx7r8Ne64b25bdnrWc9j6gqiIiI7Mbh6QKIiIiqw4AiIiJbYkAREZEtMaCIiMiWGFBERGRLvp4u4GK0adNGIyMjPV0GETUC27Zty1PVUE/XQeY16oCKjIxESkqKp8sgokZARA55ugaqG17iIyIiW2JAERGRLTGgiIjIlhr1PSgioouxbdu2tr6+vm8DiAf/h90TygGkOp3OexISEk5U3ciAIiKv5evr+3a7du16hoaGnnI4HByY1M3Ky8slNzc3Nicn520AY6pu5/8xEJE3iw8NDS1gOHmGw+HQ0NDQfLhasP+93c31EBHZiYPh5FnG37/aLLIsoETkHRE5ISKplda1FpG1IpJtvLYy1ouILBCRfSKyW0Qus6ouIiJqHKy8B/UegIUA3q+07jEA36jqCyLymLE8G8AoAN2NnwEAXjdeiYjc5ondNyU05PHm9P50W0MeryZZWVlNk5OTg/74xz+edMf53MWyFpSqbgBQ9Y91A4ClxvulAH5Xaf376vIvAC1FpL1VtRERXUqys7P9Pvroo9bVbSstLXV3OQ3G3b34wlT1uPE+B0CY8T4cwJFK+x011h1HFSIyBcAUAOjUqVOdTv7E7pvqWC6525zen3q6BAD8rjQGdvmuXKysrKymo0eP7p6dnZ0GAH/+85/DioqKfDZt2hQcFxd3dsuWLcFlZWWyePHiH4YOHXr2q6++Cpo1a1YnABARbN68OfOJJ54IP3DggH9MTEzshAkT8lq1alX2+eeftzp79qyjrKxMvvnmm+zJkyd3yszMDHA6nfLEE0/8ePvtt5/OyspqOnHixC7FxcUOAJg/f/7hESNGnPnyyy+Dn3nmmQ7Nmzd3ZmVlBY4ZM+Zkr169ihctWhRWUlIin3322f64uLgSq/82HutmrqoqInW+OamqiwEsBoDExETe3CSiS1ZxcbEjMzMz/euvvw6aMmVKl+zs7LR58+a1W7BgwaFrr732TH5+viMwMLB8zpw5x+bNmxeWnJy8DwAWLFgQkpaWFrh79+60sLCwshkzZoQPHTq0YMWKFQfz8vJ8EhMTe44ZM6agQ4cOzo0bN+4NDAzUPXv2+E2YMKFrampqBgBkZmYGpKamprVt29bZuXPnXn5+fnl79uzJeO6559rOmzev7TvvvHOk9uovnrsD6icRaa+qx41LeBUPZh0D0LHSfhHGOiIirzVx4sSTADBq1KiioqIiR15ens/AgQOLHn744Y633HLLyQkTJpyKiooqr+6zQ4YMKQgLCysDgPXr1zdfs2ZNywULFrQDgJKSEtm3b1/Tzp07l06ePLlzenp6gMPhwKFDh/wqPt+rV68znTt3LgWATp06lYwaNSofAPr06VP87bffBlv9uwPu72a+CsCdxvs7AXxRaf0kozffQAD5lS4FEhFdsnx9fbW8/D8Zc+7cufP/LovIr/YVETz//PM5b7/99qHi4mLHkCFDYnbs2OFf3XEDAwPPH1RVsXLlyn2ZmZnpmZmZ6cePH99z2WWXnZszZ05Y27ZtSzMyMtL37NmTXlpaev7cfn5+569QORwO+Pv7a8X7srKyXxdmESu7mX8IYAuAaBE5KiKTAbwAYISIZAO4xlgGgNUADgDYB+AtANOtqouIyE4iIiKcJ0+e9M3JyfEpLi6WNWvWtKjY9uGHH7YCgDVr1gQFBweXhYSElKWlpfn179+/eM6cOTm9e/c+k5qa6t+iRYuyoqIin5rOMXTo0IJ58+aFVQThd999FwAA+fn5Pu3bty/18fHBokWLQsrKyiz+bevGskt8qjqhhk3Dq9lXAdxrVS1ERGa4q1t4ZX5+fjpr1qzj/fr16xkWFlbarVu3cxXb/P39tWfPnrFOp1MWL178AwC89NJLbTdv3txcRDQ6Orp43Lhx+Q6HAz4+PhodHR07ceLEvFatWv0qaV544YUfp0yZ0ikmJia2vLxcOnbsWJKcnLzvwQcfPDF27Nio5cuXhwwbNiw/ICCg2suFniKubGicEhMTtS4TFrJnlv3ZpWcWvyv2V9fviohsU9XEyut27dp1sE+fPnkNWlgD6d+/f/TcuXOPXHnllWc9XYvVdu3a1aZPnz6RVddzqCMiIrIljmZORGRD33//fZana/A0tqCIiMiWGFBERGRLDCgiIrIlBhQREdkSO0kQERmiPprToNNt7B//RJ2eq3rooYc6BAUFlT377LM/NWQdNXnsscfavfDCCznuOFd9sAVFROSlFixYUO20RuXl5bDDqBIMKCIiD5o9e3a7yMjI+ISEhOjs7Gw/ANi8eXNAnz59Ynr06BE7YsSIqNzcXJ9jx475xsXF9QSALVu2BIhIQnZ2dlMA6NixY3xhYaFj7NixkXfddVfHvn37xkRERPR69913WwHAoUOHmiQmJkbHxMTEdu/ePS4pKSlo+vTp4SUlJY6YmJjYMWPGdMnKymoaGRkZf+ONN0b26NEjbv/+/U2feuqpsPj4+J49evSInTlzZoeKmq+55pqouLi4nt26dYubO3dum4r1gYGBfadOnRrRrVu3uEGDBvVITk4O7N+/f3RERESvZcuWtUAdMaCIiDxk48aNgZ999lnrPXv2pK9duzZ7165dzQDgrrvu6vL8888f3bt3b3pcXFzx7NmzO4SHhztLSkocJ0+edCQnJwfFxcWdXbduXdDevXubhoSEOIODg8sB4KeffmqSkpKS+cUXX2Q//fTT4QDwzjvvtB4+fHh+ZmZmekZGRtqAAQPOLlq06Jifn195ZmZm+qpVq34AgMOHD/vNmDEjd9++fWmpqan++/bt89+9e3dGRkZG+s6dOwO//vrrIABYtmzZwbS0tIydO3emv/nmm2E5OTk+gGt6kOHDhxfs27cvrVmzZmVPPvlk+MaNG/euWLFi33PPPRde178P70EREXlIcnJy0HXXXXe6Ilyuvfba02fOnHEUFhb6/Pa3vy0CgD/84Q8/33zzzV0BIDExsWjdunVBmzZtCn700UePJyUltVBVDBw4sKjimGPGjDnt4+ODhISEcz///HMTABg4cOCZqVOnRpaWljrGjRt3atCgQcXV1dO+fftfhg8ffgYAkpKSmm/YsKF5bGxsLACcPXvWkZmZ6T9q1KiiF198Meyrr75qCQA5OTlN0tLS/Nu1a3emSZMmOm7cuAIAiIuLK/bz8yv38/PT/v37Fx87dqxpXf8+bEERETUSQ4YMKdywYUPw0aNHm952222n09LSAjZt2hR05ZVXFlbsUzEtBuCaZgNwzSe1YcOGrPDw8F/uvvvuLgsXLgyp7vhVp+h48MEHj1dM0XH48OHUmTNn5n355ZfB3377bXBKSkpmVlZWes+ePYsrZuT19fVVh8MVKw6H4/yUHT4+PvWaooMBRUTkIcOGDStavXp1y6KiIjl16pRj7dq1LZs1a1bevHnzsqSkpCAAWLJkScjll19eBADXXHNN0SeffNK6S5cuJT4+PmjZsqUzOTm5xYgRI4pqO8/evXubRkRElM6aNStv0qRJudu3bw8EXIFSUlJSbXCMGjWq4IMPPmiTn5/vAIAffvihybFjx3xPnz7t06JFi7Lg4ODyHTt2+FdclrQCL/ERERnq2i38Yg0ePPjsjTfeeDI+Pj4uJCSktHfv3mcA4N133/1h2rRpne+//35Hp06dSj788MODABAdHf2LqsqQIUMKAeDyyy8vOn78eNPQ0NBau9ytWbMmeMGCBe18fX01MDCwbNmyZT8AwG233Zbbs2fP2Pj4+LMvv/zyr2Yxv+mmmwrS0tL8+/XrFwO4WlfLli37YezYsfmLFy8O7dq1a1zXrl3P9enT54wFfxoAnG6DbIbTbZBZl/p0G96E020QEVGjwoAiIiJbYkAREZEtMaCIiMiWGFBERGRLDCgiIrIlPgdFRGQ4lDKsQafb6Jz4zws+V9W3b9+YHTt2ZNb12C+99FJoYGBg+YwZM36uvD4rK6vp6NGju2dnZ6fV9Zh2w4AiIvKg+oQTADz66KO5DV2L3fASHxGRBwUGBvYFgC+//DK4f//+0SNHjuzapUuXuDFjxnQpL3cNjTd9+vTwqKiouB49esROmTIlAnBNbvjnP/85DHCNih4dHR0bHR0d+8orr7StOLbT6cTUqVMjKqbMePnll9tUU4JtsQVFRGQTGRkZATt37jwQGRlZmpCQELN27dqgPn36FK9evbrVgQMHUh0OB/Ly8nyqfm7y5MmR8+fPPzxq1KiiqVOnRlSsf/XVV9u0aNGiLDU1NaO4uFj69esXc/311xfExMT84t7frH7YgiIisolevXqdiYqKKvXx8UFcXNzZ/fv3Nw0JCSnz8/MrHz9+fOTSpUtbBgUFlVf+TF5enk9hYaHPqFGjigDg7rvvPn9Pat26dc0//vjjkJiYmNi+ffv2PHXqlG96erq/u3+v+mILiojIJiqmpwBcU1Q4nU5p0qQJdu7cmbFq1armK1eubPX666+3/de//rXXzPFUVebNm3d47NixBdZVbR22oIiIbCw/P99x8uRJn/Hjx+e/8cYbRzIzMwMrb2/Tpk1ZcHBw2Zo1a4IA4L333mtdsW3EiBH5r7/+emjFlBq7d+/2KygoaDT/7rMFRURkMNMt3N1Onz7tM3r06G4VIfPcc88dqbrPkiVLDt5zzz2RIoKrr776fGtp5syZeQcPHvTr1atXT1WV1q1bl65evXq/O+u/GJxug2yF022QWZxu49LB6TaIiKhRYUAREZEtMaCIiMiWLhhQItLLHYUQERFVZqYFtUhEvheR6SLSwvKKiIiIYCKgVHUIgNsAdASwTUT+R0RGWF4ZERF5NVPPQalqtog8CSAFwAIAfUVEAPxJVe3RL5iI6CLFPf7XBp1uI+3/z7Tdc1WNiZl7UL1F5K8AMgAMA3C9qvY03v/V4vqIiLzKs88+27awsLDOHdgWLFgQcvDgwSZW1FShf//+0Rs2bAi88J4Nw8wf4TUA2wH0UdV7VXU7AKjqjwCerM9JRWSmiKSJSKqIfCgi/iLSRUS2isg+EflIRJrW59hERI3Zm2++GVZUVFSngHI6nfj73//e5vDhw5YGlLvV+kcQER8Ax1T1A1UtrrpdVT+o6wlFJBzA/QASVTUegA+AWwG8COCvqtoNwCkAk+t6bCKixqSgoMBx9dVXd4uOjo7t3r173KxZs9qfOHGiyVVXXdVjwIABPQDgtttu6xQfH9+zW7ducTNnzuxQ8dnw8PBe06ZNC4+Nje25ePHi1qmpqYGTJk3qGhMTE1tUVCTVnW/jxo2B/fr1i46Li+s5ePDg7ocOHWoCuFpG06ZNC+/Vq1fPyMjI+KSkpCAAKCoqktGjR3ft2rVr3IgRI6LOnTtX7XGtUus9KFUtE5GOItJUVRty/hBfAAEiUgogEMBxuC4ZTjS2LwXwFwCvN+A5iYhs5dNPP23erl270vXr1+8DgJ9//tln+fLlbb799tu97du3dwLAK6+8ciwsLKzM6XRi0KBB0Vu3bg0YMGBAMQCEhIQ409PTMwDgvffeC507d+6RK6+88mx15yopKZH777+/01dffbWvQ4cOzrfeeqvVww8/HL5ixYqDAOB0OmXPnj0ZH330UYtnn322w8iRI/fOnTu3bUBAQPmBAwfStm7dGnDFFVfEuuUPYzDTjPwBwHci8pSIPFTxU98TquoxAHMBHIYrmPIBbANwWlWdxm5HAYRX93kRmSIiKSKSkpt7yc94TESXsMsuu6x448aNzadNmxaelJQUFBISUlZ1n6VLl7aOjY3tGRsbG5udne2/a9eu8/M5TZo06ZTZc+3evdsvOzs7YNiwYT1iYmJiX3755fY//vjj+UuCN9988ykAGDRo0JmjR482BYBNmzYF3XHHHT8DwIABA4p79OhRbfhZxUwvvv3GjwNA8MWeUERaAbgBQBcApwGsADDS7OdVdTGAxYBrsNiLrYeIyFN69+5dsn379vRPPvmkxVNPPRW+bt26X83blJmZ2XThwoVh27ZtywgNDS0bO3Zs5Llz5843LIKDg8v/+6jVU1Xp1q1b8c6dOzOr2+7v768A4Ovri7KyMrdeyqvJBQNKVZ8BABEJMpaLLvKc1wD4QVVzjeN+CuAKAC1FxNdoRUUAOHaR5yEiqhN3dws/ePBgk7Zt2zqnT59+slWrVmVLlixp06xZs7L8/HxH+/btcerUKZ+AgIDy1q1blx05csR3/fr1La666qrC6o4VFBRUlp+f/1/TwVfo3bv3uZMnT/quW7eu2TXXXHOmpKRE9uzZ45eYmHiups8MHjy4aNmyZa3HjBlT+O9//9t/7969buvBB5gIKBGJB/ABgNbGch6ASaqaVs9zHgYwUEQCARQDGA7X81XJAMYBWA7gTgBf1PP4RESNwrZt2wIef/zxCIfDAV9fX120aNGhjRs3Bo0cObJHWFjYL1u3bt0bHx9/NioqKr59+/a/JCQk1NhAmDRpUt59993X+ZFHHilPSUnJCAoK+tUVJn9/f12+fPn++++/v1NhYaFPWVmZTJs27afaAurhhx8+ceutt3bp2rVrXLdu3c7Fxsaeacjf/0IuOB+UiGwG8ISqJhvLVwN4XlUH1fukIs8AGA/ACWAHgHvguue0HK4g3AHgdlUtqe04nA/q0sP5oMgszgd16ahpPigz96CaVYQTAKjqehFpdjHFqOrTAJ6usvoAgP4Xc1wiIrp0mAmoAyLyFFyX+QDgdrjChIiIbGjEiBFRR44c8au8bs6cOUfHjh1bUNNn7MhMQN0N4BkAFe3pjcY6IqLGrry8vFwcDscl1SN47dq1+z1dg1nl5eUCoNreiGZ68Z2Ca+QHIqJLTWpubm5saGho/qUWUo1BeXm55ObmtgCQWt12M734/gGg6n+4fLh63r2pqjX2ACEisjOn03lPTk7O2zk5OfHgDOOeUA4g1el03lPdRlP3oACEAvjQWB4PoBBADwBvAbijAYokInK7hISEEwDGeLoOqp6ZgBqkqv0qLf9DRP6tqv1EpL7PQhEREdXKTJM2SEQ6VSwY74OMxYYcQJaIiOg8My2oWQA2ich+AALXGHrTjWehllpZHBEReS8zvfhWi0h3ADHGqqxKHSNetaowIiLybmamfA8E8AiAGaq6C0BHERlteWVEROTVzNyDeheue02XG8vHAPw/yyoiIiKCuYCKUtWXAJQCgKqeheteFBERkWXMBNQvIhIA42FdEYkCUOso40RERBfLTC++vwBIguve0zK4Jhf8vZVFERERmenF978isg3AQLgu7T2gqpw/hYiILGWmF983qvqzqn6lql+qap6IfOOO4oiIyHvV2IISEX8AgQDaiEgr/KdjRHO4Zr8lIiKyTG2X+KYCeBBABwDb8J+AKgCw0NqyiIjI29UYUKo6H8B8EblPVV9zY01ERESmOkm8JiLxAGIB+Fda/76VhRERkXczM2Hh0wCuhiugVgMYBWATAAYUERFZxsyDuuMADAeQo6q/B9AHQAtLqyIiIq9nJqCKVbUcgFNEmgM4AaCjtWUREZG3MzOSRIqItIRrevdtAIoAbLGyKCIiIjOdJKYbb98QkSQAzVV1t7VlERGRtzMzksSNItICAFT1IIDDIvI7i+siIiIvZ+Ye1NOqml+xoKqnATxtWUVEREQwF1DV7WPm3hUREVG9mQmoFBF5RUSijJ9X4OosQUREZBkzAXUfXFO+fwRgOYBzAO61sigiIiIzvfjOAHjMDbUQERGdZ6YFRURE5HYMKCIisiUGFBER2ZKZ0cxDAfwBQGTl/VX1buvKIiIib2fmeaYvAGwEsA5AmbXlEBERuZgJqEBVnW15JURERJWYuQf1pYhcZ3klRERElZgJqAfgCqliESkQkUIRKbiYk4pISxFZKSKZIpIhIpeLSGsRWSsi2cZrq4s5BxERNW5mHtQNtuC88wEkqeo4EWkKIBDAnwB8o6oviMhjcD0c3KCXFpdnJDTk4cgCc3p7ugIisosaA0pEYlQ1U0Quq267qm6vzwmNqTuuBHCXcZxfAPwiIjcAuNrYbSmA9WjggCIiosajthbUQwCmAJhXzTYFMKye5+wCIBfAuyLSB66BZx8AEKaqx419cgCEVfdhEZli1IVOnTrVswQiIrK7GgNKVacYr0MtOOdlAO5T1a0iMh9VxvpTVRURraGuxQAWA0BiYmK1+xARUePniZEkjgI4qqpbjeWVcAXWTyLSHgCM1xMeqI2IiGzC7QGlqjkAjohItLFqOIB0AKsA3GmsuxOuB4SJiMhLeWpm3PsALDN68B0A8Hu4wvJjEZkM4BCAWzxUGxF7fDYC7PF56TMzFt8VAHaq6hkRuR2uy3HzVfVQfU+qqjsBJFazaXh9j0lERJcWM5f4Xgdw1uhxNwvAfgDvW1oVERF5PTMB5VRVBXADgIWq+jcAVjy8S0REdJ6Ze1CFIvI4gNsBXCkiDgBNrC2LiIi8nZkW1HgAJQAmGz3wIgC8bGlVRETk9S441JGq5ojI31S1BABU9bCI7HVfiURE5I1qa0H9T6X3W6psW2RBLUREROfVFlBSw/vqlomIiBpUbQGlNbyvbpmIiKhB1daLL0JEFsDVWqp4D2M53PLKiIjIq9UWUI9Uep9SZVvVZSIiogZVW0BFq+qf3FYJERFRJbXdgxrptiqIiIiqqK0F5SMirVBDjz1VPWlNSURERLUHVAxc07FXF1AKoKslFREREaH2gEpX1b5uq4SIiKgST0z5TkREdEG1BdR8t1VBRERURY0BparvubEOIiKiX+ElPiIisqUaA0pEXjReb3ZfOURERC61taCuExEB8Li7iiEiIqpQWzfzJACnAASJSAFcz0NpxauqNndDfURE5KVq6yTxiKq2BPCVqjZX1eDKr+4rkYiIvFFtLSgAgKreICJhAPoZq7aqaq61ZRERkbe7YC8+o5PE9wBuBnALgO9FZJzVhRERkXe7YAsKwJMA+qnqCQAQkVAA6wCstLIwIiLybmaeg3JUhJPhZ5OfIyIiqjczLagkEVkD4ENjeTyA1daVREREZK6TxCMichOAwcaqxar6mbVlERGRtzPTgoKqfgrgU4trISIiOo/3koiIyJZMtaAuFf+M+sbTJdAFPeHpAojIJky1oEQkQESirS6GiIiogpkHda8HsBOusfkgIr8RkVUW10VERF7OTAvqLwD6AzgNAKq6E0AXyyoiIiKCuYAqVdX8KuvUimKIiIgqmOkkkSYiEwH4iEh3APcD2GxtWURE5O3MtKDuAxAHoASu0SQKADxoYU1ERESmRpI4C1ffX/b/JSIit7lgQInIP/Df95zyAaQAeFNVz9XnxCLiYxzjmKqOFpEuAJYDCAGwDcAdqvpLfY5NRESNn5l7UAcAhOLXg8UWAugB4C0Ad9Tz3A8AyABQMTvviwD+qqrLReQNAJMBvF7PYxNdFD7U3Rjwos6lzsw9qEGqOlFV/2H83A7X/FD3ArisPicVkQgAvwXwtrEsAIbhP3NMLQXwu/ocm4iILg1mAipIRDpVLBjvg4zF+l6CexXAowDKjeUQAKdV1WksHwUQXt0HRWSKiKSISEpuLmeeJyK6VJkJqFkANolIsoisB7ARwMMi0gyulk6diMhoACdUdVtdPwsAqrpYVRNVNTE0NLQ+hyAiokbATC++1cbzTzHGqqxKHSNercc5rwAwRkSuA+AP1z2o+QBaioiv0YqKAHCsHscmIqJLhNnpNroDiAbQB8AtIjKpvidU1cdVNUJVIwHcCuCfqnobgGQA44zd7gTwRX3PQUREjZ+ZwWKfBvCa8TMUwEsAxlhQy2wAD4nIPrjuSS2x4BxERNRImOlmPg6ultMOVf29iIQB+HtDnFxV1wNYb7w/ANegtERERKYCqlhVy0XEKSLNAZwA0NHiuixx3SfXe7oEuoC0RE9XQER2YSagUkSkJVwP5W4DUARgi5VFERERmenFN914+4aIJAForqq7rS2LiIi8nZlOEufHfFHVg6q6u/I6IiIiK9TYghIRfwCBANqISCsAYmxqjhpGeSAiImootV3imwrXvE8d4Lr3VBFQBQAWWlsWERF5uxoDSlXnA5gvIvep6mturImIiMhUJ4nXRGQQgMjK+6vq+xbWRUREXs7MhIUfAIgCsBNAmbFaATCgiIjIMmaeg0oEEKuqVWfVJSIisoyZgEoF0A7AcYtrIbINjjpifxx15NJnJqDaAEgXke8BlFSsVFUrBowlIiICYC6g/mJ1EURERFWZ6cX3rYh0BtBdVdeJSCAAH+tLIyIib2ZmqKM/AFgJ4E1jVTiAzy2siYiIyNSMuvfCNU17AQCoajaAtlYWRUREZCagSlT1l4oFEfGF6zkoIiIiy5gJqG9F5E8AAkRkBIAVAP5hbVlEROTtzATUYwByAeyBawDZ1QCetLIoIiIiM93MAwC8o6pvAYCI+BjrzlpZGBEReTczLahv4AqkCgEA1llTDhERkYuZgPJX1aKKBeN9oHUlERERmQuoMyJyWcWCiCQAKLauJCIiInP3oB4AsEJEfoRrVt12AMZbWhUREXm9WgPK6BAxBEAMgGhjdZaqllpdGBERebdaL/GpahmACapaqqqpxg/DiYiILGfmEt93IrIQwEcAzlSsVNXtllVFRERez0xA/cZ4fbbSOgUwrMGrISIiMpiZbmOoOwohIiKqzMx0G2EiskREvjaWY0VksvWlERGRNzPzHNR7ANYA6GAs7wXwoEX1EBERATAXUG1U9WMA5QCgqk4AZZZWRUREXs/sSBIhMOaAEpGBAPItrYqIiLyemV58DwFYBSBKRL4DEApgnKVVERGR1zPTi2+7iFwF10gSAo4kQUREbnDBgBIRfwDTAQyG6zLfRhF5Q1XPWV0cERF5LzOX+N4HUAjgNWN5IoAPANxsVVFERERmAipeVWMrLSeLSLpVBREREQHmevFtN3ruAQBEZACAlPqeUEQ6ikiyiKSLSJqIPGCsby0ia0Uk23htVd9zEBFR42cmoBIAbBaRgyJyEMAWAP1EZI+I7K7HOZ0AZhmtsoEA7hWRWACPAfhGVbvDNc38Y/U4NhERXSLMXOIb2ZAnVNXjAI4b7wtFJANAOIAbAFxt7LYUwHoAsxvy3ERE1HiY6WZ+yKqTi0gkgL4AtgIIM8ILAHIAhNXwmSkApgBAp06drCqNiIg8zMwlPkuISBCATwA8qKoFlbepqsIYuaIqVV2sqomqmhgaGuqGSomIyBM8ElAi0gSucFqmqp8aq38SkfbG9vYATniiNiIisge3B5SICIAlADJU9ZVKm1YBuNN4fyeAL9xdGxER2YeZThIN7QoAdwDYIyI7jXV/AvACgI+NuaYOAbjFA7UREZFNuD2gVHUTXGP6VWe4O2shIiL78lgnCSIiotowoIiIyJYYUEREZEsMKCIisiUGFBER2RIDioiIbIkBRUREtsSAIiIiW2JAERGRLTGgiIjIlhhQRERkSwwoIiKyJQYUERHZEgOKiIhsiQFFRES2xIAiIiJbYkAREZEtMaCIiMiWGFBERGRLDCgiIrIlBhQREdkSA4qIiGyJAUVERLbEgCIiIltiQBERkS0xoIiIyJYYUEREZEsMKCIisiUGFBER2RIDioiIbIkBRUREtsSAIiIiW2JAERGRLTGgiIjIlhhQRERkSwwoIiKyJQYUERHZEgOKiIhsiQFFRES2ZKuAEpGRIpIlIvtE5DFP10NERJ5jm4ASER8AfwMwCkAsgAkiEuvZqoiIyFNsE1AA+gPYp6oHVPUXAMsB3ODhmoiIyENEVT1dAwBARMYBGKmq9xjLdwAYoKozquw3BcAUYzEaQJZbC7WXNgDyPF0ENRre/n3prKqhni6CzPP1dAF1paqLASz2dB12ICIpqpro6TqoceD3hRobO13iOwagY6XlCGMdERF5ITsF1L8BdBeRLiLSFMCtAFZ5uCYiIvIQ21ziU1WniMwAsAaAD4B3VDXNw2XZHS91Ul3w+0KNim06SRAREVVmp0t8RERE5zGgiIjIlhhQjYSItBOR5SKyX0S2ichqEblKRLaLyE4RSRORP3q6TrLexX4XRKS/iGwwhhXbISJvi0hgpe39RMRpPJtI5DG26SRBNRMRAfAZgKWqequxrg+AlgAuV9USEQkCkCoiq1T1R89VS1a62O+CiIQBWAHgVlXdYqwbByAYwFljyLEXAfyvu34nopowoBqHoQBKVfWNihWquqvKPn5gi9gbXOx34V64wm1Lpc+vrLT9PgCfAOjXMOUS1R//QWsc4gFsq26DiHQUkd0AjgB4ka2nS97Ffhdq+3w4gBsBvN5AtRJdFAZUI6eqR1S1N4BuAO40LuGQF2qA78KrAGaranmDF0dUDwyoxiENQEJtOxj/t5wKYIhbKiJPqdN3QURuNDpO7BSRxAt8PhHAchE5CGAcgEUi8rsGq5yojhhQjcM/AfgZI7kDAESkt4gMEZEAY7kVgMHw7tHdvUGdvguq+pmq/sb4SQGwEK7W1YBKn79JRMJUtYuqRqpqJICVAKar6udu/N2IfoWdJBoBVVURuRHAqyIyG8A5AAcBfA7gbyKiAATAXFXd47FCyXIX+11Q1Z9E5FYAc0WkLYByABsAJLnpVyAyjUMdERGRLfESHxER2RIDioiIbIkBRUREtsSAIiIiW2JAERGRLTGgiIjIlhhQRERkS/8HWlsIN1jGDJMAAAAASUVORK5CYII=\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "handles = []\n", "colors = ['#2a788e', '#E0CB41','#22a884', '#7ad151']\n", "bottom_list = []\n", "all_locations = ['start_end', 'inside', 'downstream', 'upstream']\n", "for loc in range(len(all_locations)):\n", " sum_at_location = sum(df[(df.type == 'c3') & (df.location == all_locations[loc])] ['number_of_associated_tes'])\n", " total = sum(df[(df.type == 'c3')] ['number_of_associated_tes'])\n", " h = plt.bar(1, (sum_at_location/total)*100, \n", " bottom = (sum(df[(df.location.isin(bottom_list)) & (df.type == 'c3')]['number_of_associated_tes']) / total)*100, \n", " color = colors[loc], \n", " width = 0.75,\n", " label = all_locations[loc])\n", " bottom_list.append(all_locations[loc])\n", " handles.append(h)\n", "bottom_list = []\n", "for loc in range(len(all_locations)):\n", " sum_at_location = sum(df[(df.type == 'c34') & (df.location == all_locations[loc])] ['number_of_associated_tes'])\n", " total = sum(df[(df.type == 'c34')] ['number_of_associated_tes'])\n", " h1 = plt.bar(2, (sum_at_location/total)*100, \n", " bottom = (sum(df[(df.location.isin(bottom_list)) & (df.type == 'c34')]['number_of_associated_tes']) / total)*100, \n", " color = colors[loc], \n", " width = 0.75)\n", " bottom_list.append(all_locations[loc]) \n", " \n", "\n", "plt.xticks([1, 2], ['C3', 'C3-C4'])\n", "plt.ylabel('percentage of TEs in category')\n", "\n", "plt.legend(bbox_to_anchor=(1, 1), loc='upper left', handles=handles[::-1])\n", "\n", "plt.tight_layout()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }