{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "8c83058e-1fdf-4d1a-9202-4816a300e7d2",
   "metadata": {},
   "source": [
    "### read number of associated TEs for all species, store in tsv file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "0b3bd0f8-fc7e-4cc5-98c7-ea3ccd8fecb0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import gffpandas.gffpandas as gffpd\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import glob\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "bc351336-f616-43a3-ac51-b07e85d02c7e",
   "metadata": {},
   "outputs": [],
   "source": [
    "#get all associations\n",
    "all_locations = [i for i in os.listdir(\"<arc/runs/association_gff3>/.\") if i[0] != '.']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "3db55a8e-5a77-4182-a7e6-cf96fc00cdd7",
   "metadata": {},
   "outputs": [],
   "source": [
    "species_file = open('<arc/runs>/Fig6_species').read().split('\\n')\n",
    "species_dic = {}\n",
    "for i in species_file[:-1]:\n",
    "    species_dic[i.split('\\t')[0]] = i.split('\\t')[1] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "06fdddce-1198-4048-91dc-304ea7f00147",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"<arc/runs>/Fig5_data.tsv\", \"w\") as file:\n",
    "    file.write(\"species\\ttype\\tlocation\\tnumber_of_associated_tes\\n\")\n",
    "    file.close()\n",
    "for species in species_dic.keys():\n",
    "    for location in all_locations:\n",
    "        gff = gffpd.read_gff3(f'<arc/runs/association_gff3>/{location}/{species}_te_gene_associaton.gff3').df\n",
    "        with open(\"<arc/runs>/Fig5_data.tsv\", \"a\") as file:\n",
    "            file.write(f'{species}\\t{species_dic[species]}\\t{location}\\t{len(set(gff.attributes))}\\n')\n",
    "            "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "00c864fd-3021-4ed6-8389-7242cbc330a0",
   "metadata": {},
   "source": [
    "### now continue working with this data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "9d7af3ad-ff4f-4f18-bb5c-f0e8ee554c4c",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('<arc/runs>/Fig5_data.tsv', sep = '\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "91956aef-0364-467e-b1eb-0e2543d69520",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>species</th>\n",
       "      <th>type</th>\n",
       "      <th>location</th>\n",
       "      <th>number_of_associated_tes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a_alpina</td>\n",
       "      <td>c3</td>\n",
       "      <td>start_end</td>\n",
       "      <td>1665</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>a_alpina</td>\n",
       "      <td>c3</td>\n",
       "      <td>upstream</td>\n",
       "      <td>1801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a_alpina</td>\n",
       "      <td>c3</td>\n",
       "      <td>downstream</td>\n",
       "      <td>639</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>a_alpina</td>\n",
       "      <td>c3</td>\n",
       "      <td>inside</td>\n",
       "      <td>234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>a_thaliana</td>\n",
       "      <td>c3</td>\n",
       "      <td>start_end</td>\n",
       "      <td>654</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>m_nitens</td>\n",
       "      <td>c34</td>\n",
       "      <td>inside</td>\n",
       "      <td>251</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>m_suffruticosa</td>\n",
       "      <td>c34</td>\n",
       "      <td>start_end</td>\n",
       "      <td>842</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>m_suffruticosa</td>\n",
       "      <td>c34</td>\n",
       "      <td>upstream</td>\n",
       "      <td>1450</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>m_suffruticosa</td>\n",
       "      <td>c34</td>\n",
       "      <td>downstream</td>\n",
       "      <td>653</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>m_suffruticosa</td>\n",
       "      <td>c34</td>\n",
       "      <td>inside</td>\n",
       "      <td>257</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>64 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           species type    location  number_of_associated_tes\n",
       "0         a_alpina   c3   start_end                      1665\n",
       "1         a_alpina   c3    upstream                      1801\n",
       "2         a_alpina   c3  downstream                       639\n",
       "3         a_alpina   c3      inside                       234\n",
       "4       a_thaliana   c3   start_end                       654\n",
       "..             ...  ...         ...                       ...\n",
       "59        m_nitens  c34      inside                       251\n",
       "60  m_suffruticosa  c34   start_end                       842\n",
       "61  m_suffruticosa  c34    upstream                      1450\n",
       "62  m_suffruticosa  c34  downstream                       653\n",
       "63  m_suffruticosa  c34      inside                       257\n",
       "\n",
       "[64 rows x 4 columns]"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "a7d2c829-e982-43de-bf0b-af2009fdaa2f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "55148"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(df['number_of_associated_tes'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "d3ac1be6-a578-4f92-bb73-3fe75b060e42",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "28379"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(df[df.type=='c34']['number_of_associated_tes'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "id": "269edc66-14aa-48ae-b187-481ce6068769",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAAlYUlEQVR4nO3de1hVZdo/8O+9QTkInhBRQUVRQUAdAw+ZVmp2aWM2pWVaWZONjmYHs7Kmmqb62a+DNmmOlWVljW+WdnLK8NUGU9OxwTNHUfOYJKRyUCQ23O8fe+EQA7hA1t4L9/dzXVx7r8Ne64b25bdnrWc9j6gqiIiI7Mbh6QKIiIiqw4AiIiJbYkAREZEtMaCIiMiWGFBERGRLvp4u4GK0adNGIyMjPV0GETUC27Zty1PVUE/XQeY16oCKjIxESkqKp8sgokZARA55ugaqG17iIyIiW2JAERGRLTGgiIjIlhr1PSgioouxbdu2tr6+vm8DiAf/h90TygGkOp3OexISEk5U3ciAIiKv5evr+3a7du16hoaGnnI4HByY1M3Ky8slNzc3Nicn520AY6pu5/8xEJE3iw8NDS1gOHmGw+HQ0NDQfLhasP+93c31EBHZiYPh5FnG37/aLLIsoETkHRE5ISKplda1FpG1IpJtvLYy1ouILBCRfSKyW0Qus6ouIiJqHKy8B/UegIUA3q+07jEA36jqCyLymLE8G8AoAN2NnwEAXjdeiYjc5ondNyU05PHm9P50W0MeryZZWVlNk5OTg/74xz+edMf53MWyFpSqbgBQ9Y91A4ClxvulAH5Xaf376vIvAC1FpL1VtRERXUqys7P9Pvroo9bVbSstLXV3OQ3G3b34wlT1uPE+B0CY8T4cwJFK+x011h1HFSIyBcAUAOjUqVOdTv7E7pvqWC6525zen3q6BAD8rjQGdvmuXKysrKymo0eP7p6dnZ0GAH/+85/DioqKfDZt2hQcFxd3dsuWLcFlZWWyePHiH4YOHXr2q6++Cpo1a1YnABARbN68OfOJJ54IP3DggH9MTEzshAkT8lq1alX2+eeftzp79qyjrKxMvvnmm+zJkyd3yszMDHA6nfLEE0/8ePvtt5/OyspqOnHixC7FxcUOAJg/f/7hESNGnPnyyy+Dn3nmmQ7Nmzd3ZmVlBY4ZM+Zkr169ihctWhRWUlIin3322f64uLgSq/82HutmrqoqInW+OamqiwEsBoDExETe3CSiS1ZxcbEjMzMz/euvvw6aMmVKl+zs7LR58+a1W7BgwaFrr732TH5+viMwMLB8zpw5x+bNmxeWnJy8DwAWLFgQkpaWFrh79+60sLCwshkzZoQPHTq0YMWKFQfz8vJ8EhMTe44ZM6agQ4cOzo0bN+4NDAzUPXv2+E2YMKFrampqBgBkZmYGpKamprVt29bZuXPnXn5+fnl79uzJeO6559rOmzev7TvvvHOk9uovnrsD6icRaa+qx41LeBUPZh0D0LHSfhHGOiIirzVx4sSTADBq1KiioqIiR15ens/AgQOLHn744Y633HLLyQkTJpyKiooqr+6zQ4YMKQgLCysDgPXr1zdfs2ZNywULFrQDgJKSEtm3b1/Tzp07l06ePLlzenp6gMPhwKFDh/wqPt+rV68znTt3LgWATp06lYwaNSofAPr06VP87bffBlv9uwPu72a+CsCdxvs7AXxRaf0kozffQAD5lS4FEhFdsnx9fbW8/D8Zc+7cufP/LovIr/YVETz//PM5b7/99qHi4mLHkCFDYnbs2OFf3XEDAwPPH1RVsXLlyn2ZmZnpmZmZ6cePH99z2WWXnZszZ05Y27ZtSzMyMtL37NmTXlpaev7cfn5+569QORwO+Pv7a8X7srKyXxdmESu7mX8IYAuAaBE5KiKTAbwAYISIZAO4xlgGgNUADgDYB+AtANOtqouIyE4iIiKcJ0+e9M3JyfEpLi6WNWvWtKjY9uGHH7YCgDVr1gQFBweXhYSElKWlpfn179+/eM6cOTm9e/c+k5qa6t+iRYuyoqIin5rOMXTo0IJ58+aFVQThd999FwAA+fn5Pu3bty/18fHBokWLQsrKyiz+bevGskt8qjqhhk3Dq9lXAdxrVS1ERGa4q1t4ZX5+fjpr1qzj/fr16xkWFlbarVu3cxXb/P39tWfPnrFOp1MWL178AwC89NJLbTdv3txcRDQ6Orp43Lhx+Q6HAz4+PhodHR07ceLEvFatWv0qaV544YUfp0yZ0ikmJia2vLxcOnbsWJKcnLzvwQcfPDF27Nio5cuXhwwbNiw/ICCg2suFniKubGicEhMTtS4TFrJnlv3ZpWcWvyv2V9fviohsU9XEyut27dp1sE+fPnkNWlgD6d+/f/TcuXOPXHnllWc9XYvVdu3a1aZPnz6RVddzqCMiIrIljmZORGRD33//fZana/A0tqCIiMiWGFBERGRLDCgiIrIlBhQREdkSO0kQERmiPprToNNt7B//RJ2eq3rooYc6BAUFlT377LM/NWQdNXnsscfavfDCCznuOFd9sAVFROSlFixYUO20RuXl5bDDqBIMKCIiD5o9e3a7yMjI+ISEhOjs7Gw/ANi8eXNAnz59Ynr06BE7YsSIqNzcXJ9jx475xsXF9QSALVu2BIhIQnZ2dlMA6NixY3xhYaFj7NixkXfddVfHvn37xkRERPR69913WwHAoUOHmiQmJkbHxMTEdu/ePS4pKSlo+vTp4SUlJY6YmJjYMWPGdMnKymoaGRkZf+ONN0b26NEjbv/+/U2feuqpsPj4+J49evSInTlzZoeKmq+55pqouLi4nt26dYubO3dum4r1gYGBfadOnRrRrVu3uEGDBvVITk4O7N+/f3RERESvZcuWtUAdMaCIiDxk48aNgZ999lnrPXv2pK9duzZ7165dzQDgrrvu6vL8888f3bt3b3pcXFzx7NmzO4SHhztLSkocJ0+edCQnJwfFxcWdXbduXdDevXubhoSEOIODg8sB4KeffmqSkpKS+cUXX2Q//fTT4QDwzjvvtB4+fHh+ZmZmekZGRtqAAQPOLlq06Jifn195ZmZm+qpVq34AgMOHD/vNmDEjd9++fWmpqan++/bt89+9e3dGRkZG+s6dOwO//vrrIABYtmzZwbS0tIydO3emv/nmm2E5OTk+gGt6kOHDhxfs27cvrVmzZmVPPvlk+MaNG/euWLFi33PPPRde178P70EREXlIcnJy0HXXXXe6Ilyuvfba02fOnHEUFhb6/Pa3vy0CgD/84Q8/33zzzV0BIDExsWjdunVBmzZtCn700UePJyUltVBVDBw4sKjimGPGjDnt4+ODhISEcz///HMTABg4cOCZqVOnRpaWljrGjRt3atCgQcXV1dO+fftfhg8ffgYAkpKSmm/YsKF5bGxsLACcPXvWkZmZ6T9q1KiiF198Meyrr75qCQA5OTlN0tLS/Nu1a3emSZMmOm7cuAIAiIuLK/bz8yv38/PT/v37Fx87dqxpXf8+bEERETUSQ4YMKdywYUPw0aNHm952222n09LSAjZt2hR05ZVXFlbsUzEtBuCaZgNwzSe1YcOGrPDw8F/uvvvuLgsXLgyp7vhVp+h48MEHj1dM0XH48OHUmTNn5n355ZfB3377bXBKSkpmVlZWes+ePYsrZuT19fVVh8MVKw6H4/yUHT4+PvWaooMBRUTkIcOGDStavXp1y6KiIjl16pRj7dq1LZs1a1bevHnzsqSkpCAAWLJkScjll19eBADXXHNN0SeffNK6S5cuJT4+PmjZsqUzOTm5xYgRI4pqO8/evXubRkRElM6aNStv0qRJudu3bw8EXIFSUlJSbXCMGjWq4IMPPmiTn5/vAIAffvihybFjx3xPnz7t06JFi7Lg4ODyHTt2+FdclrQCL/ERERnq2i38Yg0ePPjsjTfeeDI+Pj4uJCSktHfv3mcA4N133/1h2rRpne+//35Hp06dSj788MODABAdHf2LqsqQIUMKAeDyyy8vOn78eNPQ0NBau9ytWbMmeMGCBe18fX01MDCwbNmyZT8AwG233Zbbs2fP2Pj4+LMvv/zyr2Yxv+mmmwrS0tL8+/XrFwO4WlfLli37YezYsfmLFy8O7dq1a1zXrl3P9enT54wFfxoAnG6DbIbTbZBZl/p0G96E020QEVGjwoAiIiJbYkAREZEtMaCIiMiWGFBERGRLDCgiIrIlPgdFRGQ4lDKsQafb6Jz4zws+V9W3b9+YHTt2ZNb12C+99FJoYGBg+YwZM36uvD4rK6vp6NGju2dnZ6fV9Zh2w4AiIvKg+oQTADz66KO5DV2L3fASHxGRBwUGBvYFgC+//DK4f//+0SNHjuzapUuXuDFjxnQpL3cNjTd9+vTwqKiouB49esROmTIlAnBNbvjnP/85DHCNih4dHR0bHR0d+8orr7StOLbT6cTUqVMjKqbMePnll9tUU4JtsQVFRGQTGRkZATt37jwQGRlZmpCQELN27dqgPn36FK9evbrVgQMHUh0OB/Ly8nyqfm7y5MmR8+fPPzxq1KiiqVOnRlSsf/XVV9u0aNGiLDU1NaO4uFj69esXc/311xfExMT84t7frH7YgiIisolevXqdiYqKKvXx8UFcXNzZ/fv3Nw0JCSnz8/MrHz9+fOTSpUtbBgUFlVf+TF5enk9hYaHPqFGjigDg7rvvPn9Pat26dc0//vjjkJiYmNi+ffv2PHXqlG96erq/u3+v+mILiojIJiqmpwBcU1Q4nU5p0qQJdu7cmbFq1armK1eubPX666+3/de//rXXzPFUVebNm3d47NixBdZVbR22oIiIbCw/P99x8uRJn/Hjx+e/8cYbRzIzMwMrb2/Tpk1ZcHBw2Zo1a4IA4L333mtdsW3EiBH5r7/+emjFlBq7d+/2KygoaDT/7rMFRURkMNMt3N1Onz7tM3r06G4VIfPcc88dqbrPkiVLDt5zzz2RIoKrr776fGtp5syZeQcPHvTr1atXT1WV1q1bl65evXq/O+u/GJxug2yF022QWZxu49LB6TaIiKhRYUAREZEtMaCIiMiWLhhQItLLHYUQERFVZqYFtUhEvheR6SLSwvKKiIiIYCKgVHUIgNsAdASwTUT+R0RGWF4ZERF5NVPPQalqtog8CSAFwAIAfUVEAPxJVe3RL5iI6CLFPf7XBp1uI+3/z7Tdc1WNiZl7UL1F5K8AMgAMA3C9qvY03v/V4vqIiLzKs88+27awsLDOHdgWLFgQcvDgwSZW1FShf//+0Rs2bAi88J4Nw8wf4TUA2wH0UdV7VXU7AKjqjwCerM9JRWSmiKSJSKqIfCgi/iLSRUS2isg+EflIRJrW59hERI3Zm2++GVZUVFSngHI6nfj73//e5vDhw5YGlLvV+kcQER8Ax1T1A1UtrrpdVT+o6wlFJBzA/QASVTUegA+AWwG8COCvqtoNwCkAk+t6bCKixqSgoMBx9dVXd4uOjo7t3r173KxZs9qfOHGiyVVXXdVjwIABPQDgtttu6xQfH9+zW7ducTNnzuxQ8dnw8PBe06ZNC4+Nje25ePHi1qmpqYGTJk3qGhMTE1tUVCTVnW/jxo2B/fr1i46Li+s5ePDg7ocOHWoCuFpG06ZNC+/Vq1fPyMjI+KSkpCAAKCoqktGjR3ft2rVr3IgRI6LOnTtX7XGtUus9KFUtE5GOItJUVRty/hBfAAEiUgogEMBxuC4ZTjS2LwXwFwCvN+A5iYhs5dNPP23erl270vXr1+8DgJ9//tln+fLlbb799tu97du3dwLAK6+8ciwsLKzM6XRi0KBB0Vu3bg0YMGBAMQCEhIQ409PTMwDgvffeC507d+6RK6+88mx15yopKZH777+/01dffbWvQ4cOzrfeeqvVww8/HL5ixYqDAOB0OmXPnj0ZH330UYtnn322w8iRI/fOnTu3bUBAQPmBAwfStm7dGnDFFVfEuuUPYzDTjPwBwHci8pSIPFTxU98TquoxAHMBHIYrmPIBbANwWlWdxm5HAYRX93kRmSIiKSKSkpt7yc94TESXsMsuu6x448aNzadNmxaelJQUFBISUlZ1n6VLl7aOjY3tGRsbG5udne2/a9eu8/M5TZo06ZTZc+3evdsvOzs7YNiwYT1iYmJiX3755fY//vjj+UuCN9988ykAGDRo0JmjR482BYBNmzYF3XHHHT8DwIABA4p79OhRbfhZxUwvvv3GjwNA8MWeUERaAbgBQBcApwGsADDS7OdVdTGAxYBrsNiLrYeIyFN69+5dsn379vRPPvmkxVNPPRW+bt26X83blJmZ2XThwoVh27ZtywgNDS0bO3Zs5Llz5843LIKDg8v/+6jVU1Xp1q1b8c6dOzOr2+7v768A4Ovri7KyMrdeyqvJBQNKVZ8BABEJMpaLLvKc1wD4QVVzjeN+CuAKAC1FxNdoRUUAOHaR5yEiqhN3dws/ePBgk7Zt2zqnT59+slWrVmVLlixp06xZs7L8/HxH+/btcerUKZ+AgIDy1q1blx05csR3/fr1La666qrC6o4VFBRUlp+f/1/TwVfo3bv3uZMnT/quW7eu2TXXXHOmpKRE9uzZ45eYmHiups8MHjy4aNmyZa3HjBlT+O9//9t/7969buvBB5gIKBGJB/ABgNbGch6ASaqaVs9zHgYwUEQCARQDGA7X81XJAMYBWA7gTgBf1PP4RESNwrZt2wIef/zxCIfDAV9fX120aNGhjRs3Bo0cObJHWFjYL1u3bt0bHx9/NioqKr59+/a/JCQk1NhAmDRpUt59993X+ZFHHilPSUnJCAoK+tUVJn9/f12+fPn++++/v1NhYaFPWVmZTJs27afaAurhhx8+ceutt3bp2rVrXLdu3c7Fxsaeacjf/0IuOB+UiGwG8ISqJhvLVwN4XlUH1fukIs8AGA/ACWAHgHvguue0HK4g3AHgdlUtqe04nA/q0sP5oMgszgd16ahpPigz96CaVYQTAKjqehFpdjHFqOrTAJ6usvoAgP4Xc1wiIrp0mAmoAyLyFFyX+QDgdrjChIiIbGjEiBFRR44c8au8bs6cOUfHjh1bUNNn7MhMQN0N4BkAFe3pjcY6IqLGrry8vFwcDscl1SN47dq1+z1dg1nl5eUCoNreiGZ68Z2Ca+QHIqJLTWpubm5saGho/qUWUo1BeXm55ObmtgCQWt12M734/gGg6n+4fLh63r2pqjX2ACEisjOn03lPTk7O2zk5OfHgDOOeUA4g1el03lPdRlP3oACEAvjQWB4PoBBADwBvAbijAYokInK7hISEEwDGeLoOqp6ZgBqkqv0qLf9DRP6tqv1EpL7PQhEREdXKTJM2SEQ6VSwY74OMxYYcQJaIiOg8My2oWQA2ich+AALXGHrTjWehllpZHBEReS8zvfhWi0h3ADHGqqxKHSNetaowIiLybmamfA8E8AiAGaq6C0BHERlteWVEROTVzNyDeheue02XG8vHAPw/yyoiIiKCuYCKUtWXAJQCgKqeheteFBERkWXMBNQvIhIA42FdEYkCUOso40RERBfLTC++vwBIguve0zK4Jhf8vZVFERERmenF978isg3AQLgu7T2gqpw/hYiILGWmF983qvqzqn6lql+qap6IfOOO4oiIyHvV2IISEX8AgQDaiEgr/KdjRHO4Zr8lIiKyTG2X+KYCeBBABwDb8J+AKgCw0NqyiIjI29UYUKo6H8B8EblPVV9zY01ERESmOkm8JiLxAGIB+Fda/76VhRERkXczM2Hh0wCuhiugVgMYBWATAAYUERFZxsyDuuMADAeQo6q/B9AHQAtLqyIiIq9nJqCKVbUcgFNEmgM4AaCjtWUREZG3MzOSRIqItIRrevdtAIoAbLGyKCIiIjOdJKYbb98QkSQAzVV1t7VlERGRtzMzksSNItICAFT1IIDDIvI7i+siIiIvZ+Ye1NOqml+xoKqnATxtWUVEREQwF1DV7WPm3hUREVG9mQmoFBF5RUSijJ9X4OosQUREZBkzAXUfXFO+fwRgOYBzAO61sigiIiIzvfjOAHjMDbUQERGdZ6YFRURE5HYMKCIisiUGFBER2ZKZ0cxDAfwBQGTl/VX1buvKIiIib2fmeaYvAGwEsA5AmbXlEBERuZgJqEBVnW15JURERJWYuQf1pYhcZ3klRERElZgJqAfgCqliESkQkUIRKbiYk4pISxFZKSKZIpIhIpeLSGsRWSsi2cZrq4s5BxERNW5mHtQNtuC88wEkqeo4EWkKIBDAnwB8o6oviMhjcD0c3KCXFpdnJDTk4cgCc3p7ugIisosaA0pEYlQ1U0Quq267qm6vzwmNqTuuBHCXcZxfAPwiIjcAuNrYbSmA9WjggCIiosajthbUQwCmAJhXzTYFMKye5+wCIBfAuyLSB66BZx8AEKaqx419cgCEVfdhEZli1IVOnTrVswQiIrK7GgNKVacYr0MtOOdlAO5T1a0iMh9VxvpTVRURraGuxQAWA0BiYmK1+xARUePniZEkjgI4qqpbjeWVcAXWTyLSHgCM1xMeqI2IiGzC7QGlqjkAjohItLFqOIB0AKsA3GmsuxOuB4SJiMhLeWpm3PsALDN68B0A8Hu4wvJjEZkM4BCAWzxUGxF7fDYC7PF56TMzFt8VAHaq6hkRuR2uy3HzVfVQfU+qqjsBJFazaXh9j0lERJcWM5f4Xgdw1uhxNwvAfgDvW1oVERF5PTMB5VRVBXADgIWq+jcAVjy8S0REdJ6Ze1CFIvI4gNsBXCkiDgBNrC2LiIi8nZkW1HgAJQAmGz3wIgC8bGlVRETk9S441JGq5ojI31S1BABU9bCI7HVfiURE5I1qa0H9T6X3W6psW2RBLUREROfVFlBSw/vqlomIiBpUbQGlNbyvbpmIiKhB1daLL0JEFsDVWqp4D2M53PLKiIjIq9UWUI9Uep9SZVvVZSIiogZVW0BFq+qf3FYJERFRJbXdgxrptiqIiIiqqK0F5SMirVBDjz1VPWlNSURERLUHVAxc07FXF1AKoKslFREREaH2gEpX1b5uq4SIiKgST0z5TkREdEG1BdR8t1VBRERURY0BparvubEOIiKiX+ElPiIisqUaA0pEXjReb3ZfOURERC61taCuExEB8Li7iiEiIqpQWzfzJACnAASJSAFcz0NpxauqNndDfURE5KVq6yTxiKq2BPCVqjZX1eDKr+4rkYiIvFFtLSgAgKreICJhAPoZq7aqaq61ZRERkbe7YC8+o5PE9wBuBnALgO9FZJzVhRERkXe7YAsKwJMA+qnqCQAQkVAA6wCstLIwIiLybmaeg3JUhJPhZ5OfIyIiqjczLagkEVkD4ENjeTyA1daVREREZK6TxCMichOAwcaqxar6mbVlERGRtzPTgoKqfgrgU4trISIiOo/3koiIyJZMtaAuFf+M+sbTJdAFPeHpAojIJky1oEQkQESirS6GiIiogpkHda8HsBOusfkgIr8RkVUW10VERF7OTAvqLwD6AzgNAKq6E0AXyyoiIiKCuYAqVdX8KuvUimKIiIgqmOkkkSYiEwH4iEh3APcD2GxtWURE5O3MtKDuAxAHoASu0SQKADxoYU1ERESmRpI4C1ffX/b/JSIit7lgQInIP/Df95zyAaQAeFNVz9XnxCLiYxzjmKqOFpEuAJYDCAGwDcAdqvpLfY5NRESNn5l7UAcAhOLXg8UWAugB4C0Ad9Tz3A8AyABQMTvviwD+qqrLReQNAJMBvF7PYxNdFD7U3Rjwos6lzsw9qEGqOlFV/2H83A7X/FD3ArisPicVkQgAvwXwtrEsAIbhP3NMLQXwu/ocm4iILg1mAipIRDpVLBjvg4zF+l6CexXAowDKjeUQAKdV1WksHwUQXt0HRWSKiKSISEpuLmeeJyK6VJkJqFkANolIsoisB7ARwMMi0gyulk6diMhoACdUdVtdPwsAqrpYVRNVNTE0NLQ+hyAiokbATC++1cbzTzHGqqxKHSNercc5rwAwRkSuA+AP1z2o+QBaioiv0YqKAHCsHscmIqJLhNnpNroDiAbQB8AtIjKpvidU1cdVNUJVIwHcCuCfqnobgGQA44zd7gTwRX3PQUREjZ+ZwWKfBvCa8TMUwEsAxlhQy2wAD4nIPrjuSS2x4BxERNRImOlmPg6ultMOVf29iIQB+HtDnFxV1wNYb7w/ANegtERERKYCqlhVy0XEKSLNAZwA0NHiuixx3SfXe7oEuoC0RE9XQER2YSagUkSkJVwP5W4DUARgi5VFERERmenFN914+4aIJAForqq7rS2LiIi8nZlOEufHfFHVg6q6u/I6IiIiK9TYghIRfwCBANqISCsAYmxqjhpGeSAiImootV3imwrXvE8d4Lr3VBFQBQAWWlsWERF5uxoDSlXnA5gvIvep6mturImIiMhUJ4nXRGQQgMjK+6vq+xbWRUREXs7MhIUfAIgCsBNAmbFaATCgiIjIMmaeg0oEEKuqVWfVJSIisoyZgEoF0A7AcYtrIbINjjpifxx15NJnJqDaAEgXke8BlFSsVFUrBowlIiICYC6g/mJ1EURERFWZ6cX3rYh0BtBdVdeJSCAAH+tLIyIib2ZmqKM/AFgJ4E1jVTiAzy2siYiIyNSMuvfCNU17AQCoajaAtlYWRUREZCagSlT1l4oFEfGF6zkoIiIiy5gJqG9F5E8AAkRkBIAVAP5hbVlEROTtzATUYwByAeyBawDZ1QCetLIoIiIiM93MAwC8o6pvAYCI+BjrzlpZGBEReTczLahv4AqkCgEA1llTDhERkYuZgPJX1aKKBeN9oHUlERERmQuoMyJyWcWCiCQAKLauJCIiInP3oB4AsEJEfoRrVt12AMZbWhUREXm9WgPK6BAxBEAMgGhjdZaqllpdGBERebdaL/GpahmACapaqqqpxg/DiYiILGfmEt93IrIQwEcAzlSsVNXtllVFRERez0xA/cZ4fbbSOgUwrMGrISIiMpiZbmOoOwohIiKqzMx0G2EiskREvjaWY0VksvWlERGRNzPzHNR7ANYA6GAs7wXwoEX1EBERATAXUG1U9WMA5QCgqk4AZZZWRUREXs/sSBIhMOaAEpGBAPItrYqIiLyemV58DwFYBSBKRL4DEApgnKVVERGR1zPTi2+7iFwF10gSAo4kQUREbnDBgBIRfwDTAQyG6zLfRhF5Q1XPWV0cERF5LzOX+N4HUAjgNWN5IoAPANxsVVFERERmAipeVWMrLSeLSLpVBREREQHmevFtN3ruAQBEZACAlPqeUEQ6ikiyiKSLSJqIPGCsby0ia0Uk23htVd9zEBFR42cmoBIAbBaRgyJyEMAWAP1EZI+I7K7HOZ0AZhmtsoEA7hWRWACPAfhGVbvDNc38Y/U4NhERXSLMXOIb2ZAnVNXjAI4b7wtFJANAOIAbAFxt7LYUwHoAsxvy3ERE1HiY6WZ+yKqTi0gkgL4AtgIIM8ILAHIAhNXwmSkApgBAp06drCqNiIg8zMwlPkuISBCATwA8qKoFlbepqsIYuaIqVV2sqomqmhgaGuqGSomIyBM8ElAi0gSucFqmqp8aq38SkfbG9vYATniiNiIisge3B5SICIAlADJU9ZVKm1YBuNN4fyeAL9xdGxER2YeZThIN7QoAdwDYIyI7jXV/AvACgI+NuaYOAbjFA7UREZFNuD2gVHUTXGP6VWe4O2shIiL78lgnCSIiotowoIiIyJYYUEREZEsMKCIisiUGFBER2RIDioiIbIkBRUREtsSAIiIiW2JAERGRLTGgiIjIlhhQRERkSwwoIiKyJQYUERHZEgOKiIhsiQFFRES2xIAiIiJbYkAREZEtMaCIiMiWGFBERGRLDCgiIrIlBhQREdkSA4qIiGyJAUVERLbEgCIiIltiQBERkS0xoIiIyJYYUEREZEsMKCIisiUGFBER2RIDioiIbIkBRUREtsSAIiIiW2JAERGRLTGgiIjIlhhQRERkSwwoIiKyJQYUERHZEgOKiIhsiQFFRES2ZKuAEpGRIpIlIvtE5DFP10NERJ5jm4ASER8AfwMwCkAsgAkiEuvZqoiIyFNsE1AA+gPYp6oHVPUXAMsB3ODhmoiIyENEVT1dAwBARMYBGKmq9xjLdwAYoKozquw3BcAUYzEaQJZbC7WXNgDyPF0ENRre/n3prKqhni6CzPP1dAF1paqLASz2dB12ICIpqpro6TqoceD3hRobO13iOwagY6XlCGMdERF5ITsF1L8BdBeRLiLSFMCtAFZ5uCYiIvIQ21ziU1WniMwAsAaAD4B3VDXNw2XZHS91Ul3w+0KNim06SRAREVVmp0t8RERE5zGgiIjIlhhQjYSItBOR5SKyX0S2ichqEblKRLaLyE4RSRORP3q6TrLexX4XRKS/iGwwhhXbISJvi0hgpe39RMRpPJtI5DG26SRBNRMRAfAZgKWqequxrg+AlgAuV9USEQkCkCoiq1T1R89VS1a62O+CiIQBWAHgVlXdYqwbByAYwFljyLEXAfyvu34nopowoBqHoQBKVfWNihWquqvKPn5gi9gbXOx34V64wm1Lpc+vrLT9PgCfAOjXMOUS1R//QWsc4gFsq26DiHQUkd0AjgB4ka2nS97Ffhdq+3w4gBsBvN5AtRJdFAZUI6eqR1S1N4BuAO40LuGQF2qA78KrAGaranmDF0dUDwyoxiENQEJtOxj/t5wKYIhbKiJPqdN3QURuNDpO7BSRxAt8PhHAchE5CGAcgEUi8rsGq5yojhhQjcM/AfgZI7kDAESkt4gMEZEAY7kVgMHw7tHdvUGdvguq+pmq/sb4SQGwEK7W1YBKn79JRMJUtYuqRqpqJICVAKar6udu/N2IfoWdJBoBVVURuRHAqyIyG8A5AAcBfA7gbyKiAATAXFXd47FCyXIX+11Q1Z9E5FYAc0WkLYByABsAJLnpVyAyjUMdERGRLfESHxER2RIDioiIbIkBRUREtsSAIiIiW2JAERGRLTGgiIjIlhhQRERkS/8HWlsIN1jGDJMAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "handles = []\n",
    "colors =  ['#2a788e', '#E0CB41','#22a884', '#7ad151']\n",
    "bottom_list = []\n",
    "all_locations = ['start_end', 'inside', 'downstream', 'upstream']\n",
    "for loc in range(len(all_locations)):\n",
    "    sum_at_location = sum(df[(df.type == 'c3') & (df.location == all_locations[loc])] ['number_of_associated_tes'])\n",
    "    total = sum(df[(df.type == 'c3')] ['number_of_associated_tes'])\n",
    "    h = plt.bar(1, (sum_at_location/total)*100, \n",
    "                bottom = (sum(df[(df.location.isin(bottom_list)) & (df.type == 'c3')]['number_of_associated_tes']) / total)*100, \n",
    "                color = colors[loc], \n",
    "                width = 0.75,\n",
    "                label = all_locations[loc])\n",
    "    bottom_list.append(all_locations[loc])\n",
    "    handles.append(h)\n",
    "bottom_list = []\n",
    "for loc in range(len(all_locations)):\n",
    "    sum_at_location = sum(df[(df.type == 'c34') & (df.location == all_locations[loc])] ['number_of_associated_tes'])\n",
    "    total = sum(df[(df.type == 'c34')] ['number_of_associated_tes'])\n",
    "    h1 = plt.bar(2, (sum_at_location/total)*100, \n",
    "                 bottom = (sum(df[(df.location.isin(bottom_list)) & (df.type == 'c34')]['number_of_associated_tes']) / total)*100, \n",
    "                 color = colors[loc], \n",
    "                 width = 0.75)\n",
    "    bottom_list.append(all_locations[loc])    \n",
    "    \n",
    "\n",
    "plt.xticks([1, 2], ['C3', 'C3-C4'])\n",
    "plt.ylabel('percentage of TEs in category')\n",
    "\n",
    "plt.legend(bbox_to_anchor=(1, 1), loc='upper left', handles=handles[::-1])\n",
    "\n",
    "plt.tight_layout()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}