{ "cells": [ { "cell_type": "code", "execution_count": 13, "id": "israeli-vampire", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 14, "id": "french-withdrawal", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "106129\n" ] } ], "source": [ "txt_file = open('../tempest.txt', \"r\")\n", "text = txt_file.read()\n", "text = [c.lower() for c in text]\n", "N = len(text)\n", "\n", "print(N)" ] }, { "cell_type": "code", "execution_count": 15, "id": "fresh-excerpt", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "39\n" ] } ], "source": [ "characters, occurrences = np.unique(text, return_counts=True)\n", "n = len(characters)\n", "print(n)\n", "probs = occurrences/N" ] }, { "cell_type": "code", "execution_count": 20, "id": "interim-argument", "metadata": {}, "outputs": [], "source": [ "def entropy(p):\n", " idx = np.where(p>0)\n", " h=-np.sum(np.multiply(p[idx],np.log2(p[idx])))\n", " return h\n", "\n", "def entropy_emp(p,n):\n", " idx = np.where(p>0)\n", " h=-np.sum(np.multiply(p[idx],np.log2(p[idx])))\n", " eh=np.sqrt(np.sum(np.multiply(np.square(np.log2(np.e*p)), np.multiply(p,1-p))/n)) \n", " return h,eh\n", " \n", " \n", "def KLdivergence(p,q):\n", " idx = np.where(p>0)\n", " return np.sum(np.nan_to_num(np.multiply(p[idx],np.log2(p[idx]))))-\\\n", " np.sum(np.nan_to_num(np.multiply(p[idx],np.log2(q[idx]))))\n", "\n", "\n", "def mutual_information(pxy):\n", " px=np.sum(pxy,axis=0)\n", " py=np.sum(pxy,axis=1)\n", " hx=entropy(px)\n", " hy=entropy(py)\n", " hxy=entropy(pxy.reshape(-1))\n", " MI=hx+hy-hxy\n", " hxcy=hx-MI\n", " hycx=hy-MI\n", " return MI,hx,hy\n", "\n" ] }, { "cell_type": "code", "execution_count": 21, "id": "sunrise-shore", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4.229589052568906 0.00952034755131061\n" ] } ], "source": [ "Hall, EHall = entropy_emp(probs,N)\n", "print(Hall,EHall)" ] }, { "cell_type": "code", "execution_count": 22, "id": "entertaining-guest", "metadata": {}, "outputs": [], "source": [ "L=100\n", "auto_mi = np.zeros(L)\n", "\n", "\n", "for l in range(1,L+1):\n", " text1=text[:-l]\n", " text2=text[l:]\n", " text3=[text1[i]+text2[i] for i in range(N-l)]\n", " \n", " character_pairs, occurrences_pairs_nonzero = np.unique(text3, return_counts=True)\n", " \n", " occurrences_pairs=np.zeros((n,n))\n", " \n", " for i in range(n):\n", " ci=characters[i]\n", " for j in range(n):\n", " cj=characters[j]\n", " c=ci+cj\n", " idx = np.where(character_pairs==c)[0]\n", " if len(idx)>0:\n", " occurrences_pairs[i,j]=np.squeeze(occurrences_pairs_nonzero[idx])\n", "\n", " occurrences_pairs=occurrences_pairs/(N-l)\n", " \n", " auto_mi[l-1],hx,hy = mutual_information(occurrences_pairs)\n", " \n" ] }, { "cell_type": "code", "execution_count": 28, "id": "jewish-laugh", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.8218745848897342\n", "0.3370541214802376\n", "0.18309971372248057\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZ8AAAEQCAYAAABvBHmZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAvuElEQVR4nO3deZwcdZ3/8dene+6ZHJOZ4QiEmXCEbAQEErmRBFgxXmhEA4rrhfmhsqyr4MGiRtx1FZfAeiILgigaI0YRNIQQk3BHiYBiCGcSCAEkYZIwmWTOz++Pqk6apme6Z6a6ejrzfj4e/ejpb9W36jM1Pf3p+ta3vl9zd0REROKUKHYAIiIy8ij5iIhI7JR8REQkdko+IiISOyUfERGJXVmxA4jD2LFj/eCDDy52GDlt376d2traYoeRk+KMTinECIozaqUS56pVqza5e1Mhtj0iks/ee+/Ngw8+WOwwclq+fDnTp08vdhg5Kc7olEKMoDijVipxmtn6Qm1bzW4iIhI7JR8REYmdko+IiMROyUdERGKn5CMiIrFT8hERkdgp+YiISOxGRPLZ1qlpI0REhpPYk4+ZTTGzpWbWbmYbzewyM0vmUW+amd1hZq+EjzvN7Nh89rmtQ8lHRGQ4iTX5mFk9cCfgwJnAZcDngK/lqDchrFcGfCh8lAFLzKw51367HTq7e4cWvIiIRCbu4XXOB6qBWe6+jSB5jAbmmtnlYVk2bwdGAe9x960AZnYfsAl4G/DDXDt+rrWdg5rqovgdRERkiOJudpsJLM5IMvMJEtIp/dQrB7qB7WllbWGZ5bPj9Zu3515JRERiEXfymQysSS9w92eB9nBZX34drnOFme1lZnsBVwKtwK/y2fG6Te2DClhERKIXd/KpB7ZkKW8Nl2Xl7huBGcB7gZfCxyzgDHd/OddOE8A6nfmIiAwb5h5fTzAz6wIudverMso3ADe6+yV91NsXuAtYze7rO58GjgJOCM+eMuvMAeYAVO89ceqpF/2Ai6ZVRfWrFERbWxt1dcP/upTijE4pxAiKM2qlEueMGTNWufu0Qmw77g4HrcCYLOX14bK+XExw3ecsd+8CMLM/Ak8CFwEXZlZw92uAawDGTZjkr3rlsJ8/o1Tm+FCc0SmFGEFxRq1U4iykuJvd1pBxbSfsRl1DxrWgDJOBv6cSD4C7dwJ/Bw7KtdOyJGxo3UFXj7pbi4gMB3Enn0XAGWY2Kq1sNrADWNFPvfXAYWZWkSows0rgMGBdrp2WJ6Cn19nQumNQQYuISLTiTj5XAx3AQjM7PbwuMxeYl9792syeMrPr0updC4wHfmNmbzezdwC/BfYlbFrrT3ki6I29bpM6HYiIDAexJh93bwVOA5LArQQjG1wJfDVj1bJwnVS9VcBbCW40/SlwI0FT3T+7+yO59lsW/pbq8SYiMjzE3eEAd18NnJpjnZYsZUuBpYPZZ9KgqrKM9Zt1r4+IyHAwIka1BmhprGGtmt1ERIaFEZN8mhtqNcSOiMgwMWKST0tDjbpbi4gMEyMo+dTS3es8r+7WIiJFN3KST2MtoB5vIiLDwchJPg1B8lGPNxGR4hsxyaexroLaiqR6vImIDAMjJvmYmXq8iYgMEyMm+QBMbKxlnZrdRESKbkQln+aGGp57pZ1udbcWESmqEZV8Ut2tN27ZWexQRERGtJGVfMLu1mt13UdEpKhiTz5mNsXMlppZu5ltNLPLzCyZo85cM/M+Hl/Kd98tDTUA6nQgIlJksY5qbWb1wJ3AauBMgllIryBIgpf2U/Va4PaMsncDXyCYoC4vTaMqqalIsm6TOh2IiBRT3FMqnA9UA7PCyeOWmNloYK6ZXZ4+oVw6d98AbEgvM7MvA2vc/eF8d57qbq1RDkREiivuZreZwOKMJDOfICGdku9GzKwB+GfgFwMNoKWhRslHRKTI4k4+k4E16QXu/izQHi7L13uBcgaTfBpree6Vdnp6faBVRUQkIuYe34ewmXUBF7v7VRnlG4Ab3f2SPLfzR2CMu0/tZ505wByApqamqQsWLABgxYYurn+0k2+/uZqmmuHV2a+trY26urpih5GT4oxOKcQIijNqpRLnjBkzVrn7tEJsO/ZptIfKzPYlaKL7Qn/rufs1wDUAhx56qE+fPh2Aqmc2c/2jD7DPIYdz8iFNBY52YJYvX04qzuFMcUanFGIExRm1UomzkOL+6t8KjMlSXh8uy8f7AQN+OZgAJqamVtAAoyIiRRN38llDxrUdM5sA1JBxLagfZwP3uPtzgwlgr1GVVJUnNMabiEgRxZ18FgFnmNmotLLZwA5gRa7KZtYCHMcgOhqkbYMWjW4tIlJUcSefq4EOYKGZnR52CpgLzEvvfm1mT5nZdVnqnw10A78aShAtDbWa10dEpIhiTT7u3gqcBiSBW4GvAVcCX81YtSxcJ9PZwFJ33zSUOJoba3julR3qbi0iUiSx93Zz99XAqTnWaemj/MgoYpjYUEtnTy8vbN3B/vU1UWxSREQGYHjd6BKT5oZUjzd1OhARKYYRmXxaGoOzHQ2zIyJSHCMy+ew9qoqq8oR6vImIFMmITD6JhNE8rpa1anYTESmKEZl8AJobanTmIyJSJCM2+UxsrGX9K+30qru1iEjsRmzyaW6opbO7lxe27Sx2KCIiI86ITT4tDUGPt/Ua6UBEJHYjN/mEo1uv1XUfEZHYjdjks8/oKirLEqzX6NYiIrEbscknkTCaG2o0r4+ISBHEnnzMbIqZLTWzdjPbaGaXmVm2QUSz1Z1lZn82sx1mttnMbjez2sHG0txQq1EORESKINbkY2b1wJ2AA2cClwGfIxjdOlfd84CfE8wJNBM4D3iSIQyO2tJQw/rN6m4tIhK3uEe1Ph+oBmaF8/csMbPRwFwzuzx9Tp90ZtZIMPXCv7r7/6Ut+s1QgmlprKWju5cXt+1k/NjqoWxKREQGIO5mt5nA4owkM58gIZ3ST733h88/iTKYltTo1mp6ExGJVdzJZzKwJr3A3Z8F2sNlfTkWeBz4uJltMLMuM1tpZicMJZjm1L0+6vEmIhIrc4/veoeZdQEXu/tVGeUbgBvd/ZI+6i0GTgC2AZ8HNofP04BD3P2lLHXmAHMAmpqapi5YsOB12+11Z86Sdv65uZzZh1YM5VeLRFtbG3V1dcUOIyfFGZ1SiBEUZ9RKJc4ZM2ascvdphdh27DOZDpIBdcD73P12ADO7D1gPXAB8ObOCu18DXANw6KGH+vTp07NuuOWhFfTW1DJ9ekGO74AsX76cvuIcThRndEohRlCcUSuVOAsp7ma3VmBMlvL6cFl/9RxYnioIrxutAqYMJaCWhlo1u4mIxCzu5LOGjGs7ZjYBqCHjWlCGxwjOfiyj3IDeoQTU0lDDus3b1d1aRCRGcSefRcAZZjYqrWw2sANY0U+928LnGakCMxsDTAUeGUpAzY217Ozq5R+vdgxlMyIiMgBxJ5+rgQ5goZmdHnYKmAvMS+9+bWZPmdl1qdfu/iBwC3CdmX3YzN4O/A7oAr4/lIAmqru1iEjsYk0+7t4KnAYkgVsJRja4Evhqxqpl4TrpzgV+C8wDbiZIPKeG2xy0VHdrjfEmIhKf2Hu7uftq4NQc67RkKWsDPhk+IjN+bDUVyQTr1OlARCQ2I3ZU65RkwpgwrlpnPiIiMRrxyQeC7ta65iMiEh8lH4KpFdZvbifO0R5EREYyJR9gYmMNO7p61N1aRCQmSj4EZz6gHm8iInFR8gEmNgbJR8PsiIjEQ8kH2HdMFeVJY606HYiIxELJByhLJphQX8N6JR8RkVgo+YRaGmtZt0nNbiIicVDyCTWHo1uru7WISOEp+YRaGmpp7+zh5TZ1txYRKbTYk4+ZTTGzpWbWbmYbzewyM8scRDSzTouZeZbH/KjiamlMdbdW05uISKHFOrComdUDdwKrgTOBg4ArCJLgpXls4iLg3rTXm6KKrSU1uvXm7RwzcVxUmxURkSziHtX6fKAamBXO37PEzEYDc83s8vQ5ffrwuLs/UIjA9htbTVnC1ONNRCQGcTe7zQQWZySZ+QQJ6ZSYY3mNsmSCCeNq1OwmIhKDuJPPZGBNeoG7Pwu0h8tyud7MeszsBTObZ2bVUQaX6vEmIiKFZXF2LTazLuBid78qo3wDcKO7X9JHvX2B/wDuALYB04EvAHe4+5l91JkDzAFoamqaumDBgpzx3fRYB3dv6OaHp9dgZvn+WpFpa2ujrq4u9v0OlOKMTinECIozaqUS54wZM1a5+7SCbNzdY3sQTH39mSzlG4BvDHBbnwQceGOudSdNmuT5uP6eZ7z5C7f5P7btzGv9qC1btqwo+x0oxRmdUojRXXFGrVTiBB70AuWDuJvdWoExWcrrw2UDcXP4PHVIEaVp3jXAqJreREQKKe7ks4aMaztmNgGoIeNaUB4843nIJqamVtDo1iIiBRV38lkEnGFmo9LKZgM7gBUD3NZZ4fOqKAID2K++mmTCNK+PiEiBxX2fz9XAhcBCM/sWcCAwF5jnad2vzewpYIW7fzx8PRcYRXCD6TbgzcDFwEJ3/2tUwZUnE+xfX60ebyIiBRZr8nH3VjM7DfgecCuwBbiSIAFlxpU+5M4agtENziO4J+hZ4NvAf0UdY0tDrZKPiEiBxX3mg7uvBk7NsU5Lxuv5BDejFlxLQw1/Wd+Kuxelu7WIyEigUa0ztDTW8mpHN69s7yx2KCIieywlnwwtu3q8qelNRKRQlHwyNKdGt9YYbyIiBaPkk2H/+hqSGt1aRKSglHwyVJQl2G9sNWt1o6mISMEo+WTR3FCjMx8RkQJS8sliYmMtazdtTw1gKiIiEVPyyaK5oZZXd3bT2t5V7FBERPZISj5ZtKR6vKnpTUSkIJR8smgJp1bQAKMiIoWRc3gdM3uZAUxb4O57DSmiYWBCfQ0J09QKIiKFks/Ybt8nwjlzSkFFWYL96qvV401EpEByJh93nxvlDs1sCvBd4HiCUa2vBb7m7j151k8AfyKYwfSd7n5blPGltDTUqtlNRKRA8ml2+8oAtufu/vV+tlUP3AmsBs4EDgKuILj2dGme+zgP2H8AMQ1Kc0MNtz7yQqF3IyIyIuXT7DaXYKbR7UCuOQYc6DP5AOcTzMczK5w8bomZjQbmmtnl6RPKZRMmr/8CvkhwxlQwLQ21bN3RxZb2TsbWVBRyVyIiI04+vd2eBsoJpqu+CDjQ3Zv6eOTqbDATWJyRZOYTJKRT8ojl6wSzmS7NY90hSY1uvVZNbyIikcuZfNz9EOAE4O8EH/4vmdlCM3ufmVUPcH+TCWYlTd/+s0B7uKxPZnYE8DGCBFhwLY3BvT7r1eNNRCRyNtAhZMzszcDZwHuBGuB3wI/c/a486nYBF7v7VRnlG4Ab3f2SfuquAFa6++fNrAVYSz8dDsxsDjAHoKmpaeqCBQvy+O126+p15tzRzrsOKuc9h8TT7NbW1kZdXV0s+xoKxRmdUogRFGfUSiXOGTNmrHL3aQXZuLsP6gFUAN8GuoGFedbpAj6TpXwD8I1+6p0NvAiMDl+3EFxfekc++500aZIPxgn/vdT/7Rd/GVTdwVi2bFls+xoKxRmdUojRXXFGrVTiBB70QeaIXI98Ohy8hpmdGCaDs4BRwM3AD/Os3gqMyVJeHy7Ltr9ygiT3LSBhZmOB0eHiWjMb5e6v5v0LDEBLY41uNBURKYC8htcxs6PN7HIzW09wsX8C8O/AXu5+truvyHN/a8i4tmNmEwia79ZkrQG1BF2r5xEkqFbgkXDZfOChPPc9YC0NtRrfTUSkAPK5z+dxYCLwR+CrBE1s/XaJ7sci4OKMs5XZBF25+0pgbcCMjLJ9gF8Al4RxFURLQy1b2tXdWkQkavk0ux0C7CQYUeBo4HKzvm/38f67W18NXAgsNLNvAQcS3Ec0Lz2hmdlTwAp3/7i7dwPL0zcSdjgA+Ju7r8zjdxiU1ACj6ze3K/mIiEQon+Tztah25u6tZnYa8D3gVoLhda4kSECZcSWj2u9gpU+t8MYJY4sbjIjIHiSfsd0iSz7h9lYDp+ZYpyXH8nXkHm1hyCaMq8EM1m1SpwMRkShpPp9+VJUnGT9Go1uLiERNySeH5oYa1ir5iIhESsknh+aGWg2xIyISMSWfHCY21vDK9k627ugqdigiInsMJZ8cmhtS3a3V9CYiEhUlnxwmhvf6aJgdEZHoKPnkcMC44F6ftS/rzEdEJCpKPjlUlSf5p31Hs+jRF1IjbIuIyBAp+eThoye0sObFV7n3qc3FDkVEZI+g5JOHdx05nsa6Cq6955lihyIiskdQ8slDVXmSfzm+heWPv8yTLxVk6iARkREl9uRjZlPMbKmZtZvZRjO7zMz6HUTUzN5gZreH63eY2bNmdq2Z7RtX3Oce10xlWYLr7lkb1y5FRPZYsSYfM6sH7iSYAvtM4DLgc+QeOXsMsBa4CDiDYF6h04E/mNmAZ2MdjHG1Fbx36v4sfOh5Xn61I45diojsseI+8zkfqAZmufsSd7+aIPF81sxG91XJ3e9z90+6+8/dfbm7Xw98AjgSOCKOwAE+ftJEOrt7+dkD6+PapYjIHinu5DMTWJwxE+p8goR0ygC3lep6Ftssbwc11XHa5L346QPr2dnVE9duRUT2OHEnn8nAmvQCd38WaA+X9cvMEmZWYWaHAt8E/gz8qRCB9uW8kw/kle2d/Oah5+PcrYjIHsXivHHSzLqAi939qozyDcCN7n5Jjvq3E1zzAVgFvM3d/9HHunOAOQBNTU1TFyxYMMToA+7O3Pt30tnj/NdJ1ST6mVJ8oNra2qirq4tse4WiOKNTCjGC4oxaqcQ5Y8aMVe4+rSAbd/fYHkAX8Jks5RuAb+RR/xDgWOBcgjOoVUBVrnqTJk3yKP3mLxu8+Qu3+R8feynS7S5btizS7RWK4oxOKcTorjijVipxAg96gfJB3M1urQQ91zLVh8v65e5PuvtKd/8ZwRnQUcAHog0xt7cfsS/7jK7i/+7WTaciIoMRd/JZQ8a1HTObANSQcS0oF3dfD7wCHBhZdHkqTyb4yIkt3Pf0Zv6+cWvcuxcRKXlxJ59FwBlmNiqtbDawA1gxkA2FnQ4aCO7/id05bzqAmook192tm05FRAYq7uRzNdABLDSz08NOAXOBeZ7W/drMnjKz69Je/4+ZfdPM3mNmM8zsU8Bi4GmCrtqxG1NTzvunTeB3j2zkxa07ixGCiEjJijX5uHsrcBqQBG4luMH0SoIRC9KVheukPAicDFwH/B64EPg1cJy7F22inY+dOJFed35y/7pihSAiUpJiGZomnbuvBk7NsU5Lxuv5FOkMpz8HNNRwxhv24aYH1nPBjIOprYz9cIqIlCSNaj1E5508kW07u7l51YZihyIiUjKUfIZoavM4jjpgLD++dy09vZrpVEQkH0o+EfjEyQeyfnM7S1a/VOxQRERKgpJPBN4yZW/2r6/mOs10KiKSFyWfCJQlE3zsxIn8eV0rDz+3pdjhiIgMe0o+EXn/myYwqqqMazXkjohITko+EamrLOMDxxzAokdfZENre7HDEREZ1pR8IvThE1ow4Pp71xU7FBGRYU3JJ0Ljx1bz9iP25Zd/fo5tO7uKHY6IyLCl5BOx8046kLaObn75p+eKHYqIyLCl5BOxw/cfw7ETx3H9vWvp7uktdjgiIsNS7MnHzKaY2VIzazezjWZ2mZklc9R5k5ldH4523W5mj5vZV82sKq64B+K8kw9k49ad/OHRF4sdiojIsBRr8jGzeuBOwIEzgcuAzxGMbt2f2cBBwLeAtwHfBz4L3FSwYIfgtMl7MbGxlmvvfiY1/beIiKSJexjm84FqYFY4f88SMxsNzDWzy9Pn9MnwTXfflPZ6uZntBH5kZs3hrKbDRiJhfOykiXz5t4/y53WtHDNxXLFDEhEZVuJudpsJLM5IMvMJEtIpfVXKSDwpD4XP46MLLzpnHb0/Y2vKddOpiEgWcSefycCa9AJ3fxZoD5cNxPFAL8FspsNOdUWSc49tZsljL7F2U9HmuxMRGZYszmsSZtYFXOzuV2WUbwBudPdL8tzOPsBfgT+4+0f6WGcOMAegqalp6oIFC4YQ+eBs6ejlouU7OGVCGR+aUplz/ba2Nurq6mKIbGgUZ3RKIUZQnFErlThnzJixyt2nFWTj7h7bA+gCPpOlfAPwjTy3UQHcBTwD1OdTZ9KkSV4sFy142Cdfushbt3fkXHfZsmWFDygCijM6pRCju+KMWqnECTzoBcoHcTe7tQJjspTXh8v6ZWYG3Ai8AXibu+esU2wfP3kiO7p6uGnls8UORURk2Ig7+awh49qOmU0Aasi4FtSHqwi6aJ/p7vmsX3ST9xnNyYc08pP71tHZrZtORUQg/uSzCDjDzEallc0GdgAr+qtoZl8CLgDOdfd7Chdi9D5x8oH849UO/n3Bw+zs6il2OCIiRRd38rka6AAWmtnpYaeAucA8T+t+HY5kcF3a6w8A3yBocnvezI5LezTF+ysM3JsnNfHFmZP5/V9f4IPXrmRzW0exQxIRKapYk094jeY0IAncSjCywZXAVzNWLQvXSXlL+PwR4P6Mx9sLF3F0zj/lIH7wwaN59PmtvOcH9/H0y23FDklEpGjiHuEAd18NnJpjnZaM1x8hSDwl7W2H78s+Y6r4xE8eZNYP7uPqc6dy/EENxQ5LRCR2GtU6ZkcfUM9vP30ijXUV/MuPV/LrVRuKHZKISOyUfIpgwrgaFn7yRN7UMo7P/eoR5i15QgOQisiIEnuzmwTG1JRzw0eP4T9+8ze+s/RJ1m/ezjv2UgISkZFByaeIKsoSXH7WEbQ01vLtxY/zWH2Cacd2Ul9bUezQREQKSs1uRWZmfHrGwXz3nKN4Zmsvs354nwYiFZE9npLPMPHON47nC2+qYuuOLt7zg3v509pXih2SiEjBKPkMI4fUJ/nNp05gXG0F5167klsefr7YIYmIFISSzzDT3FDLwk+ewFEHjOXf5j/Md5Y+qZ5wIrLHUfIZhsbWVPDTjx/LrKP3Y96SJ/jcrx7RoKQiskdRb7dhqqIswRXveyMtDbXMW/IEG7fs4OpzpzK2Rj3hRKT06cxnGDMzLjztEK6afSR/Wb+FWT+4j6f+8WqxwxIRGTIlnxLw7qP246ZPHMu2nV2863v3cttfNxY7JBGRIYk9+ZjZFDNbambtZrbRzC4zs2SOOhVm9m0zu9vMdpjZiLsC/6aWcfz+wpP5p31Hc8HPH+KyW1fT1aPrQCJSmmJNPmZWD9wJOMGMpJcBnyOYWqE/NcB5QDtwXyFjHM72Hl3F/DnH8dETW/jxvWs555oHeGnbzmKHJSIyYHGf+ZwPVAOz3H2Ju19NkHg+a2aj+6rk7luAce5+BvCbWCIdpsqTCb76zjfwnXOOYvUL23j7d+7hgWc2FzssEZEBiTv5zAQWp89aCswnSEin9FfRdbPLa7zrjeP57adPZHR1GR+8diXX3PW07gcSkZIRd/KZDKxJL3D3Zwma0ybHHEvJm7T3KG759Im8ZcrefOMPa/jUTX/h1Z1dxQ5LRCQni/Pbspl1ARe7+1UZ5RuAG939kjy2cQHwXXe3HOvNAeYANDU1TV2wYMGg445LW1sbdXV1A67n7ty+rptfPdHJXjXGvx5ZxX6jCve9YrBxxq0U4iyFGEFxRq1U4pwxY8Yqd59WkI27e2wPoAv4TJbyDcA38tzGBYStcPk+Jk2a5KVg2bJlQ6p//9ObfOrXl/jkSxf5LQ8/H01QWQw1zriUQpylEKO74oxaqcQJPOgFygdxN7u1AmOylNeHy2QIjjuwgd9feBJvGD+aC3/xEHN/93cNyyMiw1LcyWcNGdd2zGwCQVfqNVlryIDsPbqKX8w5jo+dOJEb7lvHOf/3AC9uVXdsERle4k4+i4AzzGxUWtlsYAewIuZY9ljlyQRfeecUvveBo3jshW2847t3c9/Tm4odlojILnEnn6uBDmChmZ0edgqYC8zztO7XZvaUmV2XXtHMZprZWcCR4euzwkdzbNGXmHccMZ5bPn0iY6rLOffalVy9Qt2xRWR4iHVUa3dvNbPTgO8BtwJbgCsJElBmXJlD7vwQSE80vwqfPwrcEHGoe4xD9h7FLRecxOdvfoRvLlrDrx58jv3qa2israBxVCUNtRU01FXSWFdBY10ljXWVjKutoKJMw/6JSOHEPqWCu68GTs2xTks+ZZKfusoyvv+Bo/nZymdZtuYfbG7r4Ol/tLGprYOOPjokjK4qo3FUJY21lTSOqqChtpKGMEF1bOnhpJ5eypJKUCIyOJrPZ4QwMz50XDMfOm73yaO7s72zh81tHWxq62BTWyeb2jrY3NYZlgWvn3ipjU1tm9nSvvsG1qseXsLxBzZw0iGNnHhwIwc21mLW761XIiK7KPmMYGZGXWUZdZVlNDfU5ly/q6eXl1/t4KeL7qW1Yi/ufnITd6x+CYB9x1Rx4sGNnHRwIycc3MBeo6oKHb6IlDAlH8lbeTLB+LHVHLtvGdOnH4G78+wr7dzz1CbufWoTS1a/xM2rNgBw6N6jOOmQIBkdM3EctZV6q4nIbvpEkEEzM5obamluqOWDxzbT0+us3rhtVzL66QPrue6etZQljKMPqA/OjA5p4Ij9x1Ku60UiI5qSj0QmmTAO338Mh+8/hk9OP4idXT2sWt+6KxldtfQJrrwTaiuSHLbfGI7Yf0z4PJbmcTUkErpmJDJSKPlIwVSVJznx4KBDAsCW9k7uf3oz9z+zmb9u2MpP7l+/a/ifUVVlHDY+PSGN4YBxNerEILKHUvKR2IytqWDm4fsy8/B9gaADwxMvvcqjz2/lrxu28ujzW7n+3nV0htODj64qC86k9hvL4WFC2r++WglJZA+g5CNFU55M8IbxY3jD+DHMflNQ1tkdJKS/pSWk6+55hq6eYGSGsTXlHL7fGA5qqqOqPElFWYKKpFGeTASPtNdPvtDNzkdfpKIsbXkyQUUyQXlYVpFMUFGWWrZ7vaSaAEUKSslHhpWKsgSH7Rc0vZ1zTFDW0d3D4y8GCelvG7byt+e38tCzG+js6c09avcjqwYVRzJhu5JRxa7ElvE6tbwsPakF5al1di977bqpJPn0xm46//4idVVljKosp66qjNrKJKMqy6kqT+gsT/ZYSj4y7FWWJTli/7Ecsf9YOPa1y9ydnl6ns6eXru7wOXzce/9Kjjx62q7XwTKnsztIWt29wXNXj792ne6M12llu7fvYd1e2jq6s66T2nauJHnNX7MnyGRi931YdZVl1FXtfh5VWUZtWF5TkSSZMJIJoyxhJFLPZpQlw+dEgmQCkmnP6etUliWoqQgSX01FsE31SJRCUvKRkmbhh2dZMgEVr122ti7BlPGjixNYhlSSTE9GXT293HXv/Rx25FTaOrpp29kdPKce4etXd3azPSzbsqOLDa3tbO/o2bVeoVSUJaitCJIR3Ttp+vu9u5JTbUWSmsqyXctrK5NUlyepDhNXdUWSmvJgWXVFMigrD8ory3RGJ0VIPmY2BfgucDzBwKLXAl9z954c9cYAVwHvJhiN+zbgQnffXMBwRSKxO0lCddqYufvUBs2Mg9Xb6+zs7qGnN0hu3b1Ob/icKusJE193j9Prr13W3dsbnDl297K9s4f2ju7XPnd2s72jh/XPv0BNdTntHd28sn3HrvL2zm7aO/v9132dhPG6pJRKWNXlZeFzgqryYFlVeWpZ+Drt5+qK3etVVyTZ1ulsbe8imUw7+wvPBmV4iTX5mFk9cCewGjgTOAi4giCZXJqj+gJgEnAe0At8C/gtcHKBwhUZ9hIJC85MCmz58lamTz8m67LeXmdHVw/tnT3s6Oyhvat7989hAkv9vKOrJ+3n7nB5qqyb1u1d7OwK1wvX7Wvw2z798Y7XFZlB0nY3TSZ3PRKveV2WMFInZWaGhXWN3eWvW5ZaP1zHINxWgkSCsMlz937Lksbml3dyy0sPB/s125Us05tOgz0MXiqWhO2Oy8KfE2H8qaScvs6unwt8dhr3mc/5QDUwK5y/Z4mZjQbmmtnl6XP6pDOz44G3AKe4+11h2fPASjM73d3vjCl+EcmQSBi14TWoQkid3e0Ik9fOrh52dvXuSk6psh2dPfztscc58KCD087+eunphZ7e3owzvrSzwp7wjNGDa3YO4OA47gSPoDT8OXgmXB6ujrvvWtbrwZlmT6/T3t1Nj4cxhGWvtvXyQmdr2tnna2Ps7h36vFvuviveXt8d63ARd/KZCSzOSDLzCc5iTiGY46evei+lEg+Au//JzNaGy5R8RPZQqbO7fM7w9ml/huknTYwhqqFZvnw506dPj32/2RJSb5iVejOX9cLYbxUulriTz2Tgj+kF7v6smbWHy/pKPpOBNVnKHwuXiYhIDqlmN4DkEJv1hiru5FNP0MkgU2u4bDD1DsxWIZyiew5AU1MTy5cvH0CYxdHW1qY4I1QKcZZCjKA4o1YqcRbSHtvV2t2vAa4BOPTQQ70Yp7gDVaxT8YFSnNEphRhBcUatVOIspLjvImsFsvUrrQ+XRV1PRESGobiTzxoyrtGY2QSghuzXdPqsF+rrWpCIiAxjcSefRcAZZjYqrWw2sANYkaPePmZ2UqrAzKYRXO9ZVIhARUSkcOJOPlcDHcBCMzs97BQwF5iX3v3azJ4ys+tSr939fuAO4EYzm2Vm7wZuAu7RPT4iIqUn1uTj7q3AaUCSoFv114Arga9mrFoWrpNuNsHZ0Y+BG4FVwHsKGa+IiBRG7L3d3H01cGqOdVqylG0BPho+RESkhJkPtzEXCsDMXgUeL3YceWgENhU7iDwozuiUQoygOKNWKnEe6u6jcq82cHvsfT4ZHnf3acUOIhcze1BxRqcU4iyFGEFxRq2U4izUtjVblIiIxE7JR0REYjdSks81xQ4gT4ozWqUQZynECIozaiM+zhHR4UBERIaXkXLmIyIiw4iSj4iIxK6kk4+ZTTGzpWbWbmYbzewyM8scGSFbvTFmdr2ZtZrZVjO7ycwaChTj+8zsd2b2vJm1mdkqMzsnj3qe5fFAIWIM9/eRPvZ5fo56sR3LcH/L+4jTw+nWs9Vp6WP9+RHGdbCZ/cjM/mpmPWa2PMs6ZmaXmNlzZrbDzO4ysyPz3P6ZZvY3M9tpZqvNbHbUMZrZvmb2bTN7JHyvPmdmPzGz8Xlse24fx/itUccZrrMuy75ezHP7Qz6W+cRpZtP7ea8uzrHtG/qoN6DJM/P9/DGzT5jZk+ExWWVmp+W5/RPNbGVYb62ZXZhvbCV7n4+Z1RNMn70aOBM4CLiCIKFemqP6AmAScB7QSzCN92+BkwsQ6meBtcC/E9xU9jbg52bW6O7fzVH3CuDmtNevFiC+TKcSDPSa8kyO9eM8lgCfAkZnlF0GHAX8OUfdi4B7015HeZPfGwj+tg8A5X2s80Xgy8DFBKOxfxa408wOc/c+PzgtGFD318APgAvD/fzCzFrd/Y4IY5xKMGTVtcBKYG+CsRfvC2Nsy7H9rUBmsnlsAPHlG2fKz4H0/6HOXBuO8FjmE+dfgMwvRAcAvyS/AZHX8PoRXdYNLMTcnz9hMrqa4G99T7jP28zsTe7+aF8bNrODgcXAbcCXgGOAeWbW7u7X5owsmNO79B7hL9sKjE4r+zzQnl6Wpd7xBNOUvzmt7Jiw7PQCxNmYpeznwNoc9Ry4IMbj+ZFwn3UDqBPrsewjhgrgFeCH/azTEsb0jgLGkUj7+WZgecbyKoIP56+kldUCLwP/mWPbi4E/ZpT9gWBg3ShjHAuUZZRNCo/dh3Nsey6wKY5jGZavA/5nENuO5FjmG2eWOhcDPcD4HOvdADwYwbHM+flDMPrLj9N/L+BvwM9ybPtHwBPp7xmCpP4cYWe2/h6l3Ow2E1jsaaNhA/OBauCUHPVecve7UgXu/ieCbwczow7S3bN9u34IyNmUUQJiPZZ9eCvBpIK/iGl/Wbl7b45VTiA4Y1uQVmc7wQC7fR4rM6sEZqTXC80HjjezbJMsDipGd9/i7t0ZZU8QfKGL7f2ax7EclCiPJQw6znOAFe6+cRB1ByzX54+ZHUjwBSP9fdkL/Irc/8MzgYUZ75n5wP7AYbliK+Xk87qJ5Nz9WYJ/lP7aRfuagO6xHPWidDzBN4Zc5ppZt5ltMrMfm9m4QgcGPB3u83Ez+3851h0Ox/JsYANwdx7rXh+2zb9gZvPMrLrAsaWbTPCN98mM8lzH6iCCJp3M4/wYwf/vpKgCzMbMjiCY7DGf9+vY8L3aZWYPmdmsQsYGfNzMOi241nizmTXnWL/Yx3ISQfNwvl+UppjZNjPrMLN7zKy/L9UDkf75k3rvZTsm48ysKdsGzKwWmNBHvfTt9qlkr/kQfNvdkqW8NVw2mHoHDjmqHMILee8GPpZj1Z8QfCt+GZhGcK3gjWZ2jLv3FCC0F8J9/IlgOouzgavNrMbdr+yjTrGPZQ3wLuBHHp7z96ED+D7BnFDbgOnAFwg+jM4scJgp9UBblr9dK1BjZhXunu2aReq9vCVLvfTlkTOzBPC/BAnzdzlWf4qg2fshYBTw/4Bfm9l73X1hAcK7heBaywbgnwimZbnbzA5396191CnasQydDXQRXHPK5SGC626rgSbgc8ASMzspbF0YlCyfP/kck5ezbGpsHvX6VcrJp+SYWQtBe+st7n5Df+u6+0fSXt5lZo8RtE2/k+CCfqTcfTFBe3jKIjOrAi41s/8tVFPIEL2T4LpJv98k3f0F4IK0ouVm9hLwAzN7o7s/UsAYS9l/E3xLPsXdu/pb0d1/lv7azG4F7gO+AkSefNz939Je3m1m9wEPE1wsvyrq/UXkbOAOd38l14ru/r/pr83sD8DfgUsIkseADeTzJw6l3OzWCmRro61nd/aNst6QhE1mi4D1wAcHsYnbgTbg6CjjyuFmYBzBBftsinIs05wNPOXugxl5N9WLcGqE8fSnFaiz198KUA+093HWk6oHrz/O9RnLI2VmnyK4OP5hd1850PrhmehC4Igsv3PkPOiV9Tj9/38U5VgCmNkbCc7QBnVt0t3bCb58Dur/v5/Pn8Eeky2DrLdLKSefNWS0K5rZBIL26WzXIfqsF+rr+sWQhc1DtxH0zHpH+EYakLRmpTjHQ8q1z9iPZUp4cXgmg+9oEPfxXEPQnHlwRnmuY/U0QVNN5nGeTNC1PZ9rMQNiZu8l6ML8eXf/5RA25cT/fu1vf7EfyzRnE9zCcMsQtjGo45nj8yf13st2TF5x92xNbqnOMs/1US99u30q5eSzCDjDzNInOppN8AdekaPePmF/fwDMbBrBNYp8+t4PiJmVEfQcOQR4q7v/Y5DbeStQRzB9eFzOIrg3YH0fy2M9lhneA1Qy+ORzVvgc1/G8j+B60/tSBeGHwjvp51i5ewewLL1eaDZwfz/XNwbFzKYDNwHfdff/GcJ2DHgv8EiBrlFm7u8wgg++Pv+ecR/LDGcDt3rue6WyCjvHvJ0Bvl9zff64+zMESTf9fZkIX+f6H14EvCfjzHY2QVLq8/6g9J2X5IPg9O4FYAlwOjCHoFnqPzPWewq4LqNsMcHNk7MI2k8fB+4uUJzXEHxbuRA4LuNRGa6zFFiaVmdOWO/9BDd9XkRwmrsSSBYozl8TXISfCbwD+GkY978Ol2OZsd/bgYf7WPaaOAnuQbkijPF0gptSdwC/jjCeGoKEdhZwP0H7fOp1TbjOlwh6Y34aOA34PUFy3zttO/8CdAPNaWUnhWVXEXSWuJzgm/pbooyRoFloC8G1k+Mz3qsHpW3nlDCeU9LKVoTv8bcQfDH4Qxjju6I+lgQfwr8gaD6aAXwSeD58H6bf91ewY5nv3zxc7ziC/6V397Gd18RJ0JR1N0GnjdMIPtAfIOg4M22AMebz+XMOQU/MS8PjeQPB/8dhOf7mBxN85v48rPd5gjPL8/KKLap/vmI8gCnAH8MD9QLwdTI+nAluRrsho2wscH34j7YtPHivuxkrohjXsft0OfPREq6znLQb1MI33L3A5vCP+RzwHWBMAY/lNwgSR3t4PFcBHxpOxzJtn43hcfliP8f8hrTXZwMPEtzk2UmQnC5L/fNFFFNLHn9nA/6DoIfWDoIPmKMytvOR9Dpp5e8m+DbZQdCkcXbUMabtO9sj/XhOD8ump5VdR/DhvwPYHv5uMwtxLIEjCL6wvRy+D14k+MAcn7Gdgh3LfP/m4XpXhf8fWd9vmXES3JC8kOD/viN8394OHDeIGNflGeMnCP4vOghGZjgtYzuv+5uH5ScR9JDdGe7rwnxj05QKIiISu1K+5iMiIiVKyUdERGKn5CMiIrFT8hERkdgp+YiISOyUfEREJHZKPiJFFE6XPJix6URKmpKPiIjETslHRERip+QjIiKxU/IREZHYKfmIiEjslHxERCR2Sj4iIhI7JR8REYmdko+IiMROyUdERGKn5CMiIrHTNNoiIhI7nfmIiEjslHxERCR2Sj4iIhI7JR8REYmdko+IiMROyUdERGKn5CMiIrFT8hERkdj9fyo8Yz12NElHAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.plot(auto_mi)\n", "plt.xticks(size=15)\n", "plt.yticks(size=15)\n", "plt.xlabel(\"l\",size=15)\n", "plt.ylabel(\"MI\",size=15)\n", "plt.xlim(0,20)\n", "plt.grid()\n", "\n", "print(auto_mi[0])\n", "print(auto_mi[1])\n", "print(auto_mi[2])" ] }, { "cell_type": "code", "execution_count": 34, "id": "worthy-strengthening", "metadata": {}, "outputs": [], "source": [ "text_subsampled = [text[i] for i in range(1,N,10)]" ] }, { "cell_type": "code", "execution_count": 47, "id": "engaged-manhattan", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4.229589052568906 4.22939412884961 0.00952034755131061 0.030112454202498615\n" ] } ], "source": [ "characters_sub, occurrences_sub = np.unique(text_subsampled, return_counts=True)\n", "Nsub = len(text_subsampled)\n", "probs = occurrences_sub/Nsub\n", "\n", "\n", "Hsub, EHsub = entropy_emp(probs,Nsub)\n", "print(Hall,Hsub,EHall,EHsub)" ] }, { "cell_type": "code", "execution_count": null, "id": "different-somerset", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }