\n","\n",""]},"metadata":{}}]},{"cell_type":"markdown","source":["# Get measures on topic assignment"],"metadata":{"id":"6AGeC7r3mg21"}},{"cell_type":"code","source":["# load stored data\n","with open(\"/content/semantic_net.pkl\", 'rb') as f:\n"," in_data = pickle.load(f)"],"metadata":{"id":"rXMa7cU8j1Ld","executionInfo":{"status":"ok","timestamp":1765744346669,"user_tz":-60,"elapsed":525,"user":{"displayName":"Tomaso Erseghe","userId":"15955126948488574654"}}},"execution_count":18,"outputs":[]},{"cell_type":"code","source":["# build C matrix\n","b_topics = np.array(new_topics)\n","b_topics = b_topics[in_data.documents]\n","C = sps.csr_matrix((len(b_topics),b_topics.max()+1))\n","for i in range(len(b_topics)):\n"," C[i,b_topics[i]] = 1\n","\n","# builds topic matrices\n","Pwc = in_data.Pwd.dot(C) # joint word + class probability\n","Pcc = ((C.T).dot(in_data.Pdd)).dot(C) # joint class + class probability\n","pc = Pcc.sum(axis=0)\n","\n","# show number of topics, and size\n","plt.bar(np.array(range(C.shape[1])),np.array(C.sum(axis=0))[0])\n","plt.xlabel(\"topic #\")\n","plt.ylabel(\"# of documents\");"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":540},"id":"hsaGdrRikKny","executionInfo":{"status":"ok","timestamp":1765745026841,"user_tz":-60,"elapsed":1484,"user":{"displayName":"Tomaso Erseghe","userId":"15955126948488574654"}},"outputId":"dc561891-4d40-4d5a-8ea5-17ef02e018fa"},"execution_count":40,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.12/dist-packages/scipy/sparse/_index.py:168: SparseEfficiencyWarning:\n","\n","Changing the sparsity structure of a csr_matrix is expensive. lil and dok are more efficient.\n","\n"]},{"output_type":"display_data","data":{"text/plain":["
"],"image/png":"iVBORw0KGgoAAAANSUhEUgAAAkkAAAGwCAYAAAC99fF4AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAANKdJREFUeJzt3X9YVHXe//HXIAKKAv4IkEQkNZX8mSZNplmyYnKXre5dbrRZcclaUKlZ6r1JZe1itpnimm5bm3WvZj+11cokTdxNRERJUyJrSTEdaCMY8QcinO8ffp27kdPK4MAM+Hxc17mumc/5zDnvj5/t4rWfc+aMxTAMQwAAAHDi4+kCAAAAvBEhCQAAwAQhCQAAwAQhCQAAwAQhCQAAwAQhCQAAwAQhCQAAwISvpwtoDmpra3XkyBG1b99eFovF0+UAAIB6MAxDx44dU0REhHx8XF8XIiTVw5EjRxQZGenpMgAAQAMUFxera9euLn+OkFQP7du3l3T2HzkoKMjD1QAAgPqw2+2KjIx0/B13FSGpHs5dYgsKCiIkAQDQzDT0Vhlu3AYAADBBSAIAADBBSAIAADBBSAIAADBBSAIAADBBSAIAADBBSAIAADBBSAIAADBBSAIAADBBSAIAADBBSAIAADBBSAIAADBBSAIAADBBSAIAADBBSAIAADDh6+kCAABAy9Z99gcX7PPt/IQmqMQ1rCQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACYICQBAACY8GhI2rp1q2655RZFRETIYrFo7dq1jn3V1dWaNWuW+vfvr8DAQEVEROjuu+/WkSNHnI5RVlamxMREBQUFKSQkRElJSaqsrHTqs2fPHo0YMUIBAQGKjIzUggULmmJ4AACgGfNoSDp+/LgGDhyopUuX1tl34sQJ7dq1S3PnztWuXbv03nvvqbCwULfeeqtTv8TERO3bt0+ZmZlav369tm7dquTkZMd+u92uMWPGKCoqSnl5eXruuef05JNP6qWXXmr08QEAgObLYhiG4ekiJMlisWjNmjW67bbbfrZPbm6uhg0bpoMHD6pbt24qKChQTEyMcnNzNXToUEnShg0bNG7cOB0+fFgRERFatmyZfve738lms8nPz0+SNHv2bK1du1ZffvllvWqz2+0KDg5WRUWFgoKCLnqsAABcSrrP/uCCfb6dn+D2817s3+9mdU9SRUWFLBaLQkJCJEnZ2dkKCQlxBCRJiouLk4+Pj3Jychx9Ro4c6QhIkhQfH6/CwkL9+OOPpuepqqqS3W532gAAwKWl2YSkU6dOadasWfr1r3/tSIM2m02hoaFO/Xx9fdWxY0fZbDZHn7CwMKc+596f63O+9PR0BQcHO7bIyEh3DwcAAHi5ZhGSqqurdfvtt8swDC1btqzRzzdnzhxVVFQ4tuLi4kY/JwAA8C6+ni7gQs4FpIMHD2rz5s1O1xTDw8NVWlrq1P/MmTMqKytTeHi4o09JSYlTn3Pvz/U5n7+/v/z9/d05DAAA0Mx49UrSuYB04MABffLJJ+rUqZPTfqvVqvLycuXl5TnaNm/erNraWsXGxjr6bN26VdXV1Y4+mZmZ6t27tzp06NA0AwEAAM2OR0NSZWWl8vPzlZ+fL0kqKipSfn6+Dh06pOrqav3qV7/Szp07tXLlStXU1Mhms8lms+n06dOSpL59+2rs2LGaMmWKduzYoc8++0ypqamaNGmSIiIiJEl33nmn/Pz8lJSUpH379unNN9/U4sWLNWPGDE8NGwAANAMefQTAli1bdOONN9Zpnzx5sp588klFR0ebfu7TTz/VqFGjJJ19mGRqaqrWrVsnHx8fTZw4URkZGWrXrp2j/549e5SSkqLc3Fx17txZDz74oGbNmlXvOnkEAAAADddcHwHgNc9J8maEJAAAGq65hiSvvicJAADAUwhJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJghJAAAAJjwakrZu3apbbrlFERERslgsWrt2rdN+wzCUlpamLl26qE2bNoqLi9OBAwec+pSVlSkxMVFBQUEKCQlRUlKSKisrnfrs2bNHI0aMUEBAgCIjI7VgwYLGHhoAAGjmPBqSjh8/roEDB2rp0qWm+xcsWKCMjAwtX75cOTk5CgwMVHx8vE6dOuXok5iYqH379ikzM1Pr16/X1q1blZyc7Nhvt9s1ZswYRUVFKS8vT88995yefPJJvfTSS40+PgAA0HxZDMMwPF2EJFksFq1Zs0a33XabpLOrSBEREXrkkUc0c+ZMSVJFRYXCwsK0YsUKTZo0SQUFBYqJiVFubq6GDh0qSdqwYYPGjRunw4cPKyIiQsuWLdPvfvc72Ww2+fn5SZJmz56ttWvX6ssvv6xXbXa7XcHBwaqoqFBQUJD7Bw8AQAvWffYHF+zz7fwEt5/3Yv9+e+09SUVFRbLZbIqLi3O0BQcHKzY2VtnZ2ZKk7OxshYSEOAKSJMXFxcnHx0c5OTmOPiNHjnQEJEmKj49XYWGhfvzxR9NzV1VVyW63O20AAODS4rUhyWazSZLCwsKc2sPCwhz7bDabQkNDnfb7+vqqY8eOTn3MjvHTc5wvPT1dwcHBji0yMvLiBwQAAJoVrw1JnjRnzhxVVFQ4tuLiYk+XBAAAmpjXhqTw8HBJUklJiVN7SUmJY194eLhKS0ud9p85c0ZlZWVOfcyO8dNznM/f319BQUFOGwAAuLR4bUiKjo5WeHi4Nm3a5Giz2+3KycmR1WqVJFmtVpWXlysvL8/RZ/PmzaqtrVVsbKyjz9atW1VdXe3ok5mZqd69e6tDhw5NNBoAANDceDQkVVZWKj8/X/n5+ZLO3qydn5+vQ4cOyWKxaNq0aXrmmWf097//XXv37tXdd9+tiIgIxzfg+vbtq7Fjx2rKlCnasWOHPvvsM6WmpmrSpEmKiIiQJN15553y8/NTUlKS9u3bpzfffFOLFy/WjBkzPDRqAADQHPh68uQ7d+7UjTfe6Hh/LrhMnjxZK1as0GOPPabjx48rOTlZ5eXluv7667VhwwYFBAQ4PrNy5UqlpqZq9OjR8vHx0cSJE5WRkeHYHxwcrI0bNyolJUVDhgxR586dlZaW5vQsJQAAgPN5zXOSvBnPSQIAoOF4ThIAAEALQkgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAw4XJIOnnypE6cOOF4f/DgQS1atEgbN250a2EAAACe5HJIGj9+vF5//XVJUnl5uWJjY/X8889r/PjxWrZsmdsLBAAA8ASXQ9KuXbs0YsQISdI777yjsLAwHTx4UK+//royMjLcXiAAAIAnuBySTpw4ofbt20uSNm7cqAkTJsjHx0fXXnutDh486PYCAQAAPMHlkNSzZ0+tXbtWxcXF+vjjjzVmzBhJUmlpqYKCgtxeIAAAgCe4HJLS0tI0c+ZMde/eXbGxsbJarZLOrioNHjzY7QUCAAB4gq+rH/jVr36l66+/XkePHtXAgQMd7aNHj9aECRPcWhwAAICnuLySdN999ykwMFCDBw+Wj8//ffyqq67Ss88+69biAAAAPMXlkPTaa6/p5MmTddpPnjzpeDQAAABAc1fvy212u12GYcgwDB07dkwBAQGOfTU1Nfrwww8VGhraKEUCAAA0tXqHpJCQEFksFlksFl155ZV19lssFj311FNuLQ4AAMBT6h2SPv30UxmGoZtuuknvvvuuOnbs6Njn5+enqKgoRURENEqRAAAATa3eIemGG26QJBUVFSkyMtLppm0AAICWxuVHAERFRam8vFw7duxQaWmpamtrnfbffffdbisOAADAU1wOSevWrVNiYqIqKysVFBQki8Xi2GexWAhJAACgRXD5mtkjjzyi++67T5WVlSovL9ePP/7o2MrKyhqjRgAAgCbnckj67rvv9NBDD6lt27aNUQ8AAIBXcDkkxcfHa+fOnY1RCwAAgNdw+Z6khIQEPfroo9q/f7/69++v1q1bO+2/9dZb3VYcAACAp7gckqZMmSJJmjdvXp19FotFNTU1F18VAACAh7kcks7/yj8AAEBLdFFPhDx16pS76gAAAPAqLoekmpoaPf3007r88svVrl07/etf/5IkzZ07V6+88orbCwQAAPAEl0PS73//e61YsUILFiyQn5+fo71fv356+eWX3VocAACAp7gckl5//XW99NJLSkxMVKtWrRztAwcO1JdffunW4mpqajR37lxFR0erTZs26tGjh55++mkZhuHoYxiG0tLS1KVLF7Vp00ZxcXE6cOCA03HKysqUmJiooKAghYSEKCkpSZWVlW6tFQAAtCwNephkz54967TX1taqurraLUWd8+yzz2rZsmX605/+pIKCAj377LNasGCBlixZ4uizYMECZWRkaPny5crJyVFgYKDi4+Od7pdKTEzUvn37lJmZqfXr12vr1q1KTk52a60AAKBlcfnbbTExMfrHP/6hqKgop/Z33nlHgwcPdlthkrRt2zaNHz9eCQkJkqTu3bvrjTfe0I4dOySdXUVatGiRHn/8cY0fP17S2ZWusLAwrV27VpMmTVJBQYE2bNig3NxcDR06VJK0ZMkSjRs3Tn/84x8VERFR57xVVVWqqqpyvLfb7W4dFwAA8H4urySlpaUpNTVVzz77rGpra/Xee+9pypQp+v3vf6+0tDS3Fnfddddp06ZN+uqrryRJn3/+uf75z3/q5ptvliQVFRXJZrMpLi7O8Zng4GDFxsYqOztbkpSdna2QkBBHQJKkuLg4+fj4KCcnx/S86enpCg4OdmyRkZFuHRcAAPB+Loek8ePHa926dfrkk08UGBiotLQ0FRQUaN26dfrFL37h1uJmz56tSZMmqU+fPmrdurUGDx6sadOmKTExUZJks9kkSWFhYU6fCwsLc+yz2WwKDQ112u/r66uOHTs6+pxvzpw5qqiocGzFxcVuHRcAAPB+Ll9uk6QRI0YoMzPT3bXU8dZbb2nlypVatWqVrrrqKuXn52vatGmKiIjQ5MmTG+28/v7+8vf3b7TjAwAA79egkHROZWVlnSdwBwUFXVRBP/Xoo486VpMkqX///jp48KDS09M1efJkhYeHS5JKSkrUpUsXx+dKSko0aNAgSVJ4eLhKS0udjnvmzBmVlZU5Pg8AAHA+ly+3FRUVKSEhQYGBgQoODlaHDh3UoUMHhYSEqEOHDm4t7sSJE/LxcS6xVatWjmAWHR2t8PBwbdq0ybHfbrcrJydHVqtVkmS1WlVeXq68vDxHn82bN6u2tlaxsbFurRcAALQcLq8k3XXXXTIMQ3/9618VFhYmi8XSGHVJkm655Rb9/ve/V7du3XTVVVdp9+7dWrhwoe677z5JZ39Qd9q0aXrmmWfUq1cvRUdHa+7cuYqIiNBtt90mSerbt6/Gjh2rKVOmaPny5aqurlZqaqomTZpk+s02AAAAqQEh6fPPP1deXp569+7dGPU4WbJkiebOnasHHnhApaWlioiI0G9/+1unb9E99thjOn78uJKTk1VeXq7rr79eGzZsUEBAgKPPypUrlZqaqtGjR8vHx0cTJ05URkZGo9cPAACaL4vx08dX18ONN96o3/3ud05fu2/p7Ha7goODVVFR4dZ7rgAAuBR0n/3BBft8Oz/B7ee92L/fLq8kvfzyy5o6daq+++479evXT61bt3baP2DAAJeLAAAA8DYuh6Tvv/9e33zzje69915Hm8VikWEYslgsqqmpcWuBAAAAnuBySLrvvvs0ePBgvfHGG41+4zYAAICnuBySDh48qL///e+mP3ILAADQUrj8nKSbbrpJn3/+eWPUAgAA4DVcXkm65ZZbNH36dO3du1f9+/evc+P2rbfe6rbiAAAAPMXlkDR16lRJ0rx58+rs48ZtAADQUrgcks7/rTYAAICWyOV7kgAAAC4FLq8kmV1m+6mf/mQIAABAc+VySFqzZo3T++rqahUVFcnX11c9evQgJAEAgBbB5ZC0e/fuOm12u1333HOPfvnLX7qlKAAAAE9zyz1JQUFBeuqppzR37lx3HA4AAMDj3HbjdkVFhSoqKtx1OAAAAI9y+XJbRkaG03vDMHT06FH97//+r26++Wa3FQYAAOBJLoekF154wem9j4+PLrvsMk2ePFlz5sxxW2EAAACe5HJIKioqaow6AAAAvIrL9yRVVFSorKysTntZWZnsdrtbigIAAPA0l0PSpEmTtHr16jrtb731liZNmuSWogAAADzN5ZCUk5OjG2+8sU77qFGjlJOT45aiAAAAPM3lkFRVVaUzZ87Uaa+urtbJkyfdUhQAAICnuRyShg0bppdeeqlO+/LlyzVkyBC3FAUAAOBpLn+77ZlnnlFcXJw+//xzjR49WpK0adMm5ebmauPGjW4vEAAAwBNcXkkaPny4srOz1bVrV7311ltat26devbsqT179mjEiBGNUSMAAECTc3klSZIGDRqkVatWubsWAAAAr9GgkFRTU6O1a9eqoKBAknTVVVfp1ltvVatWrdxaHAAAgKe4HJK+/vprJSQk6PDhw+rdu7ckKT09XZGRkfrggw/Uo0cPtxcJAADQ1FwOSQ899JCuuOIKZWdnq2PHjpKkH374QXfddZceeughffDBB24vsqXrPvvC/2bfzk9ogkoAAMA5LoekrKwsbd++3RGQJKlTp06aP3++hg8f7tbiAAAAPMXlb7f5+/vr2LFjddorKyvl5+fnlqIAAAA8zeWQ9F//9V9KTk5WTk6ODMOQYRjavn27pk6dqltvvbUxagQAAGhyLoekjIwM9ejRQ1arVQEBAQoICNDw4cPVs2dPLV68uDFqBAAAaHIu35MUEhKi999/XwcOHNCXX34pSerbt6969uzp9uIAAAA8pUHPSZKkXr16qVevXu6sBQAAwGvUKyTNmDGj3gdcuHBhg4sBAADwFvUKSbt373Z6v2vXLp05c8bxMMmvvvpKrVq10pAhQ9xfIQAAgAfUKyR9+umnjtcLFy5U+/bt9dprr6lDhw6SpB9//FH33nsvP3ALAABaDJe/3fb8888rPT3dEZAkqUOHDnrmmWf0/PPPu7U4AAAAT3E5JNntdn3//fd12r///nvTh0wCAAA0Ry6HpF/+8pe699579d577+nw4cM6fPiw3n33XSUlJWnChAmNUSMAAECTc/kRAMuXL9fMmTN15513qrq6+uxBfH2VlJSk5557zu0FAgAAeILLIalt27Z68cUX9dxzz+mbb76RJPXo0UOBgYFuLw4AAMBTXL7cdk5gYKAGDBigAQMGNGpA+u6773TXXXepU6dOatOmjfr376+dO3c69huGobS0NHXp0kVt2rRRXFycDhw44HSMsrIyJSYmKigoSCEhIUpKSlJlZWWj1QwAAJq/BoekpvDjjz9q+PDhat26tT766CPt379fzz//vNM36xYsWKCMjAwtX75cOTk5CgwMVHx8vE6dOuXok5iYqH379ikzM1Pr16/X1q1blZyc7IkhAQCAZqLBP0vSFJ599llFRkbq1VdfdbRFR0c7XhuGoUWLFunxxx/X+PHjJUmvv/66wsLCtHbtWk2aNEkFBQXasGGDcnNzNXToUEnSkiVLNG7cOP3xj39UREREnfNWVVWpqqrK8d5utzfWEAEAgJfy6pWkv//97xo6dKj++7//W6GhoRo8eLD+8pe/OPYXFRXJZrMpLi7O0RYcHKzY2FhlZ2dLkrKzsxUSEuIISJIUFxcnHx8f5eTkmJ43PT1dwcHBji0yMrKRRggAALxVvULS1VdfrR9//FGSNG/ePJ04caJRizrnX//6l5YtW6ZevXrp448/1v3336+HHnpIr732miTJZrNJksLCwpw+FxYW5thns9kUGhrqtN/X11cdO3Z09DnfnDlzVFFR4diKi4vdPTQAAODl6hWSCgoKdPz4cUnSU0891WQ3PdfW1urqq6/WH/7wBw0ePFjJycmaMmWKli9f3qjn9ff3V1BQkNMGAAAuLfW6J2nQoEG69957df3118swDP3xj39Uu3btTPumpaW5rbguXbooJibGqa1v37569913JUnh4eGSpJKSEnXp0sXRp6SkRIMGDXL0KS0tdTrGmTNnVFZW5vg8AADA+eoVklasWKEnnnhC69evl8Vi0UcffSRf37oftVgsbg1Jw4cPV2FhoVPbV199paioKElnb+IODw/Xpk2bHKHIbrcrJydH999/vyTJarWqvLxceXl5GjJkiCRp8+bNqq2tVWxsrNtqBQAALUu9QlLv3r21evVqSZKPj482bdpU5z6fxjB9+nRdd911+sMf/qDbb79dO3bs0EsvvaSXXnpJ0tlQNm3aND3zzDPq1auXoqOjNXfuXEVEROi2226TdHblaezYsY7LdNXV1UpNTdWkSZNMv9kGAAAgNeARALW1tY1Rh6lrrrlGa9as0Zw5czRv3jxFR0dr0aJFSkxMdPR57LHHdPz4cSUnJ6u8vFzXX3+9NmzYoICAAEeflStXKjU1VaNHj5aPj48mTpyojIyMJhsHAABofiyGYRiufuibb77RokWLVFBQIEmKiYnRww8/rB49eri9QG9gt9sVHBysioqKRrmJu/vsDy7Y59v5CW4/LwAATcFTf+cu9u+3y89J+vjjjxUTE6MdO3Y4fpYkJydHV111lTIzM10uAAAAwBu5fLlt9uzZmj59uubPn1+nfdasWfrFL37htuIAAAA8xeWVpIKCAiUlJdVpv++++7R//363FAUAAOBpLoekyy67TPn5+XXa8/Pzm+QbbwAAAE3B5cttU6ZMUXJysv71r3/puuuukyR99tlnevbZZzVjxgy3FwgAAOAJLoekuXPnqn379nr++ec1Z84cSVJERISefPJJPfTQQ24vEAAAwBNcDkkWi0XTp0/X9OnTdezYMUlS+/bt3V4YAACAJ7kckn6KcAQAAFoql2/cBgAAuBQQkgAAAEwQkgAAAEwQkgAAAEw0KCSlpqaqrKzM3bUAAAB4jXqHpMOHDzter1q1SpWVlZKk/v37q7i42P2VAQAAeFC9HwHQp08fderUScOHD9epU6dUXFysbt266dtvv1V1dXVj1ggAANDk6r2SVF5errfffltDhgxRbW2txo0bpyuvvFJVVVX6+OOPVVJS0ph1AgAANKl6h6Tq6moNGzZMjzzyiNq0aaPdu3fr1VdfVatWrfTXv/5V0dHR6t27d2PWCgAA0GTqfbktJCREgwYN0vDhw3X69GmdPHlSw4cPl6+vr958801dfvnlys3NbcxaAQAAmky9V5K+++47Pf744/L399eZM2c0ZMgQjRgxQqdPn9auXbtksVh0/fXXN2atAAAATabeIalz58665ZZblJ6errZt2yo3N1cPPvigLBaLZs6cqeDgYN1www2NWSsAAECTafDDJIODg3X77berdevW2rx5s4qKivTAAw+4szYAAACPqfc9ST+1Z88eXX755ZKkqKgotW7dWuHh4brjjjvcWhwAAICnNCgkRUZGOl5/8cUXbisGAADAW/DbbQAAACYISQAAACYISQAAACYISQAAACYISQAAACYISQAAACYISQAAACYISQAAACYISQAAACYISQAAACYISQAAACYISQAAACYISQAAACYISQAAACYISQAAACYISQAAACYISQAAACYISQAAACaaVUiaP3++LBaLpk2b5mg7deqUUlJS1KlTJ7Vr104TJ05USUmJ0+cOHTqkhIQEtW3bVqGhoXr00Ud15syZJq4eAAA0J80mJOXm5urPf/6zBgwY4NQ+ffp0rVu3Tm+//baysrJ05MgRTZgwwbG/pqZGCQkJOn36tLZt26bXXntNK1asUFpaWlMPAQAANCPNIiRVVlYqMTFRf/nLX9ShQwdHe0VFhV555RUtXLhQN910k4YMGaJXX31V27Zt0/bt2yVJGzdu1P79+/W3v/1NgwYN0s0336ynn35aS5cu1enTpz01JAAA4OWaRUhKSUlRQkKC4uLinNrz8vJUXV3t1N6nTx9169ZN2dnZkqTs7Gz1799fYWFhjj7x8fGy2+3at2+f6fmqqqpkt9udNgAAcGnx9XQBF7J69Wrt2rVLubm5dfbZbDb5+fkpJCTEqT0sLEw2m83R56cB6dz+c/vMpKen66mnnnJD9QAAoLny6pWk4uJiPfzww1q5cqUCAgKa7Lxz5sxRRUWFYysuLm6ycwMAAO/g1SEpLy9PpaWluvrqq+Xr6ytfX19lZWUpIyNDvr6+CgsL0+nTp1VeXu70uZKSEoWHh0uSwsPD63zb7dz7c33O5+/vr6CgIKcNAABcWrw6JI0ePVp79+5Vfn6+Yxs6dKgSExMdr1u3bq1NmzY5PlNYWKhDhw7JarVKkqxWq/bu3avS0lJHn8zMTAUFBSkmJqbJxwQAAJoHr74nqX379urXr59TW2BgoDp16uRoT0pK0owZM9SxY0cFBQXpwQcflNVq1bXXXitJGjNmjGJiYvSb3/xGCxYskM1m0+OPP66UlBT5+/s3+ZgAAEDz4NUhqT5eeOEF+fj4aOLEiaqqqlJ8fLxefPFFx/5WrVpp/fr1uv/++2W1WhUYGKjJkydr3rx5HqwaAAB4u2YXkrZs2eL0PiAgQEuXLtXSpUt/9jNRUVH68MMPG7kyAADQknj1PUkAAACeQkgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAwQUgCAAAw4evpAuC67rM/uGCfb+cnNEElAAC0XKwkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmCAkAQAAmPDqkJSenq5rrrlG7du3V2hoqG677TYVFhY69Tl16pRSUlLUqVMntWvXThMnTlRJSYlTn0OHDikhIUFt27ZVaGioHn30UZ05c6YphwIAAJoZrw5JWVlZSklJ0fbt25WZmanq6mqNGTNGx48fd/SZPn261q1bp7fffltZWVk6cuSIJkyY4NhfU1OjhIQEnT59Wtu2bdNrr72mFStWKC0tzRNDAgAAzYSvpwv4TzZs2OD0fsWKFQoNDVVeXp5GjhypiooKvfLKK1q1apVuuukmSdKrr76qvn37avv27br22mu1ceNG7d+/X5988onCwsI0aNAgPf3005o1a5aefPJJ+fn5eWJoAADAy3n1StL5KioqJEkdO3aUJOXl5am6ulpxcXGOPn369FG3bt2UnZ0tScrOzlb//v0VFhbm6BMfHy+73a59+/aZnqeqqkp2u91pAwAAl5ZmE5Jqa2s1bdo0DR8+XP369ZMk2Ww2+fn5KSQkxKlvWFiYbDabo89PA9K5/ef2mUlPT1dwcLBji4yMdPNoAACAt2s2ISklJUVffPGFVq9e3ejnmjNnjioqKhxbcXFxo58TAAB4F6++J+mc1NRUrV+/Xlu3blXXrl0d7eHh4Tp9+rTKy8udVpNKSkoUHh7u6LNjxw6n45379tu5Pufz9/eXv7+/m0cBAACaE69eSTIMQ6mpqVqzZo02b96s6Ohop/1DhgxR69attWnTJkdbYWGhDh06JKvVKkmyWq3au3evSktLHX0yMzMVFBSkmJiYphkIAABodrx6JSklJUWrVq3S+++/r/bt2zvuIQoODlabNm0UHByspKQkzZgxQx07dlRQUJAefPBBWa1WXXvttZKkMWPGKCYmRr/5zW+0YMEC2Ww2Pf7440pJSWG1CAAA/CyvDknLli2TJI0aNcqp/dVXX9U999wjSXrhhRfk4+OjiRMnqqqqSvHx8XrxxRcdfVu1aqX169fr/vvvl9VqVWBgoCZPnqx58+Y11TAAAEAz5NUhyTCMC/YJCAjQ0qVLtXTp0p/tExUVpQ8//NCdpQEAgBbOq+9JAgAA8BRCEgAAgAmvvtyGi9d99gcX7PPt/IQmqAQAgOaFlSQAAAAThCQAAAAThCQAAAAThCQAAAAThCQAAAAThCQAAAAThCQAAAAThCQAAAAThCQAAAAThCQAAAAT/CwJnPAzJgAAnMVKEgAAgAlCEgAAgAlCEgAAgAlCEgAAgAlCEgAAgAlCEgAAgAlCEgAAgAmek4QGq88zlSSeqwQAaJ5YSQIAADBBSAIAADBBSAIAADDBPUloMvwuHACgOWElCQAAwAQhCQAAwASX2+C1uDwHAPAkVpIAAABMsJKEFsHVB1uySgUAuBBWkgAAAEwQkgAAAExwuQ2oBy7PAcClh5AEuJmrgYoABgDeiZAENCPcoA4ATYeQBMCBUAUA/4eQBKBBXF3VAoDmhm+3AQAAmGAlCUCT4FIegOaGkATAK7kSqhrzG4VcVgQuXYQkAHAjbwps3nLsxq4FaCyXVEhaunSpnnvuOdlsNg0cOFBLlizRsGHDPF0WAOAieFNgIwy2LJdMSHrzzTc1Y8YMLV++XLGxsVq0aJHi4+NVWFio0NBQT5cHAMB/1FzDYHN2yXy7beHChZoyZYruvfdexcTEaPny5Wrbtq3++te/ero0AADghS6JlaTTp08rLy9Pc+bMcbT5+PgoLi5O2dnZdfpXVVWpqqrK8b6iokKSZLfbG6W+2qoTF+zz03O70t/Tx6aWpj82tTT9sVtCLZfKOL2plktlnA3p7y7njmkYRsMOYFwCvvvuO0OSsW3bNqf2Rx991Bg2bFid/k888YQhiY2NjY2Nja0FbMXFxQ3KD5fESpKr5syZoxkzZjje19bWqqysTJ06dZLFYmn089vtdkVGRqq4uFhBQUGNfj5PYZwtC+NsWRhny3KpjtMwDB07dkwRERENOt4lEZI6d+6sVq1aqaSkxKm9pKRE4eHhdfr7+/vL39/fqS0kJKQxSzQVFBTUov/HfA7jbFkYZ8vCOFuWS3GcwcHBDT7OJXHjtp+fn4YMGaJNmzY52mpra7Vp0yZZrVYPVgYAALzVJbGSJEkzZszQ5MmTNXToUA0bNkyLFi3S8ePHde+993q6NAAA4IUumZB0xx136Pvvv1daWppsNpsGDRqkDRs2KCwszNOl1eHv768nnniiziW/loZxtiyMs2VhnC0L42wYi2E09HtxAAAALdclcU8SAACAqwhJAAAAJghJAAAAJghJAAAAJghJXmbp0qXq3r27AgICFBsbqx07dni6JLd68sknZbFYnLY+ffp4uiy32Lp1q2655RZFRETIYrFo7dq1TvsNw1BaWpq6dOmiNm3aKC4uTgcOHPBMsRfhQuO855576szx2LFjPVNsA6Wnp+uaa65R+/btFRoaqttuu02FhYVOfU6dOqWUlBR16tRJ7dq108SJE+s8sNbb1Weco0aNqjOfU6dO9VDFDbds2TINGDDA8ZBBq9Wqjz76yLG/JcyndOFxtpT5/Kn58+fLYrFo2rRpjjZ3zSchyYu8+eabmjFjhp544gnt2rVLAwcOVHx8vEpLSz1dmltdddVVOnr0qGP75z//6emS3OL48eMaOHCgli5darp/wYIFysjI0PLly5WTk6PAwEDFx8fr1KlTTVzpxbnQOCVp7NixTnP8xhtvNGGFFy8rK0spKSnavn27MjMzVV1drTFjxuj48eOOPtOnT9e6dev09ttvKysrS0eOHNGECRM8WLXr6jNOSZoyZYrTfC5YsMBDFTdc165dNX/+fOXl5Wnnzp266aabNH78eO3bt09Sy5hP6cLjlFrGfJ6Tm5urP//5zxowYIBTu9vms0G/+IZGMWzYMCMlJcXxvqamxoiIiDDS09M9WJV7PfHEE8bAgQM9XUajk2SsWbPG8b62ttYIDw83nnvuOUdbeXm54e/vb7zxxhseqNA9zh+nYRjG5MmTjfHjx3uknsZSWlpqSDKysrIMwzg7d61btzbefvttR5+CggJDkpGdne2pMi/a+eM0DMO44YYbjIcffthzRTWiDh06GC+//HKLnc9zzo3TMFrWfB47dszo1auXkZmZ6TQud84nK0le4vTp08rLy1NcXJyjzcfHR3FxccrOzvZgZe534MABRURE6IorrlBiYqIOHTrk6ZIaXVFRkWw2m9P8BgcHKzY2tsXNryRt2bJFoaGh6t27t+6//3798MMPni7polRUVEiSOnbsKEnKy8tTdXW103z26dNH3bp1a9bzef44z1m5cqU6d+6sfv36ac6cOTpx4oQnynObmpoarV69WsePH5fVam2x83n+OM9pKfOZkpKihIQEp3mT3Pvf5yXzxG1v9+9//1s1NTV1ngAeFhamL7/80kNVuV9sbKxWrFih3r176+jRo3rqqac0YsQIffHFF2rfvr2ny2s0NptNkkzn99y+lmLs2LGaMGGCoqOj9c033+h//ud/dPPNNys7O1utWrXydHkuq62t1bRp0zR8+HD169dP0tn59PPzq/PD1815Ps3GKUl33nmnoqKiFBERoT179mjWrFkqLCzUe++958FqG2bv3r2yWq06deqU2rVrpzVr1igmJkb5+fktaj5/bpxSy5nP1atXa9euXcrNza2zz53/fRKS0KRuvvlmx+sBAwYoNjZWUVFReuutt5SUlOTByuAukyZNcrzu37+/BgwYoB49emjLli0aPXq0BytrmJSUFH3xxRct5t65n/Nz40xOTna87t+/v7p06aLRo0frm2++UY8ePZq6zIvSu3dv5efnq6KiQu+8844mT56srKwsT5fldj83zpiYmBYxn8XFxXr44YeVmZmpgICARj0Xl9u8ROfOndWqVas6d9+XlJQoPDzcQ1U1vpCQEF155ZX6+uuvPV1Kozo3h5fa/ErSFVdcoc6dOzfLOU5NTdX69ev16aefqmvXro728PBwnT59WuXl5U79m+t8/tw4zcTGxkpSs5xPPz8/9ezZU0OGDFF6eroGDhyoxYsXt7j5/LlxmmmO85mXl6fS0lJdffXV8vX1la+vr7KyspSRkSFfX1+FhYW5bT4JSV7Cz89PQ4YM0aZNmxxttbW12rRpk9O15JamsrJS33zzjbp06eLpUhpVdHS0wsPDnebXbrcrJyenRc+vJB0+fFg//PBDs5pjwzCUmpqqNWvWaPPmzYqOjnbaP2TIELVu3dppPgsLC3Xo0KFmNZ8XGqeZ/Px8SWpW8/lzamtrVVVV1WLm8+ecG6eZ5jifo0eP1t69e5Wfn+/Yhg4dqsTERMdrt82n++4zx8VavXq14e/vb6xYscLYv3+/kZycbISEhBg2m83TpbnNI488YmzZssUoKioyPvvsMyMuLs7o3LmzUVpa6unSLtqxY8eM3bt3G7t37zYkGQsXLjR2795tHDx40DAMw5g/f74REhJivP/++8aePXuM8ePHG9HR0cbJkyc9XLlr/tM4jx07ZsycOdPIzs42ioqKjE8++cS4+uqrjV69ehmnTp3ydOn1dv/99xvBwcHGli1bjKNHjzq2EydOOPpMnTrV6Natm7F582Zj586dhtVqNaxWqwerdt2Fxvn1118b8+bNM3bu3GkUFRUZ77//vnHFFVcYI0eO9HDlrps9e7aRlZVlFBUVGXv27DFmz55tWCwWY+PGjYZhtIz5NIz/PM6WNJ/nO/9be+6aT0KSl1myZInRrVs3w8/Pzxg2bJixfft2T5fkVnfccYfRpUsXw8/Pz7j88suNO+64w/j66689XZZbfPrpp4akOtvkyZMNwzj7GIC5c+caYWFhhr+/vzF69GijsLDQs0U3wH8a54kTJ4wxY8YYl112mdG6dWsjKirKmDJlSrML+mbjk2S8+uqrjj4nT540HnjgAaNDhw5G27ZtjV/+8pfG0aNHPVd0A1xonIcOHTJGjhxpdOzY0fD39zd69uxpPProo0ZFRYVnC2+A++67z4iKijL8/PyMyy67zBg9erQjIBlGy5hPw/jP42xJ83m+80OSu+bTYhiG0cAVLwAAgBaLe5IAAABMEJIAAABMEJIAAABMEJIAAABMEJIAAABMEJIAAABMEJIAAABMEJIAAABMEJIAXNK6d++uRYsWeboMAF6IkATA640aNUrTpk1rlGPn5uYqOTnZLcd6++23dd1110mStm3bpiuuuMItxwXgGb6eLgAAPOmyyy5z27Gys7M1fPhwSdI//vEPx2sAzRMrSQC82j333KOsrCwtXrxYFotFFotF3377rSQpKytLw4YNk7+/v7p06aLZs2frzJkzjs+OGjVKqampSk1NVXBwsDp37qy5c+fqpz9Zef7ltvLycv32t79VWFiYAgIC1K9fP61fv75etW7bts0RjP75z38SkoBmjpAEwKstXrxYVqtVU6ZM0dGjR3X06FFFRkbqu+++07hx43TNNdfo888/17Jly/TKK6/omWeecfr8a6+9Jl9fX+3YsUOLFy/WwoUL9fLLL5ueq7a2VjfffLM+++wz/e1vf9P+/fs1f/58tWrV6mfrW7VqlUJCQhQSEqIdO3boN7/5jUJCQvThhx9q5syZCgkJ0apVq9z6bwKgaXC5DYBXCw4Olp+fn9q2bavw8HBH+4svvqjIyEj96U9/ksViUZ8+fXTkyBHNmjVLaWlp8vE5+/8BIyMj9cILL8hisah3797au3evXnjhBU2ZMqXOuT755BPt2LFDBQUFuvLKKyXpgvcV3Xrrrbruuuv0ySefaNGiRVq/fr327NmjqVOnatu2bZKkzp07u+ufA0ATYiUJQLNUUFAgq9Uqi8XiaBs+fLgqKyt1+PBhR9u1117r1MdqterAgQOqqampc8z8/Hx17drVEZDqo127durevbt27dql8ePHq3v37tq7d6/GjRun7t27q3v37mrXrl0DRwnAk1hJAoD/r02bNi71P3TokGJiYiRJp06dkq+vrxYvXqyqqir5+Pho9erVuuuuu7R8+fLGKBdAIyMkAfB6fn5+dVZ++vbtq3fffVeGYThWij777DO1b99eXbt2dfTLyclx+tz27dvVq1cv0/uMBgwYoMOHD+urr76q12pSRESE8vPzZbPZFBcXp/z8fNXU1GjQoEH6xz/+oY4dOyooKKghQwbgBbjcBsDrde/eXTk5Ofr222/173//W7W1tXrggQdUXFysBx98UF9++aXef/99PfHEE5oxY4bjfiTp7GrPjBkzVFhYqDfeeENLlizRww8/bHqeG264QSNHjtTEiROVmZmpoqIiffTRR9qwYYNpf19fX/Xs2VOHDx9WbGys+vTpox9++EFXXHGFhg0bpp49eyo0NLRR/k0AND5CEgCvN3PmTLVq1UoxMTG67LLLdOjQIV1++eX68MMPtWPHDg0cOFBTp05VUlKSHn/8cafP3n333Tp58qSGDRumlJQUPfzww//x4ZHvvvuurrnmGv36179WTEyMHnvsMdP7l35qy5YtGjlypKSzjyU49xpA82YxfvrAEABoQUaNGqVBgwbxsyMAGoSVJAAAABOEJAAAABNcbgMAADDBShIAAIAJQhIAAIAJQhIAAIAJQhIAAIAJQhIAAIAJQhIAAIAJQhIAAIAJQhIAAICJ/we+/WtIzzW52AAAAABJRU5ErkJggg==\n"},"metadata":{}}]},{"cell_type":"code","source":["# extract measures\n","NMI = nmi_fn(Pwc)\n","Q = modularity_fn(Pcc)\n","Ncut = ncut_fn(Pcc)\n","rd = infomap_rank_fn(in_data.Pdd) # we need the PageRank vector first\n","Infomap = infomap_fn(C,in_data.Pdd,rd)\n","if (pc.shape[1]==1):\n"," com = 0\n","else:\n"," com = _infomap_fn(pc)/np.log(pc.shape[1])"],"metadata":{"id":"Wo4Ytt-DkOFT","executionInfo":{"status":"ok","timestamp":1765745033493,"user_tz":-60,"elapsed":3046,"user":{"displayName":"Tomaso Erseghe","userId":"15955126948488574654"}}},"execution_count":41,"outputs":[]},{"cell_type":"code","source":["# collect them in dataframe\n","pd.DataFrame(data = {'topics': C.shape[1], 'com': com,\n"," 'NMI': NMI, 'Q': Q, 'Ncut': Ncut, 'Infomap': Infomap}, index=[0])"],"metadata":{"id":"PY7lCxvqrJW1","colab":{"base_uri":"https://localhost:8080/","height":81},"executionInfo":{"status":"ok","timestamp":1765745034534,"user_tz":-60,"elapsed":16,"user":{"displayName":"Tomaso Erseghe","userId":"15955126948488574654"}},"outputId":"ee39d3b3-0263-4973-89ee-6a683e7c29fc"},"execution_count":42,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" topics com NMI Q Ncut Infomap\n","0 39 0.664808 0.29315 0.096722 0.857763 0.085053"],"text/html":["\n","