{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import statistics \n", "from collections import Counter\n", "from sklearn.impute import SimpleImputer\n", "import sklearn.preprocessing as sk\n", "from scipy.stats import zscore\n", "import jenkspy\n", "import seaborn as sns; sns.set()\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.tree import DecisionTreeClassifier,export_graphviz,plot_tree\n", "from sklearn import tree\n", "from sklearn.naive_bayes import MultinomialNB\n", "import statsmodels.tools.tools as stattools\n", "from sklearn.metrics import accuracy_score,confusion_matrix,classification_report\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.cluster import KMeans\n", "from mpl_toolkits.mplot3d.axes3d import Axes3D\n", "from sklearn.metrics import silhouette_samples, silhouette_score\n", "from scipy.cluster.hierarchy import dendrogram, linkage,fcluster" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# clustering on Banknote Dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 3.6216 | \n", "8.6661 | \n", "-2.8073 | \n", "-0.44699 | \n", "0 | \n", "
---|---|---|---|---|---|
0 | \n", "4.54590 | \n", "8.16740 | \n", "-2.4586 | \n", "-1.46210 | \n", "0 | \n", "
1 | \n", "3.86600 | \n", "-2.63830 | \n", "1.9242 | \n", "0.10645 | \n", "0 | \n", "
2 | \n", "3.45660 | \n", "9.52280 | \n", "-4.0112 | \n", "-3.59440 | \n", "0 | \n", "
3 | \n", "0.32924 | \n", "-4.45520 | \n", "4.5718 | \n", "-0.98880 | \n", "0 | \n", "
4 | \n", "4.36840 | \n", "9.67180 | \n", "-3.9606 | \n", "-3.16250 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
1366 | \n", "0.40614 | \n", "1.34920 | \n", "-1.4501 | \n", "-0.55949 | \n", "1 | \n", "
1367 | \n", "-1.38870 | \n", "-4.87730 | \n", "6.4774 | \n", "0.34179 | \n", "1 | \n", "
1368 | \n", "-3.75030 | \n", "-13.45860 | \n", "17.5932 | \n", "-2.77710 | \n", "1 | \n", "
1369 | \n", "-3.56370 | \n", "-8.38270 | \n", "12.3930 | \n", "-1.28230 | \n", "1 | \n", "
1370 | \n", "-2.54190 | \n", "-0.65804 | \n", "2.6842 | \n", "1.19520 | \n", "1 | \n", "
1371 rows × 5 columns
\n", "\n", " | a | \n", "b | \n", "c | \n", "d | \n", "e | \n", "
---|---|---|---|---|---|
0 | \n", "4.54590 | \n", "8.16740 | \n", "-2.4586 | \n", "-1.46210 | \n", "0 | \n", "
1 | \n", "3.86600 | \n", "-2.63830 | \n", "1.9242 | \n", "0.10645 | \n", "0 | \n", "
2 | \n", "3.45660 | \n", "9.52280 | \n", "-4.0112 | \n", "-3.59440 | \n", "0 | \n", "
3 | \n", "0.32924 | \n", "-4.45520 | \n", "4.5718 | \n", "-0.98880 | \n", "0 | \n", "
4 | \n", "4.36840 | \n", "9.67180 | \n", "-3.9606 | \n", "-3.16250 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
1366 | \n", "0.40614 | \n", "1.34920 | \n", "-1.4501 | \n", "-0.55949 | \n", "1 | \n", "
1367 | \n", "-1.38870 | \n", "-4.87730 | \n", "6.4774 | \n", "0.34179 | \n", "1 | \n", "
1368 | \n", "-3.75030 | \n", "-13.45860 | \n", "17.5932 | \n", "-2.77710 | \n", "1 | \n", "
1369 | \n", "-3.56370 | \n", "-8.38270 | \n", "12.3930 | \n", "-1.28230 | \n", "1 | \n", "
1370 | \n", "-2.54190 | \n", "-0.65804 | \n", "2.6842 | \n", "1.19520 | \n", "1 | \n", "
1371 rows × 5 columns
\n", "\n", " | a | \n", "b | \n", "c | \n", "d | \n", "e | \n", "
---|---|---|---|---|---|
447 | \n", "3.93640 | \n", "10.588500 | \n", "-3.72500 | \n", "-4.313300 | \n", "0 | \n", "
681 | \n", "3.73210 | \n", "-3.884000 | \n", "3.35770 | \n", "-0.006049 | \n", "0 | \n", "
1297 | \n", "-0.96070 | \n", "2.696300 | \n", "-3.12260 | \n", "-1.312100 | \n", "1 | \n", "
96 | \n", "2.16160 | \n", "-6.880400 | \n", "8.15170 | \n", "-0.081048 | \n", "0 | \n", "
980 | \n", "-6.05980 | \n", "9.295200 | \n", "-0.43642 | \n", "-6.369400 | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
211 | \n", "2.61400 | \n", "8.008100 | \n", "-3.72580 | \n", "-1.306900 | \n", "0 | \n", "
502 | \n", "4.92940 | \n", "0.277270 | \n", "0.20792 | \n", "0.336620 | \n", "0 | \n", "
537 | \n", "0.72252 | \n", "-0.053811 | \n", "5.67030 | \n", "-1.350900 | \n", "0 | \n", "
1220 | \n", "-1.83560 | \n", "-6.756200 | \n", "5.05850 | \n", "-0.550440 | \n", "1 | \n", "
175 | \n", "0.19081 | \n", "9.129700 | \n", "-3.72500 | \n", "-5.822400 | \n", "0 | \n", "
1096 rows × 5 columns
\n", "\n", " | a | \n", "b | \n", "c | \n", "d | \n", "
---|---|---|---|---|
447 | \n", "3.936400 | \n", "10.58850 | \n", "-3.72500 | \n", "-4.31330 | \n", "
1297 | \n", "-0.960700 | \n", "2.69630 | \n", "-3.12260 | \n", "-1.31210 | \n", "
980 | \n", "-6.059800 | \n", "9.29520 | \n", "-0.43642 | \n", "-6.36940 | \n", "
1249 | \n", "-2.436500 | \n", "3.60260 | \n", "-1.41660 | \n", "-2.89480 | \n", "
673 | \n", "1.375400 | \n", "8.87930 | \n", "-1.91360 | \n", "-0.53751 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
1209 | \n", "0.025013 | \n", "3.39980 | \n", "-4.43270 | \n", "-4.26550 | \n", "
919 | \n", "-5.480800 | \n", "8.18190 | \n", "0.27818 | \n", "-5.03230 | \n", "
211 | \n", "2.614000 | \n", "8.00810 | \n", "-3.72580 | \n", "-1.30690 | \n", "
502 | \n", "4.929400 | \n", "0.27727 | \n", "0.20792 | \n", "0.33662 | \n", "
175 | \n", "0.190810 | \n", "9.12970 | \n", "-3.72500 | \n", "-5.82240 | \n", "
721 rows × 4 columns
\n", "