{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "a40c3060", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.datasets import fetch_openml\n", "from sklearn.impute import SimpleImputer\n", "from sklearn.preprocessing import OneHotEncoder\n", "from sklearn.preprocessing import MinMaxScaler\n", "from sklearn.preprocessing import StandardScaler" ] }, { "cell_type": "code", "execution_count": 2, "id": "6ca0451b", "metadata": {}, "outputs": [], "source": [ "df_titanic = fetch_openml(\"titanic\", version = 1, as_frame = True)[\"data\"]" ] }, { "cell_type": "code", "execution_count": 3, "id": "108b699a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(1309, 13)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.shape" ] }, { "cell_type": "code", "execution_count": 4, "id": "0f68c698", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['pclass', 'name', 'sex', 'age', 'sibsp', 'parch', 'ticket', 'fare',\n", " 'cabin', 'embarked', 'boat', 'body', 'home.dest'],\n", " dtype='object')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.columns" ] }, { "cell_type": "code", "execution_count": 5, "id": "678f1623", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclassnamesexagesibspparchticketfarecabinembarkedboatbodyhome.dest
01.0Allen, Miss. Elisabeth Waltonfemale29.00000.00.024160211.3375B5S2NaNSt Louis, MO
11.0Allison, Master. Hudson Trevormale0.91671.02.0113781151.5500C22 C26S11NaNMontreal, PQ / Chesterville, ON
21.0Allison, Miss. Helen Lorainefemale2.00001.02.0113781151.5500C22 C26SNoneNaNMontreal, PQ / Chesterville, ON
31.0Allison, Mr. Hudson Joshua Creightonmale30.00001.02.0113781151.5500C22 C26SNone135.0Montreal, PQ / Chesterville, ON
41.0Allison, Mrs. Hudson J C (Bessie Waldo Daniels)female25.00001.02.0113781151.5500C22 C26SNoneNaNMontreal, PQ / Chesterville, ON
\n", "
" ], "text/plain": [ " pclass name sex age \\\n", "0 1.0 Allen, Miss. Elisabeth Walton female 29.0000 \n", "1 1.0 Allison, Master. Hudson Trevor male 0.9167 \n", "2 1.0 Allison, Miss. Helen Loraine female 2.0000 \n", "3 1.0 Allison, Mr. Hudson Joshua Creighton male 30.0000 \n", "4 1.0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25.0000 \n", "\n", " sibsp parch ticket fare cabin embarked boat body \\\n", "0 0.0 0.0 24160 211.3375 B5 S 2 NaN \n", "1 1.0 2.0 113781 151.5500 C22 C26 S 11 NaN \n", "2 1.0 2.0 113781 151.5500 C22 C26 S None NaN \n", "3 1.0 2.0 113781 151.5500 C22 C26 S None 135.0 \n", "4 1.0 2.0 113781 151.5500 C22 C26 S None NaN \n", "\n", " home.dest \n", "0 St Louis, MO \n", "1 Montreal, PQ / Chesterville, ON \n", "2 Montreal, PQ / Chesterville, ON \n", "3 Montreal, PQ / Chesterville, ON \n", "4 Montreal, PQ / Chesterville, ON " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.head()" ] }, { "cell_type": "code", "execution_count": 6, "id": "2f31b7e0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclassnamesexagesibspparchticketfarecabinembarkedboatbodyhome.dest
13043.0Zabour, Miss. Hilenifemale14.51.00.0266514.4542NoneCNone328.0None
13053.0Zabour, Miss. ThaminefemaleNaN1.00.0266514.4542NoneCNoneNaNNone
13063.0Zakarian, Mr. Mapriededermale26.50.00.026567.2250NoneCNone304.0None
13073.0Zakarian, Mr. Ortinmale27.00.00.026707.2250NoneCNoneNaNNone
13083.0Zimmerman, Mr. Leomale29.00.00.03150827.8750NoneSNoneNaNNone
\n", "
" ], "text/plain": [ " pclass name sex age sibsp parch ticket \\\n", "1304 3.0 Zabour, Miss. Hileni female 14.5 1.0 0.0 2665 \n", "1305 3.0 Zabour, Miss. Thamine female NaN 1.0 0.0 2665 \n", "1306 3.0 Zakarian, Mr. Mapriededer male 26.5 0.0 0.0 2656 \n", "1307 3.0 Zakarian, Mr. Ortin male 27.0 0.0 0.0 2670 \n", "1308 3.0 Zimmerman, Mr. Leo male 29.0 0.0 0.0 315082 \n", "\n", " fare cabin embarked boat body home.dest \n", "1304 14.4542 None C None 328.0 None \n", "1305 14.4542 None C None NaN None \n", "1306 7.2250 None C None 304.0 None \n", "1307 7.2250 None C None NaN None \n", "1308 7.8750 None S None NaN None " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.tail()" ] }, { "cell_type": "code", "execution_count": 7, "id": "c523519b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 1309 entries, 0 to 1308\n", "Data columns (total 13 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 pclass 1309 non-null float64 \n", " 1 name 1309 non-null object \n", " 2 sex 1309 non-null category\n", " 3 age 1046 non-null float64 \n", " 4 sibsp 1309 non-null float64 \n", " 5 parch 1309 non-null float64 \n", " 6 ticket 1309 non-null object \n", " 7 fare 1308 non-null float64 \n", " 8 cabin 295 non-null object \n", " 9 embarked 1307 non-null category\n", " 10 boat 486 non-null object \n", " 11 body 121 non-null float64 \n", " 12 home.dest 745 non-null object \n", "dtypes: category(2), float64(6), object(5)\n", "memory usage: 115.4+ KB\n" ] } ], "source": [ "df_titanic.info()" ] }, { "cell_type": "code", "execution_count": 8, "id": "ad1c6271", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclassagesibspparchfarebody
count1309.0000001046.0000001309.0000001309.0000001308.000000121.000000
mean2.29488229.8811350.4988540.38502733.295479160.809917
std0.83783614.4135001.0416580.86556051.75866897.696922
min1.0000000.1667000.0000000.0000000.0000001.000000
25%2.00000021.0000000.0000000.0000007.89580072.000000
50%3.00000028.0000000.0000000.00000014.454200155.000000
75%3.00000039.0000001.0000000.00000031.275000256.000000
max3.00000080.0000008.0000009.000000512.329200328.000000
\n", "
" ], "text/plain": [ " pclass age sibsp parch fare \\\n", "count 1309.000000 1046.000000 1309.000000 1309.000000 1308.000000 \n", "mean 2.294882 29.881135 0.498854 0.385027 33.295479 \n", "std 0.837836 14.413500 1.041658 0.865560 51.758668 \n", "min 1.000000 0.166700 0.000000 0.000000 0.000000 \n", "25% 2.000000 21.000000 0.000000 0.000000 7.895800 \n", "50% 3.000000 28.000000 0.000000 0.000000 14.454200 \n", "75% 3.000000 39.000000 1.000000 0.000000 31.275000 \n", "max 3.000000 80.000000 8.000000 9.000000 512.329200 \n", "\n", " body \n", "count 121.000000 \n", "mean 160.809917 \n", "std 97.696922 \n", "min 1.000000 \n", "25% 72.000000 \n", "50% 155.000000 \n", "75% 256.000000 \n", "max 328.000000 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.describe()" ] }, { "cell_type": "code", "execution_count": 11, "id": "61b8168d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize = (8, 6))\n", "sns.heatmap(df_titanic.corr(), cmap = \"BuPu\")" ] }, { "cell_type": "code", "execution_count": 12, "id": "7a74e552", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "male 843\n", "female 466\n", "Name: sex, dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic[\"sex\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 14, "id": "e4b9b64c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.figure(figsize = (8, 6))\n", "plt.pie(df_titanic[\"sex\"].value_counts(), explode = [0, 0.05], labels = [\"Female\", \"Male\"], colors = [\"#FF82AB\", \"#8E388E\"])\n", "plt.legend()" ] }, { "cell_type": "code", "execution_count": 15, "id": "f0e4d736", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "35.6" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.round((sum(df_titanic[\"sex\"] == \"female\") / (df_titanic.shape[0]) * 100), 2)" ] }, { "cell_type": "code", "execution_count": 16, "id": "9187c02b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "64.4" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.round((sum(df_titanic[\"sex\"] == \"male\") / (df_titanic.shape[0]) * 100), 2)" ] }, { "cell_type": "code", "execution_count": 17, "id": "ccbfa540", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "30.0" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.round(df_titanic[\"age\"].mean(), 0)" ] }, { "cell_type": "code", "execution_count": 18, "id": "b68deced", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pclass 0\n", "name 0\n", "sex 0\n", "age 263\n", "sibsp 0\n", "parch 0\n", "ticket 0\n", "fare 1\n", "cabin 1014\n", "embarked 2\n", "boat 823\n", "body 1188\n", "home.dest 564\n", "dtype: int64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 31, "id": "10093c65", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.set()\n", "missing_values = pd.DataFrame(df_titanic.isnull().sum()/len(df_titanic) * 100)\n", "missing_values.plot(kind = \"bar\",\n", " title = \"Percentage of Missing Values\",\n", " ylabel = \"Percentage\",\n", " color = \"#00CD66\")" ] }, { "cell_type": "code", "execution_count": 32, "id": "7ee19288", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Datatypes of Missing Values:\n", "age : nan\n", "fare : nan\n", "cabin : None\n", "embarked : nan\n", "boat : None\n", "body : nan\n", "home.dest : None\n" ] } ], "source": [ "#Imputing Missing Values:\n", "print(\"Datatypes of Missing Values:\")\n", "for col in df_titanic.columns[df_titanic.isnull().any()]:\n", " print(col, \":\", df_titanic[col][df_titanic[col].isnull()].values[0])" ] }, { "cell_type": "code", "execution_count": 35, "id": "aee16690", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'age': {'missing_values': nan, 'strategy': 'mean'},\n", " 'fare': {'missing_values': nan, 'strategy': 'mean'},\n", " 'cabin': {'missing_values': None, 'strategy': 'most_frequent'},\n", " 'embarked': {'missing_values': nan, 'strategy': 'most_frequent'},\n", " 'boat': {'missing_values': None, 'strategy': 'most_frequent'},\n", " 'body': {'missing_values': nan, 'strategy': 'mean'},\n", " 'home.dest': {'missing_values': None, 'strategy': 'most_frequent'}}" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_parameters(df_titanic):\n", " parameters = {}\n", " for col in df_titanic.columns[df_titanic.isnull().any()]:\n", " if df_titanic[col].dtype == \"float64\" or df_titanic[col].dtype == \"int64\" or df_titanic[col].dtype == \"int32\":\n", " strategy = \"mean\"\n", " else:\n", " strategy = \"most_frequent\"\n", " missing_values = df_titanic[col][df_titanic[col].isnull()].values[0]\n", " parameters[col] = {\"missing_values\" : missing_values, \n", " \"strategy\" : strategy}\n", " return parameters\n", "get_parameters(df_titanic)" ] }, { "cell_type": "code", "execution_count": 43, "id": "70ce99ac", "metadata": {}, "outputs": [], "source": [ "parameters = get_parameters(df_titanic)\n", "for col, param in parameters.items():\n", " missing_values = param[\"missing_values\"]\n", " strategy = param[\"strategy\"]\n", " IMP = SimpleImputer(missing_values = missing_values, strategy = strategy)\n", " df_titanic[col] = IMP.fit_transform(df_titanic[[col]])" ] }, { "cell_type": "code", "execution_count": 44, "id": "2312e508", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pclass 0\n", "name 0\n", "sex 0\n", "age 0\n", "sibsp 0\n", "parch 0\n", "ticket 0\n", "fare 0\n", "cabin 0\n", "embarked 0\n", "boat 0\n", "body 0\n", "home.dest 0\n", "dtype: int64" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_titanic.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 45, "id": "fa570c0a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of Null Values (after) : 0\n" ] } ], "source": [ "print(f'Number of Null Values (after) : {df_titanic.age.isnull().sum()}')" ] }, { "cell_type": "code", "execution_count": 47, "id": "0e24e195", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['pclass', 'age', 'sibsp', 'parch', 'fare', 'body'], dtype='object')" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Normalizing the Data - MinMaxScaler:\n", "n_cols = df_titanic.select_dtypes(include = [\"int64\", \"float64\", \"float32\"]).columns\n", "n_cols" ] }, { "cell_type": "code", "execution_count": 51, "id": "b5d7be2b", "metadata": {}, "outputs": [], "source": [ "for col in n_cols:\n", " fill_value = df_titanic[col].mean()\n", " df_titanic[col].fillna(fill_value, inplace = True)" ] }, { "cell_type": "code", "execution_count": 52, "id": "6956e3bd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclassagesibspparchfarebody
00.00.3611690.0000.0000000.4125030.488715
10.00.0093950.1250.2222220.2958060.488715
20.00.0229640.1250.2222220.2958060.488715
30.00.3736950.1250.2222220.2958060.409786
40.00.3110640.1250.2222220.2958060.488715
.....................
13041.00.1795400.1250.0000000.0282131.000000
13051.00.3722060.1250.0000000.0282130.488715
13061.00.3298540.0000.0000000.0141020.926606
13071.00.3361170.0000.0000000.0141020.488715
13081.00.3611690.0000.0000000.0153710.488715
\n", "

1309 rows × 6 columns

\n", "
" ], "text/plain": [ " pclass age sibsp parch fare body\n", "0 0.0 0.361169 0.000 0.000000 0.412503 0.488715\n", "1 0.0 0.009395 0.125 0.222222 0.295806 0.488715\n", "2 0.0 0.022964 0.125 0.222222 0.295806 0.488715\n", "3 0.0 0.373695 0.125 0.222222 0.295806 0.409786\n", "4 0.0 0.311064 0.125 0.222222 0.295806 0.488715\n", "... ... ... ... ... ... ...\n", "1304 1.0 0.179540 0.125 0.000000 0.028213 1.000000\n", "1305 1.0 0.372206 0.125 0.000000 0.028213 0.488715\n", "1306 1.0 0.329854 0.000 0.000000 0.014102 0.926606\n", "1307 1.0 0.336117 0.000 0.000000 0.014102 0.488715\n", "1308 1.0 0.361169 0.000 0.000000 0.015371 0.488715\n", "\n", "[1309 rows x 6 columns]" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "minmax = MinMaxScaler()\n", "df_titanic[n_cols] = minmax.fit_transform(df_titanic[n_cols])\n", "df_titanic[n_cols]" ] }, { "cell_type": "code", "execution_count": 55, "id": "28746928", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclassagesibspparchfarebody
count1.309000e+031.309000e+031.309000e+031.309000e+031.309000e+031.309000e+03
mean5.000329e-151.832313e-16-1.028801e-153.833620e-178.410215e-16-2.912731e-17
std1.000382e+001.000382e+001.000382e+001.000382e+001.000382e+001.000382e+00
min-1.546098e+00-2.307330e+00-4.790868e-01-4.449995e-01-6.437751e-01-5.402590e+00
25%-3.520907e-01-6.119712e-01-4.790868e-01-4.449995e-01-4.911082e-013.201135e-17
50%8.419164e-011.302752e-16-4.790868e-01-4.449995e-01-3.643001e-013.201135e-17
75%8.419164e-013.974806e-014.812878e-01-4.449995e-01-3.906640e-023.201135e-17
max8.419164e-013.891737e+007.203909e+009.956864e+009.262219e+005.652087e+00
\n", "
" ], "text/plain": [ " pclass age sibsp parch fare \\\n", "count 1.309000e+03 1.309000e+03 1.309000e+03 1.309000e+03 1.309000e+03 \n", "mean 5.000329e-15 1.832313e-16 -1.028801e-15 3.833620e-17 8.410215e-16 \n", "std 1.000382e+00 1.000382e+00 1.000382e+00 1.000382e+00 1.000382e+00 \n", "min -1.546098e+00 -2.307330e+00 -4.790868e-01 -4.449995e-01 -6.437751e-01 \n", "25% -3.520907e-01 -6.119712e-01 -4.790868e-01 -4.449995e-01 -4.911082e-01 \n", "50% 8.419164e-01 1.302752e-16 -4.790868e-01 -4.449995e-01 -3.643001e-01 \n", "75% 8.419164e-01 3.974806e-01 4.812878e-01 -4.449995e-01 -3.906640e-02 \n", "max 8.419164e-01 3.891737e+00 7.203909e+00 9.956864e+00 9.262219e+00 \n", "\n", " body \n", "count 1.309000e+03 \n", "mean -2.912731e-17 \n", "std 1.000382e+00 \n", "min -5.402590e+00 \n", "25% 3.201135e-17 \n", "50% 3.201135e-17 \n", "75% 3.201135e-17 \n", "max 5.652087e+00 " ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Normalizing the Data - StandardScaler:\n", "stsc = StandardScaler()\n", "df_titanic[n_cols] = stsc.fit_transform(df_titanic[n_cols])\n", "df_titanic[n_cols].describe()" ] }, { "cell_type": "code", "execution_count": 56, "id": "201e4ce9", "metadata": {}, "outputs": [], "source": [ "#so:\n", "#the mean(5.000329e-15) is equal to zero.\n", "#the standard deviation(std) of scaled columns are also equal to zero." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }