{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "a5b9ff56-9501-464c-8be0-c9ca203d0f5e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Age average: 30\n" ] } ], "source": [ "#Bagging\n", "import numpy as np\n", "import pandas as pd\n", "from sklearn.ensemble import BaggingClassifier\n", "from sklearn.model_selection import train_test_split\n", "import sklearn.metrics as metrics\n", "\n", "# Import data of the titanic passengers\n", "data_titanic = pd.read_csv(\"titanic_data.csv\")\n", "\n", "# Preprocessing\n", "# Not all data is important for the training. The PassengerId, the name, the number of siblings, the parch, the ticket number\n", "# and the cabin number are not important and therefore they will be dropped\n", "data_titanic = data_titanic.drop(data_titanic.columns[[0, 3, 6, 7, 8, 10]], axis = 1)\n", "\n", "# Fill the gaps:\n", "# For the age: Compute the average of ages of all passengers on board and take it as value\n", "ages = data_titanic[\"Age\"]\n", "age_average = round(ages.mean(axis = 0, skipna = True)) \n", "print('Age average: ', age_average)\n", "\n", "def set_age(Age):\n", " age = Age\n", " if pd.isnull(age):\n", " return age_average\n", " else:\n", " return age\n", "\n", "data_titanic['Age'] = data_titanic['Age'].apply(set_age)\n", " \n", "# For the embarked: Take the most occuring value, available options: S, C and Q\n", "Embarked = data_titanic[\"Embarked\"]\n", "count_S = 0\n", "count_C = 0\n", "count_Q = 0\n", "\n", "# Count embarks\n", "for i in range(data_titanic.shape[0]):\n", " if (Embarked[i] == 'S'):\n", " count_S += 1\n", " if (Embarked[i] == 'C'):\n", " count_C += 1\n", " if (Embarked[i] == 'Q'):\n", " count_Q += 1\n", " \n", "if ((count_S >= count_C) and (count_S >= count_Q)):\n", " common_embarked = 'S'\n", "if ((count_C > count_S) and (count_C >= count_Q)):\n", " common_embarked = 'C'\n", "if ((count_Q >= count_S) and (count_Q >= count_C)):\n", " common_embarked = 'Q'\n", " \n", "def set_Embarked(Embarked):\n", " embarked = Embarked\n", " if pd.isnull(embarked):\n", " return common_embarked\n", " else:\n", " return embarked\n", "\n", "data_titanic['Embarked'] = data_titanic['Embarked'].apply(set_Embarked)\n", "\n", "# Gender and Embarked have to be replaced by values: Set male = 0, female = 1 and S = 0, C = 1, Q = 2\n", "replace_gender = {data_titanic.columns[2]: {\"male\": 0, \"female\": 1}}\n", "replace_embarked = {data_titanic.columns[5]: {\"S\": 0, \"C\": 1, \"Q\": 2}}\n", "data_titanic = data_titanic.replace(replace_gender)\n", "data_titanic = data_titanic.replace(replace_embarked)" ] }, { "cell_type": "code", "execution_count": 2, "id": "2a63a9f4-b658-4556-8cb2-6fa3eba5ce98", "metadata": {}, "outputs": [], "source": [ "# Split data into train and evaluation subsets, proportion 1:4\n", "input_data = data_titanic.drop(data_titanic.columns[[0]], axis = 1) # Input has to be without \"Survived\" statement\n", "target_data = data_titanic['Survived'] # KNN algorithm has to predict survied statement\n", "input_train, input_evaluate, target_train, target_evaluate = train_test_split(input_data , target_data, test_size = 0.25)" ] }, { "cell_type": "code", "execution_count": 3, "id": "c3575857-7cb9-4a23-a9b3-89ae47e896ba", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.8071748878923767\n" ] } ], "source": [ "# Perform Bagging\n", "classifier = BaggingClassifier(n_estimators = 20, random_state=0)\n", "classifier.fit(input_train,target_train)\n", "target_pred = classifier.predict(input_evaluate)\n", "print(\"Accuracy: \", metrics.accuracy_score(target_pred, target_evaluate))" ] }, { "cell_type": "code", "execution_count": null, "id": "767d178f-60cc-41d4-a698-9c47344fcada", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" } }, "nbformat": 4, "nbformat_minor": 5 }