{ "cells": [ { "cell_type": "code", "execution_count": 20, "id": "e42a4e45", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.ensemble import BaggingClassifier" ] }, { "cell_type": "code", "execution_count": 21, "id": "43155674", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Formatted Date Summary Precip Type \\\n", "0 2006-04-01 00:00:00.000 +0200 Partly Cloudy rain \n", "1 2006-04-01 01:00:00.000 +0200 Partly Cloudy rain \n", "2 2006-04-01 02:00:00.000 +0200 Mostly Cloudy rain \n", "3 2006-04-01 03:00:00.000 +0200 Partly Cloudy rain \n", "4 2006-04-01 04:00:00.000 +0200 Mostly Cloudy rain \n", "... ... ... ... \n", "96448 2016-09-09 19:00:00.000 +0200 Partly Cloudy rain \n", "96449 2016-09-09 20:00:00.000 +0200 Partly Cloudy rain \n", "96450 2016-09-09 21:00:00.000 +0200 Partly Cloudy rain \n", "96451 2016-09-09 22:00:00.000 +0200 Partly Cloudy rain \n", "96452 2016-09-09 23:00:00.000 +0200 Partly Cloudy rain \n", "\n", " Temperature (C) Apparent Temperature (C) Humidity Wind Speed (km/h) \\\n", "0 9.472222 7.388889 0.89 14.1197 \n", "1 9.355556 7.227778 0.86 14.2646 \n", "2 9.377778 9.377778 0.89 3.9284 \n", "3 8.288889 5.944444 0.83 14.1036 \n", "4 8.755556 6.977778 0.83 11.0446 \n", "... ... ... ... ... \n", "96448 26.016667 26.016667 0.43 10.9963 \n", "96449 24.583333 24.583333 0.48 10.0947 \n", "96450 22.038889 22.038889 0.56 8.9838 \n", "96451 21.522222 21.522222 0.60 10.5294 \n", "96452 20.438889 20.438889 0.61 5.8765 \n", "\n", " Wind Bearing (degrees) Visibility (km) Loud Cover \\\n", "0 251.0 15.8263 0.0 \n", "1 259.0 15.8263 0.0 \n", "2 204.0 14.9569 0.0 \n", "3 269.0 15.8263 0.0 \n", "4 259.0 15.8263 0.0 \n", "... ... ... ... \n", "96448 31.0 16.1000 0.0 \n", "96449 20.0 15.5526 0.0 \n", "96450 30.0 16.1000 0.0 \n", "96451 20.0 16.1000 0.0 \n", "96452 39.0 15.5204 0.0 \n", "\n", " Pressure (millibars) Daily Summary \n", "0 1015.13 Partly cloudy throughout the day. \n", "1 1015.63 Partly cloudy throughout the day. \n", "2 1015.94 Partly cloudy throughout the day. \n", "3 1016.41 Partly cloudy throughout the day. \n", "4 1016.51 Partly cloudy throughout the day. \n", "... ... ... \n", "96448 1014.36 Partly cloudy starting in the morning. \n", "96449 1015.16 Partly cloudy starting in the morning. \n", "96450 1015.66 Partly cloudy starting in the morning. \n", "96451 1015.95 Partly cloudy starting in the morning. \n", "96452 1016.16 Partly cloudy starting in the morning. \n", "\n", "[96453 rows x 12 columns]\n" ] } ], "source": [ "# Read the dataset\n", "df = pd.read_csv(\"weather_data.csv\")\n", "print(df)" ] }, { "cell_type": "code", "execution_count": 22, "id": "6710c472", "metadata": {}, "outputs": [], "source": [ "# set some training and result data\n", "x = df.drop(\"Humidity\", axis=1)\n", "y = df[\"Humidity\"]\n", "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)" ] }, { "cell_type": "code", "execution_count": 19, "id": "f2bb7d76", "metadata": {}, "outputs": [], "source": [ "# preprocessing\n", "\n", "scaler = StandardScaler()\n", "X_train = scaler.fit_transform(x_train)\n", "X_test = scaler.fit_transform(x_test)" ] }, { "cell_type": "code", "execution_count": 25, "id": "a9698b59", "metadata": {}, "outputs": [], "source": [ "bagging_model = BaggingClassifier(base_estimator=3, n_estimators=500, max_samples=0.7, random_state=0)\n", "bagging_model.fit(x_train, y_train)\n", "\n", "y_predict_bagging = bagging_model.predict(x_test)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }