{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "e42a4e45",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.ensemble import BaggingClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "43155674",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                      Formatted Date        Summary Precip Type  \\\n",
      "0      2006-04-01 00:00:00.000 +0200  Partly Cloudy        rain   \n",
      "1      2006-04-01 01:00:00.000 +0200  Partly Cloudy        rain   \n",
      "2      2006-04-01 02:00:00.000 +0200  Mostly Cloudy        rain   \n",
      "3      2006-04-01 03:00:00.000 +0200  Partly Cloudy        rain   \n",
      "4      2006-04-01 04:00:00.000 +0200  Mostly Cloudy        rain   \n",
      "...                              ...            ...         ...   \n",
      "96448  2016-09-09 19:00:00.000 +0200  Partly Cloudy        rain   \n",
      "96449  2016-09-09 20:00:00.000 +0200  Partly Cloudy        rain   \n",
      "96450  2016-09-09 21:00:00.000 +0200  Partly Cloudy        rain   \n",
      "96451  2016-09-09 22:00:00.000 +0200  Partly Cloudy        rain   \n",
      "96452  2016-09-09 23:00:00.000 +0200  Partly Cloudy        rain   \n",
      "\n",
      "       Temperature (C)  Apparent Temperature (C)  Humidity  Wind Speed (km/h)  \\\n",
      "0             9.472222                  7.388889      0.89            14.1197   \n",
      "1             9.355556                  7.227778      0.86            14.2646   \n",
      "2             9.377778                  9.377778      0.89             3.9284   \n",
      "3             8.288889                  5.944444      0.83            14.1036   \n",
      "4             8.755556                  6.977778      0.83            11.0446   \n",
      "...                ...                       ...       ...                ...   \n",
      "96448        26.016667                 26.016667      0.43            10.9963   \n",
      "96449        24.583333                 24.583333      0.48            10.0947   \n",
      "96450        22.038889                 22.038889      0.56             8.9838   \n",
      "96451        21.522222                 21.522222      0.60            10.5294   \n",
      "96452        20.438889                 20.438889      0.61             5.8765   \n",
      "\n",
      "       Wind Bearing (degrees)  Visibility (km)  Loud Cover  \\\n",
      "0                       251.0          15.8263         0.0   \n",
      "1                       259.0          15.8263         0.0   \n",
      "2                       204.0          14.9569         0.0   \n",
      "3                       269.0          15.8263         0.0   \n",
      "4                       259.0          15.8263         0.0   \n",
      "...                       ...              ...         ...   \n",
      "96448                    31.0          16.1000         0.0   \n",
      "96449                    20.0          15.5526         0.0   \n",
      "96450                    30.0          16.1000         0.0   \n",
      "96451                    20.0          16.1000         0.0   \n",
      "96452                    39.0          15.5204         0.0   \n",
      "\n",
      "       Pressure (millibars)                           Daily Summary  \n",
      "0                   1015.13       Partly cloudy throughout the day.  \n",
      "1                   1015.63       Partly cloudy throughout the day.  \n",
      "2                   1015.94       Partly cloudy throughout the day.  \n",
      "3                   1016.41       Partly cloudy throughout the day.  \n",
      "4                   1016.51       Partly cloudy throughout the day.  \n",
      "...                     ...                                     ...  \n",
      "96448               1014.36  Partly cloudy starting in the morning.  \n",
      "96449               1015.16  Partly cloudy starting in the morning.  \n",
      "96450               1015.66  Partly cloudy starting in the morning.  \n",
      "96451               1015.95  Partly cloudy starting in the morning.  \n",
      "96452               1016.16  Partly cloudy starting in the morning.  \n",
      "\n",
      "[96453 rows x 12 columns]\n"
     ]
    }
   ],
   "source": [
    "# Read the dataset\n",
    "df = pd.read_csv(\"weather_data.csv\")\n",
    "print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "6710c472",
   "metadata": {},
   "outputs": [],
   "source": [
    "# set some training and result data\n",
    "x = df.drop(\"Humidity\", axis=1)\n",
    "y = df[\"Humidity\"]\n",
    "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "f2bb7d76",
   "metadata": {},
   "outputs": [],
   "source": [
    "# preprocessing\n",
    "\n",
    "scaler = StandardScaler()\n",
    "X_train = scaler.fit_transform(x_train)\n",
    "X_test = scaler.fit_transform(x_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "a9698b59",
   "metadata": {},
   "outputs": [],
   "source": [
    "bagging_model = BaggingClassifier(base_estimator=3, n_estimators=500, max_samples=0.7, random_state=0)\n",
    "bagging_model.fit(x_train, y_train)\n",
    "\n",
    "y_predict_bagging = bagging_model.predict(x_test)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}