In [2]:
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import train_test_split

We start with default model, without using a scaler and without setting any value for 'alpha'

In [4]:
dataset = datasets.load_iris()
print(dataset)
X = dataset["data"]
y = dataset["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y)
clf = RidgeCV()

{'data': array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
     

Let's check for the Ridge params.

In [25]:
clf.get_params()

{'alpha_per_target': False,
 'alphas': array([ 0.1,  1. , 10. ]),
 'cv': None,
 'fit_intercept': True,
 'gcv_mode': None,
 'normalize': 'deprecated',
 'scoring': None,
 'store_cv_values': False}

Try to train the default model and check its score.

In [32]:
clf.fit(X_train, y_train)
print(f"Default score: {clf.score(X_test, y_test)}")

Default score: 0.9478191158579921


Now try to tune the 'alpha' parameter.

In [33]:
values = [0.1, 0.5, 1, 5, 10]
for val in values:
  alphas = (val)
  clf2 = RidgeCV(alphas=alphas)
  clf2.fit(X_train, y_train)
  print(f"Score with alpha = {val}: {clf2.score(X_test, y_test)}")

Score with alpha = 0.1: 0.9489746257095962
Score with alpha = 0.5: 0.9484706431053234
Score with alpha = 1: 0.9478191158579921
Score with alpha = 5: 0.9427253648684416
Score with alpha = 10: 0.9370711176741738


We can see that the best score is obtained by using '0.1' value for alpha parameter.

Now, we try to use a scaler for optimize more the performance.

In [70]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [71]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [75]:
values = [0.001, 0.5, 1, 5, 10]
for val in values:
  alphas = (val)
  clf3 = RidgeCV(alphas=alphas)
  clf3.fit(X_train, y_train)
  print(f"Score with scaler and alpha = {val}: {clf3.score(X_test, y_test)}")


Score with scaler and alpha = 0.001: 0.9252381668650799
Score with scaler and alpha = 0.5: 0.9252143587826578
Score with scaler and alpha = 1: 0.9251102118122352
Score with scaler and alpha = 5: 0.9228145629306598
Score with scaler and alpha = 10: 0.9186746644414144
