Closed thomasjpfan closed 4 years ago
With the 0.22.1
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_digits
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
import numpy as np
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(
digits.data, digits.target, stratify=digits.target, random_state=0)
scaled_svc = make_pipeline(StandardScaler(), SVC())
print(np.mean(cross_val_score(SVC(), X_train, y_train, cv=10)))
print(np.mean(cross_val_score(scaled_svc, X_train, y_train, cv=10)))
# 0.9873742399115534
# 0.9769872857932558
# X_train.std() is also good for global scaling - if the features were on the same scale.
# this dataset is very atypical.
print(np.mean(cross_val_score(SVC(gamma=(1. / (X_train.shape[1] * X_train.var()))),
X_train, y_train, cv=10)))
# 0.9873742399115534
thanks yeah we fixed it but it wasn't released yet when I last lectured this :)
It is in the docs for svc
and in the code:
https://github.com/scikit-learn/scikit-learn/blob/c79a5b4194de6fe4b7b64396999352e38170cf57/sklearn/svm/_base.py#L191