Nested cross-validation for unbiased predictions. Can be used with Scikit-Learn, XGBoost, Keras and LightGBM, or any other estimator that implements the scikit-learn interface.
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans,MiniBatchKMeans
from sklearn.metrics import adjusted_rand_score
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn.decomposition import TruncatedSVD
in ()
39 nested_CV_search = NestedCV(model=models_to_run, params_grid=param_grid[i] , outer_kfolds=4, inner_kfolds=4,
40 cv_options={'sqrt_of_score':True, 'randomized_search_iter':30})
---> 41 nested_CV_search.fit(X=X,y=Train_Y)
42 grid_nested_cv.score_vs_variance_plot()
43 print('\nCumulated best parameter grid was:\n{0}'.format(nested_CV_search.best_params))
8 frames
/usr/local/lib/python3.7/dist-packages/nested_cv/nested_cv.py in _parallel_fitting(X_train_inner, X_test_inner, y_train_inner, y_test_inner, param_dict)
233 '\n\tFitting these parameters:\n\t{0}'.format(param_dict))
234 # Set hyperparameters, train model on inner split, predict results.
--> 235 self.model.set_params(**param_dict)
236
237 # Fit model with current hyperparameters and score it
AttributeError: 'list' object has no attribute 'set_params'
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.cluster import KMeans,MiniBatchKMeans from sklearn.metrics import adjusted_rand_score import matplotlib.pyplot as plt import matplotlib.cm as cm from sklearn.decomposition import TruncatedSVD
vectorizer = TfidfVectorizer(
min_df = 0.3,
) X = vectorizer.fit_transform(data.keywords) X=X.toarray() from sklearn.preprocessing import LabelEncoder Encoder = LabelEncoder() Train_Y = Encoder.fit_transform(data['keys']) models_to_run = [MultinomialNB(), LinearSVC(),LogisticRegression(),RandomForestClassifier(),MLPClassifier()]
param_grid = [{'alpha': (1e-2, 1e-3) }, {'C': (np.logspace(-5, 1, 5))},
nested_CV_search = NestedCV(model=models_to_run, params_grid=param_grid[i] , outer_kfolds=4, inner_kfolds=4, cv_options={'sqrt_of_score':True, 'randomized_search_iter':30}) nested_CV_search.fit(X=X,y=Train_Y) grid_nested_cv.score_vs_variance_plot() print('\nCumulated best parameter grid was:\n{0}'.format(nested_CV_search.best_params))
AttributeError Traceback (most recent call last)