Closed glebmikha closed 6 years ago
Could you please provide me with the steps necessary to reproduce this issue?
Could you please check with the latest version of auto-sklearn (0.2.0) whether this is still an issue?
Hi there,
I get almost the same mistake using auto-sklearn in version 0.2.0 except this time it involves gradient_boosting.py
:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-12-adfd28801c18> in <module>()
34
35
---> 36 cls.refit(Xk, yk)
37
38
/usr/local/lib/python3.4/dist-packages/autosklearn/estimators.py in refit(self, X, y)
48
49 """
---> 50 return self._automl.refit(X, y)
51
52 def fit_ensemble(self, y, task=None, metric=None, precision='32',
/usr/local/lib/python3.4/dist-packages/autosklearn/estimators.py in refit(self, X, y)
48
49 """
---> 50 return self._automl.refit(X, y)
51
52 def fit_ensemble(self, y, task=None, metric=None, precision='32',
/usr/local/lib/python3.4/dist-packages/autosklearn/automl.py in refit(self, X, y)
495 with warnings.catch_warnings():
496 warnings.showwarning = send_warnings_to_log
--> 497 model.fit(X.copy(), y.copy())
498 break
499 except ValueError as e:
/usr/local/lib/python3.4/dist-packages/autosklearn/pipeline/base.py in fit(self, X, y, fit_params)
87 """
88 X, fit_params = self.fit_transformer(X, y, fit_params=fit_params)
---> 89 self.fit_estimator(X, y, **fit_params)
90 return self
91
/usr/local/lib/python3.4/dist-packages/autosklearn/pipeline/base.py in fit_estimator(self, X, y, **fit_params)
102 if fit_params is None:
103 fit_params = {}
--> 104 self.steps[-1][-1].fit(X, y, **fit_params)
105 return self
106
/usr/local/lib/python3.4/dist-packages/autosklearn/pipeline/components/base.py in fit(self, X, y, **kwargs)
377 if kwargs is None:
378 kwargs = {}
--> 379 return self.choice.fit(X, y, **kwargs)
380
381 def predict(self, X):
/usr/local/lib/python3.4/dist-packages/autosklearn/pipeline/components/classification/gradient_boosting.py in fit(self, X, y, sample_weight, refit)
34 def fit(self, X, y, sample_weight=None, refit=False):
35 self.iterative_fit(X, y, n_iter=1, sample_weight=sample_weight,
---> 36 refit=True)
37 while not self.configuration_fully_fitted():
38 self.iterative_fit(X, y, n_iter=1, sample_weight=sample_weight)
/usr/local/lib/python3.4/dist-packages/autosklearn/pipeline/components/classification/gradient_boosting.py in iterative_fit(self, X, y, sample_weight, n_iter, refit)
66 self.max_leaf_nodes = None
67 else:
---> 68 self.max_leaf_nodes = int(self.max_leaf_nodes)
69 self.verbose = int(self.verbose)
70
TypeError: int() argument must be a string or a number, not 'NoneType'
What seems weird is that in the list of models (via cls.show_models()
) I cannot find any gradient_booting :
[(0.300000, SimpleClassificationPipeline({'classifier:adaboost:algorithm': 'SAMME', 'preprocessor:fast_ica:fun': 'logcosh', 'preprocessor:fast_ica:algorithm': 'deflation', 'preprocessor:__choice__': 'fast_ica', 'rescaling:__choice__': 'standardize', 'classifier:__choice__': 'adaboost', 'classifier:adaboost:n_estimators': 491, 'classifier:adaboost:max_depth': 6, 'balancing:strategy': 'weighting', 'one_hot_encoding:use_minimum_fraction': 'False', 'preprocessor:fast_ica:n_components': 1995, 'preprocessor:fast_ica:whiten': 'True', 'classifier:adaboost:learning_rate': 1.4736698696730515, 'imputation:strategy': 'median'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.140000, SimpleClassificationPipeline({'classifier:adaboost:algorithm': 'SAMME.R', 'preprocessor:liblinear_svc_preprocessor:tol': 0.01097852951252465, 'preprocessor:liblinear_svc_preprocessor:loss': 'squared_hinge', 'preprocessor:liblinear_svc_preprocessor:penalty': 'l1', 'preprocessor:__choice__': 'liblinear_svc_preprocessor', 'rescaling:__choice__': 'standardize', 'classifier:__choice__': 'adaboost', 'classifier:adaboost:n_estimators': 232, 'classifier:adaboost:max_depth': 1, 'preprocessor:liblinear_svc_preprocessor:dual': 'False', 'preprocessor:liblinear_svc_preprocessor:multi_class': 'ovr', 'preprocessor:liblinear_svc_preprocessor:intercept_scaling': 1, 'balancing:strategy': 'none', 'one_hot_encoding:use_minimum_fraction': 'False', 'preprocessor:liblinear_svc_preprocessor:C': 18857.462123060137, 'preprocessor:liblinear_svc_preprocessor:fit_intercept': 'True', 'classifier:adaboost:learning_rate': 0.10000000000000002, 'imputation:strategy': 'most_frequent'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.100000, SimpleClassificationPipeline({'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 'preprocessor:feature_agglomeration:pooling_func': 'mean', 'classifier:extra_trees:max_features': 1.0, 'classifier:extra_trees:criterion': 'gini', 'classifier:extra_trees:min_samples_split': 2, 'preprocessor:__choice__': 'feature_agglomeration', 'rescaling:__choice__': 'none', 'classifier:extra_trees:n_estimators': 100, 'classifier:extra_trees:min_samples_leaf': 1, 'classifier:extra_trees:max_depth': 'None', 'classifier:__choice__': 'extra_trees', 'preprocessor:feature_agglomeration:n_clusters': 25, 'balancing:strategy': 'weighting', 'classifier:extra_trees:bootstrap': 'False', 'preprocessor:feature_agglomeration:affinity': 'euclidean', 'one_hot_encoding:use_minimum_fraction': 'False', 'preprocessor:feature_agglomeration:linkage': 'ward', 'imputation:strategy': 'mean'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.080000, SimpleClassificationPipeline({'classifier:libsvm_svc:kernel': 'rbf', 'classifier:libsvm_svc:max_iter': -1, 'classifier:libsvm_svc:tol': 0.07228314195704957, 'classifier:libsvm_svc:C': 133.619004912714, 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'minmax', 'classifier:__choice__': 'libsvm_svc', 'classifier:libsvm_svc:shrinking': 'False', 'balancing:strategy': 'none', 'classifier:libsvm_svc:gamma': 1.421889512788389, 'one_hot_encoding:use_minimum_fraction': 'False', 'imputation:strategy': 'median'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.080000, SimpleClassificationPipeline({'classifier:adaboost:algorithm': 'SAMME', 'classifier:adaboost:max_depth': 6, 'balancing:strategy': 'none', 'classifier:adaboost:learning_rate': 1.4736698696730515, 'classifier:adaboost:n_estimators': 491, 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'none', 'one_hot_encoding:use_minimum_fraction': 'False', 'classifier:__choice__': 'adaboost', 'imputation:strategy': 'median'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.060000, SimpleClassificationPipeline({'classifier:decision_tree:criterion': 'entropy', 'classifier:decision_tree:splitter': 'best', 'preprocessor:feature_agglomeration:affinity': 'euclidean', 'preprocessor:feature_agglomeration:pooling_func': 'median', 'classifier:decision_tree:max_depth': 1.2028353286235083, 'preprocessor:__choice__': 'feature_agglomeration', 'rescaling:__choice__': 'none', 'classifier:__choice__': 'decision_tree', 'classifier:decision_tree:min_samples_split': 3, 'preprocessor:feature_agglomeration:n_clusters': 303, 'balancing:strategy': 'none', 'preprocessor:feature_agglomeration:linkage': 'complete', 'classifier:decision_tree:min_weight_fraction_leaf': 0.0, 'classifier:decision_tree:max_leaf_nodes': 'None', 'classifier:decision_tree:max_features': 1.0, 'one_hot_encoding:minimum_fraction': 0.29966862109739095, 'one_hot_encoding:use_minimum_fraction': 'True', 'classifier:decision_tree:min_samples_leaf': 14, 'imputation:strategy': 'median'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.040000, SimpleClassificationPipeline({'preprocessor:feature_agglomeration:pooling_func': 'mean', 'preprocessor:feature_agglomeration:affinity': 'manhattan', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:criterion': 'entropy', 'rescaling:__choice__': 'none', 'classifier:random_forest:n_estimators': 100, 'classifier:__choice__': 'random_forest', 'one_hot_encoding:use_minimum_fraction': 'False', 'classifier:random_forest:max_features': 3.517874457919551, 'preprocessor:feature_agglomeration:n_clusters': 338, 'balancing:strategy': 'none', 'classifier:random_forest:bootstrap': 'False', 'classifier:random_forest:min_samples_leaf': 1, 'classifier:random_forest:min_samples_split': 8, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:max_leaf_nodes': 'None', 'preprocessor:__choice__': 'feature_agglomeration', 'preprocessor:feature_agglomeration:linkage': 'complete', 'imputation:strategy': 'most_frequent'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.040000, SimpleClassificationPipeline({'classifier:adaboost:algorithm': 'SAMME.R', 'classifier:adaboost:max_depth': 8, 'one_hot_encoding:minimum_fraction': 0.0010608268861776148, 'balancing:strategy': 'none', 'classifier:adaboost:learning_rate': 0.1389245240873958, 'classifier:adaboost:n_estimators': 361, 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'none', 'one_hot_encoding:use_minimum_fraction': 'True', 'classifier:__choice__': 'adaboost', 'imputation:strategy': 'median'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.040000, SimpleClassificationPipeline({'classifier:adaboost:algorithm': 'SAMME', 'preprocessor:fast_ica:fun': 'exp', 'preprocessor:fast_ica:algorithm': 'deflation', 'preprocessor:__choice__': 'fast_ica', 'rescaling:__choice__': 'minmax', 'classifier:__choice__': 'adaboost', 'classifier:adaboost:n_estimators': 432, 'classifier:adaboost:max_depth': 10, 'balancing:strategy': 'none', 'one_hot_encoding:use_minimum_fraction': 'False', 'preprocessor:fast_ica:n_components': 226, 'preprocessor:fast_ica:whiten': 'True', 'classifier:adaboost:learning_rate': 0.1335260976284136, 'imputation:strategy': 'median'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.020000, SimpleClassificationPipeline({'one_hot_encoding:minimum_fraction': 0.01, 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:criterion': 'gini', 'rescaling:__choice__': 'standardize', 'classifier:random_forest:n_estimators': 100, 'classifier:__choice__': 'random_forest', 'one_hot_encoding:use_minimum_fraction': 'True', 'classifier:random_forest:max_features': 1.0, 'balancing:strategy': 'none', 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:min_samples_leaf': 1, 'classifier:random_forest:min_samples_split': 2, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:max_leaf_nodes': 'None', 'preprocessor:__choice__': 'no_preprocessing', 'imputation:strategy': 'mean'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.020000, SimpleClassificationPipeline({'one_hot_encoding:minimum_fraction': 0.010000000000000004, 'preprocessor:feature_agglomeration:affinity': 'euclidean', 'classifier:random_forest:max_depth': 'None', 'preprocessor:feature_agglomeration:pooling_func': 'mean', 'classifier:random_forest:criterion': 'gini', 'rescaling:__choice__': 'normalize', 'classifier:random_forest:n_estimators': 100, 'classifier:__choice__': 'random_forest', 'one_hot_encoding:use_minimum_fraction': 'True', 'classifier:random_forest:max_features': 1.0, 'preprocessor:feature_agglomeration:n_clusters': 374, 'balancing:strategy': 'none', 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:min_samples_leaf': 1, 'classifier:random_forest:min_samples_split': 2, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:max_leaf_nodes': 'None', 'preprocessor:__choice__': 'feature_agglomeration', 'preprocessor:feature_agglomeration:linkage': 'ward', 'imputation:strategy': 'median'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.020000, SimpleClassificationPipeline({'preprocessor:fast_ica:fun': 'logcosh', 'preprocessor:fast_ica:algorithm': 'deflation', 'classifier:k_nearest_neighbors:n_neighbors': 6, 'preprocessor:__choice__': 'fast_ica', 'rescaling:__choice__': 'minmax', 'classifier:__choice__': 'k_nearest_neighbors', 'classifier:k_nearest_neighbors:weights': 'distance', 'balancing:strategy': 'none', 'classifier:k_nearest_neighbors:p': 2, 'preprocessor:fast_ica:whiten': 'True', 'preprocessor:fast_ica:n_components': 1951, 'one_hot_encoding:use_minimum_fraction': 'False', 'imputation:strategy': 'mean'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.020000, SimpleClassificationPipeline({'classifier:extra_trees:min_weight_fraction_leaf': 0.0, 'classifier:extra_trees:max_features': 1.0, 'classifier:extra_trees:criterion': 'gini', 'preprocessor:select_rates:score_func': 'chi2', 'classifier:extra_trees:min_samples_split': 2, 'preprocessor:__choice__': 'select_rates', 'rescaling:__choice__': 'none', 'classifier:extra_trees:n_estimators': 100, 'classifier:extra_trees:min_samples_leaf': 1, 'classifier:extra_trees:max_depth': 'None', 'classifier:__choice__': 'extra_trees', 'balancing:strategy': 'none', 'classifier:extra_trees:bootstrap': 'False', 'preprocessor:select_rates:mode': 'fpr', 'preprocessor:select_rates:alpha': 0.1, 'one_hot_encoding:use_minimum_fraction': 'False', 'imputation:strategy': 'mean'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.020000, SimpleClassificationPipeline({'one_hot_encoding:minimum_fraction': 0.34110222241136745, 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:criterion': 'gini', 'rescaling:__choice__': 'none', 'classifier:random_forest:n_estimators': 100, 'classifier:__choice__': 'random_forest', 'one_hot_encoding:use_minimum_fraction': 'True', 'classifier:random_forest:max_features': 0.9213917629406125, 'balancing:strategy': 'none', 'classifier:random_forest:bootstrap': 'False', 'classifier:random_forest:min_samples_leaf': 5, 'classifier:random_forest:min_samples_split': 7, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:max_leaf_nodes': 'None', 'preprocessor:__choice__': 'no_preprocessing', 'imputation:strategy': 'mean'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
(0.020000, SimpleClassificationPipeline({'preprocessor:polynomial:degree': 2, 'classifier:random_forest:max_depth': 'None', 'preprocessor:polynomial:include_bias': 'False', 'classifier:random_forest:criterion': 'entropy', 'rescaling:__choice__': 'none', 'classifier:random_forest:n_estimators': 100, 'classifier:__choice__': 'random_forest', 'one_hot_encoding:use_minimum_fraction': 'False', 'classifier:random_forest:max_features': 1.0, 'balancing:strategy': 'none', 'classifier:random_forest:bootstrap': 'True', 'preprocessor:polynomial:interaction_only': 'False', 'classifier:random_forest:min_samples_leaf': 1, 'classifier:random_forest:min_samples_split': 2, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:max_leaf_nodes': 'None', 'preprocessor:__choice__': 'polynomial', 'imputation:strategy': 'mean'},
dataset_properties={
'target_type': 'classification',
'multiclass': False,
'signed': False,
'sparse': False,
'task': 1,
'multilabel': False})),
]
Thanks
That is indeed a weird problem, and from the top of my head I don't know where it could come from. @christophe-rannou could you try to provide me with a minimal working example which allows me to reproduce the error? Also, could you please post the output of either pip list
or conda list
? This is potentially related to #326.
Sadly I cannot provide you with the dataset and examples run without problem. This is the conda list
:
argparse 1.4.0 <pip>
asn1crypto 0.22.0 py36_0
auto-sklearn 0.2.0 <pip>
cffi 1.10.0 py36_0
cloog 0.18.0 0
conda 4.3.22 py36_0
conda-env 2.6.0 0
ConfigSpace 0.3.9 <pip>
cryptography 1.8.1 py36_0
Cython 0.25.2 <pip>
decorator 4.1.1 <pip>
docutils 0.13.1 <pip>
future 0.16.0 <pip>
gcc 4.8.5 7
gmp 6.1.0 0
idna 2.5 py36_0
isl 0.12.2 0
joblib 0.11 <pip>
liac-arff 2.1.1 <pip>
libffi 3.2.1 1
lockfile 0.12.2 <pip>
mpc 1.0.3 0
mpfr 3.1.5 0
networkx 1.11 <pip>
nose 1.3.7 <pip>
numpy 1.13.1 <pip>
openssl 1.0.2l 0
packaging 16.8 py36_0
pandas 0.20.3 <pip>
pcre 8.39 1
pip 9.0.1 py36_1
protobuf 3.3.0 <pip>
psutil 5.2.2 <pip>
pycosat 0.6.2 py36_0
pycparser 2.17 py36_0
pynisher 0.4.2 <pip>
pyopenssl 17.0.0 py36_0
pyparsing 2.1.4 py36_0
pyPhoenix 0.10.0 <pip>
pyrfr 0.4.0 <pip>
python 3.6.1 2
python-dateutil 2.6.1 <pip>
pytz 2017.2 <pip>
PyYAML 3.12 <pip>
readline 6.2 2
requests 2.14.2 py36_0
ruamel_yaml 0.11.14 py36_1
scikit-learn 0.18.1 <pip>
scipy 0.19.1 <pip>
setuptools 27.2.0 py36_0
six 1.10.0 py36_0
sklearn 0.0 <pip>
smac 0.5.0 <pip>
SQLAlchemy 1.1.11 <pip>
sqlite 3.13.0 0
swig 3.0.10 0
tk 8.5.18 0
typing 3.6.1 <pip>
wheel 0.29.0 py36_0
xz 5.2.2 1
yaml 0.1.6 0
zlib 1.2.8 3
I tried making a prediction without making a refit
and I get the following error :
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-22-59975d76b47f> in <module>()
2 # pos_proba = cls.predict_proba(X_crossval[val_set])[:,list(rfc.classes_).index(1)]
3 # roc_auc_score(y_crossval[val_set], pos_proba)
----> 4 predictions = cls.predict(X_test)
5 accuracy_score(y_test, predictions)
/usr/local/lib/python3.4/dist-packages/autosklearn/estimators.py in predict(self, X, batch_size, n_jobs)
419 """
420 return super(AutoSklearnClassifier, self).predict(
--> 421 X, batch_size=batch_size, n_jobs=n_jobs)
422
423 def predict_proba(self, X, batch_size=None, n_jobs=1):
/usr/local/lib/python3.4/dist-packages/autosklearn/estimators.py in predict(self, X, batch_size, n_jobs)
59
60 def predict(self, X, batch_size=None, n_jobs=1):
---> 61 return self._automl.predict(X, batch_size=batch_size, n_jobs=n_jobs)
62
63 def score(self, X, y):
/usr/local/lib/python3.4/dist-packages/autosklearn/estimators.py in predict(self, X, batch_size, n_jobs)
587 def predict(self, X, batch_size=None, n_jobs=1):
588 predicted_probabilities = self._automl.predict(
--> 589 X, batch_size=batch_size, n_jobs=n_jobs)
590
591 if self._n_outputs == 1:
/usr/local/lib/python3.4/dist-packages/autosklearn/automl.py in predict(self, X, batch_size, n_jobs)
543 all_predictions = joblib.Parallel(n_jobs=n_jobs)(
544 joblib.delayed(_model_predict)(self, X, batch_size, identifier)
--> 545 for identifier in self.ensemble_.get_model_identifiers())
546
547 if len(all_predictions) == 0:
/usr/local/lib/python3.4/dist-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
756 # was dispatched. In particular this covers the edge
757 # case of Parallel used with an exhausted iterator.
--> 758 while self.dispatch_one_batch(iterator):
759 self._iterating = True
760 else:
/usr/local/lib/python3.4/dist-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
606 return False
607 else:
--> 608 self._dispatch(tasks)
609 return True
610
/usr/local/lib/python3.4/dist-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
569 dispatch_timestamp = time.time()
570 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 571 job = self._backend.apply_async(batch, callback=cb)
572 self._jobs.append(job)
573
/usr/local/lib/python3.4/dist-packages/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
107 def apply_async(self, func, callback=None):
108 """Schedule a func to be run"""
--> 109 result = ImmediateResult(func)
110 if callback:
111 callback(result)
/usr/local/lib/python3.4/dist-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
324 # Don't delay the application, to avoid keeping the input
325 # arguments in memory
--> 326 self.results = batch()
327
328 def get(self):
/usr/local/lib/python3.4/dist-packages/sklearn/externals/joblib/parallel.py in __call__(self)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
/usr/local/lib/python3.4/dist-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
/usr/local/lib/python3.4/dist-packages/autosklearn/automl.py in _model_predict(self, X, batch_size, identifier)
43 prediction = model.predict(X_, batch_size=batch_size)
44 else:
---> 45 prediction = model.predict_proba(X_, batch_size=batch_size)
46 if len(prediction.shape) < 1 or len(X_.shape) < 1 or \
47 X_.shape[0] < 1 or prediction.shape[0] != X_.shape[0]:
/usr/local/lib/python3.4/dist-packages/autosklearn/pipeline/classification.py in predict_proba(self, X, batch_size)
125 Xt = transform.transform(Xt)
126
--> 127 return self.steps[-1][-1].predict_proba(Xt)
128
129 else:
/usr/local/lib/python3.4/dist-packages/autosklearn/pipeline/components/classification/__init__.py in predict_proba(self, X)
124
125 def predict_proba(self, X):
--> 126 return self.choice.predict_proba(X)
127
128 def estimator_supports_iterative_fit(self):
/usr/local/lib/python3.4/dist-packages/autosklearn/pipeline/components/classification/gradient_boosting.py in predict_proba(self, X)
112 def predict_proba(self, X):
113 if self.estimator is None:
--> 114 raise NotImplementedError()
115 return self.estimator.predict_proba(X)
116
NotImplementedError:
Once again it is the gradient_boosting
that raises an error. It definitely seems related to #326.
@christophe-rannou could you please check if the cause of #326 is the cause of your issue here?
Well I just tried again and executed the following code
import autosklearn.classification
import sklearn.model_selection
X_train, X_test, y_train, y_test = \
sklearn.model_selection.train_test_split(X, y, random_state=1)
cls = autosklearn.classification.AutoSklearnClassifier()
cls.fit(X_train, y_train)
cls.refit(X, y)
I ended up with a similar error except this time it was not GradientBoosting
/root/miniconda3/envs/py24/lib/python3.4/site-packages/autosklearn/pipeline/components/feature_preprocessing/random_trees_embedding.py in fit(self, X, Y)
32 self.max_leaf_nodes = None
33 else:
---> 34 self.max_leaf_nodes = int(self.max_leaf_nodes)
35
36 self.preprocessor = sklearn.ensemble.RandomTreesEmbedding(
TypeError: int() argument must be a string, a bytes-like object or a number, not 'NoneType'
Thanks for bringing this up again. Could you do me a favor and execute the following piece of code on your machine to see if it triggers the error (as it doesn't on mine, and it should be triggering the error according to your description of the problem)?
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
import autosklearn.classification
def main():
X, y = sklearn.datasets.load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = \
sklearn.model_selection.train_test_split(X, y, random_state=1)
automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=30, per_run_time_limit=10,
tmp_folder='/tmp/autoslearn_holdout_example_tmp',
output_folder='/tmp/autosklearn_holdout_example_out',
disable_evaluator_output=False,
include_preprocessors=["random_trees_embedding"],
include_estimators=["sgd"],
delete_tmp_folder_after_terminate=False,
resampling_strategy='cv')
automl.fit(X_train, y_train, dataset_name='digits')
automl.refit(X, y)
# Print the final ensemble constructed by auto-sklearn.
print(automl.show_models())
predictions = automl.predict(X_test)
# Print statistics about the auto-sklearn run such as number of
# iterations, number of models failed with a time out.
print(automl.sprint_statistics())
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
print(automl.cv_results_)
if __name__ == '__main__':
main()
A quick fix would be that you change line 31 in your file to check if that parameter is None
. However, this still doesn't explain the reasons of this failure.
This is a duplicate of #326. I posted the actual error there and will fix the issue with the next, upcoming release.
I get the following error when running refit( X , y ):