Hi, I am currently using GPBoostClassifier inside sklearn's Pipeline.
I noticed in the documentation that GPBoostClassifier's predict method still has a default value of -1 for num_neighbors_pred parameter instead of None even though it is discontinued. This raises an error when calling model.predict(X).
Error is avoided only when GPBoostClassifier.predict(X, num_neighbors_pred = None) is explicitly specified.
Is it possible to rewrite the default value of num_neighbors_pred to None to avoid this error?
GPBoost Version
1.2.1.1
GPBoost Documentation (latest version)
def predict(self, X, raw_score=False, start_iteration=0, num_iteration=None,
pred_leaf=False, pred_contrib=False,
group_data_pred=None, group_rand_coef_data_pred=None,
gp_coords_pred=None, gp_rand_coef_data_pred=None,
cluster_ids_pred=None, vecchia_pred_type=None,
num_neighbors_pred=-1, predict_cov_mat=False, predict_var=False, **kwargs):
"""Docstring is inherited from the GPBoostModel."""
result = self.predict_proba(X=X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
pred_leaf=pred_leaf, pred_contrib=pred_contrib,
group_data_pred=group_data_pred, group_rand_coef_data_pred=group_rand_coef_data_pred,
gp_coords_pred=gp_coords_pred, gp_rand_coef_data_pred=gp_rand_coef_data_pred,
cluster_ids_pred=cluster_ids_pred, vecchia_pred_type=vecchia_pred_type,
num_neighbors_pred=num_neighbors_pred, predict_cov_mat=predict_cov_mat, predict_var=predict_var,
**kwargs)
if callable(self._objective) or raw_score or pred_leaf or pred_contrib:
return result
else:
if self._Booster.has_gp_model:
if self._n_classes > 2:
result = result['response_mean']
else:
result = np.vstack((1. - result['response_mean'], result['response_mean'])).transpose()
class_index = np.argmax(result, axis=1)
return self._le.inverse_transform(class_index)
Error Message Received
GPBoostError Traceback (most recent call last)
File <command-4206059573559381>:1
----> 1 model.predict(x)
File /databricks/python/lib/python3.9/site-packages/sklearn/utils/metaestimators.py:113, in _AvailableIfDescriptor.__get__.<locals>.<lambda>(*args, **kwargs)
110 raise attr_err
112 # lambda, but not partial, allows help() to work with update_wrapper
--> 113 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs) # noqa
114 else:
116 def fn(*args, **kwargs):
File /databricks/python/lib/python3.9/site-packages/sklearn/pipeline.py:470, in Pipeline.predict(self, X, **predict_params)
468 for _, name, transform in self._iter(with_final=False):
469 Xt = transform.transform(Xt)
--> 470 return self.steps[-1][1].predict(Xt, **predict_params)
File <command-1775996920262159>:21, in predict(self, X, y)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-3330d11d-731b-439d-b8b9-5adea6365f78/lib/python3.9/site-packages/gpboost/sklearn.py:941, in GPBoostClassifier.predict(self, X, raw_score, start_iteration, num_iteration, pred_leaf, pred_contrib, group_data_pred, group_rand_coef_data_pred, gp_coords_pred, gp_rand_coef_data_pred, cluster_ids_pred, vecchia_pred_type, num_neighbors_pred, predict_cov_mat, predict_var, **kwargs)
934 def predict(self, X, raw_score=False, start_iteration=0, num_iteration=None,
935 pred_leaf=False, pred_contrib=False,
936 group_data_pred=None, group_rand_coef_data_pred=None,
937 gp_coords_pred=None, gp_rand_coef_data_pred=None,
938 cluster_ids_pred=None, vecchia_pred_type=None,
939 num_neighbors_pred=-1, predict_cov_mat=False, predict_var=False, **kwargs):
940 """Docstring is inherited from the GPBoostModel."""
--> 941 result = self.predict_proba(X=X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
942 pred_leaf=pred_leaf, pred_contrib=pred_contrib,
943 group_data_pred=group_data_pred, group_rand_coef_data_pred=group_rand_coef_data_pred,
944 gp_coords_pred=gp_coords_pred, gp_rand_coef_data_pred=gp_rand_coef_data_pred,
945 cluster_ids_pred=cluster_ids_pred, vecchia_pred_type=vecchia_pred_type,
946 num_neighbors_pred=num_neighbors_pred, predict_cov_mat=predict_cov_mat, predict_var=predict_var,
947 **kwargs)
948 if callable(self._objective) or raw_score or pred_leaf or pred_contrib:
949 return result
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-3330d11d-731b-439d-b8b9-5adea6365f78/lib/python3.9/site-packages/gpboost/sklearn.py:1033, in GPBoostClassifier.predict_proba(self, X, raw_score, start_iteration, num_iteration, pred_leaf, pred_contrib, group_data_pred, group_rand_coef_data_pred, gp_coords_pred, gp_rand_coef_data_pred, cluster_ids_pred, vecchia_pred_type, num_neighbors_pred, predict_cov_mat, predict_var, **kwargs)
961 def predict_proba(self, X, raw_score=False, start_iteration=0, num_iteration=None,
962 pred_leaf=False, pred_contrib=False,
963 group_data_pred=None, group_rand_coef_data_pred=None,
964 gp_coords_pred=None, gp_rand_coef_data_pred=None,
965 cluster_ids_pred=None, vecchia_pred_type=None,
966 num_neighbors_pred=None, predict_cov_mat=False, predict_var=False, **kwargs):
967 """Return the predicted probability for each class for each sample.
968
969 Parameters
(...)
1031 If ``pred_contrib=True``, the feature contributions for each sample.
1032 """
-> 1033 result = super().predict(X=X, raw_score=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
1034 pred_leaf=pred_leaf, pred_contrib=pred_contrib,
1035 group_data_pred=group_data_pred, group_rand_coef_data_pred=group_rand_coef_data_pred,
1036 gp_coords_pred=gp_coords_pred, gp_rand_coef_data_pred=gp_rand_coef_data_pred,
1037 cluster_ids_pred=cluster_ids_pred, vecchia_pred_type=vecchia_pred_type,
1038 num_neighbors_pred=num_neighbors_pred, predict_cov_mat=predict_cov_mat,
1039 predict_var=predict_var, **kwargs)
1040 if callable(self._objective) and not (raw_score or pred_leaf or pred_contrib):
1041 _log_warning("Cannot compute class probabilities or labels "
1042 "due to the usage of customized objective function.\n"
1043 "Returning raw scores instead.")
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-3330d11d-731b-439d-b8b9-5adea6365f78/lib/python3.9/site-packages/gpboost/sklearn.py:752, in GPBoostModel.predict(self, X, raw_score, start_iteration, num_iteration, pred_leaf, pred_contrib, group_data_pred, group_rand_coef_data_pred, gp_coords_pred, gp_rand_coef_data_pred, cluster_ids_pred, vecchia_pred_type, num_neighbors_pred, predict_cov_mat, predict_var, **kwargs)
747 if self._n_features != n_features:
748 raise ValueError("Number of features of the model must "
749 "match the input. Model n_features_ is %s and "
750 "input n_features is %s "
751 % (self._n_features, n_features))
--> 752 return self._Booster.predict(X, pred_latent=raw_score, start_iteration=start_iteration, num_iteration=num_iteration,
753 pred_leaf=pred_leaf, pred_contrib=pred_contrib,
754 group_data_pred=group_data_pred, group_rand_coef_data_pred=group_rand_coef_data_pred,
755 gp_coords_pred=gp_coords_pred, gp_rand_coef_data_pred=gp_rand_coef_data_pred,
756 cluster_ids_pred=cluster_ids_pred, vecchia_pred_type=vecchia_pred_type,
757 num_neighbors_pred=num_neighbors_pred, predict_cov_mat=predict_cov_mat,
758 predict_var=predict_var, **kwargs)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-3330d11d-731b-439d-b8b9-5adea6365f78/lib/python3.9/site-packages/gpboost/basic.py:3516, in Booster.predict(self, data, start_iteration, num_iteration, pred_latent, pred_leaf, pred_contrib, data_has_header, is_reshape, group_data_pred, group_rand_coef_data_pred, gp_coords_pred, gp_rand_coef_data_pred, cluster_ids_pred, predict_cov_mat, predict_var, cov_pars, ignore_gp_model, raw_score, vecchia_pred_type, num_neighbors_pred, **kwargs)
3513 raise GPBoostError("The argument 'vecchia_pred_type' is discontinued. "
3514 "Use the function 'set_prediction_data' to specify this")
3515 if num_neighbors_pred is not None:
-> 3516 raise GPBoostError("The argument 'num_neighbors_pred' is discontinued. "
3517 "Use the function 'set_prediction_data' to specify this")
3518 predictor = self._to_predictor(deepcopy(kwargs))
3519 if num_iteration is None:
GPBoostError: The argument 'num_neighbors_pred' is discontinued. Use the function 'set_prediction_data' to specify this
Hi, I am currently using GPBoostClassifier inside sklearn's Pipeline.
I noticed in the documentation that GPBoostClassifier's
predict
method still has a default value of-1
fornum_neighbors_pred
parameter instead ofNone
even though it is discontinued. This raises an error when callingmodel.predict(X)
.Error is avoided only when
GPBoostClassifier.predict(X, num_neighbors_pred = None)
is explicitly specified.Is it possible to rewrite the default value of
num_neighbors_pred
toNone
to avoid this error?GPBoost Version
GPBoost Documentation (latest version)
Error Message Received