CatBoostError Traceback (most recent call last)
Input In [120], in <cell line: 1>()
----> 1 db = ExplainerDashboard(explainer, title="Titanic Explainer",cats=True, hide_cats=True, hide_pdp=True, hide_whatifpdp=True, shap_interaction=False)
2 db.run(port=8051)
File ~/opt/anaconda3/lib/python3.9/site-packages/explainerdashboard/dashboards.py:590, in ExplainerDashboard.__init__(self, explainer, tabs, title, name, description, simple, hide_header, header_hide_title, header_hide_selector, header_hide_download, hide_poweredby, block_selector_callbacks, pos_label, fluid, mode, width, height, bootstrap, external_stylesheets, server, url_base_pathname, responsive, logins, port, importances, model_summary, contributions, whatif, shap_dependence, shap_interaction, decision_trees, **kwargs)
588 if isinstance(tabs, list):
589 tabs = [self._convert_str_tabs(tab) for tab in tabs]
--> 590 self.explainer_layout = ExplainerTabsLayout(explainer, tabs, title,
591 description=self.description,
592 **update_kwargs(kwargs,
593 header_hide_title=self.header_hide_title,
594 header_hide_selector=self.header_hide_selector,
595 header_hide_download=self.header_hide_download,
596 hide_poweredby=self.hide_poweredby,
597 block_selector_callbacks=self.block_selector_callbacks,
598 pos_label=self.pos_label,
599 fluid=fluid))
600 else:
601 tabs = self._convert_str_tabs(tabs)
File ~/opt/anaconda3/lib/python3.9/site-packages/explainerdashboard/dashboards.py:109, in ExplainerTabsLayout.__init__(self, explainer, tabs, title, name, description, header_hide_title, header_hide_selector, header_hide_download, hide_poweredby, block_selector_callbacks, pos_label, fluid, **kwargs)
105 assert len(self.tabs) > 0, 'When passing a list to tabs, need to pass at least one valid tab!'
107 self.register_components(*self.tabs)
--> 109 self.downloadable_tabs = [tab for tab in self.tabs if tab.to_html(add_header=False) != "<div></div>"]
110 if not self.downloadable_tabs:
111 self.header_hide_download = True
File ~/opt/anaconda3/lib/python3.9/site-packages/explainerdashboard/dashboards.py:109, in <listcomp>(.0)
105 assert len(self.tabs) > 0, 'When passing a list to tabs, need to pass at least one valid tab!'
107 self.register_components(*self.tabs)
--> 109 self.downloadable_tabs = [tab for tab in self.tabs if tab.to_html(add_header=False) != "<div></div>"]
110 if not self.downloadable_tabs:
111 self.header_hide_download = True
File ~/opt/anaconda3/lib/python3.9/site-packages/explainerdashboard/dashboard_components/composites.py:197, in ClassifierModelStatsComposite.to_html(self, state_dict, add_header)
194 def to_html(self, state_dict=None, add_header=True):
195 html = to_html.hide(to_html.title(self.title), hide=self.hide_title)
196 html += to_html.card_rows(
--> 197 [to_html.hide(self.summary.to_html(state_dict, add_header=False), hide=self.hide_modelsummary),
198 to_html.hide(self.confusionmatrix.to_html(state_dict, add_header=False), hide=self.hide_confusionmatrix)],
199 [to_html.hide(self.precision.to_html(state_dict, add_header=False), hide=self.hide_precision),
200 to_html.hide(self.classification.to_html(state_dict, add_header=False), hide=self.hide_classification)],
201 [to_html.hide(self.rocauc.to_html(state_dict, add_header=False), hide=self.hide_rocauc),
202 to_html.hide(self.prauc.to_html(state_dict, add_header=False), hide=self.hide_prauc)],
203 [to_html.hide(self.liftcurve.to_html(state_dict, add_header=False), hide=self.hide_liftcurve),
204 to_html.hide(self.cumulative_precision.to_html(state_dict, add_header=False), hide=self.hide_cumprecision)]
205 )
206 if add_header:
207 return to_html.add_header(html)
File ~/opt/anaconda3/lib/python3.9/site-packages/explainerdashboard/dashboard_components/classifier_components.py:1633, in ClassifierModelSummaryComponent.to_html(self, state_dict, add_header)
1631 def to_html(self, state_dict=None, add_header=True):
1632 args = self.get_state_args(state_dict)
-> 1633 metrics_df = self._get_metrics_df(args['cutoff'], args['pos_label'])
1634 html = to_html.table_from_df(metrics_df)
1635 html = to_html.card(html, title=self.title)
File ~/opt/anaconda3/lib/python3.9/site-packages/explainerdashboard/dashboard_components/classifier_components.py:1642, in ClassifierModelSummaryComponent._get_metrics_df(self, cutoff, pos_label)
1640 def _get_metrics_df(self, cutoff, pos_label):
1641 metrics_df = (pd.DataFrame(
-> 1642 self.explainer.metrics(cutoff=cutoff, pos_label=pos_label,
1643 show_metrics=self.show_metrics),
1644 index=["Score"])
1645 .T.rename_axis(index="metric").reset_index()
1646 .round(self.round))
1647 return metrics_df
File ~/opt/anaconda3/lib/python3.9/site-packages/explainerdashboard/explainers.py:60, in insert_pos_label.<locals>.inner(self, *args, **kwargs)
57 else:
58 # insert self.pos_label
59 kwargs.update(dict(pos_label=self.pos_label))
---> 60 return func(self, *args, **kwargs)
61 kwargs.update(dict(zip(inspect.getfullargspec(func).args[1:1+len(args)], args)))
62 if 'pos_label' in kwargs:
File ~/opt/anaconda3/lib/python3.9/site-packages/explainerdashboard/explainers.py:2679, in ClassifierExplainer.metrics(self, cutoff, show_metrics, pos_label)
2676 self._metrics[label][cut] = \
2677 get_metrics(0.01*cut, label)
2678 else:
-> 2679 self._metrics = get_cv_metrics(self.cv)
2682 if int(cutoff*100) in self._metrics[pos_label]:
2683 metrics_dict = self._metrics[pos_label][int(cutoff*100)]
File ~/opt/anaconda3/lib/python3.9/site-packages/explainerdashboard/explainers.py:2651, in ClassifierExplainer.metrics.<locals>.get_cv_metrics(n_splits)
2649 X_train, X_test = self.X.iloc[train_index], self.X.iloc[test_index]
2650 y_train, y_test = self.y.iloc[train_index], self.y.iloc[test_index]
-> 2651 preds = clone(self.model).fit(X_train, y_train).predict_proba(X_test)
2652 for label in range(len(self.labels)):
2653 for cut in np.linspace(1, 99, 99, dtype=int):
File ~/opt/anaconda3/lib/python3.9/site-packages/catboost/core.py:5007, in CatBoostClassifier.fit(self, X, y, cat_features, text_features, embedding_features, sample_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
5004 if 'loss_function' in params:
5005 CatBoostClassifier._check_is_compatible_loss(params['loss_function'])
-> 5007 self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline, use_best_model,
5008 eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period,
5009 silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
5010 return self
File ~/opt/anaconda3/lib/python3.9/site-packages/catboost/core.py:2262, in CatBoost._fit(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
2259 if y is None and not isinstance(X, PATH_TYPES + (Pool,)):
2260 raise CatBoostError("y may be None only when X is an instance of catboost.Pool or string")
-> 2262 train_params = self._prepare_train_params(
2263 X=X, y=y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features,
2264 pairs=pairs, sample_weight=sample_weight, group_id=group_id, group_weight=group_weight,
2265 subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline, use_best_model=use_best_model,
2266 eval_set=eval_set, verbose=verbose, logging_level=logging_level, plot=plot,
2267 column_description=column_description, verbose_eval=verbose_eval, metric_period=metric_period,
2268 silent=silent, early_stopping_rounds=early_stopping_rounds, save_snapshot=save_snapshot,
2269 snapshot_file=snapshot_file, snapshot_interval=snapshot_interval, init_model=init_model,
2270 callbacks=callbacks
2271 )
2272 params = train_params["params"]
2273 train_pool = train_params["train_pool"]
File ~/opt/anaconda3/lib/python3.9/site-packages/catboost/core.py:2148, in CatBoost._prepare_train_params(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks)
2145 text_features = _process_feature_indices(text_features, X, params, 'text_features')
2146 embedding_features = _process_feature_indices(embedding_features, X, params, 'embedding_features')
-> 2148 train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs,
2149 sample_weight, group_id, group_weight, subgroup_id, pairs_weight,
2150 baseline, column_description)
2151 if train_pool.is_empty_:
2152 raise CatBoostError("X is empty.")
File ~/opt/anaconda3/lib/python3.9/site-packages/catboost/core.py:1430, in _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, column_description)
1428 if y is None:
1429 raise CatBoostError("y has not initialized in fit(): X is not catboost.Pool object, y must be not None in fit().")
-> 1430 train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, weight=sample_weight, group_id=group_id,
1431 group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
1432 return train_pool
File ~/opt/anaconda3/lib/python3.9/site-packages/catboost/core.py:790, in Pool.__init__(self, data, label, cat_features, text_features, embedding_features, embedding_features_data, column_description, pairs, delimiter, has_header, ignore_csv_quoting, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count, log_cout, log_cerr)
784 if isinstance(feature_names, PATH_TYPES):
785 raise CatBoostError(
786 "feature_names must be None or have non-string type when the pool is created from "
787 "python objects."
788 )
--> 790 self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
791 group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
792 super(Pool, self).__init__()
File ~/opt/anaconda3/lib/python3.9/site-packages/catboost/core.py:1411, in Pool._init(self, data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
1409 if feature_tags is not None:
1410 feature_tags = self._check_transform_tags(feature_tags, feature_names)
-> 1411 self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
1412 group_id, group_weight, subgroup_id, pairs_weight, baseline, timestamp, feature_names, feature_tags, thread_count)
File _catboost.pyx:3941, in _catboost._PoolBase._init_pool()
File _catboost.pyx:3991, in _catboost._PoolBase._init_pool()
File _catboost.pyx:3807, in _catboost._PoolBase._init_features_order_layout_pool()
File _catboost.pyx:2731, in _catboost._set_features_order_data_pd_data_frame()
CatBoostError: features data: pandas.DataFrame column 'XYZ' has dtype 'category' but is not in cat_features list
I don't understand why this is happening. Also if I am not using explainer dashboard and creating model, doing model fit, calculating shap values etc using shap and catboost, I don't get this error so I think there is something wrong with explainer dashboard.
I am stuck at this point and can't figure out if this problem is in catboost library, explainer dashboard or in my code
I am creating a catboost model using the following
When i run this, i am getting the following error
I don't understand why this is happening. Also if I am not using explainer dashboard and creating model, doing model fit, calculating shap values etc using shap and catboost, I don't get this error so I think there is something wrong with explainer dashboard.
I am stuck at this point and can't figure out if this problem is in catboost library, explainer dashboard or in my code