After using GroupKFold strategy and training a model. When I want to evaluate it by seeing the Learning curve plot it returns error. Other plots may also be buggy and should be checked.
(This has been checked after the latest PR #3057)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File /opt/miniconda3/envs/pycaret_env_dev/lib/python3.8/site-packages/ipywidgets/widgets/interaction.py:239, in interactive.update(self, *args)
237 value = widget.get_interact_value()
238 self.kwargs[widget._kwarg] = value
--> 239 self.result = self.f(**self.kwargs)
240 show_inline_matplotlib_plots()
241 if self.auto_display and self.result is not None:
File /opt/miniconda3/envs/pycaret_env_dev/lib/python3.8/site-packages/pycaret/internal/pycaret_experiment/tabular_experiment.py:1960, in _TabularExperiment._plot_model(self, estimator, plot, scale, save, fold, fit_kwargs, plot_kwargs, groups, feature_name, label, use_train_data, verbose, system, display, display_format)
1958 # execute the plot method
1959 with redirect_output(self.logger):
-> 1960 ret = locals()[plot]()
1961 if ret:
1962 plot_filename = ret
File /opt/miniconda3/envs/pycaret_env_dev/lib/python3.8/site-packages/pycaret/internal/pycaret_experiment/tabular_experiment.py:1379, in _TabularExperiment._plot_model.<locals>.learning()
1371 sizes = np.linspace(0.3, 1.0, 10)
1372 visualizer = LearningCurve(
1373 estimator,
1374 cv=cv,
(...)
1377 random_state=self.seed,
1378 )
-> 1379 return show_yellowbrick_plot(
1380 visualizer=visualizer,
1381 X_train=self.X_train_transformed,
1382 y_train=self.y_train_transformed,
1383 X_test=self.X_test_transformed,
1384 y_test=self.y_test_transformed,
1385 handle_test="",
1386 name=plot_name,
1387 scale=scale,
1388 save=save,
1389 fit_kwargs=fit_kwargs,
1390 groups=groups,
1391 display_format=display_format,
1392 )
File /opt/miniconda3/envs/pycaret_env_dev/lib/python3.8/site-packages/pycaret/internal/plots/yellowbrick.py:88, in show_yellowbrick_plot(visualizer, X_train, y_train, X_test, y_test, name, handle_train, handle_test, scale, save, fit_kwargs, groups, display_format, **kwargs)
86 elif handle_train == "fit":
87 logger.info("Fitting Model")
---> 88 visualizer.fit(X_train, y_train, **fit_kwargs_and_kwargs)
89 elif handle_train == "fit_transform":
90 logger.info("Fitting & Transforming Model")
File /opt/miniconda3/envs/pycaret_env_dev/lib/python3.8/site-packages/yellowbrick/model_selection/learning_curve.py:249, in LearningCurve.fit(self, X, y)
233 sklc_kwargs = {
234 key: self.get_params()[key]
235 for key in (
(...)
245 )
246 }
248 # compute the learning curve and store the scores on the estimator
--> 249 curve = sk_learning_curve(self.estimator, X, y, **sklc_kwargs)
250 self.train_sizes_, self.train_scores_, self.test_scores_ = curve
252 # compute the mean and standard deviation of the training data
File /opt/miniconda3/envs/pycaret_env_dev/lib/python3.8/site-packages/sklearn/model_selection/_validation.py:1513, in learning_curve(estimator, X, y, groups, train_sizes, cv, scoring, exploit_incremental_learning, n_jobs, pre_dispatch, verbose, shuffle, random_state, error_score, return_times, fit_params)
1511 cv = check_cv(cv, y, classifier=is_classifier(estimator))
1512 # Store it as list as we will be iterating over the list multiple times
-> 1513 cv_iter = list(cv.split(X, y, groups))
1515 scorer = check_scoring(estimator, scoring=scoring)
1517 n_max_training_samples = len(cv_iter[0][0])
File /opt/miniconda3/envs/pycaret_env_dev/lib/python3.8/site-packages/sklearn/model_selection/_split.py:340, in _BaseKFold.split(self, X, y, groups)
332 if self.n_splits > n_samples:
333 raise ValueError(
334 (
335 "Cannot have number of splits n_splits={0} greater"
336 " than the number of samples: n_samples={1}."
337 ).format(self.n_splits, n_samples)
338 )
--> 340 for train, test in super().split(X, y, groups):
341 yield train, test
File /opt/miniconda3/envs/pycaret_env_dev/lib/python3.8/site-packages/sklearn/model_selection/_split.py:86, in BaseCrossValidator.split(self, X, y, groups)
84 X, y, groups = indexable(X, y, groups)
85 indices = np.arange(_num_samples(X))
---> 86 for test_index in self._iter_test_masks(X, y, groups):
87 train_index = indices[np.logical_not(test_index)]
88 test_index = indices[test_index]
File /opt/miniconda3/envs/pycaret_env_dev/lib/python3.8/site-packages/sklearn/model_selection/_split.py:98, in BaseCrossValidator._iter_test_masks(self, X, y, groups)
93 def _iter_test_masks(self, X=None, y=None, groups=None):
94 """Generates boolean masks corresponding to test sets.
95
96 By default, delegates to _iter_test_indices(X, y, groups)
97 """
---> 98 for test_index in self._iter_test_indices(X, y, groups):
99 test_mask = np.zeros(_num_samples(X), dtype=bool)
100 test_mask[test_index] = True
File /opt/miniconda3/envs/pycaret_env_dev/lib/python3.8/site-packages/sklearn/model_selection/_split.py:518, in GroupKFold._iter_test_indices(self, X, y, groups)
516 def _iter_test_indices(self, X, y, groups):
517 if groups is None:
--> 518 raise ValueError("The 'groups' parameter should not be None.")
519 groups = check_array(groups, input_name="groups", ensure_2d=False, dtype=None)
521 unique_groups, groups = np.unique(groups, return_inverse=True)
ValueError: The 'groups' parameter should not be None.
Installed Versions
System: python: 3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:18) [GCC 10.3.0] executable: /opt/miniconda3/envs/pycaret_env_dev/bin/python3.8 machine: Linux-6.0.2-arch1-1-x86_64-with-glibc2.10
PyCaret required dependencies:
pip: 22.3.1
setuptools: 65.5.1
pycaret: 3.0.0rc4
IPython: 8.6.0
ipywidgets: 8.0.2
tqdm: 4.64.1
numpy: 1.22.4
pandas: 1.4.4
jinja2: 3.1.2
scipy: 1.8.1
joblib: 1.2.0
sklearn: 1.1.3
pyod: 1.0.6
imblearn: 0.9.1
category_encoders: 2.5.1.post0
lightgbm: 3.3.3
numba: 0.55.2
requests: 2.28.1
matplotlib: 3.6.2
scikitplot: 0.3.7
yellowbrick: 1.5
plotly: 5.11.0
kaleido: 0.2.1
statsmodels: 0.13.5
sktime: 0.13.2
tbats: 1.1.1
pmdarima: 1.8.5
psutil: 5.9.3
PyCaret optional dependencies:
shap: Not installed
interpret: Not installed
umap: Not installed
pandas_profiling: Not installed
explainerdashboard: Not installed
autoviz: Not installed
fairlearn: Not installed
xgboost: Not installed
catboost: Not installed
kmodes: Not installed
mlxtend: Not installed
statsforecast: Not installed
tune_sklearn: 0.4.4
ray: 2.0.1
hyperopt: 0.2.7
optuna: 3.0.3
skopt: 0.9.0
mlflow: 1.30.0
gradio: 3.9
fastapi: 0.86.0
uvicorn: 0.19.0
m2cgen: 0.10.0
evidently: 0.1.59.dev3
nltk: Not installed
pyLDAvis: Not installed
gensim: Not installed
spacy: Not installed
wordcloud: Not installed
textblob: Not installed
fugue: Not installed
streamlit: Not installed
prophet: Not installed
pycaret version checks
[X] I have checked that this issue has not already been reported here.
[X] I have confirmed this bug exists on the latest version of pycaret.
[X] I have confirmed this bug exists on the master branch of pycaret (pip install -U git+https://github.com/pycaret/pycaret.git@master).
Issue Description
After using GroupKFold strategy and training a model. When I want to evaluate it by seeing the Learning curve plot it returns error. Other plots may also be buggy and should be checked. (This has been checked after the latest PR #3057)
Reproducible Example
Expected Behavior
Showing the Learning Curve plot
Actual Results
Installed Versions