pycaret / pycaret

An open-source, low-code machine learning library in Python
https://www.pycaret.org
MIT License
8.84k stars 1.76k forks source link

errors while running example 'PyCaret 2 Classification.ipynb' #1154

Closed Jacques2101 closed 3 years ago

Jacques2101 commented 3 years ago

Hi, I am new with PyCaret and for learning more I tried to run the simple example you put on your site but get some errors that I do not understand.

from pycaret.classification import *
clf1 = setup(data, target = 'Purchase', session_id=123, log_experiment=True, experiment_name='juice1')
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-5-33f83842c97a> in <module>
      1 from pycaret.classification import *
----> 2 clf1 = setup(data, target = 'Purchase', session_id=123, log_experiment=True, experiment_name='juice1')

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/pycaret/classification.py in setup(data, target, train_size, test_data, preprocess, imputation_type, iterative_imputation_iters, categorical_features, categorical_imputation, categorical_iterative_imputer, ordinal_features, high_cardinality_features, high_cardinality_method, numeric_features, numeric_imputation, numeric_iterative_imputer, date_features, ignore_features, normalize, normalize_method, transformation, transformation_method, handle_unknown_categorical, unknown_categorical_method, pca, pca_method, pca_components, ignore_low_variance, combine_rare_levels, rare_level_threshold, bin_numeric_features, remove_outliers, outliers_threshold, remove_multicollinearity, multicollinearity_threshold, remove_perfect_collinearity, create_clusters, cluster_iter, polynomial_features, polynomial_degree, trigonometry_features, polynomial_threshold, group_features, group_names, feature_selection, feature_selection_threshold, feature_selection_method, feature_interaction, feature_ratio, interaction_threshold, fix_imbalance, fix_imbalance_method, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, custom_pipeline, html, session_id, log_experiment, experiment_name, log_plots, log_profile, log_data, silent, verbose, profile, profile_kwargs)
    578         log_plots = ["auc", "confusion_matrix", "feature"]
    579 
--> 580     return pycaret.internal.tabular.setup(
    581         ml_usecase="classification",
    582         available_plots=available_plots,

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/pycaret/internal/tabular.py in setup(data, target, ml_usecase, available_plots, train_size, test_data, preprocess, imputation_type, iterative_imputation_iters, categorical_features, categorical_imputation, categorical_iterative_imputer, ordinal_features, high_cardinality_features, high_cardinality_method, numeric_features, numeric_imputation, numeric_iterative_imputer, date_features, ignore_features, normalize, normalize_method, transformation, transformation_method, handle_unknown_categorical, unknown_categorical_method, pca, pca_method, pca_components, ignore_low_variance, combine_rare_levels, rare_level_threshold, bin_numeric_features, remove_outliers, outliers_threshold, remove_multicollinearity, multicollinearity_threshold, remove_perfect_collinearity, create_clusters, cluster_iter, polynomial_features, polynomial_degree, trigonometry_features, polynomial_threshold, group_features, group_names, feature_selection, feature_selection_threshold, feature_selection_method, feature_interaction, feature_ratio, interaction_threshold, fix_imbalance, fix_imbalance_method, transform_target, transform_target_method, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, custom_pipeline, html, session_id, log_experiment, experiment_name, log_plots, log_profile, log_data, silent, verbose, profile, profile_kwargs, display)
   1733                 "SubProcess save_model() called =================================="
   1734             )
-> 1735             save_model(prep_pipe, "Transformation Pipeline", verbose=False)
   1736             logger.info(
   1737                 "SubProcess save_model() end =================================="

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/pycaret/internal/tabular.py in save_model(model, model_name, model_only, verbose)
   8956     import pycaret.internal.persistence
   8957 
-> 8958     return pycaret.internal.persistence.save_model(
   8959         model, model_name, None if model_only else prep_pipe, verbose
   8960     )

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/pycaret/internal/persistence.py in save_model(model, model_name, prep_pipe_, verbose)
    261     """
    262 
--> 263     function_params_str = ", ".join([f"{k}={v}" for k, v in locals().items()])
    264 
    265     logger = get_logger()

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/pycaret/internal/persistence.py in <listcomp>(.0)
    261     """
    262 
--> 263     function_params_str = ", ".join([f"{k}={v}" for k, v in locals().items()])
    264 
    265     logger = get_logger()

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/sklearn/base.py in __repr__(self, N_CHAR_MAX)
    258             n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
    259 
--> 260         repr_ = pp.pformat(self)
    261 
    262         # Use bruteforce ellipsis when there are a lot of non-blank characters

~/opt/anaconda3/envs/ml/lib/python3.8/pprint.py in pformat(self, object)
    151     def pformat(self, object):
    152         sio = _StringIO()
--> 153         self._format(object, sio, 0, 0, {}, 0)
    154         return sio.getvalue()
    155 

~/opt/anaconda3/envs/ml/lib/python3.8/pprint.py in _format(self, object, stream, indent, allowance, context, level)
    168             self._readable = False
    169             return
--> 170         rep = self._repr(object, context, level)
    171         max_width = self._width - indent - allowance
    172         if len(rep) > max_width:

~/opt/anaconda3/envs/ml/lib/python3.8/pprint.py in _repr(self, object, context, level)
    402 
    403     def _repr(self, object, context, level):
--> 404         repr, readable, recursive = self.format(object, context.copy(),
    405                                                 self._depth, level)
    406         if not readable:

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/sklearn/utils/_pprint.py in format(self, object, context, maxlevels, level)
    178 
    179     def format(self, object, context, maxlevels, level):
--> 180         return _safe_repr(object, context, maxlevels, level,
    181                           changed_only=self._changed_only)
    182 

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/sklearn/utils/_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
    434             krepr, kreadable, krecur = saferepr(
    435                 k, context, maxlevels, level, changed_only=changed_only)
--> 436             vrepr, vreadable, vrecur = saferepr(
    437                 v, context, maxlevels, level, changed_only=changed_only)
    438             append("%s=%s" % (krepr.strip("'"), vrepr))

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/sklearn/utils/_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
    403         level += 1
    404         for o in object:
--> 405             orepr, oreadable, orecur = _safe_repr(
    406                 o, context, maxlevels, level, changed_only=changed_only)
    407             append(orepr)

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/sklearn/utils/_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
    403         level += 1
    404         for o in object:
--> 405             orepr, oreadable, orecur = _safe_repr(
    406                 o, context, maxlevels, level, changed_only=changed_only)
    407             append(orepr)

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/sklearn/utils/_pprint.py in _safe_repr(object, context, maxlevels, level, changed_only)
    425             params = _changed_params(object)
    426         else:
--> 427             params = object.get_params(deep=False)
    428         components = []
    429         append = components.append

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/sklearn/base.py in get_params(self, deep)
    193         out = dict()
    194         for key in self._get_param_names():
--> 195             value = getattr(self, key)
    196             if deep and hasattr(value, 'get_params'):
    197                 deep_items = value.get_params().items()

AttributeError: 'Simple_Imputer' object has no attribute 'fill_value_categorical'

then: ensembled_models = compare_models(whitelist = models(type='ensemble').index.tolist(), fold = 3)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-12-f9bc6f4ee243> in <module>
----> 1 ensembled_models = compare_models(whitelist = models(type='ensemble').index.tolist(), fold = 3)

TypeError: compare_models() got an unexpected keyword argument 'whitelist'

then: catboost = create_model('catboost', cross_validation=False)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-26-6c85599396a1> in <module>
----> 1 catboost = create_model('catboost', cross_validation=False)

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/pycaret/classification.py in create_model(estimator, fold, round, cross_validation, fit_kwargs, groups, verbose, **kwargs)
    887     """
    888 
--> 889     return pycaret.internal.tabular.create_model_supervised(
    890         estimator=estimator,
    891         fold=fold,

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/pycaret/internal/tabular.py in create_model_supervised(estimator, fold, round, cross_validation, predict, fit_kwargs, groups, refit, verbose, system, X_train_data, y_train_data, metrics, display, **kwargs)
   2896     if isinstance(estimator, str):
   2897         if estimator not in available_estimators:
-> 2898             raise ValueError(
   2899                 f"Estimator {estimator} not available. Please see docstring for list of available estimators."
   2900             )

ValueError: Estimator catboost not available. Please see docstring for list of available estimators.

thx

Yard1 commented 3 years ago

@moezali1 Can you update the tutorial to install pycaret[full] and change whitelist to include? Thanks.

Jacques2101 commented 3 years ago

I did: pip install "pycaret[full]" but after that when I run again the example file, I get new error message that I did not get previously:

from pycaret.classification import *
clf1 = setup(data, target = 'Purchase', session_id=123, log_experiment=True, experiment_name='juice1')
---------------------------------------------------------------------------
XGBoostError                              Traceback (most recent call last)
<ipython-input-4-33f83842c97a> in <module>
      1 from pycaret.classification import *
----> 2 clf1 = setup(data, target = 'Purchase', session_id=123, log_experiment=True, experiment_name='juice1')

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/pycaret/classification.py in setup(data, target, train_size, test_data, preprocess, imputation_type, iterative_imputation_iters, categorical_features, categorical_imputation, categorical_iterative_imputer, ordinal_features, high_cardinality_features, high_cardinality_method, numeric_features, numeric_imputation, numeric_iterative_imputer, date_features, ignore_features, normalize, normalize_method, transformation, transformation_method, handle_unknown_categorical, unknown_categorical_method, pca, pca_method, pca_components, ignore_low_variance, combine_rare_levels, rare_level_threshold, bin_numeric_features, remove_outliers, outliers_threshold, remove_multicollinearity, multicollinearity_threshold, remove_perfect_collinearity, create_clusters, cluster_iter, polynomial_features, polynomial_degree, trigonometry_features, polynomial_threshold, group_features, group_names, feature_selection, feature_selection_threshold, feature_selection_method, feature_interaction, feature_ratio, interaction_threshold, fix_imbalance, fix_imbalance_method, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, custom_pipeline, html, session_id, log_experiment, experiment_name, log_plots, log_profile, log_data, silent, verbose, profile, profile_kwargs)
    578         log_plots = ["auc", "confusion_matrix", "feature"]
    579 
--> 580     return pycaret.internal.tabular.setup(
    581         ml_usecase="classification",
    582         available_plots=available_plots,

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/pycaret/internal/tabular.py in setup(data, target, ml_usecase, available_plots, train_size, test_data, preprocess, imputation_type, iterative_imputation_iters, categorical_features, categorical_imputation, categorical_iterative_imputer, ordinal_features, high_cardinality_features, high_cardinality_method, numeric_features, numeric_imputation, numeric_iterative_imputer, date_features, ignore_features, normalize, normalize_method, transformation, transformation_method, handle_unknown_categorical, unknown_categorical_method, pca, pca_method, pca_components, ignore_low_variance, combine_rare_levels, rare_level_threshold, bin_numeric_features, remove_outliers, outliers_threshold, remove_multicollinearity, multicollinearity_threshold, remove_perfect_collinearity, create_clusters, cluster_iter, polynomial_features, polynomial_degree, trigonometry_features, polynomial_threshold, group_features, group_names, feature_selection, feature_selection_threshold, feature_selection_method, feature_interaction, feature_ratio, interaction_threshold, fix_imbalance, fix_imbalance_method, transform_target, transform_target_method, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, custom_pipeline, html, session_id, log_experiment, experiment_name, log_plots, log_profile, log_data, silent, verbose, profile, profile_kwargs, display)
    263 
    264     try:
--> 265         from xgboost import __version__
    266 
    267         logger.info(f"xgboost=={__version__}")

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/xgboost/__init__.py in <module>
      7 import os
      8 
----> 9 from .core import DMatrix, DeviceQuantileDMatrix, Booster
     10 from .training import train, cv
     11 from . import rabit  # noqa

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/xgboost/core.py in <module>
    172 
    173 # load the XGBoost library globally
--> 174 _LIB = _load_lib()
    175 
    176 

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/xgboost/core.py in _load_lib()
    155     if not lib_success:
    156         libname = os.path.basename(lib_paths[0])
--> 157         raise XGBoostError(
    158             'XGBoost Library ({}) could not be loaded.\n'.format(libname) +
    159             'Likely causes:\n' +

XGBoostError: XGBoost Library (libxgboost.dylib) could not be loaded.
Likely causes:
  * OpenMP runtime is not installed (vcomp140.dll or libgomp-1.dll for Windows, libomp.dylib for Mac OSX, libgomp.so for Linux and other UNIX-like OSes). Mac OSX users: Run `brew install libomp` to install OpenMP runtime.
  * You are running 32-bit Python on a 64-bit OS
Error message(s): ['dlopen(/Users/jacques/opt/anaconda3/envs/ml/lib/python3.8/site-packages/xgboost/lib/libxgboost.dylib, 6): Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib\n  Referenced from: /Users/jacques/opt/anaconda3/envs/ml/lib/python3.8/site-packages/xgboost/lib/libxgboost.dylib\n  Reason: image not found']

And the instruction that worked previously, now I get: best_model = compare_models()

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-e904bc054f74> in <module>
----> 1 best_model = compare_models()

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/pycaret/classification.py in compare_models(include, exclude, fold, round, cross_validation, sort, n_select, budget_time, turbo, errors, fit_kwargs, groups, verbose)
    769     """
    770 
--> 771     return pycaret.internal.tabular.compare_models(
    772         include=include,
    773         exclude=exclude,

~/opt/anaconda3/envs/ml/lib/python3.8/site-packages/pycaret/internal/tabular.py in compare_models(include, exclude, fold, round, cross_validation, sort, n_select, budget_time, turbo, errors, fit_kwargs, groups, verbose, display)
   1930 
   1931     # checking error for exclude (string)
-> 1932     available_estimators = _all_models
   1933 
   1934     if exclude != None:

NameError: name '_all_models' is not defined
Jacques2101 commented 3 years ago

I needed to reinstall XGBoost with:

brew install libomp 
pip3 install xgboost 

I found this here https://xgboost.readthedocs.io/en/latest/build.html#building-on-osx

Then everything is ok.

Thx.