microsoft / FLAML

A fast library for AutoML and tuning. Join our Discord: https://discord.gg/Cppx2vSPVP.
https://microsoft.github.io/FLAML/
MIT License
3.75k stars 495 forks source link

Unable to work with root_mean_squared_log_error #1295

Open GDGauravDutta opened 2 months ago

GDGauravDutta commented 2 months ago

[flaml.automl.logger: 04-06 11:18:08] {1693} INFO - task = regression [flaml.automl.logger: 04-06 11:18:08] {1700} INFO - Data split method: uniform [flaml.automl.logger: 04-06 11:18:08] {1703} INFO - Evaluation method: cv [flaml.automl.logger: 04-06 11:18:08] {1801} INFO - Minimizing error metric: root_mean_squared_log_error [flaml.automl.logger: 04-06 11:18:08] {1911} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth'] [flaml.automl.logger: 04-06 11:18:08] {2221} INFO - iteration 0, current learner lgbm

ImportError Traceback (most recent call last) File ~\AppData\Roaming\Python\Python310\site-packages\flaml\automl\ml.py:202, in metric_loss_score(metric_name, y_processed_predict, y_processed_true, labels, sample_weight, groups) 201 try: --> 202 import datasets 204 datasets_metric_name = huggingface_submetric_to_metric.get(metric_name, metric_name.split(":")[0])

File C:\ProgramData\anaconda3\lib\site-packages\datasets__init__.py:24 22 import platform ---> 24 import pyarrow 25 from packaging import version

File C:\ProgramData\anaconda3\lib\site-packages\pyarrow__init__.py:65 64 _gc.disable() ---> 65 import pyarrow.lib as _lib 66 if _gc_enabled:

ImportError: DLL load failed while importing lib: The specified procedure could not be found.

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last) Cell In[34], line 1 ----> 1 automl.fit(X, y, task="regression",metric='root_mean_squared_log_error',time_budget=3600*3)

File ~\AppData\Roaming\Python\Python310\site-packages\flaml\automl\automl.py:1939, in AutoML.fit(self, X_train, y_train, dataframe, label, metric, task, n_jobs, log_file_name, estimator_list, time_budget, max_iter, sample, ensemble, eval_method, log_type, model_history, split_ratio, n_splits, log_training_metric, mem_thres, pred_time_limit, train_time_limit, X_val, y_val, sample_weight_val, groups_val, groups, verbose, retrain_full, split_type, learner_selector, hpo_method, starting_points, seed, n_concurrent_trials, keep_search_state, preserve_checkpoint, early_stop, force_cancel, append_log, auto_augment, min_sample_size, use_ray, use_spark, free_mem_ratio, metric_constraints, custom_hp, cv_score_agg_func, skip_transform, mlflow_logging, fit_kwargs_by_estimator, **fit_kwargs) 1937 else: 1938 self._training_log = None -> 1939 self._search() 1940 if self._best_estimator: 1941 logger.info("fit succeeded")

File ~\AppData\Roaming\Python\Python310\site-packages\flaml\automl\automl.py:2485, in AutoML._search(self) 2483 state.best_config = state.init_config[0] if state.init_config else {} 2484 elif self._use_ray is False and self._use_spark is False: -> 2485 self._search_sequential() 2486 else: 2487 self._search_parallel()

File ~\AppData\Roaming\Python\Python310\site-packages\flaml\automl\automl.py:2321, in AutoML._search_sequential(self) 2315 search_state.search_alg.searcher.set_search_properties( 2316 metric=None, 2317 mode=None, 2318 metric_target=self._state.best_loss, 2319 ) 2320 start_run_time = time.time() -> 2321 analysis = tune.run( 2322 search_state.training_function, 2323 search_alg=search_state.search_alg, 2324 time_budget_s=time_budget_s, 2325 verbose=max(self.verbose - 3, 0), 2326 use_ray=False, 2327 use_spark=False, 2328 ) 2329 time_used = time.time() - start_run_time 2330 better = False

File ~\AppData\Roaming\Python\Python310\site-packages\flaml\tune\tune.py:776, in run(evaluation_function, config, low_cost_partial_config, cat_hp_cost, metric, mode, time_budget_s, points_to_evaluate, evaluated_rewards, resource_attr, min_resource, max_resource, reduction_factor, scheduler, search_alg, verbose, local_dir, num_samples, resources_per_trial, config_constraints, metric_constraints, max_failure, use_ray, use_spark, use_incumbent_result_in_evaluation, log_file_name, lexico_objectives, force_cancel, n_concurrent_trials, **ray_args) 774 result = None 775 with PySparkOvertimeMonitor(time_start, time_budget_s, force_cancel): --> 776 result = evaluation_function(trial_to_run.config) 777 if result is not None: 778 if isinstance(result, dict):

File ~\AppData\Roaming\Python\Python310\site-packages\flaml\automl\state.py:323, in AutoMLState._compute_with_config_base(config_w_resource, state, estimator, is_report) 308 del config["FLAML_sample_size"] 309 budget = ( 310 None 311 if state.time_budget < 0 (...) 314 else (state.time_budget - state.time_from_start) / 2 * sample_size / state.data_size[0] 315 ) 317 ( 318 trained_estimator, 319 val_loss, 320 metric_forlogging, 321 , 322 pred_time, --> 323 ) = compute_estimator( 324 sampled_X_train, 325 sampled_y_train, 326 state.X_val, 327 state.y_val, 328 state.weight_val, 329 state.groups_val, 330 state.train_time_limit if budget is None else min(budget, state.train_time_limit or np.inf), 331 state.kf, 332 config, 333 state.task, 334 estimator, 335 state.eval_method, 336 state.metric, 337 state.best_loss, 338 state.n_jobs, 339 state.learner_classes.get(estimator), 340 state.cv_score_agg_func, 341 state.log_training_metric, 342 this_estimator_kwargs, 343 state.free_mem_ratio, 344 ) 345 if state.retrain_final and not state.model_history: 346 trained_estimator.cleanup()

File ~\AppData\Roaming\Python\Python310\site-packages\flaml\automl\ml.py:560, in compute_estimator(X_train, y_train, X_val, y_val, weight_val, groups_val, budget, kf, config_dic, task, estimator_name, eval_method, eval_metric, best_val_loss, n_jobs, estimator_class, cv_score_agg_func, log_training_metric, fit_kwargs, free_mem_ratio) 542 val_loss, metric_for_logging, train_time, pred_time = get_val_loss( 543 config_dic, 544 estimator, (...) 557 free_mem_ratio=0, 558 ) 559 else: --> 560 val_loss, metric_for_logging, train_time, pred_time = task.evaluate_model_CV( 561 config_dic, 562 estimator, 563 X_train, 564 y_train, 565 budget, 566 kf, 567 eval_metric, 568 best_val_loss, 569 cv_score_agg_func, 570 log_training_metric=log_training_metric, 571 fit_kwargs=fit_kwargs, 572 free_mem_ratio=0, 573 ) 575 if isinstance(estimator, TransformersEstimator): 576 del fit_kwargs["metric"], fit_kwargs["X_val"], fit_kwargs["y_val"]

File ~\AppData\Roaming\Python\Python310\site-packages\flaml\automl\task\generic_task.py:845, in GenericTask.evaluate_model_CV(self, config, estimator, X_train_all, y_train_all, budget, kf, eval_metric, best_val_loss, cv_score_agg_func, log_training_metric, fit_kwargs, free_mem_ratio) 842 groups_val = None 844 estimator.cleanup() --> 845 val_loss_i, metric_i, train_time_i, pred_time_i = get_val_loss( 846 config, 847 estimator, 848 X_train, 849 y_train, 850 X_val, 851 y_val, 852 weight_val, 853 groups_val, 854 eval_metric, 855 self, 856 labels, 857 budget_per_train, 858 log_training_metric=log_training_metric, 859 fit_kwargs=fit_kwargs, 860 free_mem_ratio=free_mem_ratio, 861 ) 862 if isinstance(metric_i, dict) and "intermediate_results" in metric_i.keys(): 863 del metric_i["intermediate_results"]

File ~\AppData\Roaming\Python\Python310\site-packages\flaml\automl\ml.py:464, in get_val_loss(config, estimator, X_train, y_train, X_val, y_val, weight_val, groups_val, eval_metric, obj, labels, budget, log_training_metric, fit_kwargs, free_mem_ratio) 459 # if groups_val is not None: 460 # fit_kwargs['groups_val'] = groups_val 461 # fit_kwargs['X_val'] = X_val 462 # fit_kwargs['y_val'] = y_val 463 estimator.fit(X_train, y_train, budget, free_mem_ratio, **fit_kwargs) --> 464 val_loss, metric_for_logging, predtime, = _eval_estimator( 465 config, 466 estimator, 467 X_train, 468 y_train, 469 X_val, 470 y_val, 471 weight_val, 472 groups_val, 473 eval_metric, 474 obj, 475 labels, 476 log_training_metric, 477 fit_kwargs, 478 ) 479 if hasattr(estimator, "intermediate_results"): 480 metric_for_logging["intermediate_results"] = estimator.intermediate_results

File ~\AppData\Roaming\Python\Python310\site-packages\flaml\automl\ml.py:400, in _eval_estimator(config, estimator, X_train, y_train, X_val, y_val, weight_val, groups_val, eval_metric, task, labels, log_training_metric, fit_kwargs) 397 val_pred_y = get_y_pred(estimator, X_val, eval_metric, task) 398 pred_time = (time.time() - pred_start) / X_val.shape[0] --> 400 val_loss = metric_loss_score( 401 eval_metric, 402 y_processed_predict=val_pred_y, 403 y_processed_true=y_val, 404 labels=labels, 405 sample_weight=weight_val, 406 groups=groups_val, 407 ) 408 metric_for_logging = {"pred_time": pred_time} 409 if log_training_metric:

File ~\AppData\Roaming\Python\Python310\site-packages\flaml\automl\ml.py:223, in metric_loss_score(metric_name, y_processed_predict, y_processed_true, labels, sample_weight, groups) 221 score = score_dict[metric_name] 222 except ImportError: --> 223 raise ValueError( 224 metric_name + " is not an built-in sklearn metric and [hf] is not installed. " 225 "Currently built-in sklearn metrics are: " 226 "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo," 227 "log_loss, mape, f1, micro_f1, macro_f1, ap. " 228 "If the metric is a huggingface metric, please pip install flaml[hf] ", 229 "or pass a customized metric function to AutoML.fit(metric=func)", 230 ) 231 # If the metric is not found from huggingface dataset metric list (i.e., FileNotFoundError) 232 # ask the user to provide a custom metric 233 except FileNotFoundError:

ValueError: ('root_mean_squared_log_error is not an built-in sklearn metric and [hf] is not installed. Currently built-in sklearn metrics are: r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,log_loss, mape, f1, micro_f1, macro_f1, ap. If the metric is a huggingface metric, please pip install flaml[hf] ', 'or pass a customized metric function to AutoML.fit(metric=func)')

Root mean squared logarithmic error regression loss.

Read more in the User Guide.

New in version 1.4.

Programmer-RD-AI commented 1 month ago

From the errors:

  1. It is caused due to root_mean_squared_log_error not a built-in sklearn metric
  2. Reinstalling HuggingFace datasets should resolve the issue

The following is the specific errors:

  1. ValueError: ('root_mean_squared_log_error is not an built-in sklearn metric and [hf] is not installed. Currently built-in sklearn metrics are: r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,log_loss, mape, f1, micro_f1, macro_f1, ap. If the metric is a huggingface metric, please pip install flaml[hf] ', 'or pass a customized metric function to AutoML.fit(metric=func)')
  2. ImportError: DLL load failed while importing lib: The specified procedure could not be found.