microsoft / solution-accelerator-many-models

MIT License
192 stars 87 forks source link

Allowed_models parameter does not work correctly #115

Closed ryoma-nagata closed 3 years ago

ryoma-nagata commented 3 years ago

Is there a solution to using allowed_models in solution-accelerator-many-models?

If I specify automl_settings as follows


import logging
from scripts.helper import write_automl_settings_to_file

automl_settings = {
    "task" : 'forecasting',
    "primary_metric" : 'normalized_root_mean_squared_error',
    "iteration_timeout_minutes" : 10, # This needs to be changed based on the dataset. We ask customer to explore how long training is taking before settings this value
    "iterations" : 15,
    "experiment_timeout_hours" : 1,
    "label_column_name" : 'sales_quantity',
    "n_cross_validations" : 3,
    "verbosity" : logging.INFO, 
    "debug_log": 'automl_oj_sales_debug.txt',   
    "track_child_runs": False,
    "time_column_name": 'sales_date',
    "max_horizon" : 6,
    "group_column_names": ['product_code'],
    "grain_column_names": ['product_code'],
    # "target_rolling_window_size" : 12,
    # "forecasting_parameters" : forecasting_parameters,
    "enable_voting_ensemble" : False,
    "allowed_models" : ['AutoArima','Average','Naive','Prophet','SeasonalAverage','SeasonalNaive']

}

write_automl_settings_to_file(automl_settings)

from scripts.helper import get_automl_environment
train_env = get_automl_environment(workspace=ws, automl_settings_dict=automl_settings)

I got an error message like this

---------------------------------------------------------------------------
ConfigException                           Traceback (most recent call last)
<ipython-input-78-295cd1e5a96f> in <module>
      1 from scripts.helper import get_automl_environment
----> 2 train_env = get_automl_environment(workspace=ws, automl_settings_dict=automl_settings)

/mnt/batch/tasks/shared/LS_root/mounts/clusters/nagata-vm/code/Users/ryoma.nagata/manymodel/solution-accelerator-many-models/Automated_ML/02_AutoML_Training_Pipeline/scripts/helper.py in get_automl_environment(workspace, automl_settings_dict)
     50 def get_automl_environment(workspace: Workspace, automl_settings_dict: dict):
     51     from common.scripts.helper import get_automl_environment as get_env
---> 52     return get_env(workspace, automl_settings_dict)
     53 
     54 

/mnt/batch/tasks/shared/LS_root/mounts/clusters/nagata-vm/code/Users/ryoma.nagata/manymodel/solution-accelerator-many-models/Automated_ML/common/scripts/helper.py in get_automl_environment(workspace, automl_settings_dict)
     22     null_logger.propagate = False
     23     automl_settings_obj = AzureAutoMLSettings.from_string_or_dict(
---> 24         automl_settings_dict)
     25     run_configuration = modify_run_configuration(
     26         automl_settings_obj,

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/_azureautomlsettings.py in from_string_or_dict(val, experiment, overrides)
    415             if overrides is not None:
    416                 val.update(overrides)
--> 417             return AzureAutoMLSettings(experiment=experiment, **val)
    418 
    419         if isinstance(val, AzureAutoMLSettings):

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/_azureautomlsettings.py in __init__(self, experiment, path, iterations, data_script, primary_metric, task_type, compute_target, spark_context, validation_size, n_cross_validations, y_min, y_max, num_classes, featurization, max_cores_per_iteration, max_concurrent_iterations, iteration_timeout_minutes, mem_in_mb, enforce_time_on_windows, experiment_timeout_minutes, experiment_exit_score, enable_early_stopping, blacklist_models, whitelist_models, exclude_nan_labels, verbosity, debug_log, debug_flag, enable_voting_ensemble, enable_stack_ensemble, ensemble_iterations, model_explainability, enable_tf, enable_subsampling, subsample_seed, cost_mode, is_timeseries, enable_onnx_compatible_models, scenario, environment_label, show_deprecate_warnings, **kwargs)
    292             scenario=scenario,
    293             environment_label=environment_label,
--> 294             **kwargs)
    295 
    296         # temporary measure to bypass the typecheck in base settings in common core

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/automl/core/automl_base_settings.py in __init__(self, path, iterations, data_script, primary_metric, task_type, validation_size, n_cross_validations, y_min, y_max, num_classes, featurization, max_cores_per_iteration, max_concurrent_iterations, iteration_timeout_minutes, mem_in_mb, enforce_time_on_windows, experiment_timeout_minutes, experiment_exit_score, blocked_models, blacklist_models, allowed_models, whitelist_models, exclude_nan_labels, verbosity, debug_log, debug_flag, enable_voting_ensemble, enable_stack_ensemble, ensemble_iterations, model_explainability, enable_tf, enable_subsampling, subsample_seed, cost_mode, is_timeseries, enable_early_stopping, early_stopping_n_iters, enable_onnx_compatible_models, enable_feature_sweeping, enable_nimbusml, enable_streaming, force_streaming, label_column_name, weight_column_name, cv_split_column_names, enable_local_managed, vm_type, track_child_runs, show_deprecate_warnings, forecasting_parameters, allowed_private_models, scenario, environment_label, **kwargs)
    549             self.whitelist_models = self.allowed_private_models.copy()
    550 
--> 551         self._verify_settings()
    552 
    553         # Settings that need to be set after verification

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/_azureautomlsettings.py in _verify_settings(self)
    363         # Base settings object will do most of the verification. Only add AzureML-specific checks here.
    364         try:
--> 365             super()._verify_settings()
    366         except ValueError as e:
    367             # todo figure out how this is reachable, and if it's right to raise it as ConfigException

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/automl/core/automl_base_settings.py in _verify_settings(self)
    877             )
    878 
--> 879         self._validate_model_filter_lists()
    880         self._validate_allowed_private_model_list()
    881 

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/automl/core/automl_base_settings.py in _validate_model_filter_lists(self)
    943                         InvalidArgumentWithSupportedValues, target="allowed_models",
    944                         reference_code=ReferenceCodes._AUTOML_CONFIG_ALLOWEDMODELS_EMPTY,
--> 945                         arguments="allowed_models", supported_values=self._get_supported_model_names()
    946                     )
    947                 )

ConfigException: ConfigException:
    Message: Invalid argument(s) 'allowed_models' specified. Supported value(s): '['XGBoostRegressor', 'TensorFlowDNN', 'ExtremeRandomTrees', 'DecisionTree', 'SGD', 'OnlineGradientDescentRegressor', 'LightGBM', 'FastLinearRegressor', 'TensorFlowLinearRegressor', 'LassoLars', 'KNN', 'RandomForest', 'ElasticNet', 'GradientBoosting']'.
    InnerException: None
    ErrorResponse 
{
    "error": {
        "code": "UserError",
        "message": "Invalid argument(s) 'allowed_models' specified. Supported value(s): '['XGBoostRegressor', 'TensorFlowDNN', 'ExtremeRandomTrees', 'DecisionTree', 'SGD', 'OnlineGradientDescentRegressor', 'LightGBM', 'FastLinearRegressor', 'TensorFlowLinearRegressor', 'LassoLars', 'KNN', 'RandomForest', 'ElasticNet', 'GradientBoosting']'.",
        "details_uri": "https://aka.ms/AutoMLConfig",
        "target": "allowed_models",
        "inner_error": {
            "code": "BadArgument",
            "inner_error": {
                "code": "ArgumentInvalid"
            }
        },
        "reference_code": "a95429c1-1592-4730-b8e8-d52b4db80349"
    }
}

「from azureml.train.automl._azureautomlsettings import AzureAutoMLSettings」 seems that it can't allow the Forecast model.

EricWrightAtWork commented 3 years ago

Hi @ryoma-nagata, looks like the forecasting models are being blocked. Can you provide a run id so we can look further into it? Based on the settings you've included, the models should be recommended.

deeptim123 commented 3 years ago

Please pass "is_timeseries": True in automl_settings, we will fix the issue in our next sdk release

ryoma-nagata commented 3 years ago

Hi , @EricWrightAtWork , @deeptim123

I added the 「 "is_timeseries": True 」, it worked as expected. I am looking forward to the next sdk release.

Thank you so much.