automl / Auto-PyTorch

Automatic architecture search and hyperparameter optimization for PyTorch
Apache License 2.0
2.37k stars 287 forks source link

crash when max_budget higher than max_runtime #5

Closed KEggensperger closed 5 years ago

KEggensperger commented 5 years ago

When setting the max_budget > max_runtime and setting the budget to runtime the program crashes (logically), but the error messages is non descriptive: KeyError: 'Imputation:strategy'

from autoPyTorch import AutoNetClassification

# data and metric imports
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
X, y = sklearn.datasets.load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = \
        sklearn.model_selection.train_test_split(X, y, random_state=1)

# running Auto-PyTorch
autoPyTorch = AutoNetClassification(log_level='info', max_runtime=30, min_budget=30, max_budget=90, budget_type='time')
autoPyTorch.fit(X_train, y_train, validation_split=0.3)
y_pred = autoPyTorch.predict(X_test)

print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_pred))

results in

KeyError                                  Traceback (most recent call last)
<ipython-input-2-24ae2d25f22b> in <module>
     11 # running Auto-PyTorch
     12 autoPyTorch = AutoNetClassification(log_level='info', max_runtime=30, min_budget=30, max_budget=90, budget_type='time')
---> 13 autoPyTorch.fit(X_train, y_train, validation_split=0.3)
     14 y_pred = autoPyTorch.predict(X_test)
     15 

~/Work/git/Auto-PyTorch/autoPyTorch/core/api.py in fit(self, X_train, Y_train, X_valid, Y_valid, refit, **autonet_config)
    114         self.optimized_hyperparameter_config_budget = output["budget"]
    115         if (refit):
--> 116             self.refit(X_train, Y_train, X_valid, Y_valid, self.optimized_hyperparameter_config, self.autonet_config)
    117         return self.optimized_hyperparameter_config, output['final_metric_score']
    118 

~/Work/git/Auto-PyTorch/autoPyTorch/core/api.py in refit(self, X_train, Y_train, X_valid, Y_valid, hyperparameter_config, autonet_config)
    146 
    147         self.pipeline.fit_pipeline(pipeline_config=autonet_config, refit=refit_data,
--> 148                                     X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid)
    149 
    150     def predict(self, X, return_probabilities=False):

~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/base/pipeline.py in fit_pipeline(self, **kwargs)
     45 
     46     def fit_pipeline(self, **kwargs):
---> 47         return self.root.fit_traverse(**kwargs)
     48 
     49     def predict_pipeline(self, **kwargs):

~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/base/node.py in fit_traverse(self, **kwargs)
     73                     raise ValueError('Node ' + str(type(node)) + ' requires keyword ' + str(keyword) + ' which is not available.')
     74 
---> 75             node.fit_output = node.fit(**required_kwargs)
     76             if (not isinstance(node.fit_output, dict)):
     77                 raise ValueError('Node ' + str(type(node)) + ' does not return a dictionary.')

~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/nodes/optimization_algorithm.py in fit(self, pipeline_config, X_train, Y_train, X_valid, Y_valid, refit)
     77                                     X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid,
     78                                     budget=refit["budget"], budget_type=self.budget_types[pipeline_config['budget_type']],
---> 79                                     optimize_start_time=time.time())
     80 
     81             return {'final_metric_score': res['loss'],

~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/base/pipeline.py in fit_pipeline(self, **kwargs)
     45 
     46     def fit_pipeline(self, **kwargs):
---> 47         return self.root.fit_traverse(**kwargs)
     48 
     49     def predict_pipeline(self, **kwargs):

~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/base/node.py in fit_traverse(self, **kwargs)
     73                     raise ValueError('Node ' + str(type(node)) + ' requires keyword ' + str(keyword) + ' which is not available.')
     74 
---> 75             node.fit_output = node.fit(**required_kwargs)
     76             if (not isinstance(node.fit_output, dict)):
     77                 raise ValueError('Node ' + str(type(node)) + ' does not return a dictionary.')

~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/nodes/cross_validation.py in fit(self, hyperparameter_config, pipeline_config, X_train, Y_train, X_valid, Y_valid, budget, budget_type, optimize_start_time)
     98                 budget=cur_budget, training_techniques=[budget_type()],
     99                 fit_start_time=time.time(),
--> 100                 categorical_features=categorical_features)
    101 
    102             if result is not None:

~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/base/pipeline.py in fit_pipeline(self, **kwargs)
     45 
     46     def fit_pipeline(self, **kwargs):
---> 47         return self.root.fit_traverse(**kwargs)
     48 
     49     def predict_pipeline(self, **kwargs):

~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/base/node.py in fit_traverse(self, **kwargs)
     73                     raise ValueError('Node ' + str(type(node)) + ' requires keyword ' + str(keyword) + ' which is not available.')
     74 
---> 75             node.fit_output = node.fit(**required_kwargs)
     76             if (not isinstance(node.fit_output, dict)):
     77                 raise ValueError('Node ' + str(type(node)) + ' does not return a dictionary.')

~/Work/git/Auto-PyTorch/autoPyTorch/pipeline/nodes/imputation.py in fit(self, hyperparameter_config, X_train, X_valid, categorical_features)
     21         hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
     22 
---> 23         strategy = hyperparameter_config['strategy']
     24         fill_value = int(np.nanmax(X_train)) + 1 if not scipy.sparse.issparse(X_train) else 0
     25         numerical_imputer = SimpleImputer(strategy=strategy, copy=False)

~/Work/git/Auto-PyTorch/autoPyTorch/utils/configspace_wrapper.py in __getitem__(self, key)
     21         if ((self.config_prefix + key) not in self.config):
     22             print(self.config)
---> 23         return self.config[self.config_prefix + key]
     24 
     25     def __str__(self):

KeyError: 'Imputation:strategy'
urbanmatthias commented 5 years ago

Hi,

after my last commit, the following error messages will be displayed:

When max_budget < min_budget.

(AutoPyTorchCPU) matthias@matthias-N501JW:~/Schreibtisch/Auto-PyTorch$ python examples/basics/basic_classification.py
Traceback (most recent call last):
  File "examples/basics/basic_classification.py", line 17, in <module>
    res = autonet.fit(X_train=dm.X, Y_train=dm.Y, X_valid=dm.X_train, Y_valid=dm.Y_train)
  File "/home/matthias/Schreibtisch/Auto-PyTorch/autoPyTorch/core/api.py", line 107, in fit
    self.autonet_config = self.pipeline.get_pipeline_config(**dict(self.base_config, **autonet_config))
  File "/home/matthias/Schreibtisch/Auto-PyTorch/autoPyTorch/pipeline/base/pipeline.py", line 104, in get_pipeline_config
    c(pipeline_config)
  File "/home/matthias/Schreibtisch/Auto-PyTorch/autoPyTorch/utils/config/config_condition.py", line 15, in __call__
    raise ValueError("Pipeline configuration condition violated: %s" % self.name)
ValueError: Pipeline configuration condition violated: max budget must be greater than or equal to min budget

When max_runtime < max_budget

(AutoPyTorchCPU) matthias@matthias-N501JW:~/Schreibtisch/Auto-PyTorch$ python examples/basics/basic_classification.py
Traceback (most recent call last):
  File "examples/basics/basic_classification.py", line 17, in <module>
    res = autonet.fit(X_train=dm.X, Y_train=dm.Y, X_valid=dm.X_train, Y_valid=dm.Y_train)
  File "/home/matthias/Schreibtisch/Auto-PyTorch/autoPyTorch/core/api.py", line 107, in fit
    self.autonet_config = self.pipeline.get_pipeline_config(**dict(self.base_config, **autonet_config))
  File "/home/matthias/Schreibtisch/Auto-PyTorch/autoPyTorch/pipeline/base/pipeline.py", line 104, in get_pipeline_config
    c(pipeline_config)
  File "/home/matthias/Schreibtisch/Auto-PyTorch/autoPyTorch/utils/config/config_condition.py", line 15, in __call__
    raise ValueError("Pipeline configuration condition violated: %s" % self.name)
ValueError: Pipeline configuration condition violated: When time is used as budget, the max_runtime must be larger than the max_budget

When results.json is empty after fit, for some other reason (e.g. budget_type is epochs, but max_runtime is too small)

(AutoPyTorchCPU) matthias@matthias-N501JW:~/Schreibtisch/Auto-PyTorch$ python examples/basics/basic_classification.py
09:44:29 WORKER: start listening for jobs
09:44:29 [AutoNet] Start bohb
09:44:29 DISPATCHER: started the 'discover_worker' thread
09:44:29 DISPATCHER: started the 'job_runner' thread
09:44:29 DISPATCHER: Pyro daemon running on 10.126.76.153:34499
09:44:29 DISPATCHER: discovered new worker, hpbandster.run_0.worker.matthias-N501JW.16799.-1140147219736384
09:44:29 HBMASTER: starting run at 1548492269.510589
09:44:29 HBMASTER: adjusted queue size to (0, 1)
09:44:29 HBMASTER: Timelimit reached: wait for remaining 0 jobs
09:44:29 DISPATCHER: Dispatcher shutting down
09:44:29 DISPATCHER: shut down complete
Error parsing results. Check results.json and output for more details. An empty results.json is usually caused by a misconfiguration of AutoNet.
Traceback (most recent call last):
  File "/home/matthias/Schreibtisch/Auto-PyTorch/autoPyTorch/pipeline/nodes/optimization_algorithm.py", line 207, in parse_results
    res = logged_results_to_HBS_result(result_logger_dir)
  File "/home/matthias/miniconda3/envs/AutoPyTorchCPU/lib/python3.7/site-packages/hpbandster/core/result.py", line 194, in logged_results_to_HBS_result
    'min_budget' : min(budget_set),
ValueError: min() arg is an empty sequence

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/matthias/Schreibtisch/Auto-PyTorch/autoPyTorch/pipeline/nodes/optimization_algorithm.py", line 103, in fit
    res = self.parse_results(pipeline_config["result_logger_dir"])
  File "/home/matthias/Schreibtisch/Auto-PyTorch/autoPyTorch/pipeline/nodes/optimization_algorithm.py", line 211, in parse_results
    raise RuntimeError("Error parsing results. Check results.json and output for more details. An empty results.json is usually caused by a misconfiguration of AutoNet.")
RuntimeError: Error parsing results. Check results.json and output for more details. An empty results.json is usually caused by a misconfiguration of AutoNet.
Traceback (most recent call last):
  File "examples/basics/basic_classification.py", line 17, in <module>
    res = autonet.fit(X_train=dm.X, Y_train=dm.Y, X_valid=dm.X_train, Y_valid=dm.Y_train)
  File "/home/matthias/Schreibtisch/Auto-PyTorch/autoPyTorch/core/api.py", line 116, in fit
    self.refit(X_train, Y_train, X_valid, Y_valid, self.optimized_hyperparameter_config, self.autonet_config)
  File "/home/matthias/Schreibtisch/Auto-PyTorch/autoPyTorch/core/api.py", line 142, in refit
    assert len(hyperparameter_config) > 0, "You have to specify a non-empty hyperparameter config for refit. Probably something went wrong in fit."
AssertionError: You have to specify a non-empty hyperparameter config for refit. Probably something went wrong in fit.

Cheers,

Matthias

KEggensperger commented 5 years ago

The first two messages look good. Could also replace the last message with something like: "No models fit during training, please retry with a larger budget."

urbanmatthias commented 5 years ago

Hi,

the following will now be displayed on develop:

13:00:46 DISPATCHER: job_runner shutting down
13:00:46 DISPATCHER: 'discover_worker' thread exited
13:00:46 DISPATCHER: 'job_runner' thread exited
13:00:46 DISPATCHER: shut down complete
Error parsing results. Check results.json and output for more details. An empty results.json is usually caused by a misconfiguration of AutoNet.
Traceback (most recent call last):
  File "/home/matthias/Dokumente/Uni/automl_Auto-PyTorch/autoPyTorch/pipeline/nodes/optimization_algorithm.py", line 210, in parse_results
    res = logged_results_to_HBS_result(result_logger_dir)
  File "/home/matthias/miniconda3/envs/AutoPyTorchCPU/lib/python3.7/site-packages/hpbandster/core/result.py", line 194, in logged_results_to_HBS_result
    'min_budget' : min(budget_set),
ValueError: min() arg is an empty sequence

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/matthias/Dokumente/Uni/automl_Auto-PyTorch/autoPyTorch/pipeline/nodes/optimization_algorithm.py", line 105, in fit
    res = self.parse_results(pipeline_config["result_logger_dir"])
  File "/home/matthias/Dokumente/Uni/automl_Auto-PyTorch/autoPyTorch/pipeline/nodes/optimization_algorithm.py", line 214, in parse_results
    raise RuntimeError("Error parsing results. Check results.json and output for more details. An empty results.json is usually caused by a misconfiguration of AutoNet.")
RuntimeError: Error parsing results. Check results.json and output for more details. An empty results.json is usually caused by a misconfiguration of AutoNet.
Traceback (most recent call last):
  File "examples/basics/basic_classification.py", line 17, in <module>
    res = autonet.fit(X_train=dm.X, Y_train=dm.Y, cross_validator="k_fold", cross_validator_args={"n_splits": 3})
  File "/home/matthias/Dokumente/Uni/automl_Auto-PyTorch/autoPyTorch/core/api.py", line 113, in fit
    raise RuntimeError("No models fit during training, please retry with a larger max_runtime.")
RuntimeError: No models fit during training, please retry with a larger max_runtime.

Thanks