Closed belzheng closed 1 year ago
Hi, Your code looks fine at first glance. And to answer your question: Yes, you can add a regressor without tuning its HPs.
However, we don't support disabling preprocessing from the include parameter. I assume the one you've passed in, is your own preprocessor. Could you please remove that for now and see if the problem resolves?
Please let me know if it did.
Yes, when I remove the 'NoPreprocessing' for data_preprocessor, the abessregression without tuing HPs can ran successfully. The code is as follows:
regaallp = autosklearn.regression.AutoSklearnRegressor(
time_left_for_this_task=60,
per_run_time_limit=10,
include={
#"data_preprocessor": ["NoPreprocessing"],
'regressor': ['AbessRegression'],
'feature_preprocessor':[
'no_preprocessing',
'polynomial',
'Bspline',
'kBinsDiscretizer',
],
},
memory_limit=6144,
)
regaallp.fit(X, y)
However, this is not I want, I want to disable the data preprocessor, so I add the NoPreprocessing component to auto-sklearn, the code is:
from autosklearn.pipeline.constants import SPARSE, DENSE, UNSIGNED_DATA, INPUT
class NoPreprocessing(AutoSklearnPreprocessingAlgorithm):
def __init__(self, **kwargs):
"""This preprocessors does not change the data"""
# Some internal checks makes sure parameters are set
for key, val in kwargs.items():
setattr(self, key, val)
def fit(self, X, Y=None):
return self
def transform(self, X):
return X
@staticmethod
def get_properties(dataset_properties=None):
return {
"shortname": "NoPreprocessing",
"name": "NoPreprocessing",
"handles_regression": True,
"handles_classification": True,
"handles_multiclass": True,
"handles_multilabel": True,
"handles_multioutput": True,
"is_deterministic": True,
"input": (SPARSE, DENSE, UNSIGNED_DATA),
"output": (INPUT,),
}
@staticmethod
def get_hyperparameter_search_space(
feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
):
return ConfigurationSpace() # Return an empty configuration as there is None
# Add NoPreprocessing component to auto-sklearn.
autosklearn.pipeline.components.data_preprocessing.add_preprocessor(NoPreprocessing)
cs = NoPreprocessing.get_hyperparameter_search_space()
print(cs)
Then, I use it as follows:
regaallp = autosklearn.regression.AutoSklearnRegressor(
time_left_for_this_task=60,
per_run_time_limit=10,
include={
"data_preprocessor": ["NoPreprocessing"],
'regressor': ['AbessRegression'],
'feature_preprocessor':[
'no_preprocessing',
'polynomial',
#'Bspline',
#'kBinsDiscretizer',
],
},
memory_limit=6144,
)
regaallp.fit(X, y)
TypeError Traceback (most recent call last) Cell In [27], line 16 1 regaallp = autosklearn.regression.AutoSklearnRegressor( 2 time_left_for_this_task=60, 3 per_run_time_limit=10, (...) 14 memory_limit=6144, 15 ) ---> 16 regaallp.fit(X, y)
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/autosklearn/estimators.py:1587, in AutoSklearnRegressor.fit(self, X, y, X_test, y_test, feat_type, dataset_name) 1576 raise ValueError( 1577 "Regression with data of type {} is " 1578 "not supported. Supported types are {}. " (...) 1582 "".format(target_type, supported_types) 1583 ) 1585 # Fit is supposed to be idempotent! 1586 # But not if we use share_mode. -> 1587 super().fit( 1588 X=X, 1589 y=y, 1590 X_test=X_test, 1591 y_test=y_test, 1592 feat_type=feat_type, 1593 dataset_name=dataset_name, 1594 ) 1596 return self
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/autosklearn/estimators.py:540, in AutoSklearnEstimator.fit(self, kwargs) 538 if self.automl is None: 539 self.automl = self.buildautoml() --> 540 self.automl.fit(load_models=self.load_models, kwargs) 542 return self
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/autosklearn/automl.py:2394, in AutoMLRegressor.fit(self, X, y, X_test, y_test, feat_type, dataset_name, only_return_configuration_space, load_models) 2383 def fit( 2384 self, 2385 X: SUPPORTED_FEAT_TYPES, (...) 2392 load_models: bool = True, 2393 ) -> AutoMLRegressor: -> 2394 return super().fit( 2395 X, 2396 y, 2397 X_test=X_test, 2398 y_test=y_test, 2399 feat_type=feat_type, 2400 dataset_name=dataset_name, 2401 only_return_configuration_space=only_return_configuration_space, 2402 load_models=load_models, 2403 is_classification=False, 2404 )
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/autosklearn/automl.py:962, in AutoML.fit(self, X, y, task, X_test, y_test, feat_type, dataset_name, only_return_configuration_space, load_models, is_classification) 959 except Exception as e: 960 # This will be called before the _fit_cleanup 961 self._logger.exception(e) --> 962 raise e 963 finally: 964 self._fit_cleanup()
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/autosklearn/automl.py:899, in AutoML.fit(self, X, y, task, X_test, y_test, feat_type, dataset_name, only_return_configuration_space, load_models, is_classification) 863 resamp_args = self._resampling_strategy_arguments 864 _proc_smac = AutoMLSMBO( 865 config_space=self.configuration_space, 866 dataset_name=self._dataset_name, (...) 892 trials_callback=self._get_trialscallback, 893 ) 895 ( 896 self.runhistory, 897 self.trajectory_, 898 self._budget_type, --> 899 ) = _proc_smac.run_smbo() 901 trajectory_filename = os.path.join( 902 self._backend.get_smac_output_directory_for_run(self._seed), 903 "trajectory.json", 904 ) 905 saveable_trajectory = [ 906 list(entry[:2]) 907 + [entry[2].getdictionary()] 908 + list(entry[3:]) 909 for entry in self.trajectory 910 ]
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/autosklearn/smbo.py:552, in AutoMLSMBO.run_smbo(self) 549 if self.trials_callback is not None: 550 smac.register_callback(self.trials_callback) --> 552 smac.optimize() 554 self.runhistory = smac.solver.runhistory 555 self.trajectory = smac.solver.intensifier.traj_logger.trajectory
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/smac/facade/smac_ac_facade.py:720, in SMAC4AC.optimize(self) 718 incumbent = None 719 try: --> 720 incumbent = self.solver.run() 721 finally: 722 self.solver.save()
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/smac/optimizer/smbo.py:273, in SMBO.run(self) 266 # Skip the run if there was a request to do so. 267 # For example, during intensifier intensification, we 268 # don't want to rerun a config that was previously ran 269 if intent == RunInfoIntent.RUN: 270 # Track the fact that a run was launched in the run 271 # history. It's status is tagged as RUNNING, and once 272 # completed and processed, it will be updated accordingly --> 273 self.runhistory.add( 274 config=run_info.config, 275 cost=float(MAXINT) 276 if num_obj == 1 277 else np.full(num_obj, float(MAXINT)), 278 time=0.0, 279 status=StatusType.RUNNING, 280 instance_id=run_info.instance, 281 seed=run_info.seed, 282 budget=run_info.budget, 283 ) 285 run_info.config.config_id = self.runhistory.config_ids[run_info.config] 287 self.tae_runner.submit_run(run_info=run_info)
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/smac/runhistory/runhistory.py:257, in RunHistory.add(self, config, cost, time, status, instance_id, seed, budget, starttime, endtime, additional_info, origin, force_update) 223 """Adds a data of a new target algorithm (TA) run; 224 it will update data if the same key values are used 225 (config, instance_id, seed) (...) 253 Forces the addition of a config to the history 254 """ 256 if config is None: --> 257 raise TypeError("Configuration to add to the runhistory must not be None") 258 elif not isinstance(config, Configuration): 259 raise TypeError( 260 "Configuration to add to the runhistory is not of type Configuration, but %s" 261 % type(config) 262 )
TypeError: Configuration to add to the runhistory must not be None.
I have no idea about to solve it, and I will be very grateful for your help!
I will close this issue, since I've already answered your question in the title, and the rest is related to your other opened issue, which I will keep open for now.
For your questions, kindly refer to my answer at #1661.
The error: TypeError Traceback (most recent call last) Cell In [25], line 16 1 regaallp = autosklearn.regression.AutoSklearnRegressor( 2 time_left_for_this_task=60, 3 per_run_time_limit=10, (...) 14 #ensemble_size=1, 15 ) ---> 16 regaallp.fit(X_train.values, y_train) 17 yaallp_pred = regaallp.predict(X_test.values)
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/autosklearn/estimators.py:1191, in AutoSklearnRegressor.fit(self, X, y, X_test, y_test, feat_type, dataset_name) 1178 raise ValueError("Regression with data of type {} is " 1179 "not supported. Supported types are {}. " 1180 "You can find more information about scikit-learn " (...) 1186 ) 1187 ) 1189 # Fit is supposed to be idempotent! 1190 # But not if we use share_mode. -> 1191 super().fit( 1192 X=X, 1193 y=y, 1194 X_test=X_test, 1195 y_test=y_test, 1196 feat_type=feat_type, 1197 dataset_name=dataset_name, 1198 ) 1200 return self
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/autosklearn/estimators.py:375, in AutoSklearnEstimator.fit(self, kwargs) 373 if self.automl is None: 374 self.automl = self.buildautoml() --> 375 self.automl.fit(load_models=self.load_models, kwargs) 377 return self
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/autosklearn/automl.py:2133, in AutoMLRegressor.fit(self, X, y, X_test, y_test, feat_type, dataset_name, only_return_configuration_space, load_models) 2122 def fit( 2123 self, 2124 X: SUPPORTED_FEAT_TYPES, (...) 2131 load_models: bool = True, 2132 ): -> 2133 return super().fit( 2134 X, y, 2135 X_test=X_test, 2136 y_test=y_test, 2137 feat_type=feat_type, 2138 dataset_name=dataset_name, 2139 only_return_configuration_space=only_return_configuration_space, 2140 load_models=load_models, 2141 is_classification=False, 2142 )
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/autosklearn/automl.py:931, in AutoML.fit(self, X, y, task, X_test, y_test, feat_type, dataset_name, only_return_configuration_space, load_models, is_classification) 898 _proc_smac = AutoMLSMBO( 899 config_space=self.configuration_space, 900 dataset_name=self._dataset_name, (...) 926 trials_callback=self._get_trialscallback 927 ) 929 try: 930 self.runhistory, self.trajectory_, self._budget_type = \ --> 931 _proc_smac.run_smbo() 932 trajectory_filename = os.path.join( 933 self._backend.get_smac_output_directory_for_run(self._seed), 934 'trajectory.json') 935 saveable_trajectory = \ 936 [list(entry[:2]) + [entry[2].getdictionary()] + list(entry[3:]) 937 for entry in self.trajectory]
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/autosklearn/smbo.py:498, in AutoMLSMBO.run_smbo(self) 495 if self.trials_callback is not None: 496 smac.register_callback(self.trials_callback) --> 498 smac.optimize() 500 self.runhistory = smac.solver.runhistory 501 self.trajectory = smac.solver.intensifier.traj_logger.trajectory
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/smac/facade/smac_ac_facade.py:720, in SMAC4AC.optimize(self) 718 incumbent = None 719 try: --> 720 incumbent = self.solver.run() 721 finally: 722 self.solver.save()
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/smac/optimizer/smbo.py:273, in SMBO.run(self) 266 # Skip the run if there was a request to do so. 267 # For example, during intensifier intensification, we 268 # don't want to rerun a config that was previously ran 269 if intent == RunInfoIntent.RUN: 270 # Track the fact that a run was launched in the run 271 # history. It's status is tagged as RUNNING, and once 272 # completed and processed, it will be updated accordingly --> 273 self.runhistory.add( 274 config=run_info.config, 275 cost=float(MAXINT) 276 if num_obj == 1 277 else np.full(num_obj, float(MAXINT)), 278 time=0.0, 279 status=StatusType.RUNNING, 280 instance_id=run_info.instance, 281 seed=run_info.seed, 282 budget=run_info.budget, 283 ) 285 run_info.config.config_id = self.runhistory.config_ids[run_info.config] 287 self.tae_runner.submit_run(run_info=run_info)
File ~/miniconda3/envs/p38/lib/python3.8/site-packages/smac/runhistory/runhistory.py:257, in RunHistory.add(self, config, cost, time, status, instance_id, seed, budget, starttime, endtime, additional_info, origin, force_update) 223 """Adds a data of a new target algorithm (TA) run; 224 it will update data if the same key values are used 225 (config, instance_id, seed) (...) 253 Forces the addition of a config to the history 254 """ 256 if config is None: --> 257 raise TypeError("Configuration to add to the runhistory must not be None") 258 elif not isinstance(config, Configuration): 259 raise TypeError( 260 "Configuration to add to the runhistory is not of type Configuration, but %s" 261 % type(config) 262 )
TypeError: Configuration to add to the runhistory must not be None