SheffieldML / GPyOpt

Gaussian Process Optimization using GPy
BSD 3-Clause "New" or "Revised" License
928 stars 261 forks source link

IndexError when using with scikit learn's cross validation #76

Closed RonLuhtanen closed 7 years ago

RonLuhtanen commented 7 years ago

Hello,

Could someone give me some pointers on why I might be getting this error? I'm trying to optimize parameters for XGB through scikit learn cross validation. However, after running the cv for all the initialization points it produces the error below.

Thanks.

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.cross_validation import cross_val_score
%pylab inline  
import GPy
import GPyOpt

def xgboostcv(max_depth = 10,
              learning_rate = 0.1,
              n_estimators = 50,
              gamma = 0.5,
              min_child_weight = 5,
              max_delta_step = 0.05,
              subsample = 1,
              colsample_bytree = 1,
              silent=True,
              nthread=-1):
    return cross_val_score(xgb.XGBClassifier(max_depth=int(max_depth),
                                             learning_rate=learning_rate,
                                             n_estimators=int(n_estimators),
                                             silent=silent,
                                             nthread=nthread,
                                             gamma=gamma,
                                             min_child_weight=min_child_weight,
                                             max_delta_step=max_delta_step,
                                             subsample=subsample,
                                             colsample_bytree=colsample_bytree),
                           X,
                           Y,
                           "roc_auc",
                           cv=5).mean()

def wrapper(X):
    max_depth = X[:,0]
    learning_rate = X[:,1]
    meanAUC = xgboostcv(int(max_depth), float(learning_rate))
    print("max depth: ", int(max_depth))
    print("learning rate: ", float(learning_rate))
    print("mean test AUC: ", meanAUC)
    return meanAUC

bounds = [{'name':'max_depth', 'type':'discrete', 'domain':range(1, 20)},
         {'name':'learning_rate', 'type':'continuous', 'domain':(0.01, 0.3)}]

seed(123)
myBopt = GPyOpt.methods.BayesianOptimization(f=wrapper,
                                            domain=bounds,
                                            acquisition_type='LCB',
                                            initial_design_numdata = 4,
                                            exact_feval = True)
Populating the interactive namespace from numpy and matplotlib
max depth:  16
learning rate:  0.21864600123781328
mean test AUC:  0.828290572493
max depth:  10
learning rate:  0.13270087343609366
mean test AUC:  0.8143772926
max depth:  1
learning rate:  0.2944216175315385
mean test AUC:  0.823469451312
max depth:  15
learning rate:  0.20860062418961034
mean test AUC:  0.825270108225
The set cost function is ignored! LBC acquisition does not make sense with cost.
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-58-ef0d82ecb5ae> in <module>()
     51                                             acquisition_type='LCB',
     52                                             initial_design_numdata = 4,
---> 53                                             exact_feval = True)

/home/ron/software/anaconda3/lib/python3.6/site-packages/GPyOpt/methods/bayesian_optimization.py in __init__(self, f, domain, constrains, cost_withGradients, model_type, X, Y, initial_design_numdata, initial_design_type, acquisition_type, normalize_Y, exact_feval, acquisition_optimizer_type, model_update_interval, evaluator_type, batch_size, num_cores, verbosity, verbosity_model, bounds, maximize, **kwargs)
    242 
    243         # --- Initialize everything
--> 244         self.run_optimization(max_iter=0,verbosity=self.verbosity)
    245 
    246     def _model_chooser(self):

/home/ron/software/anaconda3/lib/python3.6/site-packages/GPyOpt/methods/bayesian_optimization.py in run_optimization(self, max_iter, max_time, eps, verbosity, save_models_parameters, report_file, evaluations_file, models_file, **kwargs)
    456                 self.acquisition_optimizer.optimizer ='CMA'
    457             print('WARNING: "acqu_optimize_method" will be deprecated in the next version!')
--> 458         super(BayesianOptimization, self).run_optimization(max_iter = max_iter, max_time = max_time,  eps = eps, verbosity=verbosity, save_models_parameters = save_models_parameters, report_file = report_file, evaluations_file= evaluations_file, models_file=models_file)
    459 
    460     def _sign(self,f):

/home/ron/software/anaconda3/lib/python3.6/site-packages/GPyOpt/core/bo.py in run_optimization(self, max_iter, max_time, eps, verbosity, save_models_parameters, report_file, evaluations_file, models_file)
    106 
    107             # --- Update and optimize acquisition and compute the exploration level in the next evaluation
--> 108             self.suggested_sample = self._compute_next_evaluations()
    109 
    110             if not ((self.num_acquisitions < self.max_iter) and (self._distance_last_evaluations() > self.eps)):

/home/ron/software/anaconda3/lib/python3.6/site-packages/GPyOpt/core/bo.py in _compute_next_evaluations(self)
    184         Computes the location of the new evaluation (optimizes the acquisition in the standard case).
    185         """
--> 186         return self.evaluator.compute_batch()
    187 
    188     def _update_model(self):

/home/ron/software/anaconda3/lib/python3.6/site-packages/GPyOpt/core/evaluators/sequential.py in compute_batch(self)
     19         Selects the new location to evaluate the objective.
     20         """
---> 21         return self.acquisition.optimize()
     22 
     23 

/home/ron/software/anaconda3/lib/python3.6/site-packages/GPyOpt/acquisitions/base.py in optimize(self)
     57             out = self.optimizer.optimize(f=self.acquisition_function)[0]
     58         else:
---> 59             out = self.optimizer.optimize(f=self.acquisition_function, f_df=self.acquisition_function_withGradients)[0]
     60         return out
     61 

/home/ron/software/anaconda3/lib/python3.6/site-packages/GPyOpt/optimization/acquisition_optimizer.py in optimize(self, f, df, f_df)
    280 
    281         for i in range(num_discrete):
--> 282             self.mixed_optimizer.fix_dimensions(dims=self.discrete_dims, values=self.discrete_values[i,:])
    283             partial_x_min[i,:] , partial_f_min[i,:] = self.mixed_optimizer.optimize(f, df, f_df)
    284 

/home/ron/software/anaconda3/lib/python3.6/site-packages/GPyOpt/optimization/acquisition_optimizer.py in fix_dimensions(self, dims, values)
    118 
    119         # -- take only the fixed components of the random samples
--> 120         self.samples = self.samples[:,np.array(self.free_dims)] # take only the component of active dims
    121         self.subspace = self.space.get_subspace(self.free_dims)
    122         self.optimizer = select_optimizer(self.optimizer_name)(Design_space(self.subspace), **self.kwargs)

IndexError: index 1 is out of bounds for axis 1 with size 1
yxchng commented 7 years ago

@RonLuhtanen Have you solved this?

RonLuhtanen commented 7 years ago

@yxchng Yes, the error was due to the bug caused from the package accepting the continuous and discrete variables in certain order only, which is already fixed in the development branch.