kjappelbaum / pyepal

Multiobjective active learning with tunable accuracy/efficiency tradeoff and clear stopping criterion.
Apache License 2.0
37 stars 5 forks source link

`PALSklearn` pickling error #265

Open kjappelbaum opened 1 year ago

kjappelbaum commented 1 year ago

Steps to reproduce the problem

RuntimeError: Pickling of "rdkit.rdBase.vectclass std::vector<int not enabled

Reported by user via email (code below copied from screenshot).

import sklearn.gaussian_process.kernels as kernels
from pyepal import PALSklearn
#from pyepal import PALGPyReclassify
from pyepal.pal.schedules import linear
y_red = -y_red
y wav = [-np.abs (i - 375) for i in y wav]
# Build GPR models
kernel = kernels.WhiteKernel() + kernels.Matern()
gpr_red = sklearn.gaussian_process. GaussianProcessRegressor(kernel=kernel, _restarts_optimizer=10)
gpr_sol = sklearn. gaussian_process.GaussianProcessRegressor(kernel=kernel, _restarts_optimizer=10)
gpr_wav = sklearn.gaussian_process. GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10)
# Active Learning
def multiple_bayesian_optimization():
  n_data = len (smiles)
  indices = np.arange (0, n_data+1)
  init_train_indices = np.random. choice (indices, 10)
  init X train = smiles init train indices1
  init y train = y.iloc[init train indices]
  models = [gpr_red, gpr_sol, gpr_wav]
  pal = PALSklearn(X, models, 3)
  ### NEXT: set the hyperparameters
  pal.epsilon=0.05
  #pal.delta
  pal.beta_scale=0.05
  pal.update_train_set(init_train_indices,y.iloc[init_train_indices])
  while pal.number_unclassified_points > 0:
    next_index = pal.run_one_step ()
    pal.update_train_set(next_index,y.iloc[next_index])
  opt_ind = pal.pareto_optimal_indices
  return smiles [opt_ind], y.iloc[opt_ind]

Specifications