Closed kzielnicki closed 4 years ago
@kzielnicki, thank you so much for bringing this to our attention! Will get back to you with the short-term fix for the absent status quo issue shortly.
Hi @kzielnicki, sorry for the delay here! I have a patch ready to fix this--should be landed by tomorrow.
Thanks @sdsingh! I can verify that I no longer get the status quo error when running the tutorial, but I now get a failure on the same step with RuntimeError: size is inconsistent with indices: for dim 1, size is 1 but found index 1
Running rep 0
Online-only batch 0 0.005214214324951172
Online-only batch 1 95.17236709594727
Online-only batch 2 209.57900094985962
Multi-task batch 0 0.0068891048431396484
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-12-2128f1c3564e> in <module>
3 print('Running rep', rep)
4 for k, r in runners.items():
----> 5 obj, con = r()
6 iteration_objectives[k].append(obj)
7 iteration_constraints[k].append(con)
<ipython-input-10-5a7fb63abcac> in run_mtbo()
52 experiment=exp_multitask,
53 data=exp_multitask.fetch_data(),
---> 54 search_space=exp_multitask.search_space,
55 )
56
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/ax/modelbridge/factory.py in get_MTGP(experiment, data, search_space, trial_index)
223 torch_dtype=torch.double,
224 torch_device=DEFAULT_TORCH_DEVICE,
--> 225 status_quo_features=status_quo_features,
226 )
227
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/ax/modelbridge/torch.py in __init__(self, experiment, search_space, data, model, transforms, transform_configs, torch_dtype, torch_device, status_quo_name, status_quo_features, optimization_config)
70 status_quo_name=status_quo_name,
71 status_quo_features=status_quo_features,
---> 72 optimization_config=optimization_config,
73 )
74
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/ax/modelbridge/base.py in __init__(self, search_space, model, transforms, experiment, data, transform_configs, status_quo_name, status_quo_features, optimization_config, fit_out_of_design)
143 search_space=search_space,
144 observation_features=obs_feats,
--> 145 observation_data=obs_data,
146 )
147 self.fit_time = time.time() - t_fit_start
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/ax/modelbridge/torch.py in _fit(self, model, search_space, observation_features, observation_data)
84 search_space=search_space,
85 observation_features=observation_features,
---> 86 observation_data=observation_data,
87 )
88
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/ax/modelbridge/array.py in _fit(self, model, search_space, observation_features, observation_data)
82 task_features=task_features,
83 feature_names=self.parameters,
---> 84 fidelity_features=fidelity_features,
85 )
86
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/ax/modelbridge/torch.py in _model_fit(self, model, Xs, Ys, Yvars, bounds, task_features, feature_names, fidelity_features)
110 task_features=task_features,
111 feature_names=feature_names,
--> 112 fidelity_features=fidelity_features,
113 )
114
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/ax/models/torch/botorch.py in fit(self, Xs, Ys, Yvars, bounds, task_features, feature_names, fidelity_features)
232 task_features=self.task_features,
233 fidelity_features=self.fidelity_features,
--> 234 fidelity_model_id=self.fidelity_model_id,
235 )
236
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/ax/models/torch/botorch_defaults.py in get_and_fit_model(Xs, Ys, Yvars, task_features, fidelity_features, refit_model, state_dict, fidelity_model_id, **kwargs)
121 # pyre-ignore: [16]
122 mll = ExactMarginalLogLikelihood(model.likelihood, model)
--> 123 mll = fit_gpytorch_model(mll, bounds=bounds)
124 return model
125
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/botorch/fit.py in fit_gpytorch_model(mll, optimizer, **kwargs)
60 for mll_ in mll.mlls:
61 fit_gpytorch_model(
---> 62 mll=mll_, optimizer=optimizer, max_retries=max_retries, **kwargs
63 )
64 return mll
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/botorch/fit.py in fit_gpytorch_model(mll, optimizer, **kwargs)
96 mll.model.load_state_dict(original_state_dict)
97 sample_all_priors(mll.model)
---> 98 mll, _ = optimizer(mll, track_iterations=False, **kwargs)
99 if not any(issubclass(w.category, OptimizationWarning) for w in ws):
100 mll.eval()
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/botorch/optim/fit.py in fit_gpytorch_scipy(mll, bounds, method, options, track_iterations)
208 jac=True,
209 options=options,
--> 210 callback=cb,
211 )
212 iterations = []
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/scipy/optimize/_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
599 elif meth == 'l-bfgs-b':
600 return _minimize_lbfgsb(fun, x0, args, jac, bounds,
--> 601 callback=callback, **options)
602 elif meth == 'tnc':
603 return _minimize_tnc(fun, x0, args, jac, bounds, callback=callback,
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/scipy/optimize/lbfgsb.py in _minimize_lbfgsb(fun, x0, args, jac, bounds, disp, maxcor, ftol, gtol, eps, maxfun, maxiter, iprint, callback, maxls, **unknown_options)
333 # until the completion of the current minimization iteration.
334 # Overwrite f and g:
--> 335 f, g = func_and_grad(x)
336 elif task_str.startswith(b'NEW_X'):
337 # new iteration
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/scipy/optimize/lbfgsb.py in func_and_grad(x)
283 else:
284 def func_and_grad(x):
--> 285 f = fun(x, *args)
286 g = jac(x, *args)
287 return f, g
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/scipy/optimize/optimize.py in function_wrapper(*wrapper_args)
298 def function_wrapper(*wrapper_args):
299 ncalls[0] += 1
--> 300 return function(*(wrapper_args + args))
301
302 return ncalls, function_wrapper
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/scipy/optimize/optimize.py in __call__(self, x, *args)
61 def __call__(self, x, *args):
62 self.x = numpy.asarray(x).copy()
---> 63 fg = self.fun(x, *args)
64 self.jac = fg[1]
65 return fg[0]
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/botorch/optim/fit.py in _scipy_objective_and_grad(x, mll, property_dict)
266 return float("nan"), np.full_like(x, "nan")
267 else:
--> 268 raise e # pragma: nocover
269 loss.backward()
270 param_dict = OrderedDict(mll.named_parameters())
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/botorch/optim/fit.py in _scipy_objective_and_grad(x, mll, property_dict)
259 mll.zero_grad()
260 try: # catch linear algebra errors in gpytorch
--> 261 output = mll.model(*train_inputs)
262 args = [output, train_targets] + _get_extra_mll_args(mll)
263 loss = -mll(*args).sum()
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/gpytorch/models/exact_gp.py in __call__(self, *args, **kwargs)
228 if not all(torch.equal(train_input, input) for train_input, input in zip(train_inputs, inputs)):
229 raise RuntimeError("You must train on the training inputs!")
--> 230 res = super().__call__(*inputs, **kwargs)
231 return res
232
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
20
21 def __call__(self, *inputs, **kwargs):
---> 22 outputs = self.forward(*inputs, **kwargs)
23 if isinstance(outputs, list):
24 return [_validate_module_outputs(output) for output in outputs]
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/botorch/models/multitask.py in forward(self, x)
144 covar_i = self.task_covar_module(task_idcs)
145 # Combine the two in an ICM fashion
--> 146 covar = covar_x.mul(covar_i)
147 return MultivariateNormal(mean_x, covar)
148
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/gpytorch/lazy/lazy_tensor.py in mul(self, other)
1138 return self._mul_constant(other.view(*other.shape[:-2]))
1139
-> 1140 return self._mul_matrix(lazify(other))
1141
1142 def ndimension(self):
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/gpytorch/lazy/lazy_tensor.py in _mul_matrix(self, other)
487 other = other.evaluate_kernel()
488 if isinstance(self, NonLazyTensor) or isinstance(other, NonLazyTensor):
--> 489 return NonLazyTensor(self.evaluate() * other.evaluate())
490 else:
491 left_lazy_tensor = self if self._root_decomposition_size() < other._root_decomposition_size() else other
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/gpytorch/utils/memoize.py in g(self, *args, **kwargs)
32 cache_name = name if name is not None else method
33 if not is_in_cache(self, cache_name):
---> 34 add_to_cache(self, cache_name, method(self, *args, **kwargs))
35 return get_from_cache(self, cache_name)
36
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/gpytorch/lazy/lazy_tensor.py in evaluate(self)
862 eye = torch.eye(num_cols, dtype=self.dtype, device=self.device)
863 eye = eye.expand(*self.batch_shape, num_cols, num_cols)
--> 864 res = self.matmul(eye)
865 return res
866
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/gpytorch/lazy/interpolated_lazy_tensor.py in matmul(self, tensor)
391 # right_interp^T * tensor
392 base_size = self.base_lazy_tensor.size(-1)
--> 393 right_interp_res = left_t_interp(self.right_interp_indices, self.right_interp_values, tensor, base_size)
394
395 # base_lazy_tensor * right_interp^T * tensor
~/.pyenv/versions/3.7.3/envs/p3/lib/python3.7/site-packages/gpytorch/utils/interpolation.py in left_t_interp(interp_indices, interp_values, rhs, output_dim)
214 else:
215 cls = getattr(torch.sparse, type_name)
--> 216 summing_matrix = cls(summing_matrix_indices, summing_matrix_values, size)
217
218 # Sum up the values appropriately by performing sparse matrix multiplication
RuntimeError: size is inconsistent with indices: for dim 1, size is 1 but found index 1
Hi @kzielnicki, apologies--I was unable to reproduce this when I submitted the patch. However, now I am able to reproduce this. It seems like it is potentially due to a change in BoTorch/gpytorch. I'm now working with them to debug.
@kzielnicki, it looks like this was caused by an upstream issue in gpytorch (which has since been fixed in https://github.com/cornellius-gp/gpytorch/pull/911).
gpytorch is planning to put out a 0.4 release soon, which will include this fix. In the meantime, the tutorial should run fine if you install gpytorch from master: https://github.com/cornellius-gp/gpytorch#latest-unstable-version
@sdsingh, let's leave this issue open until the fix has made it into a release.
Thanks @Balandat and @sdsingh, I can verify that this runs after installing gpytorch from master 🎉
The tutorial now runs on latest stable versions of Ax + BoTorch!
The Ax Multi-Task GP tutorial (https://ax.dev/tutorials/multi_task.html) runs until the Bayesian optimization loop step 4c
At which point it fails with
ValueError: status_quo is not defined for the selected trial.
Stack trace:
Environment: