dfm / tess-atlas

MIT License
9 stars 8 forks source link

`LinAlgError` Thrown by celerite2 while sampling for TOI178 #44

Closed avivajpeyi closed 3 years ago

avivajpeyi commented 4 years ago

Notes:

Stack trace:

ATM not sure what the relavent error is, pasting fullstack-trace for future convenience

trace = start_model_sampling(planet_transient_model)
Sequential sampling (1 chains in 1 job)
NUTS: [sigma_gp, rho_gp, sigma, u, f0, b, r, d, p, t0]

 Interrupted
---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
    902             outputs =\
--> 903                 self.fn() if output_subset is None else\
    904                 self.fn(output_subset=output_subset)

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/theano/gof/op.py in rval(p, i, o, n)
    891             def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
--> 892                 r = p(n, [x[0] for x in i], o)
    893                 for o in node.outputs:

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/celerite2/theano/ops.py in perform(self, node, inputs, outputs)
    115         try:
--> 116             backprop.factor_fwd(a, U, V, P, d, W, S)
    117         except backprop.LinAlgError:

LinAlgError: failed to factorize or solve matrix

During handling of the above exception, another exception occurred:

LinAlgError                               Traceback (most recent call last)
<ipython-input-1-8684bd7d7442> in <module>
----> 1 trace = start_model_sampling(planet_transient_model)

<ipython-input-1-604ea78f527a> in start_model_sampling(model)
     13             chains=CHAINS,
     14             cores=1,
---> 15             step=xo.get_dense_nuts_step(target_accept=0.9),
     16         )
     17         return trace

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, callback, return_inferencedata, idata_kwargs, mp_ctx, pickle_backend, **kwargs)
    584             _log.info("Sequential sampling ({} chains in 1 job)".format(chains))
    585             _print_step_hierarchy(step)
--> 586             trace = _sample_many(**sample_args)
    587 
    588     t_sampling = time.time() - t_start

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/sampling.py in _sample_many(draws, chain, chains, start, random_seed, step, callback, **kwargs)
    705             random_seed=random_seed[i],
    706             callback=callback,
--> 707             **kwargs,
    708         )
    709         if trace is None:

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/sampling.py in _sample(chain, progressbar, random_seed, start, draws, step, trace, tune, model, callback, **kwargs)
    844     try:
    845         strace = None
--> 846         for it, (strace, diverging) in enumerate(sampling):
    847             if it >= skip_first and diverging:
    848                 _pbar_data["divergences"] += 1

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/fastprogress/fastprogress.py in __iter__(self)
     45         except Exception as e:
     46             self.on_interrupt()
---> 47             raise e
     48 
     49     def update(self, val):

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/fastprogress/fastprogress.py in __iter__(self)
     39         if self.total != 0: self.update(0)
     40         try:
---> 41             for i,o in enumerate(self.gen):
     42                 if i >= self.total: break
     43                 yield o

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/sampling.py in _iter_sample(draws, step, start, trace, chain, tune, model, random_seed, callback)
   1001                 step = stop_tuning(step)
   1002             if step.generates_stats:
-> 1003                 point, stats = step.step(point)
   1004                 if strace.supports_sampler_stats:
   1005                     strace.record(point, stats)

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/step_methods/arraystep.py in step(self, point)
    261 
    262         if self.generates_stats:
--> 263             apoint, stats = self.astep(array)
    264             point = self._logp_dlogp_func.array_to_full_dict(apoint)
    265             return point, stats

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/step_methods/hmc/base_hmc.py in astep(self, q0)
    174             step_size = self._step_rand(step_size)
    175 
--> 176         hmc_step = self._hamiltonian_step(start, p0, step_size)
    177 
    178         perf_end = time.perf_counter()

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/step_methods/hmc/nuts.py in _hamiltonian_step(self, start, p0, step_size)
    182         for _ in range(max_treedepth):
    183             direction = logbern(np.log(0.5)) * 2 - 1
--> 184             divergence_info, turning = tree.extend(direction)
    185 
    186             if divergence_info or turning:

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/step_methods/hmc/nuts.py in extend(self, direction)
    275         if direction > 0:
    276             tree, diverging, turning = self._build_subtree(
--> 277                 self.right, self.depth, floatX(np.asarray(self.step_size))
    278             )
    279             leftmost_begin, leftmost_end = self.left, self.right

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/step_methods/hmc/nuts.py in _build_subtree(self, left, depth, epsilon)
    376             return tree1, diverging, turning
    377 
--> 378         tree2, diverging, turning = self._build_subtree(tree1.right, depth - 1, epsilon)
    379 
    380         left, right = tree1.left, tree2.right

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/step_methods/hmc/nuts.py in _build_subtree(self, left, depth, epsilon)
    372             return self._single_step(left, epsilon)
    373 
--> 374         tree1, diverging, turning = self._build_subtree(left, depth - 1, epsilon)
    375         if diverging or turning:
    376             return tree1, diverging, turning

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/step_methods/hmc/nuts.py in _build_subtree(self, left, depth, epsilon)
    372             return self._single_step(left, epsilon)
    373 
--> 374         tree1, diverging, turning = self._build_subtree(left, depth - 1, epsilon)
    375         if diverging or turning:
    376             return tree1, diverging, turning

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/step_methods/hmc/nuts.py in _build_subtree(self, left, depth, epsilon)
    372             return self._single_step(left, epsilon)
    373 
--> 374         tree1, diverging, turning = self._build_subtree(left, depth - 1, epsilon)
    375         if diverging or turning:
    376             return tree1, diverging, turning

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/step_methods/hmc/nuts.py in _build_subtree(self, left, depth, epsilon)
    376             return tree1, diverging, turning
    377 
--> 378         tree2, diverging, turning = self._build_subtree(tree1.right, depth - 1, epsilon)
    379 
    380         left, right = tree1.left, tree2.right

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/step_methods/hmc/nuts.py in _build_subtree(self, left, depth, epsilon)
    370     def _build_subtree(self, left, depth, epsilon):
    371         if depth == 0:
--> 372             return self._single_step(left, epsilon)
    373 
    374         tree1, diverging, turning = self._build_subtree(left, depth - 1, epsilon)

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/step_methods/hmc/nuts.py in _single_step(self, left, epsilon)
    328         """Perform a leapfrog step and handle error cases."""
    329         try:
--> 330             right = self.integrator.step(epsilon, left)
    331         except IntegrationError as err:
    332             error_msg = str(err)

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/step_methods/hmc/integration.py in step(self, epsilon, state)
     66         """
     67         try:
---> 68             return self._step(epsilon, state)
     69         except linalg.LinAlgError as err:
     70             msg = "LinAlgError during leapfrog step."

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/step_methods/hmc/integration.py in _step(self, epsilon, state)
     99         axpy(v_new, q_new, a=epsilon)
    100 
--> 101         logp = self._logp_dlogp_func(q_new, q_new_grad)
    102 
    103         # p_new = p_new + dt * q_new_grad

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/pymc3/model.py in __call__(self, array, grad_out, extra_vars)
    688             out = grad_out
    689 
--> 690         logp, dlogp = self._theano_function(array)
    691         if grad_out is None:
    692             return logp, dlogp

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
    915                     node=self.fn.nodes[self.fn.position_of_error],
    916                     thunk=thunk,
--> 917                     storage_map=getattr(self.fn, 'storage_map', None))
    918             else:
    919                 # old-style linkers raise their own exceptions

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/theano/gof/link.py in raise_with_op(node, thunk, exc_info, storage_map)
    323         # extra long error message in that case.
    324         pass
--> 325     reraise(exc_type, exc_value, exc_trace)
    326 
    327 

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/six.py in reraise(tp, value, tb)
    700                 value = tp()
    701             if value.__traceback__ is not tb:
--> 702                 raise value.with_traceback(tb)
    703             raise value
    704         finally:

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
    901         try:
    902             outputs =\
--> 903                 self.fn() if output_subset is None else\
    904                 self.fn(output_subset=output_subset)
    905         except Exception:

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/theano/gof/op.py in rval(p, i, o, n)
    890             # default arguments are stored in the closure of `rval`
    891             def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
--> 892                 r = p(n, [x[0] for x in i], o)
    893                 for o in node.outputs:
    894                     compute_map[o][0] = True

~/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/celerite2/theano/ops.py in perform(self, node, inputs, outputs)
    114 
    115         try:
--> 116             backprop.factor_fwd(a, U, V, P, d, W, S)
    117         except backprop.LinAlgError:
    118             if not self.quiet:

LinAlgError: failed to factorize or solve matrix
Apply node that caused the error: FactorOp{quiet=False}(Elemwise{add,no_inplace}.0, Join.0, Join.0, Elemwise{Composite{exp((i0 * i1 * i2))}}.0)
Toposort index: 128
Inputs types: [TensorType(float64, vector), TensorType(float64, matrix), TensorType(float64, matrix), TensorType(float64, matrix)]
Inputs shapes: [(3170,), (3170, 2), (3170, 2), (3169, 2)]
Inputs strides: [(8,), (16, 8), (16, 8), (16, 8)]
Inputs values: ['not shown', 'not shown', 'not shown', 'not shown']
Outputs clients: [[NormOp(Join.0, Elemwise{Composite{exp((i0 * i1 * i2))}}.0, FactorOp{quiet=False}.0, FactorOp{quiet=False}.1, Elemwise{Composite{(i0 - ((i1 * i2) + i3))}}[(0, 2)].0), NormRevOp(Join.0, Elemwise{Composite{exp((i0 * i1 * i2))}}.0, FactorOp{quiet=False}.0, FactorOp{quiet=False}.1, Elemwise{Composite{(i0 - ((i1 * i2) + i3))}}[(0, 2)].0, NormOp.0, NormOp.1, NormOp.2, TensorConstant{-0.5}), FactorRevOp(Elemwise{add,no_inplace}.0, Join.0, Join.0, Elemwise{Composite{exp((i0 * i1 * i2))}}.0, FactorOp{quiet=False}.0, FactorOp{quiet=False}.1, FactorOp{quiet=False}.2, Elemwise{Composite{((i0 / i1) + i2)}}[(0, 2)].0, NormRevOp.3), Elemwise{Log}[(0, 0)](FactorOp{quiet=False}.0), Elemwise{Composite{((i0 / i1) + i2)}}[(0, 2)](TensorConstant{(1,) of -0.5}, FactorOp{quiet=False}.0, NormRevOp.2)], [NormOp(Join.0, Elemwise{Composite{exp((i0 * i1 * i2))}}.0, FactorOp{quiet=False}.0, FactorOp{quiet=False}.1, Elemwise{Composite{(i0 - ((i1 * i2) + i3))}}[(0, 2)].0), NormRevOp(Join.0, Elemwise{Composite{exp((i0 * i1 * i2))}}.0, FactorOp{quiet=False}.0, FactorOp{quiet=False}.1, Elemwise{Composite{(i0 - ((i1 * i2) + i3))}}[(0, 2)].0, NormOp.0, NormOp.1, NormOp.2, TensorConstant{-0.5}), FactorRevOp(Elemwise{add,no_inplace}.0, Join.0, Join.0, Elemwise{Composite{exp((i0 * i1 * i2))}}.0, FactorOp{quiet=False}.0, FactorOp{quiet=False}.1, FactorOp{quiet=False}.2, Elemwise{Composite{((i0 / i1) + i2)}}[(0, 2)].0, NormRevOp.3)], [FactorRevOp(Elemwise{add,no_inplace}.0, Join.0, Join.0, Elemwise{Composite{exp((i0 * i1 * i2))}}.0, FactorOp{quiet=False}.0, FactorOp{quiet=False}.1, FactorOp{quiet=False}.2, Elemwise{Composite{((i0 / i1) + i2)}}[(0, 2)].0, NormRevOp.3)]]

Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
  File "/Users/avaj0001/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-1-5e7f64502280>", line 1, in <module>
    planet_transient_model, params, gp = build_planet_transient_model(tic_entry)
  File "<ipython-input-1-6d78fd669e2e>", line 42, in build_planet_transient_model
    gp = GaussianProcess(kernel, t=t, diag=yerr ** 2 + sigma ** 2, mean=lightcurve)
  File "/Users/avaj0001/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/celerite2/ext.py", line 136, in __init__
    self.compute(t, **kwargs)
  File "/Users/avaj0001/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/celerite2/ext.py", line 199, in compute
    self.do_compute(quiet)
  File "/Users/avaj0001/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/celerite2/theano/celerite2.py", line 68, in do_compute
    self._a, self._U, self._V, self._P
  File "/Users/avaj0001/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/theano/gof/op.py", line 615, in __call__
    node = self.make_node(*inputs, **kwargs)
  File "/Users/avaj0001/anaconda3/envs/phase-marginalisation-test/lib/python3.7/site-packages/celerite2/theano/ops.py", line 83, in make_node
    otypes.append(tt.dvector())

HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
dfm commented 4 years ago

This generally happens when the GP hyperparameters blow up to get tiny or huge. This can be caused by bad data (outliers, etc.) or priors that aren't restrictive enough (the length scale shouldn't be allowed to be shorter than the sampling, for example).

Maybe take a look at the map solution and see if any of the noise parameters are blowing up?

dfm commented 4 years ago

We can also add the quiet=True parameter to the GaussianProcess __init__, but that'll normally just hide the problem.

avivajpeyi commented 4 years ago
avivajpeyi commented 3 years ago

The kernel is supposed to be +

There might be overflow/underflow issues at weird parameters

  1. Run with --quiet

  2. if still getting error + lots of divergences, try to restrict bounds + tighten priors? We want to figure out where the problem is happening


  1. Make corner plot of GP parameters.
  2. Jitter --> unit flux, rho --> unit days. do these values make sense? Are these crazy small/big?

Estimate inverse gamma --> helps to set the lower and upper bound on mass?

dfm commented 3 years ago

Dupe of #80