cornellius-gp / gpytorch

A highly efficient implementation of Gaussian Processes in PyTorch
MIT License
3.46k stars 546 forks source link

Error in Gaussian Process with many data points [Bug] #2456

Closed Luisa47o closed 6 months ago

Luisa47o commented 6 months ago

πŸ› Bug

When creating a Gaussian Process with many data points (more than 820) a RuntimeError occurs. With less data points the code works.

To reproduce

Code snippet to reproduce

import numpy as np
import torch
import gpytorch

class GPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(GPModel, self).__init__(train_x, train_y, likelihood)
        self.mean = gpytorch.means.ConstantMean()
        self.cov = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())                                    

    def forward(self, x):
        mean_x = self.mean(x)
        covar_x = self.cov(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

def train(model, likelihood):
    training_iter = 200

    # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # Use the adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    losses = []
    for i in range(training_iter):
        # Zero gradients from previous iteration
        optimizer.zero_grad()
        # Output from model
        output = model(train_x)
        # Calc loss and backprop gradients
        loss = -mll(output, train_y)
        losses.append(loss.item())
        loss.backward()
        #print('Iter %d/%d - Loss: %.3f - noise: %.3f' % (i + 1, training_iter, loss.item(), model.likelihood.noise.item()))

        optimizer.step()

data_train_x = np.random.uniform(low=0, high=20, size=(820,3))
data_train_y = np.random.uniform(low=0, high=1, size=(820,))

train_x = torch.tensor(data_train_x)
train_y = torch.tensor(data_train_y)

# initialize likelihood and model
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = GPModel(train_x, train_y, likelihood)

train(model, likelihood)

Stack trace/error message

RuntimeError                              Traceback (most recent call last)
Cell In[8], line 11
      8 likelihood = gpytorch.likelihoods.GaussianLikelihood()
      9 model = GPModel(train_x, train_y, likelihood)
---> 11 train(model, likelihood)

Cell In[3], line 20, in train(model, likelihood)
     18 output = model(train_x)
     19 # Calc loss and backprop gradients
---> 20 loss = -mll(output, train_y)
     21 losses.append(loss.item())
     22 loss.backward()

File ~/.local/lib/python3.10/site-packages/gpytorch/module.py:31, in Module.__call__(self, *inputs, **kwargs)
     30 def __call__(self, *inputs, **kwargs) -> Union[Tensor, Distribution, LinearOperator]:
---> 31     outputs = self.forward(*inputs, **kwargs)
     32     if isinstance(outputs, list):
     33         return [_validate_module_outputs(output) for output in outputs]

File ~/.local/lib/python3.10/site-packages/gpytorch/mlls/exact_marginal_log_likelihood.py:64, in ExactMarginalLogLikelihood.forward(self, function_dist, target, *params)
     62 # Get the log prob of the marginal distribution
     63 output = self.likelihood(function_dist, *params)
---> 64 res = output.log_prob(target)
     65 res = self._add_other_terms(res, params)
     67 # Scale by the amount of data we have

File ~/.local/lib/python3.10/site-packages/gpytorch/distributions/multivariate_normal.py:193, in MultivariateNormal.log_prob(self, value)
    191 # Get log determininant and first part of quadratic form
    192 covar = covar.evaluate_kernel()
--> 193 inv_quad, logdet = covar.inv_quad_logdet(inv_quad_rhs=diff.unsqueeze(-1), logdet=True)
    195 res = -0.5 * sum([inv_quad, logdet, diff.size(-1) * math.log(2 * math.pi)])
    196 return res

File ~/.local/lib/python3.10/site-packages/linear_operator/operators/_linear_operator.py:1764, in LinearOperator.inv_quad_logdet(self, inv_quad_rhs, logdet, reduce_inv_quad)
   1761 probe_vectors, probe_vector_norms = self._probe_vectors_and_norms()
   1763 func = InvQuadLogdet.apply
-> 1764 inv_quad_term, pinvk_logdet = func(
   1765     self.representation_tree(),
   1766     precond_lt.representation_tree(),
   1767     preconditioner,
   1768     len(precond_args),
   1769     (inv_quad_rhs is not None),
   1770     probe_vectors,
   1771     probe_vector_norms,
   1772     *(list(args) + list(precond_args)),
   1773 )
   1774 logdet_term = pinvk_logdet
   1775 logdet_term = logdet_term + logdet_p

File ~/.local/lib/python3.10/site-packages/linear_operator/functions/_inv_quad_logdet.py:132, in InvQuadLogdet.forward(ctx, representation_tree, precond_representation_tree, preconditioner, num_precond_args, inv_quad, probe_vectors, probe_vector_norms, *args)
    130 # Perform solves (for inv_quad) and tridiagonalization (for estimating logdet)
    131 rhs = torch.cat(rhs_list, -1)
--> 132 solves, t_mat = linear_op._solve(rhs, preconditioner, num_tridiag=num_random_probes)
    134 # Final values to return
    135 logdet_term = torch.zeros(linear_op.batch_shape, dtype=ctx.dtype, device=ctx.device)

File ~/.local/lib/python3.10/site-packages/linear_operator/operators/_linear_operator.py:789, in LinearOperator._solve(self, rhs, preconditioner, num_tridiag)
    774 def _solve(
    775     self: Float[LinearOperator, "... N N"],
    776     rhs: Float[torch.Tensor, "... N C"],
   (...)
    784     ],
    785 ]:
    786     r"""
    787     TODO
    788     """
--> 789     return utils.linear_cg(
    790         self._matmul,
    791         rhs,
    792         n_tridiag=num_tridiag,
    793         max_iter=settings.max_cg_iterations.value(),
    794         max_tridiag_iter=settings.max_lanczos_quadrature_iterations.value(),
    795         preconditioner=preconditioner,
    796     )

File ~/.local/lib/python3.10/site-packages/linear_operator/utils/linear_cg.py:186, in linear_cg(matmul_closure, rhs, n_tridiag, tolerance, eps, stop_updating_after, max_iter, max_tridiag_iter, initial_guess, preconditioner)
    183 initial_guess = initial_guess.div(rhs_norm)
    185 # residual: residual_{0} = b_vec - lhs x_{0}
--> 186 residual = rhs - matmul_closure(initial_guess)
    187 batch_shape = residual.shape[:-2]
    189 # result <- x_{0}

File ~/.local/lib/python3.10/site-packages/linear_operator/operators/added_diag_linear_operator.py:77, in AddedDiagLinearOperator._matmul(self, rhs)
     73 def _matmul(
     74     self: Float[LinearOperator, "*batch M N"],
     75     rhs: Union[Float[torch.Tensor, "*batch2 N C"], Float[torch.Tensor, "*batch2 N"]],
     76 ) -> Union[Float[torch.Tensor, "... M C"], Float[torch.Tensor, "... M"]]:
---> 77     return torch.addcmul(self._linear_op._matmul(rhs), self._diag_tensor._diag.unsqueeze(-1), rhs)

RuntimeError: !(has_different_input_dtypes && !config.promote_inputs_to_common_dtype_ && (has_undefined_outputs || config.enforce_safe_casting_to_output_ || config.cast_common_dtype_to_outputs_))INTERNAL ASSERT FAILED at "../aten/src/ATen/TensorIterator.cpp":399, please report a bug to PyTorch. 

​

Expected Behavior

System information

Please complete the following information:

Additional context

The same error occurs when using a one dimensional input.

supersjgk commented 6 months ago

I'm not getting any errors, either with 1000(>820) data points, or any number of dimensions. Your PyTorch version is very old, try updating it.

Luisa47o commented 6 months ago

Updating the PyTorch version solves the issue. Thank you!