cornellius-gp / gpytorch

A highly efficient implementation of Gaussian Processes in PyTorch
MIT License
3.57k stars 562 forks source link

[Bug] ExactMarginalLogLikelihood requires kwargs when using FixedNoiseGaussianLikelihood #721

Open stevenstetzler opened 5 years ago

stevenstetzler commented 5 years ago

🐛 Bug

Currently the log likelihood computed through the use of gpytorch.mlls.ExactMarginalLogLikelihood can't be computed correctly with fixed Gaussian noise.

To reproduce

Code snippet to reproduce

import torch
import gpytorch
from matplotlib import pyplot as plt
import numpy as np

noise_level = 1e-1

train_x = torch.tensor([1, 2, 3, 2.5], dtype=torch.float)
train_y = torch.tensor([3, 3.5, 2, 7], dtype=torch.float)
train_y_err = torch.ones(train_x.shape[0]) * noise_level

class RBF(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(RBF, self).__init__(train_x, train_y, likelihood)

        # Subtracted mean already
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# Compute log likelihood on training data
likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=train_y_err, learn_additional_noise=False)
model = RBF(train_x, train_y, likelihood)

model.eval(), likelihood.eval()

log_likelihood = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
# Produces no warning
print(log_likelihood(model(train_x), train_y))

# Compute log likelihood on subset of data
likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=train_y_err, learn_additional_noise=False)
model = RBF(train_x, train_y, likelihood)

model.eval(), likelihood.eval()

log_likelihood = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
# Produces a warning (below) 
print(log_likelihood(model(train_x[:3]), train_y[:3]))
# Out: tensor(-2.8965, grad_fn=<DivBackward0>)

I changed gpytorch/mlls/exact_marginal_log_likelihood.py to modify the forward method to accept kwargs and pass them to the likelihood call, as is currently done with *params:

class ExactMarginalLogLikelihood(MarginalLogLikelihood):
    def __init__(self, likelihood, model):
        """
        A special MLL designed for exact inference

        Args:
        - likelihood: (Likelihood) - the likelihood for the model
        - model: (Module) - the exact GP model
        """
        if not isinstance(likelihood, _GaussianLikelihoodBase):
            raise RuntimeError("Likelihood must be Gaussian for exact inference")
        super(ExactMarginalLogLikelihood, self).__init__(likelihood, model)

    def forward(self, output, target, *params):
        if not isinstance(output, MultivariateNormal):
            raise RuntimeError("ExactMarginalLogLikelihood can only operate on Gaussian random variables")

        # Get the log prob of the marginal distribution
        output = self.likelihood(output, *params)
        res = output.log_prob(target)

        # Add terms for SGPR / when inducing points are learned
        added_loss = torch.zeros_like(res)
        for added_loss_term in self.model.added_loss_terms():
            added_loss.add(added_loss_term.loss())

        res = res.add(0.5, added_loss)

        # Add log probs of priors on the (functions of) parameters
        for _, prior, closure, _ in self.named_priors():
            res.add_(prior.log_prob(closure()).sum())

        # Scale by the amount of data we have
        num_data = target.size(-1)
        return res.div_(num_data)```

was changed to:

class ExactMarginalLogLikelihood(MarginalLogLikelihood):
    def __init__(self, likelihood, model):
        """
        A special MLL designed for exact inference

        Args:
        - likelihood: (Likelihood) - the likelihood for the model
        - model: (Module) - the exact GP model
        """
        if not isinstance(likelihood, _GaussianLikelihoodBase):
            raise RuntimeError("Likelihood must be Gaussian for exact inference")
        super(ExactMarginalLogLikelihood, self).__init__(likelihood, model)

    def forward(self, output, target, *params, **kwargs):
        if not isinstance(output, MultivariateNormal):
            raise RuntimeError("ExactMarginalLogLikelihood can only operate on Gaussian random variables")

        # Get the log prob of the marginal distribution
        output = self.likelihood(output, *params, **kwargs)
        res = output.log_prob(target)

        # Add terms for SGPR / when inducing points are learned
        added_loss = torch.zeros_like(res)
        for added_loss_term in self.model.added_loss_terms():
            added_loss.add(added_loss_term.loss())

        res = res.add(0.5, added_loss)

        # Add log probs of priors on the (functions of) parameters
        for _, prior, closure, _ in self.named_priors():
            res.add_(prior.log_prob(closure()).sum())

        # Scale by the amount of data we have
        num_data = target.size(-1)
        return res.div_(num_data)

This results in a different log likelihood calculation (a presumably correct one) with no warning:

# Changing ExactMarginalLogLikelihood.forward to take kwargs succesfully
# passes noise to likelihood and produces no warning and a different
# result for the log likelihood
likelihood = gpytorch.likelihoods.FixedNoiseGaussianLikelihood(noise=train_y_err, learn_additional_noise=False)
model = RBF(train_x, train_y, likelihood)

model.eval(), likelihood.eval()

log_likelihood = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
print(log_likelihood(model(train_x[:3]), train_y[:3], noise=train_y_err[:3]))
# Out: tensor(-1.3459, grad_fn=<DivBackward0>)

Stack trace/error message

/astro/users/stevengs/.conda/envs/mypy/lib/python3.6/site-packages/gpytorch/likelihoods/gaussian_likelihood.py:193: UserWarning: You have passed data through a FixedNoiseGaussianLikelihood that did not match the size of the fixed noise, *and* you did not specify noise. This is treated as a no-op.
  "You have passed data through a FixedNoiseGaussianLikelihood that did not match the size "

Expected Behavior

I expect that calls to ExactMarginalLogLikelihood should be able to accommodate fixed noise models. I believe the above changes fix that.

jacobrgardner commented 5 years ago

The fix you have seems sensible enough, but do we think it's a good idea to allow for ExactMarginalLogLikelihood being called with the model in eval mode at all? This has in the past led to people accidentally "training" their models in eval mode, and getting the test log likelihood can be better accomplished without normalization e.g. with:

log_lik = likelihood(model(train_x[:3]), noise=train_y_err[:3]).log_prob(train_y[:3])
normalized_log_lik = log_lik/3  # Keep in mind ExactMarginalLogLikelihood normalizes itself by n...
print(normalized_log_lik)
# Out: tensor(-1.3459, grad_fn=<DivBackward0>)

What do you think? Also @Balandat @gpleiss

gpleiss commented 5 years ago

I think it makes sense that ExactMarginalLogLikelihood only should work in training mode. You could always compute the log likelihood of test data using likelihood(model(test_x)).log_prob(test_y)

Balandat commented 5 years ago

I think it makes sense that ExactMarginalLogLikelihood only should work in training mode

agreed

stevenstetzler commented 5 years ago

Right, I saw that I could compute the log likelihood with log_prob on my targets, but I noticed in the ExactMarginalLogLikelihood class that additional likelihood terms, specifically those from priors, are included in the computation of the likelihood after calls to log_prob:

class ExactMarginalLogLikelihood(MarginalLogLikelihood):
    ...
    def forward(self, output, target, *params):
        ...
        res = output.log_prob(target)

        # Add terms for SGPR / when inducing points are learned
        added_loss = torch.zeros_like(res)
        for added_loss_term in self.model.added_loss_terms():
            added_loss.add(added_loss_term.loss())

        res = res.add(0.5, added_loss)

        # Add log probs of priors on the (functions of) parameters
        for _, prior, closure, _ in self.named_priors():
            res.add_(prior.log_prob(closure()).sum())

My concern was that if I added priors on my kernel parameters in my model, calls to log_prob wouldn't account for them and I would have to write a layer of code to attach on the priors. Am I thinking about / using the log probability from the likelihood in the wrong way? What I am imagining in application in the end is using an MCMC to marginalize over kernel parameters, where gradients don't matter as much as just evaluating the log likelihood with priors included.