How to use Batch Independent Multioutput GPs with Inducing Points?

famura commented 11 months ago

Discussed in https://github.com/cornellius-gp/gpytorch/discussions/2369

^{Originally posted by **famura** June 22, 2023} Hi there, I want to use batch-independent GPs (see this [tutorial](https://docs.gpytorch.ai/en/stable/examples/03_Multitask_Exact_GPs/Batch_Independent_Multioutput_GP.html)) with the inducing points method (see this [tutorial](https://docs.gpytorch.ai/en/stable/examples/04_Variational_and_Approximate_GPs/SVGP_Regression_CUDA.html)). I managed to do so for the fully-correlated multi-task setting using the `MultitaskKernel`, however, fail at doing this in the described case. Here is my minimal example script which is almost equal to the first tutorial above. Both versions I tried failed with the same (output-dim based?) error. I have the feeling that I just missed one detail. ``` import math import torch import gpytorch from matplotlib import pyplot as plt train_x = torch.linspace(0, 1, 200) train_y = torch.stack([ torch.sin(train_x * (2 * math.pi)) + torch.randn(train_x.size()) * 0.2, torch.cos(train_x * (2 * math.pi)) + torch.randn(train_x.size()) * 0.2, ], -1) class BatchIndependentMultitaskGPModel(gpytorch.models.ExactGP): def __init__(self, train_x, train_y, likelihood): super().__init__(train_x, train_y, likelihood) self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([2])) # v1 -- RuntimeError: grad can be implicitly created only for scalar outputs base_covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel(batch_shape=torch.Size([2])), batch_shape=torch.Size([2]) ) self.covar_module = gpytorch.kernels.InducingPointKernel( base_covar_module, inducing_points=train_x[::10].clone(), # every 10th likelihood=likelihood ) # v2 -- RuntimeError: grad can be implicitly created only for scalar outputs # base_covar_module = gpytorch.kernels.InducingPointKernel( # gpytorch.kernels.RBFKernel(batch_shape=torch.Size([2])), # inducing_points=train_x[::10].clone(), # every 10th # likelihood=likelihood # ) # self.covar_module = gpytorch.kernels.ScaleKernel( # base_covar_module, # batch_shape = torch.Size([2]) # ) def forward(self, x): mean_x = self.mean_module(x) covar_x = self.covar_module(x) return gpytorch.distributions.MultitaskMultivariateNormal.from_batch_mvn( gpytorch.distributions.MultivariateNormal(mean_x, covar_x) ) likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=2) model = BatchIndependentMultitaskGPModel(train_x, train_y, likelihood) # this is for running the notebook in our testing framework import os training_iterations = 50 # Find optimal model hyperparameters model.train() likelihood.train() # Use the adam optimizer optimizer = torch.optim.Adam(model.parameters(), lr=0.1) # Includes GaussianLikelihood parameters # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) for i in range(training_iterations): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item())) optimizer.step() # Set into eval mode model.eval() likelihood.eval() # Initialize plots f, (y1_ax, y2_ax) = plt.subplots(1, 2, figsize=(12, 6)) # Make predictions with torch.no_grad(), gpytorch.settings.fast_pred_var(): test_x = torch.linspace(0, 1, 51) predictions = likelihood(model(test_x)) mean = predictions.mean lower, upper = predictions.confidence_region() # This contains predictions for both tasks, flattened out # The first half of the predictions is for the first task # The second half is for the second task # Plot training data as black stars y1_ax.plot(train_x.detach().numpy(), train_y[:, 0].detach().numpy(), 'k*') # Predictive mean as blue line y1_ax.plot(test_x.numpy(), mean[:, 0].numpy(), 'b') # Shade in confidence y1_ax.fill_between(test_x.numpy(), lower[:, 0].numpy(), upper[:, 0].numpy(), alpha=0.5) y1_ax.set_ylim([-3, 3]) y1_ax.legend(['Observed Data', 'Mean', 'Confidence']) y1_ax.set_title('Observed Values (Likelihood)') # Plot training data as black stars y2_ax.plot(train_x.detach().numpy(), train_y[:, 1].detach().numpy(), 'k*') # Predictive mean as blue line y2_ax.plot(test_x.numpy(), mean[:, 1].numpy(), 'b') # Shade in confidence y2_ax.fill_between(test_x.numpy(), lower[:, 1].numpy(), upper[:, 1].numpy(), alpha=0.5) y2_ax.set_ylim([-3, 3]) y2_ax.legend(['Observed Data', 'Mean', 'Confidence']) y2_ax.set_title('Observed Values (Likelihood)') plt.show() ```

agosztolai commented 10 months ago

I would also like to do the same (multiple input/multiple output GPs with SGPR). I have the following minimal working example.

import torch
import gpytorch
import tqdm as tqdm
from math import floor
from gpytorch.kernels import InducingPointKernel, MultitaskKernel

X, y = torch.randn(1000, 3), torch.randn(1000)

train_n = int(floor(0.8 * len(X)))
train_x = X[:train_n, :].contiguous()
train_y = y[:train_n].contiguous()
train_y = torch.stack([train_y, train_y]).T

test_x = X[train_n:, :].contiguous()
test_y = y[train_n:].contiguous()
test_y = torch.stack([test_y, test_y]).T

if torch.cuda.is_available():
    train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda()

class GPRegressionModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.MultitaskMean(
            gpytorch.means.ConstantMean(), num_tasks=2
        )
        self.base_covar_module = MultitaskKernel(
            gpytorch.kernels.RBFKernel(), num_tasks=2, rank=1
        )
        self.covar_module = InducingPointKernel(self.base_covar_module, inducing_points=train_x[:500, :].clone(), likelihood=likelihood)

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)

likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=2)
model = GPRegressionModel(train_x, train_y, likelihood)

if torch.cuda.is_available():
    model = model.cuda()
    likelihood = likelihood.cuda()

training_iterations = 10

# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

iterator = tqdm.tqdm(range(training_iterations), desc="Train")

for i in iterator:
    # Zero backprop gradients
    optimizer.zero_grad()
    # Get output from model
    output = model(train_x)
    # Calc loss and backprop derivatives
    loss = -mll(output, train_y)
    loss.backward()
    iterator.set_postfix(loss=loss.item())
    optimizer.step()
    torch.cuda.empty_cache()

model.eval()
likelihood.eval()
with torch.no_grad():
    preds = model.likelihood(model(test_x))
    print('Test MAE: {}'.format(torch.mean(torch.abs(preds.mean - test_y))))
    print('Test NLL: {}'.format(-preds.to_data_independent_dist().log_prob(test_y).mean().item()))

I get the error

ValueError: Expected SGPR output to be a MatmulLinearOperator or AddedDiagLinearOperator. Got SumLinearOperator instead. This is likely a bug in GPyTorch.

The same example is working if I remove the inducing points. Is SGPR not implemented with multiple outputs?

famura commented 10 months ago

Hi @agosztolai,

I think that you want to achieve the other of the two cases that I mentioned (sparse GP regression with the MultitaskKernel), while I want to do sparse GP regression assuming independent tasks.

Anyhow, regarding your MWE, try changing the class definition to

class GPRegressionModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.MultitaskMean(
            gpytorch.means.ConstantMean(), num_tasks=2
        )
        self.covar_module = MultitaskKernel(
            gpytorch.kernels.RBFKernel(), num_tasks=2, rank=1
        )
        self.covar_module.data_covar_module = InducingPointKernel(
            self.covar_module.data_covar_module, inducing_points=train_x[:500, :].clone(), likelihood=likelihood
        )

and let me know if it worked.

famura commented 10 months ago

I am closing this issue since I think that this example covers my case.

cornellius-gp / gpytorch

How to use Batch Independent Multioutput GPs with Inducing Points? #2384

Discussed in https://github.com/cornellius-gp/gpytorch/discussions/2369