cornellius-gp / gpytorch

A highly efficient implementation of Gaussian Processes in PyTorch
MIT License
3.54k stars 557 forks source link

[Feature Request] Is it possible to work with Changepoint kernels? #2507

Open francescosgarlata opened 5 months ago

francescosgarlata commented 5 months ago

🚀 Feature Request

I would like to implement a changepoint kernel in my GP model. An example is given in Duvenaud's thesis in Eq. 2.19 Screenshot 2024-04-12 at 12 33 42

where we can dress two kernels with sigmoid functions. Is this something that can be done with GPyTorch?

For example, I would like to take two SpectralMixtureKernel and dress them with sigmoid functions.

I have tried by defining a two sigmoid kernels and dressing the SpectralMixtureKernels in my model

class SigmoidKernel1(gpytorch.kernels.Kernel):
    is_stationary = False

    def forward(self, x1, x2, **params):
        return torch.sigmoid(x1) * torch.sigmoid(x2)

class SigmoidKernel2(gpytorch.kernels.Kernel):
    is_stationary = False

    def forward(self, x1, x2, **params):
        return (1-torch.sigmoid(x1)) * (1-torch.sigmoid(x2))

class SpectralMixtureGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, num_mixtures=8):
        super(SpectralMixtureGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.SpectralMixtureKernel(num_mixtures=num_mixtures) * SigmoidKernel1() + gpytorch.kernels.SpectralMixtureKernel(num_mixtures=num_mixtures) * SigmoidKernel2()

    def forward(self,x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

class GPModel:
    def __init__(self, X, Y,num_mixtures=8):
        self.train_x = torch.from_numpy(X).float()
        self.train_y = torch.from_numpy(Y).float()
        self.likelihood = gpytorch.likelihoods.GaussianLikelihood()
        self.model = SpectralMixtureGPModel(self.train_x, self.train_y, self.likelihood, num_mixtures=num_mixtures)

    def train(self, n_iter:int, learning_rate:float, patience:int=np.inf, verbose:bool=False):
        self.model.train()
        self.likelihood.train()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)

        for i in range(n_iter):
            optimizer.zero_grad()
            output = self.model(self.train_x)
            loss = -mll(output, self.train_y)
            loss.backward()
            if verbose:
                print('Iter %d/%d - Loss: %.5f' % (i + 1, n_iter, loss.item()))
            optimizer.step()

    def eval(self, X:np.ndarray):
        self.model.eval()
        self.likelihood.eval()

        test_x = torch.from_numpy(X).float()
        with torch.no_grad(), gpytorch.settings.fast_pred_var():
            # Make predictions
            observed_pred = self.likelihood(self.model(test_x))

            # Get upper and lower confidence bounds
            lower, upper = observed_pred.confidence_region()
            return observed_pred.mean.numpy(), lower.numpy(), upper.numpy()

However, during training I get some numerical instabilities "NotPSDError: Matrix not positive definite after repeatedly adding jitter up to 1.0e-04." and even when I try to evaluate the model on a test set of size 100 steps, I get an error

"RuntimeError: The size of tensor a (100) must match the size of tensor b (180) at non-singleton dimension 0"

jakee417 commented 1 month ago

Seems there is a bug in DefaultPredictionStrategy. Specifically with

test_covar = joint_covar[..., self.num_train :, :].to_dense()

Once I patched it like this:

import torch
import gpytorch
import numpy as np
from gpytorch.models.exact_prediction_strategies import DefaultPredictionStrategy

class SigmoidKernel1(gpytorch.kernels.Kernel):
    is_stationary = False

    def forward(self, x1, x2, **params):
        return torch.sigmoid(x1) * torch.sigmoid(x2)

class SigmoidKernel2(gpytorch.kernels.Kernel):
    is_stationary = False

    def forward(self, x1, x2, **params):
        return (1 - torch.sigmoid(x1)) * (1 - torch.sigmoid(x2))

class PredictionStrategy(DefaultPredictionStrategy):
    def exact_prediction(self, joint_mean, joint_covar):
        test_mean = joint_mean[..., self.num_train :]
        test_test_covar = joint_covar[..., self.num_train :, self.num_train :]
        test_train_covar = joint_covar[..., self.num_train :, : self.num_train]
        return (
            self.exact_predictive_mean(test_mean, test_train_covar),
            self.exact_predictive_covar(test_test_covar, test_train_covar),
        )

class SpectralMixtureGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, num_mixtures=8):
        super(SpectralMixtureGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
        self.covar_module = (
            gpytorch.kernels.SpectralMixtureKernel(num_mixtures=num_mixtures) * SigmoidKernel1()
            + gpytorch.kernels.SpectralMixtureKernel(num_mixtures=num_mixtures) * SigmoidKernel2()
        )
        self.covar_module.prediction_strategy = PredictionStrategy

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

class GPModel:
    def __init__(self, X, Y, num_mixtures=8):
        self.train_x = torch.from_numpy(X).float()
        self.train_y = torch.from_numpy(Y).float()
        self.likelihood = gpytorch.likelihoods.GaussianLikelihood()
        self.model = SpectralMixtureGPModel(self.train_x, self.train_y, self.likelihood, num_mixtures=num_mixtures)

    def train(self, n_iter: int, learning_rate: float, verbose: bool = False):
        self.model.train()
        self.likelihood.train()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)

        for i in range(n_iter):
            optimizer.zero_grad()
            output = self.model(self.train_x)
            loss = -mll(output, self.train_y)
            loss.backward()
            if verbose:
                print("Iter %d/%d - Loss: %.5f" % (i + 1, n_iter, loss.item()))
            optimizer.step()

    def eval(self, X: np.ndarray):
        self.model.eval()
        self.likelihood.eval()

        test_x = torch.from_numpy(X).float()
        with torch.no_grad(), gpytorch.settings.fast_pred_var():
            # Make predictions
            observed_pred = self.likelihood(self.model(test_x))

            # Get upper and lower confidence bounds
            lower, upper = observed_pred.confidence_region()
            return observed_pred.mean.numpy(), lower.numpy(), upper.numpy()

Training/eval seems to work fine. I could not repro your training instabilities, I was using:

x = np.linspace(0, 1000)
y = np.cos(x)
model = GPModel(X=x, Y=y, num_mixtures=8)
model.train(500, learning_rate=1, verbose=True)