GP-LVM over multiple trials of multiple dimensions

Hi,

I am working on developing a Bayesian GP-LVM that can reproduce human motion trajectories.

I have already posted a question ( #1867), however I am also working on a version that assumes that the random variables in the latent space (X's) are i.i.d to see whether the GP-LVM can recognise the sequential dependence.

The dataset that I'm working with is made of 19 trials and each one of them has 101 data points for 24 degrees of freedom (DoF). Therefore, my Y is a torch Tensor of size (19, 101, 24). Reading the last part of paragraph 3.3.2 of Damianou, Titsias and Lawrence (2016)(https://jmlr.csail.mit.edu/papers/volume17/damianou16a/damianou16a.pdf), I am creating a latent space for each trial and trying to optimise for the hyperparameters of unique a f(X) that can be used in evaluation mode to make predictions when some DoF are missing. Basically, what I would like to do is maximise the VariationalELBO based on all the DoF of all trials, if that makes sense. I've stuck to a NormalPrior to define the prior over X since I'm assuming that the random variables in the latent space are independent and created batches that correspond to the number of trial I'm dealing with. During the optimisation, q(X) generates samples of the size that I would expect: _ntrials x length of each trial x latent dimensions. However:

If I keep the size of the inducing inputs as _data_dim x n_inducing x latentdim, I get an error when I try to output the MultivariateNormal: __batch_shape = _mul_broadcast_shape(inducing_points.shape[:-2], x.shape[:-2]) raise RuntimeError("Shapes are not broadcastable for mul operation")__ which comes from the gpytorch.variational._variational.py script. I have tried to change things manually in that script with no success.

The optimisation is successful if I loop through the trials, but that isn't exactly what I'm trying to do.

Anyway, here is the code. you can also find a folder that contains all the data. My code works fairly well when the learning is done upon a single trial (when Batch = False)

Any idea on what I'm doing wrong? Any help will be much appreciated.

Data.zip


import os
def readStoFile(filename):

    if not os.path.exists(filename):
        print('file do not exists')

    file_id = open(filename, 'r')

    # read header
    next_line = file_id.readline()
    header = [next_line]
    nc = 0
    nr = 0
    while 'endheader' not in next_line:
        if 'nColumns' in next_line:
            nc = int(next_line[next_line.index('=') + 1:len(next_line)])
        elif 'nRows' in next_line:
            nr = int(next_line[next_line.index('=') + 1:len(next_line)])

        next_line = file_id.readline()
        header.append(next_line)

    # process column labels
    next_line = file_id.readline()
    if next_line.isspace() is True:
        next_line = file_id.readline()

    labels = next_line.split()

    # get data
    data = []
    d = [0] * nr
    for i in range(0, nr):
        d[i] = [float(x) for x in file_id.readline().split()]
        data.append(d[i])

    file_id.close()

    labels = pnd.DataFrame(labels)
    Data_np = np.array(data)

    return labels, Data_np

def deg_to_rad(deg_angle):
    rad_angle = (deg_angle * np.pi) / 180
    return rad_angle

# import libraries
import numpy as np
import pandas as pnd
import torch
import gpytorch
from tqdm.notebook import trange

# Load Simulations and store them into a unique matrix Y
folder_path = 'C:/Users/ab3758/OneDrive/Documenti/Bath/PhD/Python/GPs/Variational GPDS'
results_dir = os.path.join(folder_path, 'Data')
angle_range = np.arange(15, 110, 5)

Batch = True

if Batch is True:
    Y = np.zeros((len(angle_range), len(np.arange(0, 0.404, 0.004)), 119))
    for folder in os.listdir(results_dir):
        for i in range(len(angle_range)):
            if folder == str(angle_range[i]) + 'Data':
                file_dir = os.path.join(results_dir, str(angle_range[i]) + 'Data')
                file_path = os.path.join(file_dir, '_states.sto')
                labels, Data_np = readStoFile(file_path)
                Y[i, :, :] = Data_np
    trajectories = torch.Tensor(Y[:, :, 37:84:2])
    num_simulations = trajectories.shape[0]
    N = trajectories.shape[1]
    data_dim = trajectories.shape[2]
    #batch_shape = torch.Size([num_simulations, data_dim])
else:
    file_dir = os.path.join(results_dir, str(angle_range[4]) + 'Data')
    file_path = os.path.join(file_dir, '_states.sto')
    labels, Data_np = readStoFile(file_path)
    trajectories = torch.Tensor(Data_np[:, 37:84:2])
    N = trajectories.shape[0]
    data_dim = trajectories.shape[1]
batch_shape = torch.Size([data_dim])

latent_dim = 10
n_inducing = round(N * 0.25)

# GPLVM Learning
# -------
from gpytorch.models.gplvm.latent_variable import *
from gpytorch.models.gplvm.bayesian_gplvm import BayesianGPLVM
from gpytorch.means import ZeroMean
from gpytorch.mlls import VariationalELBO
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.variational import VariationalStrategy
from gpytorch.variational import NaturalVariationalDistribution, TrilNaturalVariationalDistribution
from gpytorch.kernels import ScaleKernel, RBFKernel, MaternKernel
from gpytorch.distributions import MultivariateNormal
from gpytorch.priors import NormalPrior, MultivariateNormalPrior

# Function for Linear PCA
def lin_PCA(Data, latent_dim):
    U, S, V = torch.pca_lowrank(Data, q=latent_dim)
    if trajectories.ndim > 2:
        return torch.nn.Parameter(torch.matmul(Data, V[:, :, :latent_dim]))
    else:
        return torch.nn.Parameter(torch.matmul(Data, V[:, :latent_dim]))

# create a class for the Bayesian GPLVM
class bGPLVM_iid(BayesianGPLVM):
    def __init__(self, n, batch_shape, num_simulations, data_dim, latent_dim, data, n_inducing):
        self.n = n
        self.batch_shape = batch_shape

        # Randomly initialise the inducing points Z in the latent space
        self.inducing_inputs = torch.randn(data_dim, n_inducing, latent_dim)

        # Define Variational Distribution: q(U)
        q_u = TrilNaturalVariationalDistribution(n_inducing, self.batch_shape)

        # Define how q_u is integrated out to obtain q_f (Hensman et al., 2015)
        q_f = VariationalStrategy(self, self.inducing_inputs, q_u, learn_inducing_locations=True)  # gpytorch.variational.variational_strategy

        # Initialise X's in the latent space with linear PCA
        X_init = lin_PCA(data, latent_dim)

        # Define prior over X
        prior_mu = torch.zeros_like(X_init)

        prior_sigma = torch.ones_like(X_init)

        X_prior = NormalPrior(prior_mu, prior_sigma)

        # Define Variational Distribution q(X)
        X = VariationalLatentVariable(n, data_dim, latent_dim, X_init, X_prior)

        BayesianGPLVM.__init__(self, X, q_f)

        # Kernel (acting on latent dimensions)
        self.mean_module = ZeroMean(ard_num_dims=latent_dim)
        self.covar_module = ScaleKernel(RBFKernel(ard_num_dims=latent_dim))

    def forward(self, X):
        mean_x = self.mean_module(X)
        covar_x = self.covar_module(X)
        self.dist = MultivariateNormal(mean_x, covar_x)
        return self.dist

# Model
Model = bGPLVM_iid(N, batch_shape, num_simulations, data_dim, latent_dim, trajectories, n_inducing)
Model.train()

# Likelihood
Likelihood = GaussianLikelihood(batch_shape=Model.batch_shape)
Likelihood.train()

# Define 2 different optimisers for Variational Parameters and Hyperparameters
hyper_optimizer = torch.optim.Adam([
    {'params': Model.hyperparameters()},
    {'params': Likelihood.parameters()}
], lr=0.01)

variational_optimizer = gpytorch.optim.NGD(Model.variational_parameters(), num_data=N, lr=0.1)

# Objective function
mll = VariationalELBO(Likelihood, Model, num_data=N)

iterator = trange(50, leave=True)
training_iter = 50

loss_list = []

RMSE_np = np.zeros((num_simulations, training_iter))
length_scale_np = np.zeros((training_iter, latent_dim))
noise_np = np.zeros((training_iter, latent_dim))
trajectories_array = np.zeros((training_iter, N, data_dim))

for i in iterator:
    if Batch is True:
        #for batch in range(trajectories.shape[0]):

        variational_optimizer.zero_grad()
        hyper_optimizer.zero_grad()

        sample = Model.sample_latent_variable()  # a full sample returns latent X across all N
        output = Model(sample)
        loss_function = -mll(output, trajectories.T).sum()  # compute loss for each dimension and add them up

        loss_list.append(loss_function.item())
        loss_function.backward()  # compute the gradients

        # store hyperparameters: length scales and variance of the likelihood
        length_scale = Model.covar_module.base_kernel.lengthscale.detach().numpy()
        length_scale_np[i, :] = length_scale

        noise = Likelihood.noise.detach().numpy()
        noise_np[i, :] = length_scale

        # Update parameters
        variational_optimizer.step()
        hyper_optimizer.step()

    else:
        variational_optimizer.zero_grad()
        hyper_optimizer.zero_grad()
        sample = Model.sample_latent_variable()  # a full sample returns latent X across all N
        output = Model(sample)
        loss = -mll(output, trajectories.T).sum()  # compute loss for each dimension and add them up

        loss_list.append(loss.item())
        loss.backward()

        length_scale = Model.covar_module.base_kernel.lengthscale.detach().numpy()
        length_scale_np[i, :] = length_scale

        # Update parameters
        variational_optimizer.step()
        hyper_optimizer.step()

        # RMSE between prediction and training data
        np_trajectories = trajectories.detach().numpy()
        prediction = Likelihood(Model(sample))
        prediction = prediction.mean.T.detach().numpy()
        RMSE = np.sqrt(np.mean(np.sum((np_trajectories - prediction) ** 2)))
        RMSE_np[i] = RMSE
        trajectories_array[i, :, :] = prediction```

cornellius-gp / gpytorch

GP-LVM over multiple trials of multiple dimensions #1902