cornellius-gp / gpytorch

A highly efficient implementation of Gaussian Processes in PyTorch
MIT License
3.46k stars 546 forks source link

mini-batch DKL training for multiple-output [Docs] #2377

Open QueeneTam opened 11 months ago

QueeneTam commented 11 months ago

πŸ“š Documentation/Examples

Is there a feature that needs some example code? I want to know how can I implement the mini-batch DKL training for multiple-output?

For example, I have input data with shape [batch_size, N channels, L length], final output target (range from [-1, 1]) with shape [batch_size, K motion parameters]. I want that: for each batch_data, batch_target: features_outByCNN with shape [batch_size, m features] = CNN(batch_data with shape [batch_size, N channels, L length]) final_output with shape [batch_size, K motion parameters] = GaussianProcessRegression(features_outByCNN with shape [batch_size, m features]) loss = loss_function(batch_target, final_output).

Can you kindly suggest that which example should I refer to? I currently take the SVDKL (Stochastic Variational Deep Kernel Learning) as the reference, but the model cannot converge. I have no idea what's wrong. Hope you can help to give some suggestions. Thanks a lot.

class GaussianProcessLayer(ApproximateGP):
  def __init__(self, inducing_points, num_features):
      variational_distribution = CholeskyVariationalDistribution(inducing_points.shape[0])
      variational_strategy = VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True)
      variational_strategy = gpytorch.variational.MultitaskVariationalStrategy(variational_strategy, num_tasks=num_features)
      super(GaussianProcessLayer, self).__init__(variational_strategy)

      self.mean_module = gpytorch.means.ConstantMean()
      self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

      # This module will scale the NN features so that they're nice values
      self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(-1., 1.)

  def forward(self, x):
      # We're first putting our data through a deep net (feature extractor)
      scaled_x = self.scale_to_bounds(x)

      mean_x = self.mean_module(scaled_x)
      covar_x = self.covar_module(scaled_x)
      out = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

      return out

class FeatureExtractor(nn.Module):
    def __init__(self, in_channels=32, out_channels=16, num_parameters=6, stride=2, drop_out_rate=0.1):
        super(FeatureExtractor, self).__init__()

        self.convLayers = nn.Sequential(
                                    nn.Conv1d(in_channels, 64, kernel_size=3, stride=1),
                                    nn.BatchNorm1d(64),
                                    nn.LeakyReLU(inplace=True),

                                    nn.Conv1d(64, 128, kernel_size=3, stride=1),
                                    nn.BatchNorm1d(128),
                                    nn.LeakyReLU(inplace=True),

                                    nn.Conv1d(128, out_channels, kernel_size=3, stride=1),
                                    nn.BatchNorm1d(out_channels),
                                    nn.LeakyReLU(inplace=True),
                                    )
        self.pooling = nn.AdaptiveAvgPool1d(1)

    def forward(self, x):
        x = self.convLayers(x)
        x = self.pooling(x)
        x = x.squeeze()

        return x

class DKLModel(gpytorch.Module):
    def __init__(self, inducing_points, num_features, grid_bounds=(-1., 1.)):
        super(DKLModel, self).__init__()
        self.feature_extractor = FeatureExtractor()
        self.gp_layer = GaussianProcessLayer(inducing_points=inducing_points, num_features=num_features)
        self.grid_bounds = grid_bounds

    def forward(self, x):
        features = self.feature_extractor(x)
        res = self.gp_layer(features)
        return res

def train(args, epoch, net, trainLoader, optimizer):
    model.train()
    likelihood.train()
    nProcessed = 0
    train_loss = list()
    nTrain = len(trainLoader.dataset)

    for batch_idx, (batch_data, batch_label) in enumerate(trainLoader):
        batch_data, batch_label = batch_data.cuda(), batch_label.squeeze().cuda()
        output = model(batch_data)
        batch_loss = -mll(output, batch_label/scale)

        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()
        nProcessed += len(batch_data)
        train_loss.append(batch_loss.item())

    train_loss = np.mean(train_loss)
    print('Train Epoch: {:.2f}|{} [{}/{}] \t Loss: {:.2f} \t '.format(
        epoch + 1, args.nEpochs, nProcessed, nTrain, train_loss))

    return train_loss

def val(args, epoch, net, valLoader):
    model.eval()
    likelihood.eval()

    with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
        for batch_idx, (batch_data, batch_label) in enumerate(valLoader):
            batch_data, batch_label = batch_data.cuda(), batch_label.cuda()

            output = model(batch_data)
            logits = likelihood(output).mean

            if batch_idx == 0:
                targets = batch_label
                evals = logits
            else:
                targets = torch.cat((targets, batch_label), dim=0)
                evals = torch.cat((evals, logits), dim=0)
    print(torch.max(evals), torch.min(evals))
    val_loss = torch.nn.MSELoss()(evals*scale, targets.squeeze()).item()

    return val_loss

 if __name__ == '__main__':  
      inducing_points = torch.FloatTensor(np.ones((args.batchSz, 16)))
      model = MotionNet.DKLModel(inducing_points=inducing_points, num_features=6).cuda()
      likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=6).cuda()

      optimizer = optim.Adam(
          [{'params': model.feature_extractor.parameters()},
          {'params': model.gp_layer.hyperparameters()},
          {'params': model.gp_layer.variational_parameters()},
          {'params': likelihood.parameters()},],
          lr=0.01, betas=(0.9, 0.999), weight_decay=args.weightDecay)

      mll = gpytorch.mlls.VariationalELBO(likelihood, model.gp_layer, args.batchSz)

      for epoch in range(args.nEpochs):
          train_loss = train(args, epoch, model, train_loader, optimizer)
          val_loss = val(args, epoch, model, val_loader)
rajveer43 commented 8 months ago

I would like to work on this