Closed h-jia closed 5 years ago
@h-jia is there any chance you could provide example code, or an example notebook? My guess is that you're probably encountering a NaN issue - but that's hard to debug without a script that we can run.
Thanks for the reply. The code is a little bit rough, but hope it generally tells where problems happens. Thanks!
import torch.nn as nn
import torch
import gpytorch
nums = 60
sequence_length = nums
input_size = 2
hidden_size = 64
num_layers = 4
num_classes = nums
batch_size = 1
learning_rate = 0.01
epochs = 20
train_x = torch.randn(1, 60, 2)
train_y = torch.randn(60)
class Down(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes, init_func=nn.init.normal_):
super(Down, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=False)
self.fc = nn.Linear(hidden_size, num_classes)
if init_func is not None:
init_func(self.lstm.weight_hh_l0)
init_func(self.lstm.weight_ih_l0)
init_func(self.fc.weight)
def forward(self, x):
# Set initial hidden and cell states
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
# Forward propagate LSTM
out, _ = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :]).view(train_x.shape[0] * train_x.shape[1], -1)
return out
feature_extractor = Down(input_size, hidden_size, num_layers, num_classes)
# We will use the simplest form of GP model, exact inference
class GPRegressionModel(gpytorch.models.ExactGP):
def __init__(self, train_x, train_y, likelihood):
super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
self.feature_extractor = feature_extractor
self.mean_module = gpytorch.means.ConstantMean()
self.covar_module = gpytorch.kernels.GridInterpolationKernel(
gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=3)),
num_dims=3, grid_size=5
)
def forward(self, x):
projected_x = self.feature_extractor(x)
projected_x = projected_x - projected_x.min(0)[0]
projected_x = 2 * (projected_x / projected_x.max(0)[0]) - 1
mean_x = self.mean_module(projected_x)
covar_x = self.covar_module(projected_x)
return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = GPRegressionModel(train_x, train_y, likelihood)
train_loss = []
optimizer = torch.optim.Adam([
{'params': model.feature_extractor.parameters()},
{'params': model.covar_module.parameters()},
{'params': model.mean_module.parameters()},
{'params': model.likelihood.parameters()},
], lr=learning_rate)
# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
train_loss = []
def train(epoch):
with gpytorch.settings.use_toeplitz(True):
model.train()
likelihood.train()
# for i, data in enumerate(train_loader):
optimizer.zero_grad()
prediction = model(train_x)
loss = -mll(prediction, train_y)
loss.backward()
print('Iter %d - Loss: %.3f' % (epoch, loss.item()))
train_loss.append(loss.cpu().data.numpy().tolist())
optimizer.step()
return loss
def test(epoch):
model.eval()
likelihood.eval()
test_x = torch.randn(1, 60, 2)
test_y = torch.randn(60)
# for batch_size, (x_all, y_all) in enumerate(test_loader):
test_x = test_x
test_y = test_y.squeeze()
with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.fast_pred_var():
preds = model(test_x)
print('Test MAE: {}'.format(torch.mean(torch.abs(preds.mean - test_y))))
if torch.mean(torch.abs(preds.mean - test_y)) < 0.02:
print("we can save all parameters")
return preds
for epoch in range(epochs):
loss = train(epoch)
if epoch % 10 == 0 and epoch > 0:
preds = test(epoch)
print("test finished")
Also wish to know if it is a good way to do this as the loss in simple GP decrease with no limits (can be -x.xxx(negative) for example). Is the best model with the minimum tested loss?
@h-jia hmmm I'm not seeing this error. What version of gpytorch are you using? We have added a couple stability fixes recently, so this might actually be a solved problem.
Also, with regards to the loss - once the loss levels out that's usually a good time to stop and test.
@gpleiss
Gpytorch: Version: 0.1.0rc5
Pytorch: torch-nightly-1.0.0.dev20181206
Could you help me to post your Gpytorch and Pytorch version please? After uninstall and reinstall I still get the error below. Thanks!
Iter 0 - Loss: 1.513
Iter 1 - Loss: 1.499
Iter 2 - Loss: 1.505
Iter 3 - Loss: 1.506
Iter 4 - Loss: 1.502
Iter 5 - Loss: 1.495
Iter 6 - Loss: 1.489
Iter 7 - Loss: 1.488
Iter 8 - Loss: 1.489
Iter 9 - Loss: 1.485
Iter 10 - Loss: 1.481
Traceback (most recent call last):
File "newtest.py", line 127, in <module>
preds = test(epoch)
File "newtest.py", line 117, in test
preds = model(test_x)
File "/anaconda3-server/lib/python3.6/site-packages/gpytorch/models/exact_gp.py", line 158, in __call__
non_batch_train=non_batch_train,
File "/anaconda3-server/lib/python3.6/site-packages/gpytorch/functions/__init__.py", line 99, in exact_predictive_mean
full_mean, train_inputs, train_labels, num_train, likelihood, precomputed_cache, non_batch_train
File "/anaconda3-server/lib/python3.6/site-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py", line 233, in exact_predictive_mean
full_mean, train_inputs, train_labels, num_train, likelihood, precomputed_cache, non_batch_train
File "/anaconda3-server/lib/python3.6/site-packages/gpytorch/lazy/interpolated_lazy_tensor.py", line 389, in exact_predictive_mean
res = left_interp(test_interp_indices, test_interp_values, precomputed_cache).squeeze(-1) + test_mean
File "/anaconda3-server/lib/python3.6/site-packages/gpytorch/utils/interpolation.py", line 207, in left_interp
res = rhs.unsqueeze(-2).expand(*rhs_size).gather(-3, interp_indices_expanded)
RuntimeError: expand(torch.FloatTensor{[1, 125, 1, 1]}, size=[125, 4, 1]): the number of sizes provided (3) must be greater or equal to the number of dimensions in the tensor (4)
@h-jia I was able to fix your problem by fixing a few issues in your GP model specification.
train_x
is batch mode (e.g., b x n x d
), but after projecting in forward
, projected_x
is not (e.g. it is n x d
). You need to unsqueeze projected_x
to be 1 x n x d
so that it matches the batch shape of x
coming in. This was causing the mean module to not recognize that we were in batch mode, leading to the issue.projected_x
has d=1
(e.g., it is 1 x n x 1
after unsqueezing). This is a silent failure we should fix, but these are invalid values for this data. I changed them to 1
.I’m removing the bug label for now, because I was able to fix this by solving some sizing issues with the GP model specification.
@jacobrgardner Hi Jacob, thank you very much for your help! Yeah I have found those issues and changed those according to you help.
However, I still facing a little bit confusion about the test.
I have changed a very small learning rate and a tiny network for feature extracting, while I still get this error. For the train, we get 1 x n x 2
for tran_x
, and to make the code as simple to work I set the test_x
as the same of the train, i.e. 1 x n x 2
(I have also unsqueezed the test_x for batch mode as 1 x 1 x n x 2
it will meet the error with input must have 3 dimensions, got 4
).
It seems in my preds.mean
I get the error The size of tensor a (0) must match the size of tensor b (60) at non-singleton dimension 0
with tensor([])
. It seems the preds
is Nan during test.
Dose my batch mode setting has some problem?
Hope you could give me some advice at your very convenience. You have saved me a lot of time, Thanks!
The code I have change into below to click with run.
import torch.nn as nn
import torch
import gpytorch
nums = 60
sequence_length = nums
input_size = 2
hidden_size = 64
num_layers = 4
num_classes = nums
batch_size = 1
learning_rate = 0.0000001
epochs = 20
train_x = torch.randn(1, 60, 2)
train_y = torch.randn(60)
class Down(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes, init_func=nn.init.normal_):
super(Down, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=False)
self.fc = nn.Linear(hidden_size, num_classes)
if init_func is not None:
init_func(self.lstm.weight_hh_l0)
init_func(self.lstm.weight_ih_l0)
init_func(self.fc.weight)
def forward(self, x):
# Set initial hidden and cell states
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
# Forward propagate LSTM
out, _ = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :]).view(train_x.shape[0] * train_x.shape[1], -1)
return out
feature_extractor = Down(input_size, hidden_size, num_layers, num_classes)
# We will use the simplest form of GP model, exact inference
class GPRegressionModel(gpytorch.models.ExactGP):
def __init__(self, train_x, train_y, likelihood):
super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
self.feature_extractor = feature_extractor
self.mean_module = gpytorch.means.ConstantMean()
grid_size = gpytorch.utils.grid.choose_grid_size(train_x) # Fix 1
self.covar_module = gpytorch.kernels.GridInterpolationKernel(
gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=1)),
num_dims=1, grid_size=grid_size # Fix 2
)
def forward(self, x):
projected_x = self.feature_extractor(x)
projected_x = projected_x - projected_x.min(0)[0]
projected_x = 2 * (projected_x / projected_x.max(0)[0]) - 1
projected_x = torch.unsqueeze(projected_x, 0) # Fix 3
mean_x = self.mean_module(projected_x)
covar_x = self.covar_module(projected_x)
return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = GPRegressionModel(train_x, train_y, likelihood)
train_loss = []
optimizer = torch.optim.Adam([
{'params': model.feature_extractor.parameters()},
{'params': model.covar_module.parameters()},
{'params': model.mean_module.parameters()},
{'params': model.likelihood.parameters()},
], lr=learning_rate)
# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
train_loss = []
def train(epoch):
with gpytorch.settings.use_toeplitz(True):
model.train()
likelihood.train()
# for i, data in enumerate(train_loader):
optimizer.zero_grad()
prediction = model(train_x)
loss = -mll(prediction, train_y)
loss.backward()
print('Iter %d - Loss: %.3f' % (epoch, loss.item()))
train_loss.append(loss.cpu().data.numpy().tolist())
optimizer.step()
return loss
def test(epoch):
model.eval()
likelihood.eval()
test_x = torch.randn(1, 60, 2)
test_y = torch.randn(60)
# for batch_size, (x_all, y_all) in enumerate(test_loader):
# torch.unsqueeze(test_x, 0)
with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.fast_pred_var():
preds = model(test_x)
print(preds.mean)
print('Test MAE: {}'.format(torch.mean(torch.abs(preds.mean - test_y))))
if torch.mean(torch.abs(preds.mean - test_y)) < 0.02:
print("we can save all parameters")
return preds
for epoch in range(epochs):
loss = train(epoch)
if epoch % 10 == 0 and epoch > 0:
preds = test(epoch)
print("test finished")
@h-jia I'll try to look in to this more tomorrow!
@h-jia Sorry for the delay. Finally had a chance to look at this. It looks like the issue is that your covariance function is always returning a 60 x 60
covariance matrix. For example:
test_x = torch.randn(1, 60, 2)
print('size', model.forward(test_x).covariance_matrix.shape)
# size torch.Size([1, 60, 60])
test_x = torch.randn(1, 120, 2)
print('size 2', model.forward(test_x).covariance_matrix.shape)
# size 2 torch.Size([1, 60, 60])
Since the training data is also 60 points, when you slice in to this you get empty tensors.
@jacobrgardner Thanks for your kind and patient help. Yes it seems my model was poor designed. When I finish the model fix I will share in here how it modified.
THX! 👍 :)
Hi @h-jia . Were you able to fix your model? I'm interested in how you resolved it? With many thanks.
I'm using DKL model to do regression. However, I met this error during testing. Any suggestions?