Hey hey!
I am trying to train a pretrained VAE. I have 3 channeled RBG byte data and try to train it on pl_bolts.models.autoencoders, which eventually produced a bug.
Please reproduce using the BoringModel
#I load my data here, I converted it to 3 channel RGB
def load_data(path="./no_obstacle/black_and_white_compressed/"):
return [np.array(Image.open(f"{path}{i}.png").convert("RGB")) for i in range(2500)]
class NewSet(Dataset):
def __init__(self, data, label):
self.data = data
self.label = label.tobytes()
def __len__(self):
return len(self.label)
#the pretrained model requires to have two outputs
def __getitem__(self, idx):
return self.data[idx],self.data[idx]#self.label[idx]
def data_loaders(data_source="../no_obstacle/black_and_white_compressed_shape10/",batchsize = 25, train_size = 2000, val_size=250, test_size=250):
#load_data
data = load_data(path=data_source)
data_num = load_data_numeric(path="../no_obstacle/state/")#TODO: WICHTIG GILT NUR FÜR NO OBSTACLE
#shuffle data with permutations
np.random.seed(123)
p = np.random.permutation(len(data))
p = p.astype(int)
data, data_num = np.array(data), np.array(data_num)[:,2].reshape(len(data_num),1)
data = data[p]
data_num = data_num[p]
data_num = data_num.astype(float)
#split data
data = np.split(data, [train_size,train_size+val_size,train_size+val_size*test_size]) #is there any randomness here?
num_data = np.split(data_num, [train_size,train_size+val_size,train_size+val_size*test_size])
train,val,test = data[0],data[1],data[2]
train_num,val_num,test_num = num_data[0],num_data[1],num_data[2]
#init(deploy pytorch data_loader)
init = lambda a: torch.utils.data.DataLoader(a,batch_size=batchsize,shuffle=False)
swap = lambda a: np.swapaxes(a,1,3)
train,val,test = swap(train), swap(val), swap(test)
train_loader = init(NewSet(data=train,label=train_num))
val_loader = init(NewSet(data=val,label=val_num))
test_loader = init(NewSet(data=test,label=test_num))
#return the dataloader
return train_loader, val_loader, test_loader
loader = data_loaders()[0]
vae = VAE(
input_height = 10,
enc_type = 'resnet18',
first_conv = False,
maxpool1 = False,
enc_out_dim = 512,
kl_coeff = 0.1,
latent_dim = 256,
lr = 1e-4
).from_pretrained('cifar10-resnet18') #not sure if the last one is really necesarry?
trainer = pl.Trainer(max_epochs=10)
trainer.fit(vae, loader)
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/trainer.py", line 599, in run_train
self.train_loop.run_training_epoch()
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/training_loop.py", line 480, in run_training_epoch
batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/training_loop.py", line 639, in run_training_batch
self.optimizer_step(optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/training_loop.py", line 414, in optimizer_step
model_ref.optimizer_step(
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/core/lightning.py", line 1400, in optimizer_step
optimizer.step(closure=optimizer_closure)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/core/optimizer.py", line 214, in step
self.__optimizer_step(*args, closure=closure, profiler_name=profiler_name, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/core/optimizer.py", line 134, in __optimizer_step
trainer.accelerator.optimizer_step(optimizer, self._optimizer_idx, lambda_closure=closure, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/accelerators/accelerator.py", line 303, in optimizer_step
self.run_optimizer_step(optimizer, opt_idx, lambda_closure, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/accelerators/accelerator.py", line 310, in run_optimizer_step
self.training_type_plugin.optimizer_step(optimizer, lambda_closure=lambda_closure, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 184, in optimizer_step
optimizer.step(closure=lambda_closure, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/autograd/grad_mode.py", line 26, in decorate_context
return func(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/optim/adam.py", line 66, in step
loss = closure()
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/training_loop.py", line 633, in train_step_and_backward_closure
result = self.training_step_and_backward(
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/training_loop.py", line 727, in training_step_and_backward
result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/trainer/training_loop.py", line 281, in training_step
training_step_output = self.trainer.accelerator.training_step(args)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/accelerators/accelerator.py", line 182, in training_step
return self.training_type_plugin.training_step(*args)
File "/usr/local/lib/python3.8/dist-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 146, in training_step
return self.lightning_module.training_step(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/pl_bolts/models/autoencoders/basic_vae/basic_vae_module.py", line 154, in training_step
loss, logs = self.step(batch, batch_idx)
File "/usr/local/lib/python3.8/dist-packages/pl_bolts/models/autoencoders/basic_vae/basic_vae_module.py", line 133, in step
z, x_hat, p, q = self._run_step(x)
File "/usr/local/lib/python3.8/dist-packages/pl_bolts/models/autoencoders/basic_vae/basic_vae_module.py", line 118, in _run_step
x = self.encoder(x)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/pl_bolts/models/autoencoders/components.py", line 247, in forward
x = self.conv1(x)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/conv.py", line 423, in forward
return self._conv_forward(input, self.weight)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/conv.py", line 419, in _conv_forward
return F.conv2d(input, weight, self.bias, self.stride,
RuntimeError: expected scalar type Byte but found Float
The obvious thing to check is if my datatypes are indeed Byte-format - uint8 as I understand. It is indeed. It is uint8 in my script, it is basic_vae_module.py, I didn't check any further. Maybe useful to know, the structure of my input is torch.Size([25, 3, 10, 10])
25 minibatches, 3 channels, 10x10 pictures.
🐛 Bug
Hey hey! I am trying to train a pretrained VAE. I have 3 channeled RBG byte data and try to train it on pl_bolts.models.autoencoders, which eventually produced a bug.
Please reproduce using the BoringModel
The obvious thing to check is if my datatypes are indeed Byte-format - uint8 as I understand. It is indeed. It is uint8 in my script, it is basic_vae_module.py, I didn't check any further. Maybe useful to know, the structure of my input is torch.Size([25, 3, 10, 10]) 25 minibatches, 3 channels, 10x10 pictures.
Thanks in advance!