yandex-research / tabular-dl-tabr

The implementation of "TabR: Unlocking the Power of Retrieval-Augmented Tabular Deep Learning"
https://arxiv.org/abs/2307.14338
MIT License
258 stars 27 forks source link

RuntimeError: mat1 and mat2 must have the same dtype #23

Closed YuHengjie closed 5 months ago

YuHengjie commented 5 months ago

thanks very much for this great work.

I am tring to understanding the code and use it in my research.

I encounter an error and don't know how to fix it. Any suggestions would be greatly appreciated.

here is the code:

%%

data = { "X_num": { "train": X_train, "val": X_test
}, "Y": { "train": y_train, "val": y_test } }

%%

dataset = Dataset( data=data, task_type=TaskType.REGRESSION,
score='rmse', y_info=None, _Y_numpy=None
)

seed = 42 model = {'num_embeddings': None, # Example embedding configuration 'd_main': 64, 'd_multiplier': 1.0, 'encoder_n_blocks': 2, 'predictor_n_blocks': 2, 'mixer_normalization': False, 'context_dropout': 0.1, 'dropout0': 0.1, 'dropout1': 0.1, 'normalization': 'BatchNorm1d', 'activation': 'ReLU' }

define Config

config = Config( seed=seed, data=dataset, model=model, context_size=5, optimizer={'type': 'Adam', 'lr': 0.001}, batch_size=64, patience=10, n_epochs=10, )

%%

output_path = "./output" force = True report = main(config, output_path, force=force)

the error details are as follows:


RuntimeError Traceback (most recent call last) File /Users/hjyu/Library/Mobile Documents/com~apple~CloudDocs/Code/Transfer_Learning_Tabular/TabR/tabr_test.py:4 2 output_path = "./output" 3 force = True ----> 4 report = main(config, output_path, force=force)

File ~/Library/Mobile Documents/com~apple~CloudDocs/Code/Transfer_Learning_Tabular/TabR/bin/tabr.py:508, in main(config, output, force) 503 epoch_losses = [] 504 for batch_idx in tqdm( 505 lib.make_random_batches(train_size, C.batch_size, device), 506 desc=f'Epoch {epoch}', 507 ): --> 508 loss, new_chunk_size = lib.train_step( 509 optimizer, 510 lambda idx: loss_fn(apply_model('train', idx, True), Y_train[idx]), 511 batch_idx, 512 chunk_size or C.batch_size, 513 ) 514 epoch_losses.append(loss.detach()) 515 if new_chunk_size and new_chunk_size < (chunk_size or C.batch_size):

File ~/Library/Mobile Documents/com~apple~CloudDocs/Code/Transfer_Learning_Tabular/TabR/lib/deep.py:447, in train_step(optimizer, step_fn, batch, chunk_size) 445 optimizer.zero_grad() 446 if batch_size <= chunk_size: --> 447 loss = step_fn(batch) 448 loss.backward() 449 else:

File ~/Library/Mobile Documents/com~apple~CloudDocs/Code/Transfer_Learning_Tabular/TabR/bin/tabr.py:510, in main..(idx) 503 epoch_losses = [] 504 for batch_idx in tqdm( 505 lib.make_random_batches(train_size, C.batch_size, device), 506 desc=f'Epoch {epoch}', 507 ): 508 loss, new_chunk_size = lib.train_step( 509 optimizer, --> 510 lambda idx: loss_fn(apply_model('train', idx, True), Y_train[idx]), 511 batch_idx, 512 chunk_size or C.batch_size, 513 ) 514 epoch_losses.append(loss.detach()) 515 if new_chunk_size and new_chunk_size < (chunk_size or C.batch_size):

File ~/Library/Mobile Documents/com~apple~CloudDocs/Code/Transfer_Learning_Tabular/TabR/bin/tabr.py:436, in main..apply_model(part, idx, training) 428 candidate_indices = candidate_indices[~torch.isin(candidate_indices, idx)] 429 candidate_x, candidate_y = get_Xy( 430 'train', 431 # This condition is here for historical reasons, it could be just 432 # the unconditional candidate_indices. 433 None if candidate_indices is train_indices else candidate_indices, 434 ) --> 436 return model( 437 x_=x, 438 y=y if is_train else None, 439 candidatex=candidate_x, 440 candidate_y=candidate_y, 441 context_size=C.context_size, 442 is_train=is_train, 443 ).squeeze(-1)

File ~/anaconda3/envs/tabr/lib/python3.9/site-packages/torch/nn/modules/module.py:1194, in Module._call_impl(self, *input, *kwargs) 1190 # If we don't have any hooks, we want to skip the rest of the logic in 1191 # this function, and just call forward. 1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1193 or _global_forward_hooks or _global_forward_pre_hooks): -> 1194 return forward_call(input, **kwargs) 1195 # Do not call functions when jit is used 1196 full_backward_hooks, non_full_backward_hooks = [], []

File ~/Library/Mobile Documents/com~apple~CloudDocs/Code/Transfer_LearningTabular/TabR/bin/tabr.py:243, in Model.forward(self, x, y, candidatex, candidate_y, context_size, is_train) 212 def forward( 213 self, 214 *, (...) 221 ) -> Tensor: 222 # >>> 223 with torch.set_grad_enabled( 224 torch.is_grad_enabled() and not self.memory_efficient 225 ): (...) 240 # performed without gradients. 241 # Later, it is recomputed with gradients only for the context objects. 242 candidate_k = ( --> 243 self._encode(candidatex)[1] 244 if self.candidate_encoding_batch_size is None 245 else torch.cat( 246 [ 247 self._encode(x)[1] 248 for x in delu.iter_batches( 249 candidatex, self.candidate_encoding_batch_size 250 ) 251 ] 252 ) 253 ) 254 x, k = self.encode(x) 255 if is_train: 256 # NOTE: here, we add the training batch back to the candidates after the 257 # function apply_model removed them. The further code relies 258 # on the fact that the first batch_size candidates come from the 259 # training batch.

File ~/Library/Mobile Documents/com~apple~CloudDocs/Code/Transfer_Learning_Tabular/TabR/bin/tabr.py:206, in Model._encode(failed resolving arguments) 203 assert x # 断言列表x不为空,这可能是为了确保输入数据的正确性 204 x = torch.cat(x, dim=1) --> 206 x = self.linear(x) 207 for block in self.blocks0: 208 x = x + block(x)

File ~/anaconda3/envs/tabr/lib/python3.9/site-packages/torch/nn/modules/module.py:1194, in Module._call_impl(self, *input, *kwargs) 1190 # If we don't have any hooks, we want to skip the rest of the logic in 1191 # this function, and just call forward. 1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1193 or _global_forward_hooks or _global_forward_pre_hooks): -> 1194 return forward_call(input, **kwargs) 1195 # Do not call functions when jit is used 1196 full_backward_hooks, non_full_backward_hooks = [], []

File ~/anaconda3/envs/tabr/lib/python3.9/site-packages/torch/nn/modules/linear.py:114, in Linear.forward(self, input) 113 def forward(self, input: Tensor) -> Tensor: --> 114 return F.linear(input, self.weight, self.bias)

RuntimeError: mat1 and mat2 must have the same dtype

YuHengjie commented 5 months ago

fix it, change X's data type to float32.