dreamquark-ai / tabnet

PyTorch implementation of TabNet paper : https://arxiv.org/pdf/1908.07442.pdf
https://dreamquark-ai.github.io/tabnet/
MIT License
2.61k stars 485 forks source link

Getting "IndexError: index out of range in self" while running the Multitask classification problem #402

Closed omkarmutreja12 closed 2 years ago

omkarmutreja12 commented 2 years ago

####### Code ###########

clf = TabNetClassifier( n_d=64, n_a=64, n_steps=2, gamma=1.5, n_independent=1, n_shared=1, cat_idxs=cat_idx, cat_dims=cat_dims,

cat_emb_dim=1,

lambda_sparse=1e-4, momentum=0.3, clip_value=2.,
optimizer_fn=torch.optim.Adam,
optimizer_params=dict(lr=2e-2),
scheduler_params = {"gamma": 0.95,
                 "step_size": 20},
scheduler_fn=torch.optim.lr_scheduler.StepLR, epsilon=1e-15

)

clf.fit( X_train=X_train, y_train=y_train, eval_set=[(X_train, y_train), (X_valid, y_valid)], eval_name=['train', 'valid'], max_epochs=max_epochs , patience=10, batch_size=8, virtual_batch_size=8,

num_workers=0,

#drop_last=False,
#loss_fn=[torch.nn.functional.cross_entropy]*NB_TASKS # Optional, just an example of list usage

)

########### Error ###########3

IndexError Traceback (most recent call last)

in ----> 1 clf.fit( 2 X_train=X_train, y_train=y_train, 3 eval_set=[(X_train, y_train), (X_valid, y_valid)], 4 eval_name=['train', 'valid'], 5 max_epochs=max_epochs , patience=10, C:\ProgramData\Anaconda3\lib\site-packages\pytorch_tabnet\abstract_model.py in fit(self, X_train, y_train, eval_set, eval_name, eval_metric, loss_fn, weights, max_epochs, patience, batch_size, virtual_batch_size, num_workers, drop_last, callbacks, pin_memory, from_unsupervised) 221 self._callback_container.on_epoch_begin(epoch_idx) 222 --> 223 self._train_epoch(train_dataloader) 224 225 # Apply predict epoch to all eval sets C:\ProgramData\Anaconda3\lib\site-packages\pytorch_tabnet\abstract_model.py in _train_epoch(self, train_loader) 432 self._callback_container.on_batch_begin(batch_idx) 433 --> 434 batch_logs = self._train_batch(X, y) 435 436 self._callback_container.on_batch_end(batch_idx, batch_logs) C:\ProgramData\Anaconda3\lib\site-packages\pytorch_tabnet\abstract_model.py in _train_batch(self, X, y) 467 param.grad = None 468 --> 469 output, M_loss = self.network(X) 470 471 loss = self.compute_loss(output, y) C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs) 1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1109 or _global_forward_hooks or _global_forward_pre_hooks): -> 1110 return forward_call(*input, **kwargs) 1111 # Do not call functions when jit is used 1112 full_backward_hooks, non_full_backward_hooks = [], [] C:\ProgramData\Anaconda3\lib\site-packages\pytorch_tabnet\tab_network.py in forward(self, x) 580 581 def forward(self, x): --> 582 x = self.embedder(x) 583 return self.tabnet(x) 584 C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs) 1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1109 or _global_forward_hooks or _global_forward_pre_hooks): -> 1110 return forward_call(*input, **kwargs) 1111 # Do not call functions when jit is used 1112 full_backward_hooks, non_full_backward_hooks = [], [] C:\ProgramData\Anaconda3\lib\site-packages\pytorch_tabnet\tab_network.py in forward(self, x) 847 else: 848 cols.append( --> 849 self.embeddings[cat_feat_counter](x[:, feat_init_idx].long()) 850 ) 851 cat_feat_counter += 1 clf = TabNetClassifier( n_d=64, n_a=64, n_steps=2, gamma=1.5, n_independent=1, n_shared=1, cat_idxs=cat_idx, cat_dims=cat_dims, #cat_emb_dim=1, lambda_sparse=1e-4, momentum=0.3, clip_value=2., optimizer_fn=torch.optim.Adam, optimizer_params=dict(lr=2e-2), scheduler_params = {"gamma": 0.95, "step_size": 20}, scheduler_fn=torch.optim.lr_scheduler.StepLR, epsilon=1e-15 C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs) 1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1109 or _global_forward_hooks or _global_forward_pre_hooks): -> 1110 return forward_call(*input, **kwargs) 1111 # Do not call functions when jit is used 1112 full_backward_hooks, non_full_backward_hooks = [], [] C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\sparse.py in forward(self, input) 156 157 def forward(self, input: Tensor) -> Tensor: --> 158 return F.embedding( 159 input, self.weight, self.padding_idx, self.max_norm, 160 self.norm_type, self.scale_grad_by_freq, self.sparse) C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse) 2181 # remove once script supports set_grad_enabled 2182 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type) -> 2183 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) 2184 2185 IndexError: index out of range in self
omkarmutreja12 commented 2 years ago

image

There are 3 output classes and it's a multi-class classification problem Shapes of train, valid and test also look fine, could you please help me out with the solution?

Optimox commented 2 years ago

It looks like you might have unknown categorical values in one of your columns.

Optimox commented 2 years ago

Closing this, feel free to reopen with more information.