Open santurini opened 4 months ago
Hello, you should have a look at this : https://www.kaggle.com/code/optimo/tabnetmultitaskclassifier
If all your tasks are classification then you just need to follow this notebook and you should be good to go.
I am trying to replicate the example on my data. I preprocessed the data using a label encoder and filling the NA, and same for the two target columns. Then I defined categorical_dims as the list of unique values for each categorical variable (targets excluded) get the following error with the model defined like this:
clf = TabNetMultiTaskClassifier(
n_steps=1,
cat_idxs=cat_idxs,
cat_dims=cat_dims,
cat_emb_dim=12,
optimizer_fn=torch.optim.Adam,
optimizer_params=dict(lr=2e-2),
scheduler_params={"step_size":50, "gamma":0.9},
scheduler_fn=torch.optim.lr_scheduler.StepLR,
mask_type='entmax',
lambda_sparse=0,
)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
Cell In[61], [line 3]
[1] max_epochs = 2
----> [3] clf.fit(
[4] X_train=X_train.values, y_train=y_train.values,
[5] max_epochs=max_epochs ,
[6] batch_size=1024,
[7] virtual_batch_size=12
[8] num_workers=1,
[9] drop_last=False,
[10])
File pytorch_tabnet\abstract_model.py:258, in TabModel.fit(self, X_train, y_train, eval_set, eval_name, eval_metric, loss_fn, weights, max_epochs, patience, batch_size, virtual_batch_size, num_workers, drop_last, callbacks, pin_memory, from_unsupervised, warm_start, augmentations, compute_importance)
(pytorch_tabnet/abstract_model.py:253) for epoch_idx in range(self.max_epochs):
(pytorch_tabnet/abstract_model.py:254)
(pytorch_tabnet/abstract_model.py:255) # Call method on_epoch_begin for all callbacks
(pytorch_tabnet/abstract_model.py:256) self._callback_container.on_epoch_begin(epoch_idx)
--> (pytorch_tabnet/abstract_model.py:258) self._train_epoch(train_dataloader)
(pytorch_tabnet/abstract_model.py:260) # Apply predict epoch to all eval sets
(pytorch_tabnet/abstract_model.py:261) for eval_name, valid_dataloader in zip(eval_names, valid_dataloaders):
File pytorch_tabnet\abstract_model.py:489, in TabModel._train_epoch(self, train_loader)
(pytorch_tabnet/abstract_model.py:486) for batch_idx, (X, y) in enumerate(train_loader):
(pytorch_tabnet/abstract_model.py:487) self._callback_container.on_batch_begin(batch_idx)
--> (pytorch_tabnet/abstract_model.py:489) batch_logs = self._train_batch(X, y)
(pytorch_tabnet/abstract_model.py:491) self._callback_container.on_batch_end(batch_idx, batch_logs)
(pytorch_tabnet/abstract_model.py:493) epoch_logs = {"lr": self._optimizer.param_groups[-1]["lr"]}
File pytorch_tabnet\abstract_model.py:527, in TabModel._train_batch(self, X, y)
(pytorch_tabnet/abstract_model.py:524) for param in self.network.parameters():
(pytorch_tabnet/abstract_model.py:525) param.grad = None
--> (pytorch_tabnet/abstract_model.py:527) output, M_loss = self.network(X)
(pytorch_tabnet/abstract_model.py:529) loss = self.compute_loss(output, y)
(pytorch_tabnet/abstract_model.py:530) # Add the overall sparsity loss
File torch\nn\modules\module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs)
(torch/nn/modules/module.py:1530) return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
(torch/nn/modules/module.py:1531) else:
-> (torch/nn/modules/module.py:1532) return self._call_impl(*args, **kwargs)
File torch\nn\modules\module.py:1541, in Module._call_impl(self, *args, **kwargs)
(torch/nn/modules/module.py:1536) # If we don't have any hooks, we want to skip the rest of the logic in
(torch/nn/modules/module.py:1537) # this function, and just call forward.
(torch/nn/modules/module.py:1538) if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
(torch/nn/modules/module.py:1539) or _global_backward_pre_hooks or _global_backward_hooks
(torch/nn/modules/module.py:1540) or _global_forward_hooks or _global_forward_pre_hooks):
-> (torch/nn/modules/module.py:1541) return forward_call(*args, **kwargs)
(torch/nn/modules/module.py:1543) try:
(torch/nn/modules/module.py:1544) result = None
File pytorch_tabnet\tab_network.py:615, in TabNet.forward(self, x)
(pytorch_tabnet/tab_network.py:614) def forward(self, x):
--> (pytorch_tabnet/tab_network.py:615) x = self.embedder(x)
(pytorch_tabnet/tab_network.py:616) return self.tabnet(x)
File torch\nn\modules\module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs)
(torch/nn/modules/module.py:1530) return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
(torch/nn/modules/module.py:1531) else:
-> (torch/nn/modules/module.py:1532) return self._call_impl(*args, **kwargs)
File torch\nn\modules\module.py:1541, in Module._call_impl(self, *args, **kwargs)
(torch/nn/modules/module.py:1536) # If we don't have any hooks, we want to skip the rest of the logic in
(torch/nn/modules/module.py:1537) # this function, and just call forward.
(torch/nn/modules/module.py:1538) if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
(torch/nn/modules/module.py:1539) or _global_backward_pre_hooks or _global_backward_hooks
(torch/nn/modules/module.py:1540) or _global_forward_hooks or _global_forward_pre_hooks):
-> (torch/nn/modules/module.py:1541) return forward_call(*args, **kwargs)
(torch/nn/modules/module.py:1543) try:
(torch/nn/modules/module.py:1544) result = None
File pytorch_tabnet\tab_network.py:890, in EmbeddingGenerator.forward(self, x)
(pytorch_tabnet/tab_network.py:887) cols.append(x[:, feat_init_idx].float().view(-1, 1))
(pytorch_tabnet/tab_network.py:888) else:
(pytorch_tabnet/tab_network.py:889) cols.append(
--> (pytorch_tabnet/tab_network.py:890) self.embeddings[cat_feat_counter](x[:, feat_init_idx].long())
(pytorch_tabnet/tab_network.py:891) )
(pytorch_tabnet/tab_network.py:892) cat_feat_counter += 1
(pytorch_tabnet/tab_network.py:893) # concat
File torch\nn\modules\module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs)
(torch/nn/modules/module.py:1530) return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
(torch/nn/modules/module.py:1531) else:
-> (torch/nn/modules/module.py:1532) return self._call_impl(*args, **kwargs)
File torch\nn\modules\module.py:1541, in Module._call_impl(self, *args, **kwargs)
(torch/nn/modules/module.py:1536) # If we don't have any hooks, we want to skip the rest of the logic in
(torch/nn/modules/module.py:1537) # this function, and just call forward.
(torch/nn/modules/module.py:1538) if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
(torch/nn/modules/module.py:1539) or _global_backward_pre_hooks or _global_backward_hooks
(torch/nn/modules/module.py:1540) or _global_forward_hooks or _global_forward_pre_hooks):
-> (torch/nn/modules/module.py:1541) return forward_call(*args, **kwargs)
(torch/nn/modules/module.py:1543) try:
(torch/nn/modules/module.py:1544) result = None
File torch\nn\modules\sparse.py:163, in Embedding.forward(self, input)
(torch/nn/modules/sparse.py:162) def forward(self, input: Tensor) -> Tensor:
--> (torch/nn/modules/sparse.py:163) return F.embedding(
(torch/nn/modules/sparse.py:164) input, self.weight, self.padding_idx, self.max_norm,
(torch/nn/modules/sparse.py:165) self.norm_type, self.scale_grad_by_freq, self.sparse)
File torch\nn\functional.py:2264, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
(torch/nn/functional.py:2258) # Note [embedding_renorm set_grad_enabled]
(torch/nn/functional.py:2259) # XXX: equivalent to
(torch/nn/functional.py:2260) # with torch.no_grad():
(torch/nn/functional.py:2261) # torch.embedding_renorm_
(torch/nn/functional.py:2262) # remove once script supports set_grad_enabled
(torch/nn/functional.py:2263) _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> torch/nn/functional.py:2264) return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
IndexError: index out of range in self
Looks like one of the categories is out of range, you must have integers between 0 and cat_dim-1 for each categorical column.
Hello, I am new to TabNet and I would like to use it for a Multi-label classification task. I have a pandas dataset with mixed columns (numerical and categorical) and I would like to classify two different columns with mulitple values, let's say COMPANY and ROLE.
Which model class should I use? How should I prepare the numerical data? How should I prepare the categorical data? How do I pass the data and the targets?
Some help would really be appreciated, thank you!