jrzaurin / pytorch-widedeep

A flexible package for multimodal-deep-learning to combine tabular data with text and images using Wide and Deep models in Pytorch
Apache License 2.0
1.3k stars 190 forks source link

AttributeError: 'TabMlp' object has no attribute 'with_fds' #195

Closed MOREDataset closed 1 year ago

MOREDataset commented 1 year ago

Hi,

I am new to using this library. I am trying to develop a regression model following the below script:

import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error from pytorch_widedeep import Trainer from pytorch_widedeep.models import TabMlp from pytorch_widedeep.preprocessing import TabPreprocessor

loc = 'Toolik'
sequence_length = 1
features = ['SNODP', 'SWGDN', 'T2M', 'SWLAND', 'TLML', 'SLP']
target= 'T0'
res = 30
season = 'Winter'
_, _, df = prepare_sequences(loc, sequence_length, target, features, season, res)
# Split into train and test sets
train_size = int(len(df) * 0.8)
# Split the data chronologically
train_df = df.iloc[:train_size]
test_df = df.iloc[train_size:]

features = ['SNODP', 'SWGDN', 'T2M', 'SWLAND', 'TLML', 'SLP','Year','Month','Day']

# Preprocess the data for continuous features
tab_preprocessor = TabPreprocessor(continuous_cols=features)
train_processed = tab_preprocessor.fit_transform(train_df)
test_processed = tab_preprocessor.transform(test_df)

# Extract the processed features and target for training and testing
X_train_processed = train_processed[:, :-1]
y_train = train_processed[:, -1]
X_test_processed = test_processed[:, :-1]
y_test = test_processed[:, -1]

# Define the TabMlp model
tab_mlp = TabMlp(column_idx=tab_preprocessor.column_idx, 
                 continuous_cols=features,
                 mlp_hidden_dims=[64, 32])  # Adjust the hidden dimensions as necessary

# Configure the trainer
trainer = Trainer(model=tab_mlp, objective="regression")

# Train the model
trainer.fit(X_train=X_train_processed, target=y_train, n_epochs=10, batch_size=32)

# Make predictions and evaluate the model
y_pred = trainer.predict(X_test_processed)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Test RMSE: {rmse}")

However, I get this error:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[4], line 51
     46 tab_mlp = TabMlp(column_idx=tab_preprocessor.column_idx, 
     47                  continuous_cols=features,
     48                  mlp_hidden_dims=[64, 32])  # Adjust the hidden dimensions as necessary
     50 # Configure the trainer
---> 51 trainer = Trainer(model=tab_mlp, objective="regression")
     53 # Train the model
     54 trainer.fit(X_train=X_train_processed, target=y_train, n_epochs=10, batch_size=32)

File ~/.local/lib/python3.9/site-packages/pytorch_widedeep/utils/general_utils.py:61, in Alias.__call__(self, wrapped, instance, args, kwargs)
     57     else:
     58         set_default_attr(wrapped, "__wd_aliases_used", {})[
     59             self.primary_name
     60         ] = alias
---> 61 return wrapped(*args, **kwargs)

File ~/.local/lib/python3.9/site-packages/pytorch_widedeep/training/trainer.py:241, in Trainer.__init__(self, model, objective, custom_loss_function, optimizers, lr_schedulers, initializers, transforms, callbacks, metrics, verbose, seed, **kwargs)
    222 @Alias(  # noqa: C901
    223     "objective",
    224     ["loss_function", "loss_fn", "loss", "cost_function", "cost_fn", "cost"],
   (...)
    239     **kwargs,
    240 ):
--> 241     super().__init__(
    242         model=model,
    243         objective=objective,
    244         custom_loss_function=custom_loss_function,
    245         optimizers=optimizers,
    246         lr_schedulers=lr_schedulers,
    247         initializers=initializers,
    248         transforms=transforms,
    249         callbacks=callbacks,
    250         metrics=metrics,
    251         verbose=verbose,
    252         seed=seed,
    253         **kwargs,
    254     )

File ~/.local/lib/python3.9/site-packages/pytorch_widedeep/training/_base_trainer.py:57, in BaseTrainer.__init__(self, model, objective, custom_loss_function, optimizers, lr_schedulers, initializers, transforms, callbacks, metrics, verbose, seed, **kwargs)
     42 def __init__(
     43     self,
     44     model: WideDeep,
   (...)
     55     **kwargs,
     56 ):
---> 57     self._check_inputs(
     58         model, objective, optimizers, lr_schedulers, custom_loss_function
     59     )
     60     self.device, self.num_workers = self._set_device_and_num_workers(**kwargs)
     62     self.early_stop = False

File ~/.local/lib/python3.9/site-packages/pytorch_widedeep/training/_base_trainer.py:327, in BaseTrainer._check_inputs(model, objective, optimizers, lr_schedulers, custom_loss_function)
    319 @staticmethod
    320 def _check_inputs(
    321     model,
   (...)
    325     custom_loss_function,
    326 ):
--> 327     if model.with_fds and _ObjectiveToMethod.get(objective) != "regression":
    328         raise ValueError(
    329             "Feature Distribution Smooting can be used only for regression"
    330         )
    332     if _ObjectiveToMethod.get(objective) == "multiclass" and model.pred_dim == 1:

File ~/.local/lib/python3.9/site-packages/torch/nn/modules/module.py:1614, in Module.__getattr__(self, name)
   1612     if name in modules:
   1613         return modules[name]
-> 1614 raise AttributeError("'{}' object has no attribute '{}'".format(
   1615     type(self).__name__, name))

AttributeError: 'TabMlp' object has no attribute 'with_fds'

I do not know why this error happens or how to fix it. Can anyone help?

jrzaurin commented 1 year ago

hi @MOREDataset

Thanks for opening the issue!

All models need to be wrap up in what can be called the "collector-class" WideDeep

please, see here: https://github.com/jrzaurin/pytorch-widedeep/blob/master/examples/scripts/adult_census.py

If you still have any questions let me know and I will post some code directly here

Cheers!

MOREDataset commented 1 year ago

Hi @jrzaurin, Thank you for your quick response. Yes, a code to showcase how to train a deep learning model for regression (using only continuous data) would be very much appreciated.

jrzaurin commented 1 year ago

@MOREDataset is it not the example and the library docs enough?

The code in the example does a classification, simply, when you use a target for regression, change the objective param in the Trainer class and off you go.

trainer = Trainer(model, objective="regression")

You have plenty of other examples (including regression) in the examples folder.

Cheers!