skorch-dev / skorch

A scikit-learn compatible neural network library that wraps PyTorch
BSD 3-Clause "New" or "Revised" License
5.7k stars 384 forks source link

Problem RNN with LSTM cells #632

Closed brunomorampc closed 4 years ago

brunomorampc commented 4 years ago

I am testing Skorch with LSTM cells for a regression problem. Here is the code

import numpy as np
from sklearn.datasets import make_regression
from skorch import NeuralNetRegressor
import unittest
from util.pythorch_util import MyLSTM

X_regr, y_regr = make_regression(1000, 20, n_informative=10, random_state=0)
X_regr = X_regr.astype(np.float32)
y_regr = y_regr.astype(np.float32) / 100
y_regr = y_regr.reshape(-1, 1)
ni = 20
no = 1
nh = 10
nlayers = 3
net_regr = NeuralNetRegressor(
    module=MyLSTM,
    module__ni=ni,
    module__no=no,
    module__nh=nh,
    module__nlayers=nlayers,
    max_epochs=20,
    lr=0.1,
    #     device='cuda',  # uncomment this to train with CUDA
)

net_regr.fit(X_regr,y_regr)

where MyLSTM is

class MyLSTM(nn.Module):
    def __init__(self, ni=6, no=3, nh=10, nlayers=1):
        super(MyLSTM, self).__init__()

        self.ni = ni
        self.no = no
        self.nh = nh
        self.nlayers = nlayers

        self.lstms = nn.ModuleList(
            [nn.LSTMCell(self.ni, self.nh)] + [nn.LSTMCell(self.nh, self.nh) for i in range(nlayers - 1)])
        self.out = nn.Linear(self.nh, self.no)
        self.do = nn.Dropout(p=0.2)
        self.actfn = nn.Tanh()
        self.device = torch.device('cpu')
        self.dtype = torch.float

    # description of the whole block
    def forward(self, x, h0=None, train=False):
        hs = x  # initiate hidden state
        if h0 is None:
            h = torch.zeros(hs.shape[0], self.nh, device=device)
            c = torch.zeros(hs.shape[0], self.nh, device=device)
        else:
            (h, c) = h0

        # LSTM cells
        for i in range(self.nlayers):
            h, c = self.lstms[i](hs, (h, c))
            if train:
                hs = self.do(h)
            else:
                hs = h
        y = self.out(hs)
        return y, (h, c)

when I try to run I get this error:

Error
Traceback (most recent call last):
  File "C:\Users\morabru01\AppData\Local\Programs\Python\Python36\lib\unittest\case.py", line 59, in testPartExecutor
    yield
  File "C:\Users\morabru01\AppData\Local\Programs\Python\Python36\lib\unittest\case.py", line 605, in run
    testMethod()
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\testbenches\test_LSTM_skorch.py", line 30, in test_toy
    net_regr.fit(X_regr,y_regr)
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\venv\lib\site-packages\skorch\regressor.py", line 91, in fit
    return super(NeuralNetRegressor, self).fit(X, y, **fit_params)
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\venv\lib\site-packages\skorch\net.py", line 854, in fit
    self.partial_fit(X, y, **fit_params)
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\venv\lib\site-packages\skorch\net.py", line 813, in partial_fit
    self.fit_loop(X, y, **fit_params)
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\venv\lib\site-packages\skorch\net.py", line 727, in fit_loop
    step_fn=self.train_step, **fit_params)
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\venv\lib\site-packages\skorch\net.py", line 763, in run_single_epoch
    step = step_fn(Xi, yi, **fit_params)
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\venv\lib\site-packages\skorch\net.py", line 659, in train_step
    self.optimizer_.step(step_fn)
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\venv\lib\site-packages\torch\optim\sgd.py", line 80, in step
    loss = closure()
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\venv\lib\site-packages\skorch\net.py", line 656, in step_fn
    step = self.train_step_single(Xi, yi, **fit_params)
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\venv\lib\site-packages\skorch\net.py", line 599, in train_step_single
    loss = self.get_loss(y_pred, yi, X=Xi, training=True)
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\venv\lib\site-packages\skorch\net.py", line 1102, in get_loss
    return self.criterion_(y_pred, y_true)
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\venv\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\venv\lib\site-packages\torch\nn\modules\loss.py", line 431, in forward
    return F.mse_loss(input, target, reduction=self.reduction)
  File "C:\Users\morabru01\Desktop\WORKSPACE\Learning\venv\lib\site-packages\torch\nn\functional.py", line 2203, in mse_loss
    if not (target.size() == input.size()):
AttributeError: 'tuple' object has no attribute 'size'

I am using Python 3.6, torch 1.4.0 and skorch 0.8.

EDIT: I guess that the problem comes from the fact that lstm takes a tuple (h,c) with the hidden states... is there a way to solve this?

ottonemo commented 4 years ago

Yes, skorch just passes the module's output to get_loss and, subsequently, to the criterion. You have several options:

  1. create your own simple criterion that extracts the prediction and discards the context / hidden state
  2. implement your own get_loss that extracts the prediction and discards the context / hidden state
  3. don't return anything except the prediction
  4. make a case for why we should only pass the first argument to get_loss / the criterion :)

I think (1) is the easiest. Example:

class ContextlessMSE(torch.nn.MSELoss):
    def forward(self, y_pred, y_true):
        y, (h, c) = y_pred # extract prediction and context information 
        return super().forward(y, y_true)

net_regr = NeuralNetRegressor(
    module=MyLSTM,
    module__ni=ni,
    module__no=no,
    module__nh=nh,
    module__nlayers=nlayers,
    max_epochs=20,
    lr=0.1,
    criterion=ContextlessMSE,
)

Additional comments:

brunomorampc commented 4 years ago

great! it works thanks!

ottonemo commented 4 years ago

OK, great this solution works for you. I'm closing this issue, feel free to re-open it, if this should not be in your interest :)

brunomorampc commented 4 years ago

Another maybe relate question. Now I want to use GridSearchCV. I got a similar error AttributeError: 'tuple' object has no attribute 'to' so, I did the same thing you suggested, but with a custom loss function that sklearn accepts:

from sklearn.metrics import mean_squared_error, make_scorer
def my_custom_loss(y_true, y_pred):
    y, (h, c) = y_pred
    return mean_squared_error(y, y_true)

and then used this one as score:

    loss = make_scorer(my_custom_loss, greater_is_better=False)

    # Load toy regressor
    X_regr, y_regr = make_regression(1000, 20, n_informative=10, random_state=0)
    X_regr = X_regr.astype(np.float32)
    y_regr = y_regr.astype(np.float32) / 100
    y_regr = y_regr.reshape(-1, 1)
    ni = 20
    no = 1
    nh = 10
    nlayers = 3
    net_regr = NeuralNetRegressor(
        module=MyLSTM,
        module__ni=ni,
        module__no=no,
        module__nh=nh,
        module__nlayers=nlayers,
        max_epochs=20,
        lr=0.1,
        criterion=ContextlessMSE,
        iterator_train__shuffle=True,  # this shuffles the training data
        #     device='cuda',  # uncomment this to train with CUDA
    )

    params = {
        'lr': [0.1, 0.2],
        'max_epochs': [10, 20],
        'module__nh': [10, 20],
    }
    gs = GridSearchCV(net_regr, params, refit=False, cv=2, scoring=loss)

    gs.fit(X_regr, y_regr)
    print(gs.best_score_, gs.best_params_)

but I still get the same error AttributeError: 'tuple' object has no attribute 'to'