fastai / fastai2

Temporary home for fastai v2 while it's being developed
https://dev.fast.ai
Apache License 2.0
645 stars 235 forks source link

Padding stripping leads to QRNN classification model input error #95

Closed morganmcg1 closed 4 years ago

morganmcg1 commented 4 years ago

As outlined on the forums: https://forums.fast.ai/t/fastai-v2-chat/53518/673

Trying to do classification with QRNN leads to a model input size mismatch between the previous input which has had its padding stripped and the current input which is still padded

Error

invalid argument 0: Sizes of tensors must match except in dimension 1. Got 2 and 1 in dimension 0 at /opt/conda/conda-bld/pytorch_1573049306803/work/aten/src/THC/generic/THCTensorMath.cu:71

Replication

The error can be replicated with the ULMFiT tutorial in nb38 where the only change made is swapping the architecture and config for the learner:

Language leaner (trains as expected):

learn = language_model_learner(dls, AWD_QRNN, config=awd_qrnn_lm_config,
                               pretrained=False, metrics=[accuracy, Perplexity()], path=path, wd=0.1)

Classifier learner (raise error when trained)

learn = text_classifier_learner(dls, AWD_QRNN, config=awd_qrnn_clas_config,
                                metrics=[accuracy], path=path,drop_mult=0.5)

Full stack trace

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-27-4dfb24161c57> in <module>
----> 1 learn.fit_one_cycle(1)

~/fastai2/fastai2/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
     88     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
     89               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
---> 90     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
     91 
     92 # Cell

~/fastai2/fastai2/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    293                     try:
    294                         self.epoch=epoch;          self('begin_epoch')
--> 295                         self._do_epoch_train()
    296                         self._do_epoch_validate()
    297                     except CancelEpochException:   self('after_cancel_epoch')

~/fastai2/fastai2/learner.py in _do_epoch_train(self)
    268         try:
    269             self.dl = self.dls.train;                        self('begin_train')
--> 270             self.all_batches()
    271         except CancelTrainException:                         self('after_cancel_train')
    272         finally:                                             self('after_train')

~/fastai2/fastai2/learner.py in all_batches(self)
    246     def all_batches(self):
    247         self.n_iter = len(self.dl)
--> 248         for o in enumerate(self.dl): self.one_batch(*o)
    249 
    250     def one_batch(self, i, b):

~/fastai2/fastai2/learner.py in one_batch(self, i, b)
    252         try:
    253             self._split(b);                                  self('begin_batch')
--> 254             self.pred = self.model(*self.xb);                self('after_pred')
    255             if len(self.yb) == 0: return
    256             self.loss = self.loss_func(self.pred, *self.yb); self('after_loss')

~/anaconda3/envs/fastai2_me/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

~/anaconda3/envs/fastai2_me/lib/python3.7/site-packages/torch/nn/modules/container.py in forward(self, input)
     90     def forward(self, input):
     91         for module in self._modules.values():
---> 92             input = module(input)
     93         return input
     94 

~/anaconda3/envs/fastai2_me/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

~/fastai2/fastai2/text/models/core.py in forward(self, input)
     91             #Note: this expects that sequence really begins on a round multiple of bptt
     92             real_bs = (input[:,i] != self.pad_idx).long().sum()
---> 93             o = self.module(input[:real_bs,i: min(i+self.bptt, sl)])
     94             if self.max_len is None or sl-i <= self.max_len:
     95                 outs.append(o)

~/anaconda3/envs/fastai2_me/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

~/fastai2/fastai2/text/models/awdlstm.py in forward(self, inp, from_embeds)
    100         new_hidden = []
    101         for l, (rnn,hid_dp) in enumerate(zip(self.rnns, self.hidden_dps)):
--> 102             output, new_h = rnn(output, self.hidden[l])
    103             new_hidden.append(new_h)
    104             if l != self.n_layers - 1: output = hid_dp(output)

~/anaconda3/envs/fastai2_me/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

~/fastai2/fastai2/text/models/qrnn.py in forward(self, inp, hid)
    152         if self.bidirectional: inp_bwd = inp.clone()
    153         for i, layer in enumerate(self.layers):
--> 154             inp, h = layer(inp, None if hid is None else hid[2*i if self.bidirectional else i])
    155             new_hid.append(h)
    156             if self.bidirectional:

~/anaconda3/envs/fastai2_me/lib/python3.7/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    539             result = self._slow_forward(*input, **kwargs)
    540         else:
--> 541             result = self.forward(*input, **kwargs)
    542         for hook in self._forward_hooks.values():
    543             hook_result = hook(self, input, result)

~/fastai2/fastai2/text/models/qrnn.py in forward(self, inp, hid)
    100 
    101     def forward(self, inp, hid=None):
--> 102         y = self.linear(self._get_source(inp))
    103         if self.output_gate: z_gate,f_gate,o_gate = y.chunk(3, dim=2)
    104         else:                z_gate,f_gate        = y.chunk(2, dim=2)

~/fastai2/fastai2/text/models/qrnn.py in _get_source(self, inp)
    123         if self.backward: inp_shift.insert(0,inp[:,1:] if self.batch_first else inp[1:])
    124         else:             inp_shift.append(inp[:,:-1] if self.batch_first else inp[:-1])
--> 125         inp_shift = torch.cat(inp_shift, dim)
    126         return torch.cat([inp, inp_shift], 2)
    127 

RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 1. Got 2 and 1 in dimension 0 at /opt/conda/conda-bld/pytorch_1573049306803/work/aten/src/THC/generic/THCTensorMath.cu:71
sgugger commented 4 years ago

Thanks for flagging! I fixed all the issues normally.