fastai / fastai2

Temporary home for fastai v2 while it's being developed
https://dev.fast.ai
Apache License 2.0
645 stars 235 forks source link

Device Assist Error with Tabular Predictions #115

Closed muellerzr closed 4 years ago

muellerzr commented 4 years ago

Currently if we make a test_dl with a DataFrame that does not have the target column present (such as a Kaggle test set), it will first throw an error about it not being present (expected) and then throw a Device Assist error if said values are set to np.nan (but not if defaulted to zero)

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-41-9a993e47d6c2> in <module>()
----> 1 preds = learn.get_preds(dl=dl)

15 frames
/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in get_preds(self, ds_idx, dl, with_input, with_decoded, with_loss, act, inner, **kwargs)
    319             for mgr in ctx_mgrs: stack.enter_context(mgr)
    320             self(event.begin_epoch if inner else _before_epoch)
--> 321             self._do_epoch_validate(dl=dl)
    322             self(event.after_epoch if inner else _after_epoch)
    323             if act is None: act = getattr(self.loss_func, 'activation', noop)

/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in _do_epoch_validate(self, ds_idx, dl)
    277             dl,old,has = change_attrs(dl, names, [False,False])
    278             self.dl = dl;                                    self('begin_validate')
--> 279             with torch.no_grad(): self.all_batches()
    280         except CancelValidException:                         self('after_cancel_validate')
    281         finally:

/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in all_batches(self)
    245     def all_batches(self):
    246         self.n_iter = len(self.dl)
--> 247         for o in enumerate(self.dl): self.one_batch(*o)
    248 
    249     def one_batch(self, i, b):

/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in one_batch(self, i, b)
    259             self.opt.zero_grad()
    260         except CancelBatchException:                         self('after_cancel_batch')
--> 261         finally:                                             self('after_batch')
    262 
    263     def _do_begin_fit(self, n_epoch):

/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in __call__(self, event_name)
    226     def ordered_cbs(self, cb_func): return [cb for cb in sort_by_run(self.cbs) if hasattr(cb, cb_func)]
    227 
--> 228     def __call__(self, event_name): L(event_name).map(self._call_one)
    229     def _call_one(self, event_name):
    230         assert hasattr(event, event_name)

/usr/local/lib/python3.6/dist-packages/fastcore/foundation.py in map(self, f, *args, **kwargs)
    360              else f.format if isinstance(f,str)
    361              else f.__getitem__)
--> 362         return self._new(map(g, self))
    363 
    364     def filter(self, f, negate=False, **kwargs):

/usr/local/lib/python3.6/dist-packages/fastcore/foundation.py in _new(self, items, *args, **kwargs)
    313     @property
    314     def _xtra(self): return None
--> 315     def _new(self, items, *args, **kwargs): return type(self)(items, *args, use_list=None, **kwargs)
    316     def __getitem__(self, idx): return self._get(idx) if is_indexer(idx) else L(self._get(idx), use_list=None)
    317     def copy(self): return self._new(self.items.copy())

/usr/local/lib/python3.6/dist-packages/fastcore/foundation.py in __call__(cls, x, *args, **kwargs)
     39             return x
     40 
---> 41         res = super().__call__(*((x,) + args), **kwargs)
     42         res._newchk = 0
     43         return res

/usr/local/lib/python3.6/dist-packages/fastcore/foundation.py in __init__(self, items, use_list, match, *rest)
    304         if items is None: items = []
    305         if (use_list is not None) or not _is_array(items):
--> 306             items = list(items) if use_list else _listify(items)
    307         if match is not None:
    308             if is_coll(match): match = len(match)

/usr/local/lib/python3.6/dist-packages/fastcore/foundation.py in _listify(o)
    240     if isinstance(o, list): return o
    241     if isinstance(o, str) or _is_array(o): return [o]
--> 242     if is_iter(o): return list(o)
    243     return [o]
    244 

/usr/local/lib/python3.6/dist-packages/fastcore/foundation.py in __call__(self, *args, **kwargs)
    206             if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
    207         fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
--> 208         return self.fn(*fargs, **kwargs)
    209 
    210 # Cell

/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in _call_one(self, event_name)
    229     def _call_one(self, event_name):
    230         assert hasattr(event, event_name)
--> 231         [cb(event_name) for cb in sort_by_run(self.cbs)]
    232 
    233     def _bn_bias_state(self, with_bias): return bn_bias_params(self.model, with_bias).map(self.opt.state)

/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in <listcomp>(.0)
    229     def _call_one(self, event_name):
    230         assert hasattr(event, event_name)
--> 231         [cb(event_name) for cb in sort_by_run(self.cbs)]
    232 
    233     def _bn_bias_state(self, with_bias): return bn_bias_params(self.model, with_bias).map(self.opt.state)

/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in __call__(self, event_name)
     23         _run = (event_name not in _inner_loop or (self.run_train and getattr(self, 'training', True)) or
     24                (self.run_valid and not getattr(self, 'training', False)))
---> 25         if self.run and _run: getattr(self, event_name, noop)()
     26         if event_name=='after_fit': self.run=True #Reset self.run to True at each end of fit
     27 

/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in after_batch(self)
    505         if len(self.yb) == 0: return
    506         mets = self._train_mets if self.training else self._valid_mets
--> 507         for met in mets: met.accumulate(self.learn)
    508         if not self.training: return
    509         self.lrs.append(self.opt.hypers[-1]['lr'])

/usr/local/lib/python3.6/dist-packages/fastai2/learner.py in accumulate(self, learn)
    454     def accumulate(self, learn):
    455         bs = find_bs(learn.yb)
--> 456         self.total += to_detach(learn.loss.mean())*bs
    457         self.count += bs
    458     @property

RuntimeError: CUDA error: device-side assert triggered

I feel like there should be a better way to handle a unlabelled dataset for just get_preds, as it shouldn't call for the ground truth value, yes?

sgugger commented 4 years ago

Should be fixed now (and you don't have to add the dependent variable to your dataframe). I added examples in tabular.core and tabular.learner.