airctic / icevision

An Agnostic Computer Vision Framework - Pluggable to any Training Library: Fastai, Pytorch-Lightning with more to come
https://airctic.github.io/icevision/
Apache License 2.0
848 stars 150 forks source link

learn.unfreeze & learn.fit_one_cycle fails with error RuntimeError: cannot reshape tensor of 0 elements into shape [0, -1] because the unspecified dimension size -1 can be any value and is ambiguous #641

Closed ganesh3 closed 2 years ago

ganesh3 commented 3 years ago

🐛 Bug

Describe the bug Running the below code with faster_rcnn with restnet backbone given the below error. learn.unfreeze() learn.fit_one_cycle(5, max_lr=slice(1e-6, 1e-4))

To Reproduce Steps to reproduce the behavior:

  1. Go to 'plantdoc.ipynb'
  2. update the faster_rcnn model to have a resnet backbone
  3. Run the commands as described above for learn.unfreeze and learn.fit_one_cycle.
  4. See error

          RuntimeError                              Traceback (most recent call last)
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
              154     def _with_events(self, f, event_type, ex, final=noop):
          --> 155         try:       self(f'before_{event_type}')       ;f()
              156         except ex: self(f'after_cancel_{event_type}')
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in _do_one_batch(self)
              166         if len(self.yb): self.loss = self.loss_func(self.pred, *self.yb)
          --> 167         self('after_loss')
              168         if not self.training or not len(self.yb): return
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in __call__(self, event_name)
              132 
          --> 133     def __call__(self, event_name): L(event_name).map(self._call_one)
              134 
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastcore/foundation.py in map(self, f, gen, *args, **kwargs)
              225 
          --> 226     def map(self, f, *args, gen=False, **kwargs): return self._new(map_ex(self, f, *args, gen=gen, **kwargs))
              227     def argwhere(self, f, negate=False, **kwargs): return self._new(argwhere(self, f, negate, **kwargs))
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastcore/basics.py in map_ex(iterable, f, gen, *args, **kwargs)
              542     if gen: return res
          --> 543     return list(res)
              544 
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastcore/basics.py in __call__(self, *args, **kwargs)
              532         fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
          --> 533         return self.func(*fargs, **kwargs)
              534 
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in _call_one(self, event_name)
              136         assert hasattr(event, event_name), event_name
          --> 137         [cb(event_name) for cb in sort_by_run(self.cbs)]
              138 
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in <listcomp>(.0)
              136         assert hasattr(event, event_name), event_name
          --> 137         [cb(event_name) for cb in sort_by_run(self.cbs)]
              138 
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/callback/core.py in __call__(self, event_name)
               43         res = None
          ---> 44         if self.run and _run: res = getattr(self, event_name, noop)()
               45         if event_name=='after_fit': self.run=True #Reset self.run to True at each end of fit
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/icevision/models/torchvision/fastai/callbacks.py in after_loss(self)
               29             self.model.eval()
          ---> 30             self.learn.pred = self.model(*self.xb)
               31             self.model.train()
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
              726         else:
          --> 727             result = self.forward(*input, **kwargs)
              728         for hook in itertools.chain(
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
               99         proposals, proposal_losses = self.rpn(images, features, targets)
          --> 100         detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
              101         detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
              726         else:
          --> 727             result = self.forward(*input, **kwargs)
              728         for hook in itertools.chain(
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/torchvision/models/detection/roi_heads.py in forward(self, features, proposals, image_shapes, targets)
              766         else:
          --> 767             boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
              768             num_images = len(boxes)
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/torchvision/models/detection/roi_heads.py in postprocess_detections(self, class_logits, box_regression, proposals, image_shapes)
              674         boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
          --> 675         pred_boxes = self.box_coder.decode(box_regression, proposals)
              676 
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/torchvision/models/detection/_utils.py in decode(self, rel_codes, boxes)
              175         pred_boxes = self.decode_single(
          --> 176             rel_codes.reshape(box_sum, -1), concat_boxes
              177         )
    
          RuntimeError: cannot reshape tensor of 0 elements into shape [0, -1] because the unspecified dimension size -1 can be any value and is ambiguous
    
          During handling of the above exception, another exception occurred:
    
          AttributeError                            Traceback (most recent call last)
          <ipython-input-110-2297b9d4df32> in <module>
                1 #learn.freeze()
                2 learn.unfreeze()
          ----> 3 learn.fit_one_cycle(5, max_lr=slice(1e-6, 1e-4))
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastcore/logargs.py in _f(*args, **kwargs)
               54         init_args.update(log)
               55         setattr(inst, 'init_args', init_args)
          ---> 56         return inst if to_return else f(*args, **kwargs)
               57     return _f
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
              111     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
              112               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
          --> 113     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
              114 
              115 # Cell
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastcore/logargs.py in _f(*args, **kwargs)
               54         init_args.update(log)
               55         setattr(inst, 'init_args', init_args)
          ---> 56         return inst if to_return else f(*args, **kwargs)
               57     return _f
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
              205             self.opt.set_hypers(lr=self.lr if lr is None else lr)
              206             self.n_epoch = n_epoch
          --> 207             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
              208 
              209     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
              153 
              154     def _with_events(self, f, event_type, ex, final=noop):
          --> 155         try:       self(f'before_{event_type}')       ;f()
              156         except ex: self(f'after_cancel_{event_type}')
              157         finally:   self(f'after_{event_type}')        ;final()
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in _do_fit(self)
              195         for epoch in range(self.n_epoch):
              196             self.epoch=epoch
          --> 197             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
              198 
              199     @log_args(but='cbs')
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
              153 
              154     def _with_events(self, f, event_type, ex, final=noop):
          --> 155         try:       self(f'before_{event_type}')       ;f()
              156         except ex: self(f'after_cancel_{event_type}')
              157         finally:   self(f'after_{event_type}')        ;final()
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in _do_epoch(self)
              190     def _do_epoch(self):
              191         self._do_epoch_train()
          --> 192         self._do_epoch_validate()
              193 
              194     def _do_fit(self):
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_validate(self, ds_idx, dl)
              186         if dl is None: dl = self.dls[ds_idx]
              187         self.dl = dl
          --> 188         with torch.no_grad(): self._with_events(self.all_batches, 'validate', CancelValidException)
              189 
              190     def _do_epoch(self):
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
              153 
              154     def _with_events(self, f, event_type, ex, final=noop):
          --> 155         try:       self(f'before_{event_type}')       ;f()
              156         except ex: self(f'after_cancel_{event_type}')
              157         finally:   self(f'after_{event_type}')        ;final()
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
              159     def all_batches(self):
              160         self.n_iter = len(self.dl)
          --> 161         for o in enumerate(self.dl): self.one_batch(*o)
              162 
              163     def _do_one_batch(self):
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in one_batch(self, i, b)
              177         self.iter = i
              178         self._split(b)
          --> 179         self._with_events(self._do_one_batch, 'batch', CancelBatchException)
              180 
              181     def _do_epoch_train(self):
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
              155         try:       self(f'before_{event_type}')       ;f()
              156         except ex: self(f'after_cancel_{event_type}')
          --> 157         finally:   self(f'after_{event_type}')        ;final()
              158 
              159     def all_batches(self):
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in __call__(self, event_name)
              131     def ordered_cbs(self, event): return [cb for cb in sort_by_run(self.cbs) if hasattr(cb, event)]
              132 
          --> 133     def __call__(self, event_name): L(event_name).map(self._call_one)
              134 
              135     def _call_one(self, event_name):
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastcore/foundation.py in map(self, f, gen, *args, **kwargs)
              224     def range(cls, a, b=None, step=None): return cls(range_of(a, b=b, step=step))
              225 
          --> 226     def map(self, f, *args, gen=False, **kwargs): return self._new(map_ex(self, f, *args, gen=gen, **kwargs))
              227     def argwhere(self, f, negate=False, **kwargs): return self._new(argwhere(self, f, negate, **kwargs))
              228     def filter(self, f=noop, negate=False, gen=False, **kwargs):
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastcore/basics.py in map_ex(iterable, f, gen, *args, **kwargs)
              541     res = map(g, iterable)
              542     if gen: return res
          --> 543     return list(res)
              544 
              545 # Cell
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastcore/basics.py in __call__(self, *args, **kwargs)
              531             if isinstance(v,_Arg): kwargs[k] = args.pop(v.i)
              532         fargs = [args[x.i] if isinstance(x, _Arg) else x for x in self.pargs] + args[self.maxi+1:]
          --> 533         return self.func(*fargs, **kwargs)
              534 
              535 # Cell
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in _call_one(self, event_name)
              135     def _call_one(self, event_name):
              136         assert hasattr(event, event_name), event_name
          --> 137         [cb(event_name) for cb in sort_by_run(self.cbs)]
              138 
              139     def _bn_bias_state(self, with_bias): return norm_bias_params(self.model, with_bias).map(self.opt.state)
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in <listcomp>(.0)
              135     def _call_one(self, event_name):
              136         assert hasattr(event, event_name), event_name
          --> 137         [cb(event_name) for cb in sort_by_run(self.cbs)]
              138 
              139     def _bn_bias_state(self, with_bias): return norm_bias_params(self.model, with_bias).map(self.opt.state)
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/callback/core.py in __call__(self, event_name)
               42                (self.run_valid and not getattr(self, 'training', False)))
               43         res = None
          ---> 44         if self.run and _run: res = getattr(self, event_name, noop)()
               45         if event_name=='after_fit': self.run=True #Reset self.run to True at each end of fit
               46         return res
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/fastai/learner.py in after_batch(self)
              455         if len(self.yb) == 0: return
              456         mets = self._train_mets if self.training else self._valid_mets
          --> 457         for met in mets: met.accumulate(self.learn)
              458         if not self.training: return
              459         self.lrs.append(self.opt.hypers[-1]['lr'])
    
          ~/anaconda3/envs/icevision/lib/python3.8/site-packages/icevision/engines/fastai/adapters/fastai_metric_adapter.py in accumulate(self, learn)
               14 
               15     def accumulate(self, learn: fastai.Learner):
          ---> 16         self.metric.accumulate(records=learn.records, preds=learn.converted_preds)
               17 
               18     @property
    
          AttributeError: 'Learner' object has no attribute 'converted_preds'

Expected behavior Output should be generated without any error

Screenshots None

Desktop (please complete the following information):

Additional context Add any other context about the problem here.

lgvaz commented 3 years ago

Hi, did you had any progress with this since you opened the issue?

ganesh3 commented 3 years ago

No haven't had the time to look at it. Currently, working on solving a use case which will take time.

FraPochetti commented 2 years ago

Not relevant anymore.