airctic / icevision

An Agnostic Computer Vision Framework - Pluggable to any Training Library: Fastai, Pytorch-Lightning with more to come
https://airctic.github.io/icevision/
Apache License 2.0
848 stars 150 forks source link

Augmented (albumentations) BBox outside the range [0.0, 1.0] #526

Closed ai-fast-track closed 3 years ago

ai-fast-track commented 3 years ago

🐛 Bug

Describe the bug The error occur during training in this Plantdoc notebook: learn.fine_tune(20, 0.012, freeze_epochs=3)

It occurs in:

File "/usr/local/lib/python3.6/dist-packages/albumentations/augmentations/bbox_utils.py", line 330, in check_bbox
    "to be in the range [0.0, 1.0], got {value}.".format(bbox=bbox, name=name, value=value)
ValueError: Expected x_max for bbox (0.00038580246913580245, 0.022119341563786008, 1.4683641975308641, 0.625, 0) to be in the range [0.0, 1.0], got 1.4683641975308641.

Error stack:

ValueError                                Traceback (most recent call last)
<ipython-input-47-2d489cf4c395> in <module>()
      1 from fastai.callback.tracker import SaveModelCallback
----> 2 learn.fine_tune(20, 0.012, freeze_epochs=3)

17 frames
/usr/local/lib/python3.6/dist-packages/fastcore/logargs.py in _f(*args, **kwargs)
     54         init_args.update(log)
     55         setattr(inst, 'init_args', init_args)
---> 56         return inst if to_return else f(*args, **kwargs)
     57     return _f

/usr/local/lib/python3.6/dist-packages/fastai/callback/schedule.py in fine_tune(self, epochs, base_lr, freeze_epochs, lr_mult, pct_start, div, **kwargs)
    159     "Fine tune with `freeze` for `freeze_epochs` then with `unfreeze` from `epochs` using discriminative LR"
    160     self.freeze()
--> 161     self.fit_one_cycle(freeze_epochs, slice(base_lr), pct_start=0.99, **kwargs)
    162     base_lr /= 2
    163     self.unfreeze()

/usr/local/lib/python3.6/dist-packages/fastcore/logargs.py in _f(*args, **kwargs)
     54         init_args.update(log)
     55         setattr(inst, 'init_args', init_args)
---> 56         return inst if to_return else f(*args, **kwargs)
     57     return _f

/usr/local/lib/python3.6/dist-packages/fastai/callback/schedule.py in fit_one_cycle(self, n_epoch, lr_max, div, div_final, pct_start, wd, moms, cbs, reset_opt)
    111     scheds = {'lr': combined_cos(pct_start, lr_max/div, lr_max, lr_max/div_final),
    112               'mom': combined_cos(pct_start, *(self.moms if moms is None else moms))}
--> 113     self.fit(n_epoch, cbs=ParamScheduler(scheds)+L(cbs), reset_opt=reset_opt, wd=wd)
    114 
    115 # Cell

/usr/local/lib/python3.6/dist-packages/fastcore/logargs.py in _f(*args, **kwargs)
     54         init_args.update(log)
     55         setattr(inst, 'init_args', init_args)
---> 56         return inst if to_return else f(*args, **kwargs)
     57     return _f

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
    205             self.opt.set_hypers(lr=self.lr if lr is None else lr)
    206             self.n_epoch = n_epoch
--> 207             self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
    208 
    209     def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_fit(self)
    195         for epoch in range(self.n_epoch):
    196             self.epoch=epoch
--> 197             self._with_events(self._do_epoch, 'epoch', CancelEpochException)
    198 
    199     @log_args(but='cbs')

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_epoch(self)
    189 
    190     def _do_epoch(self):
--> 191         self._do_epoch_train()
    192         self._do_epoch_validate()
    193 

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _do_epoch_train(self)
    181     def _do_epoch_train(self):
    182         self.dl = self.dls.train
--> 183         self._with_events(self.all_batches, 'train', CancelTrainException)
    184 
    185     def _do_epoch_validate(self, ds_idx=1, dl=None):

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
    153 
    154     def _with_events(self, f, event_type, ex, final=noop):
--> 155         try:       self(f'before_{event_type}')       ;f()
    156         except ex: self(f'after_cancel_{event_type}')
    157         finally:   self(f'after_{event_type}')        ;final()

/usr/local/lib/python3.6/dist-packages/fastai/learner.py in all_batches(self)
    159     def all_batches(self):
    160         self.n_iter = len(self.dl)
--> 161         for o in enumerate(self.dl): self.one_batch(*o)
    162 
    163     def _do_one_batch(self):

/usr/local/lib/python3.6/dist-packages/fastai/data/load.py in __iter__(self)
    100         self.before_iter()
    101         self.__idxs=self.get_idxs() # called in context of main process (not workers/subprocesses)
--> 102         for b in _loaders[self.fake_l.num_workers==0](self.fake_l):
    103             if self.device is not None: b = to_device(b, self.device)
    104             yield self.after_batch(b)

/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in __next__(self)
    361 
    362     def __next__(self):
--> 363         data = self._next_data()
    364         self._num_yielded += 1
    365         if self._dataset_kind == _DatasetKind.Iterable and \

/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _next_data(self)
    987             else:
    988                 del self._task_info[idx]
--> 989                 return self._process_data(data)
    990 
    991     def _try_put_index(self):

/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py in _process_data(self, data)
   1012         self._try_put_index()
   1013         if isinstance(data, ExceptionWrapper):
-> 1014             data.reraise()
   1015         return data
   1016 

/usr/local/lib/python3.6/dist-packages/torch/_utils.py in reraise(self)
    393             # (https://bugs.python.org/issue2651), so we work around it.
    394             msg = KeyErrorMessage(msg)
--> 395         raise self.exc_type(msg)

ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/worker.py", line 185, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 34, in fetch
    data = next(self.dataset_iter)
  File "/usr/local/lib/python3.6/dist-packages/fastai/data/load.py", line 111, in create_batches
    yield from map(self.do_batch, self.chunkify(res))
  File "/usr/local/lib/python3.6/dist-packages/fastcore/utils.py", line 159, in chunked
    res = list(itertools.islice(it, chunk_sz))
  File "/usr/local/lib/python3.6/dist-packages/fastai/data/load.py", line 124, in do_item
    try: return self.after_item(self.create_item(s))
  File "/usr/local/lib/python3.6/dist-packages/fastai/data/load.py", line 130, in create_item
    def create_item(self, s):  return next(self.it) if s is None else self.dataset[s]
  File "/usr/local/lib/python3.6/dist-packages/icevision/data/dataset.py", line 35, in __getitem__
    data = self.tfm(data)
  File "/usr/local/lib/python3.6/dist-packages/icevision/tfms/transform.py", line 13, in __call__
    tfmed = self.apply(**data)
  File "/usr/local/lib/python3.6/dist-packages/icevision/tfms/albumentations/tfms.py", line 110, in apply
    d = self.tfms(**params)
  File "/usr/local/lib/python3.6/dist-packages/albumentations/core/composition.py", line 180, in __call__
    p.preprocess(data)
  File "/usr/local/lib/python3.6/dist-packages/albumentations/core/utils.py", line 62, in preprocess
    data[data_name] = self.check_and_convert(data[data_name], rows, cols, direction="to")
  File "/usr/local/lib/python3.6/dist-packages/albumentations/core/utils.py", line 70, in check_and_convert
    return self.convert_to_albumentations(data, rows, cols)
  File "/usr/local/lib/python3.6/dist-packages/albumentations/augmentations/bbox_utils.py", line 51, in convert_to_albumentations
    return convert_bboxes_to_albumentations(data, self.params.format, rows, cols, check_validity=True)
  File "/usr/local/lib/python3.6/dist-packages/albumentations/augmentations/bbox_utils.py", line 303, in convert_bboxes_to_albumentations
    return [convert_bbox_to_albumentations(bbox, source_format, rows, cols, check_validity) for bbox in bboxes]
  File "/usr/local/lib/python3.6/dist-packages/albumentations/augmentations/bbox_utils.py", line 303, in <listcomp>
    return [convert_bbox_to_albumentations(bbox, source_format, rows, cols, check_validity) for bbox in bboxes]
  File "/usr/local/lib/python3.6/dist-packages/albumentations/augmentations/bbox_utils.py", line 251, in convert_bbox_to_albumentations
    check_bbox(bbox)
  File "/usr/local/lib/python3.6/dist-packages/albumentations/augmentations/bbox_utils.py", line 330, in check_bbox
    "to be in the range [0.0, 1.0], got {value}.".format(bbox=bbox, name=name, value=value)
ValueError: Expected x_max for bbox (0.00038580246913580245, 0.022119341563786008, 1.4683641975308641, 0.625, 0) to be in the range [0.0, 1.0], got 1.4683641975308641.
lgvaz commented 3 years ago

Because this error is thrown by albumentations, there is no way for adding the image filename to the stacktrace

Maybe you would like to add it in the autofix step?

AUTOFIX-START - ️🔨  Autofixing record with imageid: 2 <FILENAME> ️🔨
lgvaz commented 3 years ago

Another issue, we cannot assume that FILENAME is always present on the record and you would still need to filter the list of records to find it.

Do you think a function get_record_by_id(records, id) would already solve this issue?

ai-fast-track commented 3 years ago

Yes, we should have a function get_record_by_id(records, id), or have a dictionary that stores bothe the ids and the corresponding images.

p9anand commented 3 years ago

is this fixed now?

lgvaz commented 3 years ago

Not really, are you also facing this issue?

p9anand commented 3 years ago

yes.

lgvaz commented 3 years ago

Are you able to share the specific image that is causing the error for you? We can use that to investigate further

p9anand commented 3 years ago

ice vision doesn't provide at which particular image id we have faced this error.

FraPochetti commented 3 years ago

Hi @ai-fast-track can you please assign this one to me?

ai-fast-track commented 3 years ago

Done @FraPochetti! Thanks a lot for looking into this issue.

FraPochetti commented 3 years ago

Brainstorming out loud here. For this error to happen, it means that xmax/ymax must be > width/height of the image. I don't see any other logical explanation. So, it must be an annotation issue in the dataset, right? If yes, why doesn't the AUTOFIX pick it up?

I run the following snippet on the plantdoc records and it finds nothing. Kind of weird. Does albumentations stretch boxes out of the image? Seems really awkward.

def check_boxes(r):
    w, h = r["width"], r["height"]
    ok = True
    for box in r["bboxes"]:
        xmax, ymax = box.xmax, box.ymax
        if xmax > w:
            print("X is wrong", xmax, w, r["filepath"])
            ok = False
        if ymax > h:
            print("Y is wrong", ymax, h, r["filepath"])
            ok = False
    return ok, r

wrong = []
for record in train_records_csv:
    ok, record = check_boxes(record)
    if not ok:
        wrong.append(record)
lgvaz commented 3 years ago

Does albumentations stretch boxes out of the image? Seems really awkward.

This is what I'm currently thinking, that albumentations itself is causing the issue, which indeed is really weird

If yes, why doesn't the AUTOFIX pick it up?

Exactly! Autofix would pick it up if it was an annotation issue! The only other explanation is if we have a bug there

FraPochetti commented 3 years ago

The only other explanation is if we have a bug there

The code snippet I ran proves the contrary. All records seem fine.

Super weird if albumentations is the root cause.

FraPochetti commented 3 years ago

ok, I have nailed down one wrong image in the plantdoc dataset, and found something interesting. Not sure the order of the records is going to be the same from my machine to yours (I am not shuffling), but I want to post everything for you guys to take a look as well.

parser_csv = PlantDocParser(train_labels, source=data_dir, class_map=class_map)
train_records_csv, valid_records_csv = parser_csv.parse(cache_filepath="plantdoc.pkl")

presize = 128
size = 64

train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=size, presize=presize), tfms.A.Normalize()])
train_ds = Dataset(train_records_csv, train_tfms)
train_dl = model_type.train_dl(train_ds, batch_size=1, num_workers=0, shuffle=False)

This is the incriminated record

Record:
    - Image ID: 1363
    - Filepath: /Users/francescopochetti/PlantDoc-Object-Detection-Dataset/TRAIN/flies.jpg
    - Image size (width, height): (3888, 2592)
    - Labels: [23]
    - BBoxes: [<BBox (xmin:1, ymin:86, xmax:3806, ymax:2430)>]

which throws

ValueError: Expected x_max for bbox (0.00038580246913580245, 0.022119341563786008, 1.4683641975308641, 0.625, 0) to be in the range [0.0, 1.0], got 1.4683641975308641.

Now, if you look at the image (id=44 in train_records_csv), something looks off show_record(train_records_csv[44], display_bbox=True, figsize=(8, 10)) image The record reads - Image size (width, height): (3888, 2592), whereas it should clearly be (width, height): (2592, 3888), e.g. inverted width and height. The flies.xml annotation file seems screwed up too:

<annotation>
    <folder> tomato leaf </folder>
    <filename>flies.jpg</filename>
    <path>/home/sohamp/Desktop/done/ tomato leaf /flies.jpg</path>
    <source>
        <database>Unknown</database>
    </source>
    <size>
        <width>3888</width>
        <height>2592</height>
        <depth>3</depth>
    </size>
    <segmented>0</segmented>
    <object>
        <name>Tomato leaf</name>
        <pose>Unspecified</pose>
        <truncated>1</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>1</xmin>
            <ymin>86</ymin>
            <xmax>3806</xmax>
            <ymax>2430</ymax>
        </bndbox>
    </object>
</annotation>

From this SO thread I tried the following and it indeed seems the image is somehow rotated. image According to this SO thread:

If you're using Pillow >= 6.0.0, you can use the built-in ImageOps.exif_transpose function do correctly rotate an image according to its exif tag

So, long story short, some images might be rotated! We need to find a way to rotate them back while reading them. Not sure. Any thoughts?

attibalazs commented 3 years ago

any thoughts on this error ? trying to get to the cause the image is there, why is it looking into "C:\Users\appveyor"

C:\Anaconda\envs\ice\lib\site-packages\icevision\core\record_mixins.py in _load(self)
     79 
     80     def _load(self):
---> 81         self.img = open_img(self.filepath)
     82         # TODO, HACK: is it correct to overwrite height and width here?
     83         self.height, self.width, _ = self.img.shape

C:\Anaconda\envs\ice\lib\site-packages\icevision\utils\imageio.py in open_img(fn, gray)
      8         raise ValueError(f"File {fn} does not exists")
      9     color = cv2.COLOR_BGR2GRAY if gray else cv2.COLOR_BGR2RGB
---> 10     return cv2.cvtColor(cv2.imread(str(fn), cv2.IMREAD_UNCHANGED), color)
     11 
     12 

error: OpenCV(4.5.1) C:\Users\appveyor\AppData\Local\Temp\1\pip-req-build-vjyn6ztg\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'
lgvaz commented 3 years ago

Hi @attibalazs, I don't think the error you're getting is related to this issue, can you open a separate issue?

why is it looking into "C:\Users\appveyor"

This is not where is looking for the image, but where opencv is installed and the error is being thrown