converting standard coco to kwcoco format

periakiva commented 2 years ago

Currently, a standard coco file cannot be directly loaded using kwcoco. It seems like kwcoco requires a 'frame_index' key which does not exist in my coco formatted dataset.

~/anaconda3/envs/pixar/lib/python3.9/site-packages/sortedcontainers/sortedset.py in update(self, *iterables)
    685             _set.update(values)
    686             _list.clear()
--> 687             _list.update(_set)
    688         else:
    689             _add = self._add

~/anaconda3/envs/pixar/lib/python3.9/site-packages/sortedcontainers/sortedlist.py in update(self, iterable)
   1876         _keys = self._keys
   1877         _maxes = self._maxes
-> 1878         values = sorted(iterable, key=self._key)
   1879 
   1880         if _maxes:

~/anaconda3/envs/pixar/lib/python3.9/site-packages/kwcoco/_helpers.py in _lut_frame_index(imgs, gid)
    145 # TODO: add a pickled test, FIXME: I dont think this is safe
    146 def _lut_frame_index(imgs, gid):
--> 147     return imgs[gid]['frame_index']
    148 
    149 

KeyError: 'frame_index'

Erotemic commented 2 years ago

Thanks for catching that. I'll fix this next week. If you want an immediate workaround you can probably patch this via imgs[gid].get('frame_index', 0) and get something workable.

Erotemic commented 2 years ago

Do you have a pointer to the dataset that failed? I just wrote a test for the 2014 mscoco dataset, and everything seemed to work fine on my end.

For reference, the test I wrote was for the 2017 validation set:

def grab_mscoco_annotations():
    """
    TODO: move to a grabdata script
    """
    import ubelt as ub
    from kwcoco.util import util_archive
    dpath = ub.Path.appdir('kwcoco/data/mscoco')

    mscoco_urls = {
        'trainval2017': 'http://images.cocodataset.org/annotations/annotations_trainval2017.zip',
        'trainval2014': 'http://images.cocodataset.org/annotations/annotations_trainval2014.zip',
        'testinfo2014': 'http://images.cocodataset.org/annotations/image_info_test2014.zip',
    }
    mscoco_fpaths = {}
    keys = ['trainval2017']
    for key in keys:
        url = mscoco_urls[key]
        zip_fpath = ub.grabdata(url, dpath=dpath)
        archive = util_archive.Archive.coerce(zip_fpath)
        _fpaths = archive.extractall(output_dpath=dpath)
        _fpaths = [ub.Path(p) for p in _fpaths]
        fpaths = {p.name: p for p in _fpaths}
        mscoco_fpaths[key] = fpaths
    return mscoco_fpaths

def test_standard_coco_dataset():
    import kwcoco
    mscoco_fpaths = grab_mscoco_annotations()

    fpath = mscoco_fpaths['trainval2017']['instances_val2017.json']
    dset = kwcoco.CocoDataset(fpath)
    _test_dset_api(dset)

def _test_dset_api(dset):
    """
    Run various API calls on the dataset to validate everything works.
    """
    dset.validate(missing=False)

    stats = dset.stats()

    images = dset.images()
    videos = dset.videos()
    annots = dset.annots()
    categories = dset.categories()

    assert stats['basic']['n_anns'] == len(annots)
    assert stats['basic']['n_imgs'] == len(images)
    assert stats['basic']['n_videos'] == len(videos)
    assert stats['basic']['n_cats'] == len(categories)

Kitware / kwcoco

converting standard coco to kwcoco format #2