bonn-activity-maps / bam_annotation_tool

Bonn Activity Maps annotation tool
4 stars 1 forks source link

Export: Wrong Data Format for some Poses #387

Closed anDoer closed 3 years ago

anDoer commented 4 years ago

I noticed the following issues:

I wrote this script to fix these issues temporarily, but I'm not sure if everything is resolved. My script removed more than 4800 annotations for a single!!! sequence. I still need to examine why this is the case

import os
import json
import numpy as np

def fix_image_ids(anno_images):
    # find available id
    img_id = None
    for img in anno_images:
        if 'id' in img:
            img_id = str(img['id'])
            img_id = img_id[:-3] + "000"

            break

    for idx, img in enumerate(anno_images):
        if 'id' not in img:
            assert img_id is not None
            frame_idx = int(os.path.basename(img['file_name'].split('.')[0]))
            new_img_id = int(img_id) + frame_idx

            img['id'] = new_img_id
            img['frame_id'] = new_img_id

    images = {img['id']: img['file_name'] for img in anno_images}

    return images

def fix_im_id(im_id, images):
    if im_id not in images:
        im_id_2 = im_id + 10000000000
        if im_id_2 in images:
            im_id = im_id_2

    return im_id

def fixing_missing_joints(kpts):
    missing_kpts = np.zeros([2, 3])
    kpts = np.insert(kpts, 3, missing_kpts, axis=0)

    return kpts

def fix_weird_keypoints(kpts):
    for idx, kpt in enumerate(kpts):
        if len(kpt) < 3:
            kpts[idx] = [0, 0, 0]
        else:
            for i in range(len(kpt)):
                if kpt[i] == None:
                    kpts[idx][i] = 0

    kpts = np.array(kpts)

    return kpts

def main():
    anno_file_path = 'data/posetrack_data_export_new/train/'
    fixed_file_path = 'data/posetrack_data_export_new_fixed/train/'

    os.makedirs(fixed_file_path, exist_ok=True)

    dataset_path = '/media/datasets/pose_estimation/PoseTrack_Challenge_2018_v2.2/posetrack_data'
    sequences = os.listdir(anno_file_path)

    # ===================== Load Sequences =======================
    for seq_file in sequences:
        with open(os.path.join(anno_file_path, seq_file)) as anno_file:
            anno = json.load(anno_file)

        # ================== Fix missing image id ========================
        try:
            images = {img['id']: img['file_name'] for img in anno['images']}
        except:
            images = fix_image_ids(anno['images'])

        # ================== Remove invalid annotations ==================

        fixed_annotations = []
        before = len(anno['annotations'])
        n_no_bbox = 0
        n_no_pose = 0

        for ann_idx, ann in enumerate(anno['annotations']):
            im_id = fix_im_id(ann['image_id'], images)

            bbox = ann['bbox']
            ann['image_id'] = im_id

            no_bbox = len(bbox) == 0 or bbox[2] == 0 or bbox[3] == 0
            no_kpts = len(ann['keypoints']) == 0

            # If we don't have a bounding box, remove this anno
            if no_bbox:
                n_no_bbox += 1
                continue

            if not no_kpts:
                # The annotation tool does not extract 'empty' coco keypoints. Add them for consistency
                if len(ann['keypoints']) == 45:

                    kpts = np.array(ann['keypoints']).reshape([-1, 3])
                    kpts = fixing_missing_joints(kpts)
                    ann['keypoints'] = kpts.reshape([-1]).tolist()

                # some annotations are returned in a weird format by the annotation tool!
                elif len(ann['keypoints']) == 17 or len(ann['keypoints']) == 15:
                    kpts = fix_weird_keypoints(ann['keypoints'])
                    if len(kpts) == 15:
                        kpts = fixing_missing_joints(kpts)
                    ann['keypoints'] = kpts.reshape([-1]).tolist()

            fixed_annotations.append(ann)
        print(f"Removed {before - len(fixed_annotations)} annotations. Remaining annotations: {len(fixed_annotations)}")

        with open(os.path.join(fixed_file_path, seq_file), 'w') as anno_file:
            anno['annotations'] = fixed_annotations
            json.dump(anno, anno_file)

if __name__ == '__main__':
    main()
dari1495 commented 4 years ago

Quick note:

dari1495 commented 4 years ago

I wrote a function to check every annotation for incorrect data and some insight so far (I haven't fixed anything yet, I just finished the method):

dari1495 commented 4 years ago

FIXED:

TODO: Fix image_id references

anDoer commented 3 years ago

Thanks!

I think we can go for the second option and change all image_id in the database from 2 to 1, but please make sure that this will not result in duplicate image_ids for different sequences.

anDoer commented 3 years ago

Potential Bug with the list of resized videos: