Creating Custom Dataset on COCO Format for Training

Hello. I'm trying to annotate my custom dataset (images only) on COCO format so I can use your script for training SuperGlue. My dataset consists RGB images and I want to extract keypoints from both viewpoints to perform image matching. My main goal is to annotate keypoints only. But I'm stuck on what to include in my annotations file. I understand that I need to focus on the Keypoint Detection task of COCO dataset. This is the description of the annotations for keypoint detection i could find: "A keypoint annotation contains all the data of the object annotation (including id, bbox, etc.) and two additional fields. For Object Detection: annotation{"id" : int, "image_id" : int, "category_id" : int, "segmentation" : RLE or [polygon], "area" : float, "bbox" : [x,y,width,height], "iscrowd" : 0 or 1,} categories[{ "id" : int, "name" : str, "supercategory" : str, }]

Additional fields for keypoint detection: annotation{ "keypoints" : [x1,y1,v1,...], "num_keypoints" : int, "[cloned]" : ..., } categories[{ "keypoints" : [str], "skeleton" : [edge], "[cloned]" : ...,}]"

My primary concerns are: i. What categories and supercategories should i define? ii. What do i include in 'bbox', 'segmentation', 'num_keypoints' and 'skeleton'?

I would be very grateful if you can usher me in the right direction.

Hi @Shafaq-S , following is the script for class COCO_loader in ./utils/dataset.py.

class COCO_loader(Dataset):
    def __init__(self, dataset_params):
        super(COCO_loader, self).__init__()
        self.files = []
        self.config = dataset_params
        self.aug_params = dataset_params['augmentation_params']
        self.dataset_path = dataset_params['dataset_path']
        self.aspect_resize = dataset_params['resize_aspect']
        self.apply_aug = dataset_params['apply_color_aug']
        # self.images_path = os.path.join(self.dataset_path, "{}2017".format(typ))
        # self.json_path = os.path.join(self.dataset_path, 'annotations', 'instances_{}2017.json'.format(typ))
        # self.coco_json = coco.COCO(self.json_path)
        # self.images = self.coco_json.getImgIds()
        self.images_path = os.path.join(self.dataset_path, "train")
        # self.files += [self.images_path + f for f in os.listdir(self.images_path)]
        self.images = os.listdir(self.images_path)
        if self.apply_aug:
            import albumentations as alb
            self.aug_list = [alb.OneOf([alb.RandomBrightness(limit=0.4, p=0.6), alb.RandomContrast(limit=0.3, p=0.7)], p=0.6),
                             alb.OneOf([alb.MotionBlur(p=0.5), alb.GaussNoise(p=0.6)], p=0.5),
                             #alb.JpegCOmpression(quality_lower=65, quality_upper=100, p=0.4)
                             ]
            self.aug_func = alb.Compose(self.aug_list, p=0.65) 
    def __len__(self):
        return len(self.images)
        # return len(self.files)

    def apply_augmentations(self, image1, image2):
        image1_dict = {'image': image1}
        image2_dict = {'image': image2}
        result1, result2 = self.aug_func(**image1_dict), self.aug_func(**image2_dict)
        return result1['image'], result2['image']

    def __getitem__(self, index: int):
        resize = True
        file_name = self.images[index]
        # file_name = self.files[index]
        file_path = os.path.join(self.images_path, file_name)
        image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
        # image = cv2.imread(file_name, cv2.IMREAD_GRAYSCALE)
        if self.aspect_resize:
            image = resize_aspect_ratio(image, self.config['image_height'], self.config['image_width'])
            resize = False
        height, width = image.shape[0:2]
        homo_matrix = get_perspective_mat(self.aug_params['patch_ratio'], width//2, height//2, self.aug_params['perspective_x'], self.aug_params['perspective_y'], self.aug_params['shear_ratio'], self.aug_params['shear_angle'], self.aug_params['rotation_angle'], self.aug_params['scale'], self.aug_params['translation'])
        warped_image = cv2.warpPerspective(image.copy(), homo_matrix, (width, height))
        if resize:
            orig_resized = cv2.resize(image, (self.config['image_width'], self.config['image_height']))
            warped_resized = cv2.resize(warped_image, (self.config['image_width'], self.config['image_height']))
        else:
            orig_resized = image
            warped_resized = warped_image
        if self.apply_aug:
            orig_resized, warped_resized = self.apply_augmentations(orig_resized, warped_resized)
        homo_matrix = scale_homography(homo_matrix, height, width, self.config['image_height'], self.config['image_width']).astype(np.float32)
        orig_resized = np.expand_dims(orig_resized, 0).astype(np.float32) / 255.0
        warped_resized = np.expand_dims(warped_resized, 0).astype(np.float32) / 255.0
        return orig_resized, warped_resized, homo_matrix

In my case, I have created a folder where there are 3 subfolders, train, val and test, which contains just images. The above script runs for the same.

I hope this helps you figure out the same.

gouthamvgk / SuperGlue_training

Creating Custom Dataset on COCO Format for Training #17