Open Shafaq-S opened 10 months ago
Hi @Shafaq-S , following is the script for class COCO_loader
in ./utils/
class COCO_loader(Dataset):
def __init__(self, dataset_params):
super(COCO_loader, self).__init__()
self.files = []
self.config = dataset_params
self.aug_params = dataset_params['augmentation_params']
self.dataset_path = dataset_params['dataset_path']
self.aspect_resize = dataset_params['resize_aspect']
self.apply_aug = dataset_params['apply_color_aug']
# self.images_path = os.path.join(self.dataset_path, "{}2017".format(typ))
# self.json_path = os.path.join(self.dataset_path, 'annotations', 'instances_{}2017.json'.format(typ))
# self.coco_json = coco.COCO(self.json_path)
# self.images = self.coco_json.getImgIds()
self.images_path = os.path.join(self.dataset_path, "train")
# self.files += [self.images_path + f for f in os.listdir(self.images_path)]
self.images = os.listdir(self.images_path)
if self.apply_aug:
import albumentations as alb
self.aug_list = [alb.OneOf([alb.RandomBrightness(limit=0.4, p=0.6), alb.RandomContrast(limit=0.3, p=0.7)], p=0.6),
alb.OneOf([alb.MotionBlur(p=0.5), alb.GaussNoise(p=0.6)], p=0.5),
#alb.JpegCOmpression(quality_lower=65, quality_upper=100, p=0.4)
self.aug_func = alb.Compose(self.aug_list, p=0.65)
def __len__(self):
return len(self.images)
# return len(self.files)
def apply_augmentations(self, image1, image2):
image1_dict = {'image': image1}
image2_dict = {'image': image2}
result1, result2 = self.aug_func(**image1_dict), self.aug_func(**image2_dict)
return result1['image'], result2['image']
def __getitem__(self, index: int):
resize = True
file_name = self.images[index]
# file_name = self.files[index]
file_path = os.path.join(self.images_path, file_name)
image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
# image = cv2.imread(file_name, cv2.IMREAD_GRAYSCALE)
if self.aspect_resize:
image = resize_aspect_ratio(image, self.config['image_height'], self.config['image_width'])
resize = False
height, width = image.shape[0:2]
homo_matrix = get_perspective_mat(self.aug_params['patch_ratio'], width//2, height//2, self.aug_params['perspective_x'], self.aug_params['perspective_y'], self.aug_params['shear_ratio'], self.aug_params['shear_angle'], self.aug_params['rotation_angle'], self.aug_params['scale'], self.aug_params['translation'])
warped_image = cv2.warpPerspective(image.copy(), homo_matrix, (width, height))
if resize:
orig_resized = cv2.resize(image, (self.config['image_width'], self.config['image_height']))
warped_resized = cv2.resize(warped_image, (self.config['image_width'], self.config['image_height']))
orig_resized = image
warped_resized = warped_image
if self.apply_aug:
orig_resized, warped_resized = self.apply_augmentations(orig_resized, warped_resized)
homo_matrix = scale_homography(homo_matrix, height, width, self.config['image_height'], self.config['image_width']).astype(np.float32)
orig_resized = np.expand_dims(orig_resized, 0).astype(np.float32) / 255.0
warped_resized = np.expand_dims(warped_resized, 0).astype(np.float32) / 255.0
return orig_resized, warped_resized, homo_matrix
In my case, I have created a folder where there are 3 subfolders, train
, val
and test
, which contains just images. The above script runs for the same.
I hope this helps you figure out the same.
Hello. I'm trying to annotate my custom dataset (images only) on COCO format so I can use your script for training SuperGlue. My dataset consists RGB images and I want to extract keypoints from both viewpoints to perform image matching. My main goal is to annotate keypoints only. But I'm stuck on what to include in my annotations file. I understand that I need to focus on the Keypoint Detection task of COCO dataset. This is the description of the annotations for keypoint detection i could find: "A keypoint annotation contains all the data of the object annotation (including id, bbox, etc.) and two additional fields. For Object Detection: annotation{"id" : int, "image_id" : int, "category_id" : int, "segmentation" : RLE or [polygon], "area" : float, "bbox" : [x,y,width,height], "iscrowd" : 0 or 1,} categories[{ "id" : int, "name" : str, "supercategory" : str, }]
Additional fields for keypoint detection: annotation{ "keypoints" : [x1,y1,v1,...], "num_keypoints" : int, "[cloned]" : ..., } categories[{ "keypoints" : [str], "skeleton" : [edge], "[cloned]" : ...,}]"
My primary concerns are: i. What categories and supercategories should i define? ii. What do i include in 'bbox', 'segmentation', 'num_keypoints' and 'skeleton'?
I would be very grateful if you can usher me in the right direction.