in the
flickr30ke2odvg.py of open-grounding dino, each region have a bbox and phrase, so why odvg.py in mm-grounding dino each region have multiple bbox?
for i, region in enumerate(regions):
bbox = region['bbox']
phrase = region['phrase']
tokens_positive = region['tokens_positive']
if not isinstance(bbox[0], list):
bbox = [bbox]
for box in bbox:
instance = {}
x1, y1, x2, y2 = box
inter_w = max(0, min(x2, data['width']) - max(x1, 0))
inter_h = max(0, min(y2, data['height']) - max(y1, 0))
if inter_w * inter_h == 0:
continue
if (x2 - x1) < 1 or (y2 - y1) < 1:
continue
instance['ignore_flag'] = 0
instance['bbox'] = box
instance['bbox_label'] = i
phrases[i] = {
'phrase': phrase,
'tokens_positive': tokens_positive
}
in the flickr30ke2odvg.py of open-grounding dino, each region have a bbox and phrase, so why odvg.py in mm-grounding dino each region have multiple bbox? for i, region in enumerate(regions): bbox = region['bbox'] phrase = region['phrase'] tokens_positive = region['tokens_positive'] if not isinstance(bbox[0], list): bbox = [bbox] for box in bbox: instance = {} x1, y1, x2, y2 = box inter_w = max(0, min(x2, data['width']) - max(x1, 0)) inter_h = max(0, min(y2, data['height']) - max(y1, 0)) if inter_w * inter_h == 0: continue if (x2 - x1) < 1 or (y2 - y1) < 1: continue instance['ignore_flag'] = 0 instance['bbox'] = box instance['bbox_label'] = i phrases[i] = { 'phrase': phrase, 'tokens_positive': tokens_positive }