MasterBin-IIAU / Unicorn

[ECCV'22 Oral] Towards Grand Unification of Object Tracking
MIT License
953 stars 87 forks source link

How to evaluate on vot2020? #47

Open xjtuwh opened 1 year ago

xjtuwh commented 1 year ago

Hello! I want to compare unicorn with our method on vot2020.
[unicorn] # label = unicorn protocol = traxpython command = import tools.run_vot as run_vot; run_vot.run_vot2020('unicorn_vos', 'unicorn_track_r50_mask') # Set the tracker name and the parameter name

Specify a path to trax python wrapper if it is not visible (separate by ; if using multiple paths)

paths = /media/wuhan/disk1/wh_code_backup/Unicorn

Additional environment paths

env_PATH = /home/wuhan/anaconda3/envs/unicorn/bin/python;${PATH}

And I modified the Unicorn/external/lib/test/tracker/unicorn_vos.py

def initialize(self, image, info: dict):
    self.frame_id = 0
    # process init_info
    self.init_object_ids = info["init_object_ids"]
    self.sequence_object_ids = info['sequence_object_ids']
    # assert self.init_object_ids == self.sequence_object_ids
    # forward the reference frame once
    """resize the original image and transform the coordinates"""
    self.H, self.W, _ = image.shape
    ref_frame_t, r = self.preprocessor.process(image, self.input_size)
    """forward the network"""
    with torch.no_grad():
        _, self.out_dict_pre = self.model(imgs=ref_frame_t, mode="backbone")  # backbone output (previous frame) (b, 3, H, W)
    self.dh, self.dw = self.out_dict_pre["h"] * 2, self.out_dict_pre["w"] * 2  # STRIDE = 8
    """get initial label mask (K, H/8*W/8)"""
    self.lbs_pre_dict = {}
    self.state_pre_dict = {}
    for obj_id in self.init_object_ids:
        self.state_pre_dict[obj_id] = info["init_bbox"]
        init_box = torch.tensor(info["init_bbox"]).view(-1)
        init_box[2:] += init_box[:2] # (x1, y1, x2, y2)
        init_box_rsz = init_box * r # coordinates on the resized image
        self.lbs_pre_dict[obj_id] = F.interpolate(get_label_map(init_box_rsz, self.input_size[0], self.input_size[1]) \
            , scale_factor=1/8, mode="bilinear", align_corners=False)[0].flatten(-2).to(self.device) # (1, H/8*W/8)
    """deal with new-incoming instances"""
    self.out_dict_pre_new = [] # a list containing out_dict for new in-coming instances
    self.obj_ids_new = []

def track(self, image, info: dict = None, bboxes=None, scores=None, gt_box=None):
    self.frame_id += 1
    """resize the original image and transform the coordinates"""
    cur_frame_t, r = self.preprocessor.process(image, self.input_size)
    with torch.no_grad():
        with torch.cuda.amp.autocast(enabled=False):
            fpn_outs_cur, out_dict_cur = self.model(imgs=cur_frame_t, mode="backbone")  # backbone output (current frame)
    # deal with instances from the first frame
    final_mask_dict, inst_scores = self.get_mask_results(fpn_outs_cur, out_dict_cur, self.out_dict_pre, r, self.init_object_ids)
    # deal with instances from the intermediate frames
    for (out_dict_pre, init_object_ids) in zip(self.out_dict_pre_new, self.obj_ids_new):
        final_mask_dict_tmp, inst_scores_tmp = self.get_mask_results(fpn_outs_cur, out_dict_cur, out_dict_pre, r, init_object_ids)
        final_mask_dict.update(final_mask_dict_tmp)
        inst_scores = np.concatenate([inst_scores, inst_scores_tmp])
    # deal with instances from the current frame"""
    if "init_object_ids" in info.keys():
        self.out_dict_pre_new.append(out_dict_cur)
        self.obj_ids_new.append(info["init_object_ids"])
        inst_scores_tmp = np.ones((len(info["init_object_ids"]),))
        inst_scores = np.concatenate([inst_scores, inst_scores_tmp])
        for obj_id in info["init_object_ids"]:
            self.state_pre_dict[obj_id] = info["init_bbox"]
            init_box = torch.tensor(info["init_bbox"]).view(-1)
            init_box[2:] += init_box[:2] # (x1, y1, x2, y2)
            init_box_rsz = init_box * r # coordinates on the resized image
            self.lbs_pre_dict[obj_id] = F.interpolate(get_label_map(init_box_rsz, self.input_size[0], self.input_size[1]) \
                , scale_factor=1/8, mode="bilinear", align_corners=False)[0].flatten(-2).to(self.device) # (1, H/8*W/8)
            final_mask_dict[obj_id] = info["init_mask"]
    # Deal with overlapped masks
    cur_obj_ids = copy.deepcopy(self.init_object_ids)
    for obj_ids_inter in self.obj_ids_new:
        cur_obj_ids += obj_ids_inter
    if "init_object_ids" in info.keys():
        cur_obj_ids += info["init_object_ids"]
    # soft aggregation
    cur_obj_ids_int = [int(x) for x in cur_obj_ids]
    mask_merge = np.zeros((self.H, self.W, max(cur_obj_ids_int)+1)) # (H, W, N+1)
    tmp_list = []
    for cur_id in cur_obj_ids:
        mask_merge[:, :, int(cur_id)] = final_mask_dict[cur_id]
        tmp_list.append(final_mask_dict[cur_id])
    back_prob = np.prod(1 - np.stack(tmp_list, axis=-1), axis=-1, keepdims=False)
    mask_merge[:, :, 0] = back_prob
    mask_merge_final = np.argmax(mask_merge, axis=-1) # (H, W)
    for cur_id in cur_obj_ids:
        final_mask_dict[cur_id] = (mask_merge_final == int(cur_id))
    """get the final result"""
    final_mask = np.zeros((self.H, self.W), dtype=np.uint8)
    # for obj_id in cur_obj_ids:
    #     final_mask[final_mask_dict[obj_id]==1] = int(obj_id)
    final_mask = mask_merge_final
    return {"segmentation": final_mask}

But the tracking and segmentation results is "0, 0, 0, 0"

Can you help me?

xjtuwh commented 1 year ago

We need not modify the Unicorn/external/lib/test/tracker/unicorn_vos.py, just send the right initialization like bbox_dict = {} bbox_dict['1'] = bbox out = tracker.initialize(image, {'init_mask': vot_anno_mask, 'init_bbox': bbox_dict, 'init_object_ids': ['1'], 'sequence_object_ids': ['1']})

xjtuwh commented 1 year ago

image