Scalsol / mega.pytorch

Memory Enhanced Global-Local Aggregation for Video Object Detection, CVPR2020
Other
566 stars 115 forks source link

A severe BUG in dataloader (mega_core/data/datasets/vid_mega.py) #106

Open Dawn-LX opened 3 years ago

Dawn-LX commented 3 years ago

https://github.com/Scalsol/mega.pytorch/blob/a6aa6e0537b82d70da94228100a51e6a53d98f82/mega_core/data/datasets/vid_mega.py#L133

images["cur"] = img, However, this img is not corresponding to the current idx when cfg.MODEL.VID.MEGA.GLOBAL.ENABLE == True.

The for loop in line116 will replace img at each iteration

https://github.com/Scalsol/mega.pytorch/blob/a6aa6e0537b82d70da94228100a51e6a53d98f82/mega_core/data/datasets/vid_mega.py#L116

    def _get_test(self, idx):
        filename = self.image_set_index[idx]
        img = Image.open(self._img_dir % filename).convert("RGB")

        # give the current frame a category. 0 for start, 1 for normal
        frame_id = int(filename.split("/")[-1])
        frame_category = 0
        if frame_id != 0:
            frame_category = 1

        img_refs_l = []
        # reading other images of the queue (not necessary to be the last one, but last one here)
        ref_id = min(self.frame_seg_len[idx] - 1, frame_id + cfg.MODEL.VID.MEGA.MAX_OFFSET)
        ref_filename = self.pattern[idx] % ref_id
        img_ref = Image.open(self._img_dir % ref_filename).convert("RGB")
        img_refs_l.append(img_ref)

        img_refs_g = []
        if cfg.MODEL.VID.MEGA.GLOBAL.ENABLE:
            size = cfg.MODEL.VID.MEGA.GLOBAL.SIZE if frame_id == 0 else 1
            shuffled_index = self.shuffled_index[str(self.start_id[idx])]
            for id in range(size):
                filename = self.pattern[idx] % shuffled_index[
                    (idx - self.start_id[idx] + cfg.MODEL.VID.MEGA.GLOBAL.SIZE - id - 1) % self.frame_seg_len[idx]]
                img = Image.open(self._img_dir % filename).convert("RGB")
                img_refs_g.append(img)

        target = self.get_groundtruth(idx)
        target = target.clip_to_image(remove_empty=True)

        if self.transforms is not None:
            img, target = self.transforms(img, target)
            for i in range(len(img_refs_l)):
                img_refs_l[i], _ = self.transforms(img_refs_l[i], None)
            for i in range(len(img_refs_g)):
                img_refs_g[i], _ = self.transforms(img_refs_g[i], None)

        images = {}
        images["cur"] = img
        images["ref_l"] = img_refs_l
        images["ref_g"] = img_refs_g
        images["frame_category"] = frame_category
        images["seg_len"] = self.frame_seg_len[idx]
        images["pattern"] = self.pattern[idx]
        images["img_dir"] = self._img_dir
        images["transforms"] = self.transforms

        return images, target, idx