zylo117 / Yet-Another-EfficientDet-Pytorch

The pytorch re-implement of the official efficientdet with SOTA performance in real time and pretrained weights.
GNU Lesser General Public License v3.0
5.2k stars 1.27k forks source link

In the inference process of combining tiling images #740

Open JINWONMIN opened 1 year ago

JINWONMIN commented 1 year ago

First of all, thank you for providing good open source.

In the process of trying to combining the tiled image in the inference process, it faced difficulties.


original image average w, h = 10000, 8500 average bbox w, h = 188, 166

99% of the objects occupied less than 0.3% of the original image, and image tiling was performed in the preprocessing step Tiling was overlapped and duplicated annotation information was deleted.

# tiler code
class SplitImage():
    def __init__(self):
        self.images_df = []
        self.annos_df = []
        self.new_image_id = 1
        self.new_img_list = []
        self.new_anno_id = 0
        self.new_anno_list = []
        self.slice_bboxes = []

    def split(self, im, rows, cols, image_path, img_dict, overlap_height_ratio: float=0.2, overlap_width_ratio: float=0.2):
        save_img_dir = r'./data'
        os.makedirs(save_img_dir, exist_ok=True)

        im_width, im_height = im.size
        print('######## image size: ', im.size)
        slice_width = int(im_width / rows)
        slice_height = int(im_height / cols)

        """
        Given the height and width of an image, calculates how to divide the image into
        overlapping slices according to the height and width provided. These slices are returned
        as bounding boxes in xyxy format
        """
        check_list = []
        n = 0
        y_max = y_min = 0
        y_overlap = int(overlap_height_ratio * slice_height)
        x_overlap = int(overlap_width_ratio * slice_width)
        while y_max < im_height:
            x_min = x_max = 0
            y_max = y_min + slice_height
            while x_max < im_width:
                x_max = x_min + slice_width

                n += 1
                if y_max > im_height or x_max > im_width:
                    xmax = min(im_width, x_max)
                    ymax = min(im_height, y_max)
                    xmin = max(0, xmax - slice_width)
                    ymin = max(0, ymax - slice_height)
                    slice_bbox = [xmin, ymin, xmax, ymax]
                    print('____slice_bbox:', slice_bbox)

                    new_img_dict = {}
                    outp = im.crop(slice_bbox)
                    name, ext = os.path.splitext(os.path.basename(image_path))
                    new_name = name + "_" + str(n) + ext
                    outp_path = os.path.join(save_img_dir, new_name)
                    image_yn = True # annotations 정보 없는 이미지도 저장

                    # new annotation
                    for anno_dict in self.annos_df[self.annos_df['image_id'] == img_dict["id"]].to_dict("records"):
                        new_anno_dict = {}
                        coord = anno_dict['bbox']
                        coord_tp = [coord[0], coord[1], coord[0] + coord[2], coord[1] + coord[3]]
                        if coord_tp[0] >= slice_bbox[0] and coord_tp[2] <= slice_bbox[2]:
                            if coord_tp[1] >= slice_bbox[1] and coord_tp[3] <= slice_bbox[3]:
                                # image_yn = True   # image_yn이 False로 설정 되어있을 때 사용
                                if [coord[0], coord[1], coord[2], coord[3]] in check_list:
                                    pass
                                else:
                                    self.new_anno_id += 1
                                    new_anno_dict["id"] = self.new_anno_id
                                    new_anno_dict['image_id'] = self.new_image_id
                                    new_anno_dict['category_id'] = anno_dict['category_id']
                                    new_anno_dict['category_name'] = anno_dict['category_name']
                                    new_anno_dict['class_name'] = anno_dict['class_name']                              
                                    new_anno_dict['bbox'] = [round(coord[0]-slice_bbox[0], 1), round(coord[1]-slice_bbox[1], 1), coord[2], coord[3]]
                                    new_anno_dict['area'] = anno_dict['area']
                                    new_anno_dict['iscrowd'] = 0

                                    check_list.append([coord[0], coord[1], coord[2], coord[3]])
                                    self.new_anno_list.append(new_anno_dict)

                    # new image
                    if image_yn:
                        new_img_dict["id"] = self.new_image_id
                        new_img_dict["file_name"] = new_name
                        new_img_dict["width"] = outp.size[0]
                        new_img_dict["height"] = outp.size[1]
                        new_img_dict["slices"] = slice_bbox
                        new_img_dict["ori_width"] = im_width
                        new_img_dict["ori_height"] = im_height
                        new_img_dict["ori_file_name"] = os.path.basename(image_path)

                        self.new_img_list.append(new_img_dict)
                        self.new_image_id += 1
                        print("Exporting image tile: " + outp_path)
                        outp.save(outp_path)

                else:
                    slice_bbox = [x_min, y_min, x_max, y_max]
                    print('slice_bbox:', slice_bbox)
                    new_img_dict = {}
                    outp = im.crop(slice_bbox)
                    name, ext = os.path.splitext(os.path.basename(image_path))
                    new_name = name + "_" + str(n) + ext
                    outp_path = os.path.join(save_img_dir, new_name)
                    image_yn = True # annotations 정보 없는 이미지도 저장

                    # new annotation
                    for anno_dict in self.annos_df[self.annos_df['image_id'] == img_dict["id"]].to_dict("records"):
                        new_anno_dict = {}
                        coord = anno_dict['bbox']
                        coord_tp = [coord[0], coord[1], coord[0] + coord[2], coord[1] + coord[3]]
                        if coord_tp[0] >= slice_bbox[0] and coord_tp[2] <= slice_bbox[2]:
                            if coord_tp[1] >= slice_bbox[1] and coord_tp[3] <= slice_bbox[3]:
                                # image_yn = True
                                if [coord[0], coord[1], coord[2], coord[3]] in check_list:
                                    pass
                                else:
                                    self.new_anno_id += 1
                                    new_anno_dict["id"] = self.new_anno_id
                                    new_anno_dict['image_id'] = self.new_image_id
                                    new_anno_dict['category_id'] = anno_dict['category_id']
                                    new_anno_dict['category_name'] = anno_dict['category_name']
                                    new_anno_dict['class_name'] = anno_dict['class_name']
                                    new_anno_dict['bbox'] = [round(coord[0]-slice_bbox[0], 1), round(coord[1]-slice_bbox[1], 1), coord[2], coord[3]]
                                    new_anno_dict['area'] = anno_dict['area']
                                    new_anno_dict['iscrowd'] = 0

                                    check_list.append([coord[0], coord[1], coord[2], coord[3]])
                                    self.new_anno_list.append(new_anno_dict)

                    # new image
                    if image_yn:
                        new_img_dict["id"] = self.new_image_id
                        new_img_dict["file_name"] = new_name
                        new_img_dict["width"] = outp.size[0]
                        new_img_dict["height"] = outp.size[1]
                        new_img_dict["slices"] = slice_bbox
                        new_img_dict["ori_width"] = im_width
                        new_img_dict["ori_height"] = im_height
                        new_img_dict["ori_file_name"] = os.path.basename(image_path)

                        self.new_img_list.append(new_img_dict)
                        self.new_image_id += 1
                        print("Exporting image tile: " + outp_path)
                        outp.save(outp_path)
                x_min = x_max - x_overlap
            y_min = y_max - y_overlap

화면 캡처 2022-10-12 192435 화면 캡처 2022-10-12 192501


화면 캡처 2022-10-12 192722


I want to reason with these images and then combine them again. But I just can't think of what to do. At this time, when combining the separated images back to the original image, I want to delete the duplicated bbox. I want to know if there's a way to apply it in the inference process. <<< in the eval_coco.py