Closed linhaibo123 closed 4 years ago
作者这个修改的代码很有用,帮了我大忙,但是还是有个地方没有看明白 在data_util.py 中,有个参数n,m。n设置为6,m为0.5。看代码在
def generate_rbox(im_size, text_polys, text_tags, training_mask, i, n, m): """ 生成mask图,白色部分是文本,黑色是北京 :param im_size: 图像的h,w :param text_polys: 框的坐标 :param text_tags: 标注文本框是否参与训练 :return: 生成的mask图 """ h, w = im_size score_map = np.zeros((h, w), dtype=np.uint8) for poly, tag in zip(text_polys, text_tags): poly = poly.astype(np.int) r_i = 1 - (1 - m) * (n - i) / (n - 1) d_i = cv2.contourArea(poly) * (1 - r_i * r_i) / cv2.arcLength(poly, True) pco = pyclipper.PyclipperOffset() # pco.AddPath(pyclipper.scale_to_clipper(poly), pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) # shrinked_poly = np.floor(np.array(pyclipper.scale_from_clipper(pco.Execute(-d_i)))).astype(np.int) pco.AddPath(poly, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) shrinked_poly = np.array(pco.Execute(-d_i)) cv2.fillPoly(score_map, shrinked_poly, 1) # 制作mask # rect = cv2.minAreaRect(shrinked_poly) # poly_h, poly_w = rect[1] # if min(poly_h, poly_w) < 10: # cv2.fillPoly(training_mask, shrinked_poly, 0) if tag: cv2.fillPoly(training_mask, shrinked_poly, 0) # 闭运算填充内部小框 # kernel = np.ones((3, 3), np.uint8) # score_map = cv2.morphologyEx(score_map, cv2.MORPH_CLOSE, kernel) return score_map, training_mask def augmentation(im: np.ndarray, text_polys: np.ndarray, scales: np.ndarray, degrees: int, input_size: int) -> tuple: # the images are rescaled with ratio {0.5, 1.0, 2.0, 3.0} randomly im, text_polys = data_aug.random_scale(im, text_polys, scales) # the images are horizontally fliped and rotated in range [−10◦, 10◦] randomly if random.random() < 0.5: im, text_polys = data_aug.horizontal_flip(im, text_polys) if random.random() < 0.5: im, text_polys = data_aug.random_rotate_img_bbox(im, text_polys, degrees) # 640 × 640 random samples are cropped from the transformed images # im, text_polys = data_aug.random_crop_img_bboxes(im, text_polys) # im, text_polys = data_aug.resize(im, text_polys, input_size, keep_ratio=False) # im, text_polys = data_aug.random_crop_image_pse(im, text_polys, input_size) return im, text_polys def image_label(im_fn: str, text_polys: np.ndarray, text_tags: list, n: int, m: float, input_size: int, defrees: int = 10, scales: np.ndarray = np.array([0.5, 1, 2.0, 3.0])) -> tuple: ''' get image's corresponding matrix and ground truth return images [512, 512, 3] score [128, 128, 1] geo [128, 128, 5] mask [128, 128, 1] ''' im = cv2.imread(im_fn) im = cv2.cvtColor(im,cv2.COLOR_BGR2RGB) h, w, _ = im.shape # 检查越界 text_polys = check_and_validate_polys(text_polys, (h, w)) im, text_polys, = augmentation(im, text_polys, scales, defrees, input_size) h, w, _ = im.shape short_edge = min(h, w) if short_edge < input_size: # 保证短边 >= inputsize scale = input_size / short_edge im = cv2.resize(im, dsize=None, fx=scale, fy=scale) text_polys *= scale # # normal images # im = im.astype(np.float32) # im /= 255.0 # im -= np.array((0.485, 0.456, 0.406)) # im /= np.array((0.229, 0.224, 0.225)) h, w, _ = im.shape training_mask = np.ones((h, w), dtype=np.uint8) score_maps = [] for i in range(1, n + 1): # s1->sn,由小到大 score_map, training_mask = generate_rbox((h, w), text_polys, text_tags, training_mask, i, n, m) score_maps.append(score_map) score_maps = np.array(score_maps, dtype=np.float32) imgs = data_aug.random_crop_author([im, score_maps.transpose((1, 2, 0)),training_mask], (input_size, input_size)) return imgs[0], imgs[1].transpose((2, 0, 1)), imgs[2]#im,score_maps,training_mask#
中有这么一段,
poly = poly.astype(np.int) r_i = 1 - (1 - m) * (n - i) / (n - 1) d_i = cv2.contourArea(poly) * (1 - r_i * r_i) / cv2.arcLength(poly, True) pco = pyclipper.PyclipperOffset() # pco.AddPath(pyclipper.scale_to_clipper(poly), pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) # shrinked_poly = np.floor(np.array(pyclipper.scale_from_clipper(pco.Execute(-d_i)))).astype(np.int) pco.AddPath(poly, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) shrinked_poly = np.array(pco.Execute(-d_i)) cv2.fillPoly(score_map, shrinked_poly, 1)
这段应该是对poly进行了缩放,但是为什么要这么计算和设置呢?目的是什么呢?能帮忙解答下吗?谢谢。
参考论文
作者这个修改的代码很有用,帮了我大忙,但是还是有个地方没有看明白 在data_util.py 中,有个参数n,m。n设置为6,m为0.5。看代码在
中有这么一段,
这段应该是对poly进行了缩放,但是为什么要这么计算和设置呢?目的是什么呢?能帮忙解答下吗?谢谢。