Open apple2373 opened 5 years ago
P.S. but just changing the data type will cause opencv error..... here https://github.com/argman/EAST/blob/dca414de39a3a4915a019c9a02c1832a31cdd0ca/icdar.py#L481
We have to find a way to replace cv2.fillPoly
with something else.....
Here's my solution in the end.
def generate_rbox(im_size, polys, tags):
h, w = im_size
poly_mask = np.zeros((h, w), dtype=np.uint8)
score_map = np.zeros((h, w), dtype=np.uint8)
geo_map = np.zeros((h, w, 5), dtype=np.float32)
# mask used during traning, to ignore some hard areas
training_mask = np.ones((h, w), dtype=np.uint8)
for poly_idx, poly_tag in enumerate(zip(polys, tags)):
poly = poly_tag[0]
tag = poly_tag[1]
r = [None, None, None, None]
for i in range(4):
r[i] = min(np.linalg.norm(poly[i] - poly[(i + 1) % 4]),
np.linalg.norm(poly[i] - poly[(i - 1) % 4]))
# score map
shrinked_poly = shrink_poly(poly.copy(), r).astype(np.int32)[np.newaxis, :, :]
cv2.fillPoly(score_map, shrinked_poly, 1)
cv2.fillPoly(poly_mask, shrinked_poly, 1)
# if the poly is too small, then ignore it during training
poly_h = min(np.linalg.norm(poly[0] - poly[3]), np.linalg.norm(poly[1] - poly[2]))
poly_w = min(np.linalg.norm(poly[0] - poly[1]), np.linalg.norm(poly[2] - poly[3]))
if min(poly_h, poly_w) < FLAGS.min_text_size:
cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0)
if tag:
cv2.fillPoly(training_mask, poly.astype(np.int32)[np.newaxis, :, :], 0)
xy_in_poly = np.argwhere(poly_mask == 1)
# if geometry == 'RBOX':
# 对任意两个顶点的组合生成一个平行四边形 - generate a parallelogram for any combination of two vertices
fitted_parallelograms = []
for i in range(4):
p0 = poly[i]
p1 = poly[(i + 1) % 4]
p2 = poly[(i + 2) % 4]
p3 = poly[(i + 3) % 4]
edge = fit_line([p0[0], p1[0]], [p0[1], p1[1]])
backward_edge = fit_line([p0[0], p3[0]], [p0[1], p3[1]])
forward_edge = fit_line([p1[0], p2[0]], [p1[1], p2[1]])
if point_dist_to_line(p0, p1, p2) > point_dist_to_line(p0, p1, p3):
# 平行线经过p2 - parallel lines through p2
if edge[1] == 0:
edge_opposite = [1, 0, -p2[0]]
else:
edge_opposite = [edge[0], -1, p2[1] - edge[0] * p2[0]]
else:
# 经过p3 - after p3
if edge[1] == 0:
edge_opposite = [1, 0, -p3[0]]
else:
edge_opposite = [edge[0], -1, p3[1] - edge[0] * p3[0]]
# move forward edge
new_p0 = p0
new_p1 = p1
new_p2 = p2
new_p3 = p3
new_p2 = line_cross_point(forward_edge, edge_opposite)
if point_dist_to_line(p1, new_p2, p0) > point_dist_to_line(p1, new_p2, p3):
# across p0
if forward_edge[1] == 0:
forward_opposite = [1, 0, -p0[0]]
else:
forward_opposite = [forward_edge[0], -1, p0[1] - forward_edge[0] * p0[0]]
else:
# across p3
if forward_edge[1] == 0:
forward_opposite = [1, 0, -p3[0]]
else:
forward_opposite = [forward_edge[0], -1, p3[1] - forward_edge[0] * p3[0]]
new_p0 = line_cross_point(forward_opposite, edge)
new_p3 = line_cross_point(forward_opposite, edge_opposite)
fitted_parallelograms.append([new_p0, new_p1, new_p2, new_p3, new_p0])
# or move backward edge
new_p0 = p0
new_p1 = p1
new_p2 = p2
new_p3 = p3
new_p3 = line_cross_point(backward_edge, edge_opposite)
if point_dist_to_line(p0, p3, p1) > point_dist_to_line(p0, p3, p2):
# across p1
if backward_edge[1] == 0:
backward_opposite = [1, 0, -p1[0]]
else:
backward_opposite = [backward_edge[0], -1, p1[1] - backward_edge[0] * p1[0]]
else:
# across p2
if backward_edge[1] == 0:
backward_opposite = [1, 0, -p2[0]]
else:
backward_opposite = [backward_edge[0], -1, p2[1] - backward_edge[0] * p2[0]]
new_p1 = line_cross_point(backward_opposite, edge)
new_p2 = line_cross_point(backward_opposite, edge_opposite)
fitted_parallelograms.append([new_p0, new_p1, new_p2, new_p3, new_p0])
areas = [Polygon(t).area for t in fitted_parallelograms]
parallelogram = np.array(fitted_parallelograms[np.argmin(areas)][:-1], dtype=np.float32)
# sort thie polygon
parallelogram_coord_sum = np.sum(parallelogram, axis=1)
min_coord_idx = np.argmin(parallelogram_coord_sum)
parallelogram = parallelogram[
[min_coord_idx, (min_coord_idx + 1) % 4, (min_coord_idx + 2) % 4, (min_coord_idx + 3) % 4]]
rectange = rectangle_from_parallelogram(parallelogram)
rectange, rotate_angle = sort_rectangle(rectange)
p0_rect, p1_rect, p2_rect, p3_rect = rectange
for y, x in xy_in_poly:
point = np.array([x, y], dtype=np.float32)
# top
geo_map[y, x, 0] = point_dist_to_line(p0_rect, p1_rect, point)
# right
geo_map[y, x, 1] = point_dist_to_line(p1_rect, p2_rect, point)
# down
geo_map[y, x, 2] = point_dist_to_line(p2_rect, p3_rect, point)
# left
geo_map[y, x, 3] = point_dist_to_line(p3_rect, p0_rect, point)
# angle
geo_map[y, x, 4] = rotate_angle
poly_mask.fill(0)
return score_map, geo_map, training_mask
@apple2373 hi, can you show some results on character detection?
My problem is not for scene text, but scanned classical Japanese papers. Here is one.
Well, don't expect EAST to work as much as this on your data. I can't find all visualizations except some cherry picked ones.
@apple2373 Thanks! Have you changed any parameters? I also use east to train character detection models,Although the model has trained 1000 steps, nothing has been detected.
As far as I remember, I don't change the parameters much. Instead, I divided the images into multiple crops as a preprocessing phase. This is not only to increase the number of training images but also to make EAST easier to detect because the original image is too big, and with the input size of EAST (512 or 1024? I forgot), it's too small (less than 10 pixels) to detect. Notice that the default settting ignores the box less than 10x10.
https://github.com/argman/EAST/blob/ab97939783901b7e22ff55e151964e159d1627b9/icdar.py#L24-L25
I think the following data type should be uint64 not uint8. https://github.com/argman/EAST/blob/dca414de39a3a4915a019c9a02c1832a31cdd0ca/icdar.py#L465 because https://github.com/argman/EAST/blob/dca414de39a3a4915a019c9a02c1832a31cdd0ca/icdar.py#L490 will be empty when poly_idx > 255
I was solving a very dense text detection problem in character base, and I noticed it.