I have converted the SAST pre-trained model into inference model and after that serving, model using these docs https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/doc/doc_en/inference_en.md#2-curved-text-detection-model-total-text

after that i have added SASTPostProcess Function in the image_reader.py in serving pakage which is :-

`class SASTPostProcess(object): def init(self, params): self.score_thresh = params['secore_thresh'] self.nms_thresh = params['nms_thresh'] self.sast_polygon = params['sast_polygon'] if self.sast_polygon: self.sample_pts_num = 6 self.expand_scale = 1.2 self.shrink_ratio_of_width = 0.2 else: self.sample_pts_num = 2 self.expand_scale = 1.0 self.shrink_ratio_of_width = 0.3 self.tcl_map_thresh = params['tcl_map_thresh']

          def point_pair2poly(self, point_pair_list):
              """
              Transfer vertical point_pairs into poly point in clockwise.
              """
              # constract poly
              point_num = len(point_pair_list) * 2
              point_list = [0] * point_num
              for idx, point_pair in enumerate(point_pair_list):
                  point_list[idx] = point_pair[0]
                  point_list[point_num - 1 - idx] = point_pair[1]
              return np.array(point_list).reshape(-1, 2)

          def shrink_quad_along_width(self,
                                      quad,
                                      begin_width_ratio=0.,
                                      end_width_ratio=1.):
              """
              Generate shrink_quad_along_width.
              """
              ratio_pair = np.array(
                  [[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
              p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
              p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
              return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])
          def expand_poly_along_width(self, poly, shrink_ratio_of_width=0.3):
              """
              expand poly along width.
              """
              point_num = poly.shape[0]
              left_quad = np.array(
              [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32)
              left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) /
              (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6)
              left_quad_expand = self.shrink_quad_along_width(left_quad, left_ratio,
              1.0)
              right_quad = np.array(
              [
              poly[point_num // 2 - 2], poly[point_num // 2 - 1],
              poly[point_num // 2], poly[point_num // 2 + 1]
              ],
              dtype=np.float32)
              right_ratio = 1.0 +
              shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) /
              (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6)
              right_quad_expand = self.shrink_quad_along_width(right_quad, 0.0,
              right_ratio)
              poly[0] = left_quad_expand[0]
              poly[-1] = left_quad_expand[-1]
              poly[point_num // 2 - 1] = right_quad_expand[1]
              poly[point_num // 2] = right_quad_expand[2]
              return poly
          def restore_quad(self, tcl_map, tcl_map_thresh, tvo_map):
              """Restore quad."""
              xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh)
              xy_text = xy_text[:, ::-1]  # (n, 2)

              # Sort the text boxes via the y axis
              xy_text = xy_text[np.argsort(xy_text[:, 1])]

              scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0]
              scores = scores[:, np.newaxis]

              # Restore
              point_num = int(tvo_map.shape[-1] / 2)
              assert point_num == 4
              tvo_map = tvo_map[xy_text[:, 1], xy_text[:, 0], :]
              xy_text_tile = np.tile(xy_text, (1, point_num))  # (n, point_num * 2)
              quads = xy_text_tile - tvo_map

              return scores, quads, xy_text
          def quad_area(self, quad):
              """
              compute area of a quad.
              """
              edge = [(quad[1][0] - quad[0][0]) * (quad[1][1] + quad[0][1]),
              (quad[2][0] - quad[1][0]) * (quad[2][1] + quad[1][1]),
              (quad[3][0] - quad[2][0]) * (quad[3][1] + quad[2][1]),
              (quad[0][0] - quad[3][0]) * (quad[0][1] + quad[3][1])]
              return np.sum(edge) / 2.
          def nms(self, dets):
              if self.is_python35:
                  import lanms
                  dets = lanms.merge_quadrangle_n9(dets, self.nms_thresh)
              else:
                  dets = nms_locality(dets, self.nms_thresh)
              return dets

          def cluster_by_quads_tco(self, tcl_map, tcl_map_thresh, quads, tco_map):
              """
              Cluster pixels in tcl_map based on quads.
              """
              instance_count = quads.shape[0] + 1  # contain background
              instance_label_map = np.zeros(tcl_map.shape[:2], dtype=np.int32)
              if instance_count == 1:
                  return instance_count, instance_label_map

              # predict text center
              xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh)
              n = xy_text.shape[0]
              xy_text = xy_text[:, ::-1]  # (n, 2)
              tco = tco_map[xy_text[:, 1], xy_text[:, 0], :]  # (n, 2)
              pred_tc = xy_text - tco

              # get gt text center
              m = quads.shape[0]
              gt_tc = np.mean(quads, axis=1)  # (m, 2)

              pred_tc_tile = np.tile(pred_tc[:, np.newaxis, :],
                                     (1, m, 1))  # (n, m, 2)
              gt_tc_tile = np.tile(gt_tc[np.newaxis, :, :], (n, 1, 1))  # (n, m, 2)
              dist_mat = np.linalg.norm(pred_tc_tile - gt_tc_tile, axis=2)  # (n, m)
              xy_text_assign = np.argmin(dist_mat, axis=1) + 1  # (n,)

              instance_label_map[xy_text[:, 1], xy_text[:, 0]] = xy_text_assign
              return instance_count, instance_label_map
          def estimate_sample_pts_num(self, quad, xy_text):
              """
              Estimate sample points number.
              """
              eh = (np.linalg.norm(quad[0] - quad[3]) +
              np.linalg.norm(quad[1] - quad[2])) / 2.0
              ew = (np.linalg.norm(quad[0] - quad[1]) +
              np.linalg.norm(quad[2] - quad[3])) / 2.0
              dense_sample_pts_num = max(2, int(ew))
              dense_xy_center_line = xy_text[np.linspace(
                  0,
                  xy_text.shape[0] - 1,
                  dense_sample_pts_num,
                  endpoint=True,
                  dtype=np.float32).astype(np.int32)]

              dense_xy_center_line_diff = dense_xy_center_line[
                  1:] - dense_xy_center_line[:-1]
              estimate_arc_len = np.sum(
                  np.linalg.norm(
                      dense_xy_center_line_diff, axis=1))

              sample_pts_num = max(2, int(estimate_arc_len / eh))
              return sample_pts_num

          def detect_sast(self,
                          tcl_map,
                          tvo_map,
                          tbo_map,
                          tco_map,
                          ratio_w,
                          ratio_h,
                          src_w,
                          src_h,
                          shrink_ratio_of_width=0.3,
                          tcl_map_thresh=0.5,
                          offset_expand=1.0,
                          out_strid=4.0):
              """
              first resize the tcl_map, tvo_map and tbo_map to the input_size, then restore the polys
              """
              # restore quad
              scores, quads, xy_text = self.restore_quad(tcl_map, tcl_map_thresh,
                                                         tvo_map)
              dets = np.hstack((quads, scores)).astype(np.float32, copy=False)
              dets = self.nms(dets)
              if dets.shape[0] == 0:
                  return []
              quads = dets[:, :-1].reshape(-1, 4, 2)
              # Compute quad area
              quad_areas = []
              for quad in quads:
                  quad_areas.append(-self.quad_area(quad))

              # instance segmentation
              # instance_count, instance_label_map = cv2.connectedComponents(tcl_map.astype(np.uint8), connectivity=8)
              instance_count, instance_label_map = self.cluster_by_quads_tco(
                  tcl_map, tcl_map_thresh, quads, tco_map)

              # restore single poly with tcl instance.
              poly_list = []
              for instance_idx in range(1, instance_count):
                  xy_text = np.argwhere(instance_label_map == instance_idx)[:, ::-1]
                  quad = quads[instance_idx - 1]
                  q_area = quad_areas[instance_idx - 1]
                  if q_area < 5:
                      continue

                  #
                  len1 = float(np.linalg.norm(quad[0] - quad[1]))
                  len2 = float(np.linalg.norm(quad[1] - quad[2]))
                  min_len = min(len1, len2)
                  if min_len < 3:
                      continue

                  # filter small CC
                  if xy_text.shape[0] <= 0:
                      continue

                  # filter low confidence instance
                  xy_text_scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0]
                  if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.1:
                      # if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.05:
                      continue

                  # sort xy_text
                  left_center_pt = np.array(
                      [[(quad[0, 0] + quad[-1, 0]) / 2.0,
                        (quad[0, 1] + quad[-1, 1]) / 2.0]])  # (1, 2)
                  right_center_pt = np.array(
                      [[(quad[1, 0] + quad[2, 0]) / 2.0,
                        (quad[1, 1] + quad[2, 1]) / 2.0]])  # (1, 2)
                  proj_unit_vec = (right_center_pt - left_center_pt) / \
                                  (np.linalg.norm(right_center_pt - left_center_pt) + 1e-6)
                  proj_value = np.sum(xy_text * proj_unit_vec, axis=1)
                  xy_text = xy_text[np.argsort(proj_value)]

                  # Sample pts in tcl map
                  if self.sample_pts_num == 0:
                      sample_pts_num = self.estimate_sample_pts_num(quad, xy_text)
                  else:
                         sample_pts_num = self.sample_pts_num
                  xy_center_line = xy_text[np.linspace(
                      0,
                      xy_text.shape[0] - 1,
                      sample_pts_num,
                      endpoint=True,
                      dtype=np.float32).astype(np.int32)]

                  point_pair_list = []
                  for x, y in xy_center_line:
                      # get corresponding offset
                      offset = tbo_map[y, x, :].reshape(2, 2)
                      if offset_expand != 1.0:
                          offset_length = np.linalg.norm(
                              offset, axis=1, keepdims=True)
                          expand_length = np.clip(
                              offset_length * (offset_expand - 1),
                              a_min=0.5,
                              a_max=3.0)
                          offset_detal = offset / offset_length * expand_length
                          offset = offset + offset_detal
                          # original point
                      ori_yx = np.array([y, x], dtype=np.float32)
                      point_pair = (ori_yx + offset)[:, ::-1] * out_strid / np.array(
                          [ratio_w, ratio_h]).reshape(-1, 2)
                      point_pair_list.append(point_pair)

                  # ndarry: (x, 2), expand poly along width
                  detected_poly = self.point_pair2poly(point_pair_list)
                  detected_poly = self.expand_poly_along_width(detected_poly,
                                                               shrink_ratio_of_width)
                  detected_poly[:, 0] = np.clip(
                      detected_poly[:, 0], a_min=0, a_max=src_w)
                  detected_poly[:, 1] = np.clip(
                      detected_poly[:, 1], a_min=0, a_max=src_h)
                  poly_list.append(detected_poly)

              return poly_list

          def __call__(self, outs_dict, shape_list):
              score_list = outs_dict['f_score']
              border_list = outs_dict['f_border']
              tvo_list = outs_dict['f_tvo']
              tco_list = outs_dict['f_tco']
              if isinstance(score_list, paddle.Tensor):
                  score_list = score_list.numpy()
                  border_list = border_list.numpy()
                  tvo_list = tvo_list.numpy()
                  tco_list = tco_list.numpy()

              img_num = len(shape_list)
              poly_lists = []
              for ino in range(img_num):
                  p_score = score_list[ino].transpose((1, 2, 0))
                  p_border = border_list[ino].transpose((1, 2, 0))
                  p_tvo = tvo_list[ino].transpose((1, 2, 0))
                  p_tco = tco_list[ino].transpose((1, 2, 0))
                  src_h, src_w, ratio_h, ratio_w = shape_list[ino]

                  poly_list = self.detect_sast(
                      p_score,
                      p_tvo,
                      p_border,
                      p_tco,
                      ratio_w,
                      ratio_h,
                      src_w,
                      src_h,
                      shrink_ratio_of_width=self.shrink_ratio_of_width,
                      tcl_map_thresh=self.tcl_map_thresh,
                      offset_expand=self.expand_scale)
                  poly_lists.append({'points': np.array(poly_list)})
              return poly_lists

          def __repr__(self):
              return self.__class__.__name__ + \
                  " score_thresh: {1}, nms_thresh: {2}, sast_polygon: {3}, tcl_map_thresh: {4}".format(
                      self.score_thresh, self.nms_thresh, self.sast_polygon, self.tcl_map_thresh)

and my web_service.py file looks like :-

`from paddle_serving_app.reader import DBPostProcess, SASTPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes

_LOGGER = logging.getLogger()

class DetOp(Op): def init_op(self): self.det_preprocess = Sequential([ DetResizeForTest(), Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose( (2, 0, 1)) ]) self.filter_func = FilterBoxes(10, 10) self.post_func = SASTPostProcess({ "score_thresh": 0.1, "nms_thresh": 0.2, "sast_polygon": False, "tcl_map_thresh": 0.5 }) ''' self.post_func = DBPostProcess({ "thresh": 0.3, "box_thresh": 0.5, "max_candidates": 1000, "unclip_ratio": 1.5, "min_size": 3 }) ''' def preprocess(self, input_dicts, data_id, logid): (, input_dict), = input_dicts.items() data = base64.b64decode(input_dict["image"].encode('utf8')) self.raw_im = data print(type(data)) data = np.fromstring(data, np.uint8)

Note: class variables(self.var) can only be used in process op mode

          im = cv2.imdecode(data, cv2.IMREAD_COLOR)
          self.ori_h, self.ori_w, _ = im.shape
          det_img = self.det_preprocess(im)
          _, self.new_h, self.new_w = det_img.shape
          return {"x": det_img[np.newaxis, :].copy()}, False, None, ""
    def postprocess(self, input_dicts, fetch_dict, log_id):
          det_out = fetch_dict["save_infer_model/scale_0.tmp_1"]
          print("post process running")
          print(det_out)
          preds = {}
          preds['f_border'] = det_out[0]
          preds['f_score'] = det_out[1]
          preds['f_tco'] = det_out[2]
          preds['f_tvo'] = det_out[3]
          ratio_list = [
              float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
          ]
          post_result = self.post_func(preds, ratio_list)
          dt_boxes = post_result[0]['points']
          dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
          out_dict = {"dt_boxes": dt_boxes, "image": self.raw_im}

          return out_dict, None, ""

class RecOp(Op): def init_op(self): self.ocr_reader = OCRReader( char_dict_path="/home/yashwant/research/PaddleOCR/ppocr/utils/ppocr_keys_v1.txt")

              self.get_rotate_crop_image = GetRotateCropImage()
              self.sorted_boxes = SortedBoxes()

    def preprocess(self, input_dicts, data_id, log_id):
          (_, input_dict), = input_dicts.items()
          raw_im = input_dict["image"]
          data = np.frombuffer(raw_im, np.uint8)
          im = cv2.imdecode(data, cv2.IMREAD_COLOR)
          dt_boxes = input_dict["dt_boxes"]
          dt_boxes = self.sorted_boxes(dt_boxes)
          feed_list = []
          img_list = []
          max_wh_ratio = 0
           ## Many mini-batchs, the type of feed_data is list.
          max_batch_size = 6  # len(dt_boxes)

          # If max_batch_size is 0, skipping predict stage
          if max_batch_size == 0:
              return {}, True, None, ""
          boxes_size = len(dt_boxes)
          batch_size = boxes_size // max_batch_size
          rem = boxes_size % max_batch_size
          for bt_idx in range(0, batch_size + 1):
              imgs = None
              boxes_num_in_one_batch = 0
              if bt_idx == batch_size:
                  if rem == 0:
                      continue
                  else:
                      boxes_num_in_one_batch = rem
              elif bt_idx < batch_size:
                  boxes_num_in_one_batch = max_batch_size
              else:
                  _LOGGER.error("batch_size error, bt_idx={}, batch_size={}".
                                format(bt_idx, batch_size))
                  break

              start = bt_idx * max_batch_size
              end = start + boxes_num_in_one_batch
              img_list = []
              for box_idx in range(start, end):
                  boximg = self.get_rotate_crop_image(im, dt_boxes[box_idx])
                  img_list.append(boximg)
                  h, w = boximg.shape[0:2]
                  wh_ratio = w * 1.0 / h
                  max_wh_ratio = max(max_wh_ratio, wh_ratio)
              _, w, h = self.ocr_reader.resize_norm_img(img_list[0],
                                                        max_wh_ratio).shape

              imgs = np.zeros((boxes_num_in_one_batch, 3, w, h)).astype('float32')
              for id, img in enumerate(img_list):
                  norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
                  imgs[id] = norm_img
              feed = {"x": imgs.copy()}
              feed_list.append(feed)

          return feed_list, False, None, ""

    def postprocess(self, input_dicts, fetch_data, log_id):
          res_list = []
          if isinstance(fetch_data, dict):
              if len(fetch_data) > 0:
                  rec_batch_res = self.ocr_reader.postprocess(
                      fetch_data, with_score=True)
                  for res in rec_batch_res:
                        res_list.append(res[0])
          elif isinstance(fetch_data, list):
              for one_batch in fetch_data:
                  one_batch_res = self.ocr_reader.postprocess(
                      one_batch, with_score=True)
                  for res in one_batch_res:
                      res_list.append(res[0])

          res = {"res": str(res_list)}
          return res, None, ""

class OcrService(WebService): def get_pipeline_response(self, read_op): det_op = DetOp(name="det", input_ops=[read_op]) rec_op = RecOp(name="rec", input_ops=[det_op]) return rec_op

uci_service = OcrService(name="ocr") uci_service.prepare_pipeline_config("config.yml") uci_service.run_service()
`

PaddlePaddle / PaddleOCR

How to Use serving to serve SAST DETECTION model? #4792

Note: class variables(self.var) can only be used in process op mode

4441