PaddlePaddle / PaddleOCR

Awesome multilingual OCR toolkits based on PaddlePaddle (practical ultra lightweight OCR system, support 80+ languages recognition, provide data annotation and synthesis tools, support training and deployment among server, mobile, embedded and IoT devices)
https://paddlepaddle.github.io/PaddleOCR/
Apache License 2.0
43.47k stars 7.75k forks source link

How Can i use pretrained models like SAST EAST in pdserving? #4441

Closed YC7225 closed 2 years ago

YC7225 commented 2 years ago

i have tried to convert pre trained model into inference and after that into serving model but getting a empty result with zero error? i am not able to use that i am using default config file for example SAST with total text data. and using export_model.py file for converting a pre-trained model to inference model.

YC7225 commented 2 years ago

@littletomatodonkey could you please help me out in this?

littletomatodonkey commented 2 years ago

Hi, sorry pdserving does not support east or sast as now.

YC7225 commented 2 years ago

Do you know how can i make changes for east and sast algoritham?

littletomatodonkey commented 2 years ago

You might modify preprocess and postprocess here for the east/sast model.

https://github.com/PaddlePaddle/PaddleOCR/blob/release%2F2.3/deploy/pdserving/ocr_reader.py

YC7225 commented 2 years ago

@ZeyuChen @bingooo @karlhorky @ufoym @jacquesqiao @littletomatodonkey have modified it according to ppocr code here but no response will come could you help me in this how can i use it? this is my web_service.py :-

`from paddle_serving_server.web_service import WebService, Op import pdb import logging import numpy as np import cv2 import base64

from paddle_serving_app.reader import OCRReader

from ocr_reader import OCRReader, DetResizeForTest from paddle_serving_app.reader import Sequential, ResizeByFactor from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import DBPostProcess, SASTPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes

_LOGGER = logging.getLogger()

class DetOp(Op): def init_op(self): self.det_preprocess = Sequential([ DetResizeForTest(), Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose( (2, 0, 1)) ]) self.filter_func = FilterBoxes(10, 10) self.post_func = SASTPostProcess({ "score_thresh": 0.1, "nms_thresh": 0.2, "sast_polygon": False, "tcl_map_thresh": 0.5 }) '''
self.post_func = DBPostProcess({ "thresh": 0.3, "box_thresh": 0.5, "max_candidates": 1000, "unclip_ratio": 1.5, "min_size": 3 }) ''' def preprocess(self, input_dicts, data_id, logid): (, input_dict), = input_dicts.items() data = base64.b64decode(input_dict["image"].encode('utf8')) self.raw_im = data print(type(data)) data = np.fromstring(data, np.uint8)

Note: class variables(self.var) can only be used in process op mode

      im = cv2.imdecode(data, cv2.IMREAD_COLOR)
      self.ori_h, self.ori_w, _ = im.shape
      det_img = self.det_preprocess(im)
      _, self.new_h, self.new_w = det_img.shape
      return {"x": det_img[np.newaxis, :].copy()}, False, None, ""

def postprocess(self, input_dicts, fetch_dict, log_id):
    det_out = fetch_dict["save_infer_model/scale_0.tmp_1"]
    print("post process running")
    print(det_out)
    preds = {}
    preds['f_border'] = det_out[0]
    preds['f_score'] = det_out[1]
    preds['f_tco'] = det_out[2]
    preds['f_tvo'] = det_out[3]
    ratio_list = [
        float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
    ]
    post_result = self.post_func(preds, ratio_list)
    dt_boxes = post_result[0]['points']
    dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
    out_dict = {"dt_boxes": dt_boxes, "image": self.raw_im}

    return out_dict, None, ""

class RecOp(Op): def init_op(self): self.ocr_reader = OCRReader( char_dict_path="/home/yashwant/research/PaddleOCR/ppocr/utils/ppocr_keys_v1.txt")

    self.get_rotate_crop_image = GetRotateCropImage()
    self.sorted_boxes = SortedBoxes()

def preprocess(self, input_dicts, data_id, log_id):
    (_, input_dict), = input_dicts.items()
    raw_im = input_dict["image"]
    data = np.frombuffer(raw_im, np.uint8)
    im = cv2.imdecode(data, cv2.IMREAD_COLOR)
    dt_boxes = input_dict["dt_boxes"]
    dt_boxes = self.sorted_boxes(dt_boxes)
    feed_list = []
    img_list = []
    max_wh_ratio = 0
     ## Many mini-batchs, the type of feed_data is list.
    max_batch_size = 6  # len(dt_boxes)

    # If max_batch_size is 0, skipping predict stage
    if max_batch_size == 0:
        return {}, True, None, ""
    boxes_size = len(dt_boxes)
    batch_size = boxes_size // max_batch_size
    rem = boxes_size % max_batch_size
    for bt_idx in range(0, batch_size + 1):
        imgs = None
        boxes_num_in_one_batch = 0
        if bt_idx == batch_size:
            if rem == 0:
                continue
            else:
                boxes_num_in_one_batch = rem
        elif bt_idx < batch_size:
            boxes_num_in_one_batch = max_batch_size
        else:
            _LOGGER.error("batch_size error, bt_idx={}, batch_size={}".
                          format(bt_idx, batch_size))
            break

        start = bt_idx * max_batch_size
        end = start + boxes_num_in_one_batch
        img_list = []
        for box_idx in range(start, end):
            boximg = self.get_rotate_crop_image(im, dt_boxes[box_idx])
            img_list.append(boximg)
            h, w = boximg.shape[0:2]
            wh_ratio = w * 1.0 / h
            max_wh_ratio = max(max_wh_ratio, wh_ratio)
        _, w, h = self.ocr_reader.resize_norm_img(img_list[0],
                                                  max_wh_ratio).shape

        imgs = np.zeros((boxes_num_in_one_batch, 3, w, h)).astype('float32')
        for id, img in enumerate(img_list):
            norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
            imgs[id] = norm_img
        feed = {"x": imgs.copy()}
        feed_list.append(feed)

    return feed_list, False, None, ""

def postprocess(self, input_dicts, fetch_data, log_id):
    res_list = []
    if isinstance(fetch_data, dict):
        if len(fetch_data) > 0:
            rec_batch_res = self.ocr_reader.postprocess(
                fetch_data, with_score=True)
            for res in rec_batch_res:
                  res_list.append(res[0])
    elif isinstance(fetch_data, list):
        for one_batch in fetch_data:
            one_batch_res = self.ocr_reader.postprocess(
                one_batch, with_score=True)
            for res in one_batch_res:
                res_list.append(res[0])

    res = {"res": str(res_list)}
    return res, None, ""

class OcrService(WebService): def get_pipeline_response(self, read_op): det_op = DetOp(name="det", input_ops=[read_op]) rec_op = RecOp(name="rec", input_ops=[det_op]) return rec_op

uci_service = OcrService(name="ocr") uci_service.prepare_pipeline_config("config.yml") uci_service.run_service()`

now this is my SASTPostProcess file code :-

`class SASTPostProcess(object):

def __init__(self, params):
    self.score_thresh = params['secore_thresh']
    self.nms_thresh = params['nms_thresh']
    self.sast_polygon = params['sast_polygon']
    if self.sast_polygon:
        self.sample_pts_num = 6
        self.expand_scale = 1.2
        self.shrink_ratio_of_width = 0.2
    else:
        self.sample_pts_num = 2
        self.expand_scale = 1.0
        self.shrink_ratio_of_width = 0.3
    self.tcl_map_thresh = params['tcl_map_thresh']

def point_pair2poly(self, point_pair_list):
    """
    Transfer vertical point_pairs into poly point in clockwise.
    """
    # constract poly
    point_num = len(point_pair_list) * 2
    point_list = [0] * point_num
    for idx, point_pair in enumerate(point_pair_list):
        point_list[idx] = point_pair[0]
        point_list[point_num - 1 - idx] = point_pair[1]
    return np.array(point_list).reshape(-1, 2)

def shrink_quad_along_width(self,
                            quad,
                            begin_width_ratio=0.,
                            end_width_ratio=1.):
    """
    Generate shrink_quad_along_width.
    """
    ratio_pair = np.array(
        [[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
    p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
    p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
    return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])

def expand_poly_along_width(self, poly, shrink_ratio_of_width=0.3): """ expand poly along width. """ point_num = poly.shape[0] left_quad = np.array( [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32) left_ratio = -shrink_ratio_of_width np.linalg.norm(left_quad[0] - left_quad[3]) / \ (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6) left_quad_expand = self.shrink_quad_along_width(left_quad, left_ratio, 1.0) right_quad = np.array( [ poly[point_num // 2 - 2], poly[point_num // 2 - 1], poly[point_num // 2], poly[point_num // 2 + 1] ], dtype=np.float32) right_ratio = 1.0 + \ shrink_ratio_of_width np.linalg.norm(right_quad[0] - right_quad[3]) / \ (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6) right_quad_expand = self.shrink_quad_along_width(right_quad, 0.0, right_ratio) poly[0] = left_quad_expand[0] poly[-1] = left_quad_expand[-1] poly[point_num // 2 - 1] = right_quad_expand[1] poly[point_num // 2] = right_quad_expand[2] return poly

def restore_quad(self, tcl_map, tcl_map_thresh, tvo_map):
    """Restore quad."""
    xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh)
    xy_text = xy_text[:, ::-1]  # (n, 2)

    # Sort the text boxes via the y axis
    xy_text = xy_text[np.argsort(xy_text[:, 1])]

    scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0]
    scores = scores[:, np.newaxis]

    # Restore
    point_num = int(tvo_map.shape[-1] / 2)
    assert point_num == 4
    tvo_map = tvo_map[xy_text[:, 1], xy_text[:, 0], :]
    xy_text_tile = np.tile(xy_text, (1, point_num))  # (n, point_num * 2)
    quads = xy_text_tile - tvo_map

    return scores, quads, xy_text

def quad_area(self, quad): """ compute area of a quad. """ edge = [(quad[1][0] - quad[0][0]) (quad[1][1] + quad[0][1]), (quad[2][0] - quad[1][0]) (quad[2][1] + quad[1][1]), (quad[3][0] - quad[2][0]) (quad[3][1] + quad[2][1]), (quad[0][0] - quad[3][0]) (quad[0][1] + quad[3][1])] return np.sum(edge) / 2.

def nms(self, dets):
    if self.is_python35:
        import lanms
        dets = lanms.merge_quadrangle_n9(dets, self.nms_thresh)
    else:
        dets = nms_locality(dets, self.nms_thresh)
    return dets

def cluster_by_quads_tco(self, tcl_map, tcl_map_thresh, quads, tco_map):
    """
    Cluster pixels in tcl_map based on quads.
    """
    instance_count = quads.shape[0] + 1  # contain background
    instance_label_map = np.zeros(tcl_map.shape[:2], dtype=np.int32)
    if instance_count == 1:
        return instance_count, instance_label_map

    # predict text center
    xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh)
    n = xy_text.shape[0]
    xy_text = xy_text[:, ::-1]  # (n, 2)
    tco = tco_map[xy_text[:, 1], xy_text[:, 0], :]  # (n, 2)
    pred_tc = xy_text - tco

    # get gt text center
    m = quads.shape[0]
    gt_tc = np.mean(quads, axis=1)  # (m, 2)

    pred_tc_tile = np.tile(pred_tc[:, np.newaxis, :],
                           (1, m, 1))  # (n, m, 2)
    gt_tc_tile = np.tile(gt_tc[np.newaxis, :, :], (n, 1, 1))  # (n, m, 2)
    dist_mat = np.linalg.norm(pred_tc_tile - gt_tc_tile, axis=2)  # (n, m)
    xy_text_assign = np.argmin(dist_mat, axis=1) + 1  # (n,)

    instance_label_map[xy_text[:, 1], xy_text[:, 0]] = xy_text_assign
    return instance_count, instance_label_map

def estimate_sample_pts_num(self, quad, xy_text): """ Estimate sample points number. """ eh = (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - quad[2])) / 2.0 ew = (np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[2] - quad[3])) / 2.0

    dense_sample_pts_num = max(2, int(ew))
    dense_xy_center_line = xy_text[np.linspace(
        0,
        xy_text.shape[0] - 1,
        dense_sample_pts_num,
        endpoint=True,
        dtype=np.float32).astype(np.int32)]

    dense_xy_center_line_diff = dense_xy_center_line[
        1:] - dense_xy_center_line[:-1]
    estimate_arc_len = np.sum(
        np.linalg.norm(
            dense_xy_center_line_diff, axis=1))

    sample_pts_num = max(2, int(estimate_arc_len / eh))
    return sample_pts_num

def detect_sast(self,
                tcl_map,
                tvo_map,
                tbo_map,
                tco_map,
                ratio_w,
                ratio_h,
                src_w,
                src_h,
                shrink_ratio_of_width=0.3,
                tcl_map_thresh=0.5,
                offset_expand=1.0,
                out_strid=4.0):
    """
    first resize the tcl_map, tvo_map and tbo_map to the input_size, then restore the polys
    """
    # restore quad
    scores, quads, xy_text = self.restore_quad(tcl_map, tcl_map_thresh,
                                               tvo_map)
    dets = np.hstack((quads, scores)).astype(np.float32, copy=False)
    dets = self.nms(dets)
    if dets.shape[0] == 0:
        return []
    quads = dets[:, :-1].reshape(-1, 4, 2)
    # Compute quad area
    quad_areas = []
    for quad in quads:
        quad_areas.append(-self.quad_area(quad))

    # instance segmentation
    # instance_count, instance_label_map = cv2.connectedComponents(tcl_map.astype(np.uint8), connectivity=8)
    instance_count, instance_label_map = self.cluster_by_quads_tco(
        tcl_map, tcl_map_thresh, quads, tco_map)

    # restore single poly with tcl instance.
    poly_list = []
    for instance_idx in range(1, instance_count):
        xy_text = np.argwhere(instance_label_map == instance_idx)[:, ::-1]
        quad = quads[instance_idx - 1]
        q_area = quad_areas[instance_idx - 1]
        if q_area < 5:
            continue

        #
        len1 = float(np.linalg.norm(quad[0] - quad[1]))
        len2 = float(np.linalg.norm(quad[1] - quad[2]))
        min_len = min(len1, len2)
        if min_len < 3:
            continue

        # filter small CC
        if xy_text.shape[0] <= 0:
            continue

        # filter low confidence instance
        xy_text_scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0]
        if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.1:
            # if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.05:
            continue

        # sort xy_text
        left_center_pt = np.array(
            [[(quad[0, 0] + quad[-1, 0]) / 2.0,
              (quad[0, 1] + quad[-1, 1]) / 2.0]])  # (1, 2)
        right_center_pt = np.array(
            [[(quad[1, 0] + quad[2, 0]) / 2.0,
              (quad[1, 1] + quad[2, 1]) / 2.0]])  # (1, 2)
        proj_unit_vec = (right_center_pt - left_center_pt) / \
                        (np.linalg.norm(right_center_pt - left_center_pt) + 1e-6)
        proj_value = np.sum(xy_text * proj_unit_vec, axis=1)
        xy_text = xy_text[np.argsort(proj_value)]

        # Sample pts in tcl map
        if self.sample_pts_num == 0:
            sample_pts_num = self.estimate_sample_pts_num(quad, xy_text)
        else:
               sample_pts_num = self.sample_pts_num
        xy_center_line = xy_text[np.linspace(
            0,
            xy_text.shape[0] - 1,
            sample_pts_num,
            endpoint=True,
            dtype=np.float32).astype(np.int32)]

        point_pair_list = []
        for x, y in xy_center_line:
            # get corresponding offset
            offset = tbo_map[y, x, :].reshape(2, 2)
            if offset_expand != 1.0:
                offset_length = np.linalg.norm(
                    offset, axis=1, keepdims=True)
                expand_length = np.clip(
                    offset_length * (offset_expand - 1),
                    a_min=0.5,
                    a_max=3.0)
                offset_detal = offset / offset_length * expand_length
                offset = offset + offset_detal
                # original point
            ori_yx = np.array([y, x], dtype=np.float32)
            point_pair = (ori_yx + offset)[:, ::-1] * out_strid / np.array(
                [ratio_w, ratio_h]).reshape(-1, 2)
            point_pair_list.append(point_pair)

        # ndarry: (x, 2), expand poly along width
        detected_poly = self.point_pair2poly(point_pair_list)
        detected_poly = self.expand_poly_along_width(detected_poly,
                                                     shrink_ratio_of_width)
        detected_poly[:, 0] = np.clip(
            detected_poly[:, 0], a_min=0, a_max=src_w)
        detected_poly[:, 1] = np.clip(
            detected_poly[:, 1], a_min=0, a_max=src_h)
        poly_list.append(detected_poly)

    return poly_list

def __call__(self, outs_dict, shape_list):
    score_list = outs_dict['f_score']
    border_list = outs_dict['f_border']
    tvo_list = outs_dict['f_tvo']
    tco_list = outs_dict['f_tco']
    if isinstance(score_list, paddle.Tensor):
        score_list = score_list.numpy()
        border_list = border_list.numpy()
        tvo_list = tvo_list.numpy()
        tco_list = tco_list.numpy()

    img_num = len(shape_list)
    poly_lists = []
    for ino in range(img_num):
        p_score = score_list[ino].transpose((1, 2, 0))
        p_border = border_list[ino].transpose((1, 2, 0))
        p_tvo = tvo_list[ino].transpose((1, 2, 0))
        p_tco = tco_list[ino].transpose((1, 2, 0))
        src_h, src_w, ratio_h, ratio_w = shape_list[ino]

        poly_list = self.detect_sast(
            p_score,
            p_tvo,
            p_border,
            p_tco,
            ratio_w,
            ratio_h,
            src_w,
            src_h,
            shrink_ratio_of_width=self.shrink_ratio_of_width,
            tcl_map_thresh=self.tcl_map_thresh,
            offset_expand=self.expand_scale)
        poly_lists.append({'points': np.array(poly_list)})
    return poly_lists

def __repr__(self):
    return self.__class__.__name__ + \
        " score_thresh: {1}, nms_thresh: {2}, sast_polygon: {3}, tcl_map_thresh: {4}".format(
            self.score_thresh, self.nms_thresh, self.sast_polygon, self.tcl_map_thresh)

`

i am not getting any response as well i have used some print functions in my web_service.py file those functions are also not executed could I know what is the issue in this?