FatalError: Termination signal is detected by the operating system #13976

Open yl2982 opened 1 week ago

yl2982 commented 1 week ago

I use detect model in paddle_ocr framework and recog model in modelscope and deploy ocr model service using fastapi in k8s using GPU resource. When I request model service concurrently for a while, I encountered the following problem:

C++ Traceback (most recent call last):

0 uv_run

Error Message Summary:

FatalError: Termination signal is detected by the operating system. [TimeInfo: Aborted at 1728638155 (unix time) try "date -d @1728638155" if you are using GNU date ] [SignalInfo: SIGTERM (@0x0) received by PID 1 (TID 0x7f384e3eb740) from PID 0 ]

  Exit Code:    143
  Started:      Fri, 11 Oct 2024 17:08:33 +0800
  Finished:     Fri, 11 Oct 2024 17:15:56 +0800

🏃‍♂️ Environment (运行环境)

CUDA Version: 12.0 paddleocr: paddlepaddle-gpu: 2.4.2.post117 modelscope: 1.7.1

🌰 Minimal Reproducible Example (最小可复现问题的Demo)

async def load(self) -> bool:
        # 模型路径位于model/${model_version}, 由model-settings.json中的参数定义
    model_dir = self.settings.parameters.uri

    # TODO: 加载模型
    self._detect_model = PaddleOCR(
        det_model_dir=os.path.join(model_dir, "ch_PP-OCRv4_det_server_infer"),
        rec_model_dir=os.path.join(model_dir, "ch_PP-OCRv3/ch_PP-OCRv3_rec_infer/"),
            model_dir, "ch_PP-OCRv3/ch_ppocr_mobile_v2.0_cls_infer/"
    # 识别模型
    self._recog_model = pipeline(
            model_dir, "cv_convnextTiny_ocr-recognition-document_damo"

    return True

async def predict(self, payload: InferenceRequest) -> InferenceResponse:
    # TODO 将payload输入解码
    # inference_input: bytes = self.decode_request(
    #     inference_request=payload, default_codec=NumpyRequestCodec
    # )

    inference_input: bytes = self.decode_request(
        inference_request=payload, default_codec=Base64RequestCodec

    img_array: np.ndarray = self.transfer_img(inference_input)

    # TODO 模型预测
    detect_res = self.text_detection(img_array)
        recog_res = self.text_recognition(
            inference_input=img_array, detect_res=detect_res
    except cv2.error as e:
        logger.warning(f'cv2 error: {e}')
        recog_res = OcrOutput()

    # TODO 将模型输出编码
    inference_output = self.encode_response(
        [json.dumps(recog_res.dict(), ensure_ascii=False)], StringRequestCodec

    return inference_output

def text_recognition(
    self, inference_input: np.ndarray, detect_res: np.ndarray
) -> OcrOutput:
    :param: inference_input: 原始图像转化的RGB Numpy矩阵
    :param: detect_res: 识别模型的输出结果
    output = []
    for i in range(detect_res.shape[0]):
        pts = self.order_point(detect_res[i])
        image_crop = self.crop_image(inference_input, pts)
        result = self._recog_model(image_crop)
        locations = [float(e) for e in list(pts.reshape(-1))]
                order=str(i + 1),
                    [locations[0], locations[1]],
                    [locations[2], locations[3]],
                    [locations[4], locations[5]],
                    [locations[6], locations[7]],
    return OcrOutput(output=output)

def text_detection(self, inference_input: np.ndarray) -> np.ndarray:
    :param: inference_input: 原始图像转化的RGB Numpy矩阵
    output = self._detect_model.ocr(inference_input, rec=False)

    outputs = []
    if output:
        for item in output[0]:
            outputs.append([i for j in item for i in j])
    # 保证返回的结果是顺序,不是倒序
    outputs = np.array(outputs[::-1])
    return outputs

def transfer_img(img: bytes) -> np.ndarray:
    return cv2.imdecode(np.frombuffer(img, dtype=np.uint8), cv2.IMREAD_COLOR)

def crop_image(img: np.ndarray, position: np.ndarray):
    def distance(x1, y1, x2, y2):
        return math.sqrt(pow(x1 - x2, 2) + pow(y1 - y2, 2))

    position = position.tolist()
    for i in range(4):
        for j in range(i + 1, 4):
            if position[i][0] > position[j][0]:
                tmp = position[j]
                position[j] = position[i]
                position[i] = tmp
    if position[0][1] > position[1][1]:
        tmp = position[0]
        position[0] = position[1]
        position[1] = tmp
    if position[2][1] > position[3][1]:
        tmp = position[2]
        position[2] = position[3]
        position[3] = tmp
    x1, y1 = position[0][0], position[0][1]
    x2, y2 = position[2][0], position[2][1]
    x3, y3 = position[3][0], position[3][1]
    x4, y4 = position[1][0], position[1][1]
    corners = np.zeros((4, 2), np.float32)
    corners[0] = [x1, y1]
    corners[1] = [x2, y2]
    corners[2] = [x4, y4]
    corners[3] = [x3, y3]
    img_width = distance((x1 + x4) / 2, (y1 + y4) / 2, (x2 + x3) / 2, (y2 + y3) / 2)
    img_height = distance(
        (x1 + x2) / 2, (y1 + y2) / 2, (x4 + x3) / 2, (y4 + y3) / 2
    corners_trans = np.zeros((4, 2), np.float32)
    corners_trans[0] = [0, 0]
    corners_trans[1] = [img_width - 1, 0]
    corners_trans[2] = [0, img_height - 1]
    corners_trans[3] = [img_width - 1, img_height - 1]
    transform = cv2.getPerspectiveTransform(corners, corners_trans)
    dst = cv2.warpPerspective(img, transform, (int(img_width), int(img_height)))
    return dst

def order_point(coor: np.ndarray):
    arr = np.array(coor).reshape([4, 2])
    sum_ = np.sum(arr, 0)
    centroid = sum_ / arr.shape[0]
    theta = np.arctan2(arr[:, 1] - centroid[1], arr[:, 0] - centroid[0])
    sort_points = arr[np.argsort(theta)]
    sort_points = sort_points.reshape([4, -1])
    if sort_points[0][0] > centroid[0]:
        sort_points = np.concatenate([sort_points[3:], sort_points[:3]])
    sort_points = sort_points.reshape([4, 2]).astype("float32")
    return sort_points
Liyulingyue commented 6 days ago

可以更进一步提供报错描述/复现代码吗? 比如说,部署后,是否调用成功过?