Open Nagisakurumi opened 3 years ago
你好,具体使用的是paddlehub的版本号是多少? 用到的module和具体执行的脚本能顺便提供一下吗?
paddlepaddle-gpu 2.0.0rc1.post110 虽然用的GPU版本的 但是实际运行的是 CPU 模式
from paddleocr import PaddleOCR import os import paddlehub as hub from paddlehub.common.logger import logger from paddlehub.module.module import moduleinfo, runnable, serving import datetime import sys import base64 import cv2 as cv import numpy as np from PIL import Image
def image_to_base64(image_path):
image_np = np.array(Image.open(image_path))
# print(image_np)
image = cv.imencode('.jpg', image_np)[1]
image_code = str(base64.b64encode(image))[2:-1]
return image_code
def appendPath(path): current = os.getcwd() current = os.path.join(current, path) return current
def parse_args(): import argparse
return argparse.Namespace(
use_gpu=False,
ir_optim=True,
use_tensorrt=False,
gpu_mem=8000,
image_dir='',
det_algorithm='DB',
det_model_dir=None,
det_limit_side_len=960,
det_limit_type='max',
det_db_thresh=0.3,
det_db_box_thresh=0.5,
det_db_unclip_ratio=2.0,
det_east_score_thresh=0.8,
det_east_cover_thresh=0.1,
det_east_nms_thresh=0.2,
rec_algorithm='CRNN',
rec_model_dir=None,
rec_image_shape="3, 32, 320",
rec_char_type='ch',
rec_batch_num=30,
max_text_length=25,
rec_char_dict_path=appendPath('ppocr_keys_v1.txt'),
use_space_char=True,
drop_score=0.5,
cls_model_dir=None,
cls_image_shape="3, 48, 192",
label_list=['0', '180'],
cls_batch_num=30,
cls_thresh=0.9,
enable_mkldnn=False,
use_zero_copy_run=False,
use_pdserving=False,
lang='ch',
det=True,
rec=True,
use_angle_cls=False)
@moduleinfo( name="searchAll", version="1.0.0", summary="ocr system service", author="paddle-dev", author_email="paddle-dev@baidu.com", type="cv/text_recognition") class OcrSystem(hub.Module):
# self.ocr_engine = PaddleOCR(**(parse_args().__dict__))
def _initialize(self, use_gpu=False, enable_mkldnn=False):
self.ocr_engine = PaddleOCR(**(parse_args().__dict__))
#base64 -> image
def base64_to_image(self, base64_code):
# base64解码
img_data = base64.b64decode(base64_code)
# 转换为np数组
img_array = np.fromstring(img_data, np.uint8)
# 转换成opencv可用格式
img_array = cv.imdecode(img_array, cv.COLOR_RGB2BGR)
return img_array
def convertToRect(self, points):
x1 = min(points[0][0], points[1][0], points[2][0], points[3][0])
y1 = min(points[0][1], points[1][1], points[2][1], points[3][1])
x2 = max(points[0][0], points[1][0], points[2][0], points[3][0])
y2 = max(points[0][1], points[1][1], points[2][1], points[3][1])
return {'x' : int(x1), 'y' : int(y1), 'width' : int(x2 - x1), 'height' : int(y2 - y1)}
#查找所有文字
def searchAllProcess(self, data):
img = self.base64_to_image(data)
results = self.ocr_engine.ocr(img)
if len(results) == 0:
ret = []
return ret
boxes = []
boxes = [{'text' : item[1][0], 'rect' : self.convertToRect(item[0])} for item in results]
return boxes
@serving
def searchAll(self, data, **kwargs):
try:
start = datetime.datetime.now()
if data is None or data == '':
ret = []
return ret
ret = self.searchAllProcess(data)
end = datetime.datetime.now()
print("服务响应时间为: {} ms".format((end - start).microseconds / 1000.0))
return ret
except Exception as e:
logger.info(e)
return []
脚本就是一直发 jpg的图片
是用hub serving起了推理的服务,然后另一边一直发送图片去预测吗?
是的,但是频率没有很高,
1.系统centos7 2.anaconda python3.7 3.环境paddleocr paddlehub 4.6核 i9 CPU 5.参数设置了8个线程 6.刚开始运行会比较快,但是时间运行久了就会变慢,这时候重启也没用。