Closed ignore1999 closed 2 months ago
可以提供推理效果不一致的图像吗?我们本地复现一下
可以提供推理效果不一致的图像吗?我们本地复现一下
我也遇到了同样问题,亟待解决,谢谢! 本地推理: 检测=ch_PP-OCRv4_server_det 识别=ch_PP-OCRv4_server_rec 原始参数、代码推理,无任何微调。
【官网demo】 【本地推理】
【官网demo】 【本地推理】 检测为空
没问题呀,用超轻量模型也是准确的。
paddleocr --image_dir 348621372-cdaaf8a5-0373-452d-a618-0c85e2ef2197.jpg
[2024/07/18 08:39:59] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=True, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir='348621372-cdaaf8a5-0373-452d-a618-0c85e2ef2197.jpg', page_num=0, det_algorithm='DB', det_model_dir='/home/greatx/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/home/greatx/.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_length=25, rec_char_dict_path='/home/greatx/repos/PaddleOCR/ppocr/utils/ppocr_keys_v1.txt', use_space_char=True, vis_font_path='./doc/fonts/simfang.ttf', drop_score=0.5, e2e_algorithm='PGNet', e2e_model_dir=None, e2e_limit_side_len=768, e2e_limit_type='max', e2e_pgnet_score_thresh=0.5, e2e_char_dict_path='./ppocr/utils/ic15_dict.txt', e2e_pgnet_valid_set='totaltext', e2e_pgnet_mode='fast', use_angle_cls=False, cls_model_dir='/home/greatx/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer', cls_image_shape='3, 48, 192', label_list=['0', '180'], cls_batch_num=6, cls_thresh=0.9, enable_mkldnn=False, cpu_threads=10, use_pdserving=False, warmup=False, sr_model_dir=None, sr_image_shape='3, 32, 128', sr_batch_num=1, draw_img_save_dir='./inference_results', save_crop_res=False, crop_res_save_dir='./output', use_mp=False, total_process_num=1, process_id=0, benchmark=False, save_log_path='./log_output/', show_log=True, use_onnx=False, return_word_box=False, output='./output', table_max_len=488, table_algorithm='TableAttn', table_model_dir=None, merge_no_span_structure=True, table_char_dict_path=None, layout_model_dir=None, layout_dict_path=None, layout_score_threshold=0.5, layout_nms_threshold=0.5, kie_algorithm='LayoutXLM', ser_model_dir=None, re_model_dir=None, use_visual_backbone=True, ser_dict_path='../train_data/XFUND/class_list_xfun.txt', ocr_order_method=None, mode='structure', image_orientation=False, layout=True, table=True, ocr=True, recovery=False, use_pdf2docx_api=False, invert=False, binarize=False, alphacolor=(255, 255, 255), lang='ch', det=True, rec=True, type='ocr', savefile=False, ocr_version='PP-OCRv4', structure_version='PP-StructureV2')
[2024/07/18 08:39:59] ppocr WARNING: The first GPU is used for inference by default, GPU ID: 0
[2024/07/18 08:40:00] ppocr WARNING: The first GPU is used for inference by default, GPU ID: 0
[2024/07/18 08:40:00] ppocr INFO: **********348621372-cdaaf8a5-0373-452d-a618-0c85e2ef2197.jpg**********
[2024/07/18 08:40:00] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.3688657283782959
[2024/07/18 08:40:01] ppocr DEBUG: rec_res num : 1, elapsed : 0.11798381805419922
[2024/07/18 08:40:01] ppocr INFO: [[[57.0, 54.0], [567.0, 54.0], [567.0, 89.0], [57.0, 89.0]], ('上水库主副坝面板.趾板混凝土缺陷处理工程(1/5)', 0.9800083637237549)]
@Minghao2812 你这个看着像是显卡的问题,是不是显卡不兼容。
高精度模型,效果也是可以。@ignore1999
paddleocr --image_dir 348621372-cdaaf8a5-0373-452d-a618-0c85e2ef2197.jpg --det_model_dir=pretrained_models/ch_PP-OCRv4_det_server_infer --rec_model_dir=pretrained_models/ch_PP-OCRv4_rec_server_infer
[2024/07/18 08:45:59] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=True, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir='348621372-cdaaf8a5-0373-452d-a618-0c85e2ef2197.jpg', page_num=0, det_algorithm='DB', det_model_dir='pretrained_models/ch_PP-OCRv4_det_server_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='pretrained_models/ch_PP-OCRv4_rec_server_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_length=25, rec_char_dict_path='/home/greatx/repos/PaddleOCR/ppocr/utils/ppocr_keys_v1.txt', use_space_char=True, vis_font_path='./doc/fonts/simfang.ttf', drop_score=0.5, e2e_algorithm='PGNet', e2e_model_dir=None, e2e_limit_side_len=768, e2e_limit_type='max', e2e_pgnet_score_thresh=0.5, e2e_char_dict_path='./ppocr/utils/ic15_dict.txt', e2e_pgnet_valid_set='totaltext', e2e_pgnet_mode='fast', use_angle_cls=False, cls_model_dir='/home/greatx/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer', cls_image_shape='3, 48, 192', label_list=['0', '180'], cls_batch_num=6, cls_thresh=0.9, enable_mkldnn=False, cpu_threads=10, use_pdserving=False, warmup=False, sr_model_dir=None, sr_image_shape='3, 32, 128', sr_batch_num=1, draw_img_save_dir='./inference_results', save_crop_res=False, crop_res_save_dir='./output', use_mp=False, total_process_num=1, process_id=0, benchmark=False, save_log_path='./log_output/', show_log=True, use_onnx=False, return_word_box=False, output='./output', table_max_len=488, table_algorithm='TableAttn', table_model_dir=None, merge_no_span_structure=True, table_char_dict_path=None, layout_model_dir=None, layout_dict_path=None, layout_score_threshold=0.5, layout_nms_threshold=0.5, kie_algorithm='LayoutXLM', ser_model_dir=None, re_model_dir=None, use_visual_backbone=True, ser_dict_path='../train_data/XFUND/class_list_xfun.txt', ocr_order_method=None, mode='structure', image_orientation=False, layout=True, table=True, ocr=True, recovery=False, use_pdf2docx_api=False, invert=False, binarize=False, alphacolor=(255, 255, 255), lang='ch', det=True, rec=True, type='ocr', savefile=False, ocr_version='PP-OCRv4', structure_version='PP-StructureV2')
[2024/07/18 08:45:59] ppocr WARNING: The first GPU is used for inference by default, GPU ID: 0
[2024/07/18 08:45:59] ppocr WARNING: The first GPU is used for inference by default, GPU ID: 0
[2024/07/18 08:46:00] ppocr INFO: **********348621372-cdaaf8a5-0373-452d-a618-0c85e2ef2197.jpg**********
[2024/07/18 08:46:00] ppocr DEBUG: dt_boxes num : 1, elapsed : 0.19745516777038574
[2024/07/18 08:46:00] ppocr DEBUG: rec_res num : 1, elapsed : 0.046478271484375
[2024/07/18 08:46:00] ppocr INFO: [[[54.0, 49.0], [571.0, 50.0], [571.0, 89.0], [54.0, 88.0]], ('上水库主副坝面板.趾板混凝土缺陷处理工程(1/5)', 0.9718229174613953)]
@Minghao2812 你这个看着像是显卡的问题,是不是显卡不兼容。
我在推理时的确遇到了这个报警: W0718 17:14:21.101558 50737 gpu_resources.cc:299] WARNING: device: . The installed Paddle is compiled with CUDNN 8.2, but CUDNN version in your machine is 8.1, which may cause serious incompatible bug. Please recompile or reinstall Paddle with compatible CUDNN version.
1)是CUDNN版本引起的吗? 2)paddle版本我是按照训练脚本里建议的2.6.1.post112,不知道为什么还会报warning? 3)如果机器cudnn不改,安装哪个paddle合适呢? @GreatV
@Minghao2812 试试高精度模型 https://github.com/PaddlePaddle/PaddleOCR/blob/main/doc/doc_ch/models_list.md warning 一般没关系。
from paddleocr import PaddleOCR, draw_ocr
# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
ocr = PaddleOCR(use_angle_cls=True, lang="ch",
det_model_dir="pretrained_models/ch_PP-OCRv4_det_server_infer",
rec_model_dir="pretrained_models/ch_PP-OCRv4_rec_server_infer") # need to run only once to download and load model into memory
img_path = './Snipaste_2024-07-18_21-00-13.png'
result = ocr.ocr(img_path, cls=True)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)
# 显示结果
from PIL import Image
result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='./doc/fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')
@Minghao2812 现在 推荐 安装 paddle 3.0 beta
https://github.com/PaddlePaddle/PaddleOCR/blob/main/doc/doc_ch/models_list.md
@GreatV 好的,谢谢,我试一下先。看起来只需要使用paddleocr即可?不需要使用infer_det.py推理吗?
@Minghao2812 两种都可以,取决于具体需求。
@GreatV 好的,上面的代码正在运行,看起来需要等一会。我发的错误结果,就是用infer_det.py得到的。配置文件没有修改参数,只修改了数据路径,是怎么回事呢?配置文件全文如下: Global: debug: false use_gpu: true epoch_num: 5 log_smooth_window: 20 print_batch_step: 10 save_model_dir: ./output/rec_ch_ppocr_v4_hgnet save_epoch_step: 10 eval_batch_step: [0, 2000] cal_metric_during_train: true pretrained_model: ./pretrained_models/ch_PP-OCRv4_rec_server_train/best_accuracy checkpoints: save_inference_dir: ./inference/rec_ch_ppocr_v4_hgnet use_visualdl: true infer_img: doc/imgs_words/ch/word_1.jpg character_dict_path: ppocr/utils/ppocr_keys_v1.txt max_text_length: &max_text_length 25 infer_mode: false use_space_char: true distributed: true save_res_path: ./test_data/rec_pred.txt
Optimizer: name: Adam beta1: 0.9 beta2: 0.999 lr: name: Cosine learning_rate: 0.001 warmup_epoch: 5 regularizer: name: L2 factor: 3.0e-05
Architecture: model_type: rec algorithm: SVTR_HGNet Transform: Backbone: name: PPHGNet_small Head: name: MultiHead head_list:
Loss: name: MultiLoss loss_config_list:
PostProcess:
name: CTCLabelDecode
Metric: name: RecMetric main_indicator: acc
Train: dataset: name: MultiScaleDataSet ds_width: false data_dir: ./train_data/rec/ ext_op_transform_idx: 1 label_file_list:
@GreatV 我好像发现问题了。我用你帮忙提供的代码运行,出现如下报错,看起来是result = ocr.ocr(img_path, cls=True)没有结果。这是为什么呢?
[2024/07/18 21:29:48] ppocr DEBUG: dt_boxes num : 0, elapsed : 404.3944854736328 [2024/07/18 21:29:48] ppocr DEBUG: cls num : 0, elapsed : 0 [2024/07/18 21:29:48] ppocr DEBUG: rec_res num : 0, elapsed : 1.6689300537109375e-06
@Minghao2812 换一个电脑试试,看看是不是环境的问题。
@Minghao2812 你用的gpu是什么型号的,paddle对旧的gpu不兼容。
gpu=A100 cuda=11.2 cudnn=8.1 python=3.8
指定一下 use_gpu=False
,看看会不会有结果。
use_gpu=False,同样报错: [2024/07/18 21:41:58] ppocr DEBUG: dt_boxes num : 0, elapsed : 0.4653017520904541 [2024/07/18 21:41:58] ppocr DEBUG: cls num : 0, elapsed : 0 [2024/07/18 21:41:58] ppocr DEBUG: rec_res num : 0, elapsed : 1.1920928955078125e-06
在我的mac上这两张都没问题,我用的是paddleocr main分支、paddle 2.6.1。
这里是我截取的两张图
谢谢,我在本地再运行一下试试。看起来与机器环境有关。
我在mac上用CPU推理,这几张图得到了结果
但是唯独这张图,依然没有结果
不可理解 :) @GreatV 请问有什么建议吗?
@Minghao2812
from paddleocr import PaddleOCR, draw_ocr
from PIL import Image
# ocr = PaddleOCR(use_angle_cls=True, lang="ch",
# det_model_dir="./pretrained_models/ch_PP-OCRv4_det_server_infer",
# rec_model_dir="./pretrained_models/ch_PP-OCRv4_rec_server_infer")
ocr = PaddleOCR(use_angle_cls=True, lang="ch")
img_path = './350243239-132da577-a298-43c5-97bb-8dfc8b205db8.png'
result = ocr.ocr(img_path, cls=True)
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)
result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='./doc/fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')
我这使用默认的超轻量模型是检测不出来,使用高精度模型可以检测和识别出来
同样的情况,官网通用OCR效果很好,本地效果很差
直接参考的前面提供的代码,参数如下 [2024/07/23 14:14:46] ppocr DEBUG: Namespace(alpha=1.0, alphacolor=(255, 255, 255), benchmark=False, beta=1.0, binarize=False, cls_batch_num=6, cls_image_shape='3, 48, 192', cls_model_dir='/Users/zyb/.paddleocr/whl/cls/ch_ppocr_mobile_v2.0_cls_infer', cls_thresh=0.9, cpu_threads=10, crop_res_save_dir='./output', det=True, det_algorithm='DB', det_box_type='quad', det_db_box_thresh=0.6, det_db_score_mode='fast', det_db_thresh=0.3, det_db_unclip_ratio=1.5, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_east_score_thresh=0.8, det_limit_side_len=960, det_limit_type='max', det_model_dir='./pretrained_models/ch_PP-OCRv4_det_server_infer', det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, det_pse_thresh=0, det_sast_nms_thresh=0.2, det_sast_score_thresh=0.5, draw_img_save_dir='./inference_results', drop_score=0.5, e2e_algorithm='PGNet', e2e_char_dict_path='./ppocr/utils/ic15_dict.txt', e2e_limit_side_len=768, e2e_limit_type='max', e2e_model_dir=None, e2e_pgnet_mode='fast', e2e_pgnet_score_thresh=0.5, e2e_pgnet_valid_set='totaltext', enable_mkldnn=False, fourier_degree=5, gpu_id=0, gpu_mem=500, help='==SUPPRESS==', image_dir=None, image_orientation=False, invert=False, ir_optim=True, kie_algorithm='LayoutXLM', label_list=['0', '180'], lang='ch', layout=True, layout_dict_path=None, layout_model_dir=None, layout_nms_threshold=0.5, layout_score_threshold=0.5, max_batch_size=10, max_text_length=25, merge_no_span_structure=True, min_subgraph_size=15, mode='structure', ocr=True, ocr_order_method=None, ocr_version='PP-OCRv4', output='./output', page_num=0, precision='fp32', process_id=0, re_model_dir=None, rec=True, rec_algorithm='SVTR_LCNet', rec_batch_num=6, rec_char_dict_path='/Users/zyb/miniconda3/envs/py38/lib/python3.8/site-packages/paddleocr/ppocr/utils/ppocr_keys_v1.txt', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_model_dir='./pretrained_models/ch_PP-OCRv4_rec_server_infer', recovery=False, save_crop_res=False, save_log_path='./log_output/', scales=[8, 16, 32], ser_dict_path='../train_data/XFUND/class_list_xfun.txt', ser_model_dir=None, show_log=True, sr_batch_num=1, sr_image_shape='3, 32, 128', sr_model_dir=None, structure_version='PP-StructureV2', table=True, table_algorithm='TableAttn', table_char_dict_path=None, table_max_len=488, table_model_dir=None, total_process_num=1, type='ocr', use_angle_cls=True, use_dilation=False, use_gpu=False, use_mp=False, use_npu=False, use_onnx=False, use_pdf2docx_api=False, use_pdserving=False, use_space_char=True, use_tensorrt=False, use_visual_backbone=True, use_xpu=False, vis_font_path='./doc/fonts/simfang.ttf', warmup=False) [2024/07/23 14:14:46] ppocr DEBUG: dt_boxes num : 4, elapsed : 0.05576610565185547 [2024/07/23 14:14:46] ppocr DEBUG: cls num : 4, elapsed : 0.0359950065612793 [2024/07/23 14:14:47] ppocr DEBUG: rec_res num : 4, elapsed : 0.4000670909881592 [[[29.0, 95.0], [44.0, 95.0], [44.0, 109.0], [29.0, 109.0]], ('D.', 0.8692728281021118)] [[[30.0, 169.0], [39.0, 169.0], [39.0, 178.0], [30.0, 178.0]], ('B', 0.9885401129722595)] [[[97.0, 174.0], [107.0, 174.0], [107.0, 185.0], [97.0, 185.0]], ('E', 0.9936330318450928)]
paddlepaddle==2.6.1
感谢 @GreatV 及时的帮助,我的问题已解决,解法供大家参考:
ocr = PaddleOCR(use_angle_cls=True, lang="ch", det_model_dir="pretrained_models/ch_PP-OCRv4_det_server_infer", rec_model_dir="pretrained_models/ch_PP-OCRv4_rec_server_infer")
paddlepaddle==2.5.2 paddleocr==2.7.0.3
问题描述 / Problem Description
飞桨官网的通用OCR体验(https://aistudio.baidu.com/community/app/91660/webUI),识别效果比本地运行python3 tools/infer/predict_det.py 推理的效果好很多。 据悉官网版本用的模型并不特殊,能否提供官网体验版本服务化部署的代码和前后处理和http请求的图片编解码方式相关代码,以定位问题原因?
运行环境 / Runtime Environment
复现代码 / Reproduction Code
python3 tools/infer/predict_det.py