Open bottleofwater11 opened 6 days ago
换一份pdf报错如下:直接下载https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar放到对应位置仍然报错需要下载,huggingface的whl文件替换仍然报错 [10/10 06:32:17 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from /opt/models/Layout/model_final.pth ... [10/10 06:32:17 fvcore.common.checkpoint]: [Checkpointer] Loading from /opt/models/Layout/model_final.pth ... download https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar to /root/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer/ch_PP-OCRv4_det_infer.tar 2024-10-10 06:32:18.904 | ERROR | app:pdf_parse_main:133 - ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')) Traceback (most recent call last):
File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/connectionpool.py", line 789, in urlopen response = self._make_request( │ └ <function HTTPConnectionPool._make_request at 0x7f6cad79ea70> └ <urllib3.connectionpool.HTTPSConnectionPool object at 0x7f67daf79930> File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/connectionpool.py", line 490, in _make_request raise new_e └ ConnectionResetError(104, 'Connection reset by peer') File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/connectionpool.py", line 466, in _make_request self._validate_conn(conn) │ │ └ <urllib3.connection.HTTPSConnection object at 0x7f67daf78f10> │ └ <function HTTPSConnectionPool._validate_conn at 0x7f6cad79eef0> └ <urllib3.connectionpool.HTTPSConnectionPool object at 0x7f67daf79930> File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/connectionpool.py", line 1095, in _validate_conn conn.connect() │ └ <function HTTPSConnection.connect at 0x7f6d51aeb250> └ <urllib3.connection.HTTPSConnection object at 0x7f67daf78f10> File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/connection.py", line 652, in connect sock_and_verified = _ssl_wrap_socket_and_match_hostname( └ <function _ssl_wrap_socket_and_match_hostname at 0x7f6d51ae9d80> File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/connection.py", line 805, in _ssl_wrap_socket_and_match_hostname ssl_sock = ssl_wrap_socket( └ <function ssl_wrap_socket at 0x7f6d51ac2290> File "/opt/mineruvenv/lib/python3.10/site-packages/urllib3/util/ssl.py", line 465, in ssl_wrap_socket ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname) │ │ │ │ └ 'paddleocr.bj.bcebos.com' │ │ │ └ False │ │ └ <ssl.SSLContext object at 0x7f6cad087640> │ └ <socket.socket [closed] fd=-1, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6> └ <function _ssl_wrap_socket_impl at 0x7f6d51ac2440> File "/opt/mineruvenv/lib/python3.10/site-packages/urllib3/util/ssl.py", line 509, in _ssl_wrap_socket_impl return ssl_context.wrap_socket(sock, server_hostname=server_hostname) │ │ │ └ 'paddleocr.bj.bcebos.com' │ │ └ <socket.socket [closed] fd=-1, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6> │ └ <function SSLContext.wrap_socket at 0x7f6d537d9e10> └ <ssl.SSLContext object at 0x7f6cad087640> File "/usr/lib/python3.10/ssl.py", line 513, in wrap_socket return self.sslsocket_class._create( │ │ └ <classmethod(<function SSLSocket._create at 0x7f6d537db9a0>)> │ └ <class 'ssl.SSLSocket'> └ <ssl.SSLContext object at 0x7f6cad087640> File "/usr/lib/python3.10/ssl.py", line 1104, in _create self.do_handshake() │ └ <function SSLSocket.do_handshake at 0x7f6d537e0ca0> └ <ssl.SSLSocket [closed] fd=-1, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6> File "/usr/lib/python3.10/ssl.py", line 1375, in do_handshake self._sslobj.do_handshake() │ └ None └ <ssl.SSLSocket [closed] fd=-1, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6>
ConnectionResetError: [Errno 104] Connection reset by peer
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/mineru_venv/lib/python3.10/site-packages/requests/adapters.py", line 667, in send resp = conn.urlopen( │ └ <function HTTPConnectionPool.urlopen at 0x7f6cad79ecb0> └ <urllib3.connectionpool.HTTPSConnectionPool object at 0x7f67daf79930> File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/connectionpool.py", line 843, in urlopen retries = retries.increment( │ └ <function Retry.increment at 0x7f6d51a9ba30> └ Retry(total=0, connect=None, read=False, redirect=None, status=None) File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/util/retry.py", line 474, in increment raise reraise(type(error), error, _stacktrace) │ │ │ └ <traceback object at 0x7f67d9805bc0> │ │ └ ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')) │ └ ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')) └ <function reraise at 0x7f6d51a99fc0> File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/util/util.py", line 38, in reraise raise value.with_traceback(tb) │ └ None └ None File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/connectionpool.py", line 789, in urlopen response = self._make_request( │ └ <function HTTPConnectionPool._make_request at 0x7f6cad79ea70> └ <urllib3.connectionpool.HTTPSConnectionPool object at 0x7f67daf79930> File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/connectionpool.py", line 490, in _make_request raise new_e └ ConnectionResetError(104, 'Connection reset by peer') File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/connectionpool.py", line 466, in _make_request self._validate_conn(conn) │ │ └ <urllib3.connection.HTTPSConnection object at 0x7f67daf78f10> │ └ <function HTTPSConnectionPool._validate_conn at 0x7f6cad79eef0> └ <urllib3.connectionpool.HTTPSConnectionPool object at 0x7f67daf79930> File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/connectionpool.py", line 1095, in _validate_conn conn.connect() │ └ <function HTTPSConnection.connect at 0x7f6d51aeb250> └ <urllib3.connection.HTTPSConnection object at 0x7f67daf78f10> File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/connection.py", line 652, in connect sock_and_verified = _ssl_wrap_socket_and_match_hostname( └ <function _ssl_wrap_socket_and_match_hostname at 0x7f6d51ae9d80> File "/opt/mineru_venv/lib/python3.10/site-packages/urllib3/connection.py", line 805, in _ssl_wrap_socket_and_match_hostname ssl_sock = ssl_wrap_socket( └ <function ssl_wrap_socket at 0x7f6d51ac2290> File "/opt/mineruvenv/lib/python3.10/site-packages/urllib3/util/ssl.py", line 465, in ssl_wrap_socket ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname) │ │ │ │ └ 'paddleocr.bj.bcebos.com' │ │ │ └ False │ │ └ <ssl.SSLContext object at 0x7f6cad087640> │ └ <socket.socket [closed] fd=-1, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6> └ <function _ssl_wrap_socket_impl at 0x7f6d51ac2440> File "/opt/mineruvenv/lib/python3.10/site-packages/urllib3/util/ssl.py", line 509, in _ssl_wrap_socket_impl return ssl_context.wrap_socket(sock, server_hostname=server_hostname) │ │ │ └ 'paddleocr.bj.bcebos.com' │ │ └ <socket.socket [closed] fd=-1, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6> │ └ <function SSLContext.wrap_socket at 0x7f6d537d9e10> └ <ssl.SSLContext object at 0x7f6cad087640> File "/usr/lib/python3.10/ssl.py", line 513, in wrap_socket return self.sslsocket_class._create( │ │ └ <classmethod(<function SSLSocket._create at 0x7f6d537db9a0>)> │ └ <class 'ssl.SSLSocket'> └ <ssl.SSLContext object at 0x7f6cad087640> File "/usr/lib/python3.10/ssl.py", line 1104, in _create self.do_handshake() │ └ <function SSLSocket.do_handshake at 0x7f6d537e0ca0> └ <ssl.SSLSocket [closed] fd=-1, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6> File "/usr/lib/python3.10/ssl.py", line 1375, in do_handshake self._sslobj.do_handshake() │ └ None └ <ssl.SSLSocket [closed] fd=-1, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6>
urllib3.exceptions.ProtocolError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/mineru_venv/bin/uvicorn", line 8, in
File "/root/app.py", line 115, in pdf_parse_main pipe.pipe_analyze() # Parse │ └ <function UNIPipe.pipe_analyze at 0x7f6c03f83130> └ <magic_pdf.pipe.UNIPipe.UNIPipe object at 0x7f6a2cfd3a60>
File "/opt/mineru_venv/lib/python3.10/site-packages/magic_pdf/pipe/UNIPipe.py", line 31, in pipe_analyze self.model_list = doc_analyze(self.pdf_bytes, ocr=True) │ │ │ │ └ b'%PDF-1.7\r\n%\xb5\xb5\xb5\xb5\r\n1 0 obj\r\n<</Type/Catalog/Pages 2 0 R/Lang(zh-CN) /StructTreeRoot 12 0 R/MarkInfo<</Marke... │ │ │ └ <magic_pdf.pipe.UNIPipe.UNIPipe object at 0x7f6a2cfd3a60> │ │ └ <function doc_analyze at 0x7f6cad1dfbe0> │ └ [] └ <magic_pdf.pipe.UNIPipe.UNIPipe object at 0x7f6a2cfd3a60> File "/opt/mineru_venv/lib/python3.10/site-packages/magic_pdf/model/doc_analyze_by_custom_model.py", line 109, in doc_analyze custom_model = model_manager.get_model(ocr, show_log) │ │ │ └ False │ │ └ True │ └ <function ModelSingleton.get_model at 0x7f6cad1dfb50> └ <magic_pdf.model.doc_analyze_by_custom_model.ModelSingleton object at 0x7f6c03f73be0> File "/opt/mineru_venv/lib/python3.10/site-packages/magic_pdf/model/doc_analyze_by_custom_model.py", line 63, in get_model self._models[key] = custom_model_init(ocr=ocr, show_log=show_log) │ │ │ │ │ └ False │ │ │ │ └ True │ │ │ └ <function custom_model_init at 0x7f6cad1dfa30> │ │ └ (True, False) │ └ {(False, False): <magic_pdf.model.pdf_extract_kit.CustomPEKModel object at 0x7f6c03f73c10>} └ <magic_pdf.model.doc_analyze_by_custom_model.ModelSingleton object at 0x7f6c03f73be0> File "/opt/mineru_venv/lib/python3.10/site-packages/magic_pdf/model/doc_analyze_by_custom_model.py", line 93, in custom_model_init custom_model = CustomPEKModel(model_input) │ └ {'ocr': True, 'show_log': False, 'models_dir': '/opt/models', 'device': 'cuda', 'tableconfig': {'model': 'TableMaster', 'is... └ <class 'magic_pdf.model.pdf_extract_kit.CustomPEKModel'> File "/opt/mineru_venv/lib/python3.10/site-packages/magic_pdf/model/pdf_extract_kit.py", line 152, in init self.ocr_model = ModifiedPaddleOCR(show_log=show_log) │ │ └ False │ └ <class 'magic_pdf.model.pek_sub_modules.self_modify.ModifiedPaddleOCR'> └ <magic_pdf.model.pdf_extract_kit.CustomPEKModel object at 0x7f6a2cf41b40> File "/opt/mineru_venv/lib/python3.10/site-packages/paddleocr/paddleocr.py", line 599, in init maybe_download(params.det_model_dir, det_url) │ │ │ └ 'https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar' │ │ └ '/root/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer' │ └ Namespace(help='==SUPPRESS==', use_gpu=True, use_xpu=False, use_npu=False, ir_optim=True, use_tensorrt=False, min_subgraph_si... └ <function maybe_download at 0x7f6a2d284700> File "/opt/mineru_venv/lib/python3.10/site-packages/paddleocr/ppocr/utils/network.py", line 55, in maybe_download download_with_progressbar(url, tmp_path) │ │ └ '/root/.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer/ch_PP-OCRv4_det_infer.tar' │ └ 'https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar' └ <function download_with_progressbar at 0x7f6a2d284670> File "/opt/mineru_venv/lib/python3.10/site-packages/paddleocr/ppocr/utils/network.py", line 28, in download_with_progressbar response = requests.get(url, stream=True) │ │ └ 'https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar' │ └ <function get at 0x7f6caceec790> └ <module 'requests' from '/opt/mineru_venv/lib/python3.10/site-packages/requests/init.py'> File "/opt/mineru_venv/lib/python3.10/site-packages/requests/api.py", line 73, in get return request("get", url, params=params, kwargs) │ │ │ └ {'stream': True} │ │ └ None │ └ 'https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar' └ <function request at 0x7f6cad0de560> File "/opt/mineru_venv/lib/python3.10/site-packages/requests/api.py", line 59, in request return session.request(method=method, url=url, kwargs) │ │ │ │ └ {'params': None, 'stream': True} │ │ │ └ 'https://paddleocr.bj.bcebos.com/PP-OCRv4/chinese/ch_PP-OCRv4_det_infer.tar' │ │ └ 'get' │ └ <function Session.request at 0x7f6cacecbeb0> └ <requests.sessions.Session object at 0x7f67dadf05b0> File "/opt/mineru_venv/lib/python3.10/site-packages/requests/sessions.py", line 589, in request resp = self.send(prep, send_kwargs) │ │ │ └ {'timeout': None, 'allow_redirects': True, 'proxies': OrderedDict(), 'stream': True, 'verify': True, 'cert': None} │ │ └ <PreparedRequest [GET]> │ └ <function Session.send at 0x7f6caceec3a0> └ <requests.sessions.Session object at 0x7f67dadf05b0> File "/opt/mineru_venv/lib/python3.10/site-packages/requests/sessions.py", line 703, in send r = adapter.send(request, **kwargs) │ │ │ └ {'timeout': None, 'proxies': OrderedDict(), 'stream': True, 'verify': True, 'cert': None} │ │ └ <PreparedRequest [GET]> │ └ <function HTTPAdapter.send at 0x7f6cacecb7f0> └ <requests.adapters.HTTPAdapter object at 0x7f67dadf2470> File "/opt/mineru_venv/lib/python3.10/site-packages/requests/adapters.py", line 682, in send raise ConnectionError(err, request=request) │ └ <PreparedRequest [GET]> └ <class 'requests.exceptions.ConnectionError'>
requests.exceptions.ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer')) INFO: 172.1.100.100:64781 - "POST /pdf_parse?parse_method=auto&is_json_md_dump=true&output_dir=output HTTP/1.1" 500 Internal Server Error
https://huggingface.co/spaces/opendatalab/MinerU/tree/main/paddleocr
下载这个目录,拷贝到用户目录
cp -r paddleocr ~/.paddleocr
https://huggingface.co/spaces/opendatalab/MinerU/tree/main/paddleocr 下载这个目录,拷贝到用户目录
cp -r paddleocr ~/.paddleocr
我试过了,看之前的issue有过这个解决办法
whl目录里要解压缩,不能是tar包的形式
whl目录里要解压缩,不能是tar包的形式
我直接huggingface一个一个文件下载的, 现在我测试两个pdf,都是word导出的pdf,一个报错是 另一个报错为
把paddleocr目录中的whl复制到.paddleocr中,不能在.paddleocr中嵌套一个paddleocr目录
一样的报错
这个报错应该不会出现了吧
还是出现了,下面这张图就是我出现这个报错的pdf,我woed随便插入的一个表格导出的
这个报错应该不会出现了吧
需要看一下我的pdf嘛
这个报错应该不会出现了吧
需要看一下我的pdf嘛
这个报错和pdf没关系的啊,就是机器没有网络,paddleocr没有做好本地部署,可以找个有网的机器或者网上找找paddleocr的本地部署教程。
Description of the bug | 错误描述
sys.platform linux Python 3.10.14 (main, Apr 27 2024, 21:17:55) [GCC 13.2.0] numpy 1.26.4 detectron2 0.6 @/opt/mineru_venv/lib/python3.10/site-packages/detectron2 Compiler GCC 11.4 CUDA compiler not available DETECTRON2_ENV_MODULE
PyTorch 2.3.1+cu121 @/opt/mineru_venv/lib/python3.10/site-packages/torch
PyTorch debug build False
torch._C._GLIBCXX_USE_CXX11_ABI False
GPU available Yes
GPU 0 NVIDIA H20 (arch=9.0)
Driver version 550.54.14
CUDA_HOME None - invalid!
Pillow 10.4.0
torchvision 0.18.1+cu121 @/opt/mineru_venv/lib/python3.10/site-packages/torchvision
torchvision arch flags /opt/mineru_venv/lib/python3.10/site-packages/torchvision/_C.so
fvcore 0.1.5.post20221221
iopath 0.1.9
cv2 4.6.0
PyTorch built with:
[10/10 06:01:21 detectron2]: Command line arguments: {'config_file': '/opt/mineru_venv/lib/python3.10/site-packages/magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml', 'resume': False, 'eval_only': False, 'num_gpus': 1, 'num_machines': 1, 'machine_rank': 0, 'dist_url': 'tcp://127.0.0.1:57823', 'opts': ['MODEL.WEIGHTS', '/opt/models/Layout/model_final.pth']} [10/10 06:01:21 detectron2]: Contents of args.config_file=/opt/mineru_venv/lib/python3.10/site-packages/magic_pdf/resources/model_config/layoutlmv3/layoutlmv3_base_inference.yaml: AUG: DETR: true CACHE_DIR: ~/cache/huggingface CUDNN_BENCHMARK: false DATALOADER: ASPECT_RATIO_GROUPING: true FILTER_EMPTY_ANNOTATIONS: false NUM_WORKERS: 4 REPEAT_THRESHOLD: 0.0 SAMPLER_TRAIN: TrainingSampler DATASETS: PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000 PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000 PROPOSAL_FILES_TEST: [] PROPOSAL_FILES_TRAIN: [] TEST:
[10/10 06:01:23 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from /opt/models/Layout/model_final.pth ... [10/10 06:01:23 fvcore.common.checkpoint]: [Checkpointer] Loading from /opt/models/Layout/model_final.pth ... 2024-10-10 06:01:27.362 | INFO | magic_pdf.model.pdf_extract_kit:init:159 - DocAnalysis init done! 2024-10-10 06:01:27.362 | INFO | magic_pdf.model.doc_analyze_by_custom_model:custom_model_init:98 - model init cost: 24.853662252426147 2024-10-10 06:01:27.878 | INFO | magic_pdf.model.pdf_extract_kit:call:170 - layout detection cost: 0.49
0: 1888x1344 10 embeddings, 78.2ms Speed: 10.4ms preprocess, 78.2ms inference, 1.0ms postprocess per image at shape (1, 3, 1888, 1344) 2024-10-10 06:01:28.800 | INFO | magic_pdf.model.pdf_extract_kit:call:200 - formula nums: 10, mfr time: 0.36 2024-10-10 06:01:28.806 | INFO | magic_pdf.model.pdf_extract_kit:call:291 - ------------------table recognition processing begins----------------- 2024-10-10 06:01:28.917 | ERROR | app:pdf_parse_main:133 - axis 2 is out of bounds for array of dimension 1 Traceback (most recent call last):
File "/opt/mineru_venv/bin/uvicorn", line 8, in
sys.exit(main())
│ │ └
│ └
└ <module 'sys' (built-in)>
File "/opt/mineru_venv/lib/python3.10/site-packages/click/core.py", line 1157, in call
return self.main(args, kwargs)
│ │ │ └ {}
│ │ └ ()
│ └ <function BaseCommand.main at 0x7f6d52ebdb40>
└
File "/opt/mineru_venv/lib/python3.10/site-packages/click/core.py", line 1078, in main
rv = self.invoke(ctx)
│ │ └ <click.core.Context object at 0x7f6d539e3fa0>
│ └ <function Command.invoke at 0x7f6d52ebe5f0>
└
File "/opt/mineru_venv/lib/python3.10/site-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, ctx.params)
│ │ │ │ │ └ {'host': '0.0.0.0', 'port': 8000, 'app': 'app:app', 'uds': None, 'fd': None, 'reload': False, 'reload_dirs': (), 'reload_incl...
│ │ │ │ └ <click.core.Context object at 0x7f6d539e3fa0>
│ │ │ └ <function main at 0x7f6d52b98c10>
│ │ └
│ └ <function Context.invoke at 0x7f6d52ebd360>
└ <click.core.Context object at 0x7f6d539e3fa0>
File "/opt/mineru_venv/lib/python3.10/site-packages/click/core.py", line 783, in invoke
return __callback( args, *kwargs)
│ └ {'host': '0.0.0.0', 'port': 8000, 'app': 'app:app', 'uds': None, 'fd': None, 'reload': False, 'reload_dirs': (), 'reload_incl...
└ ()
File "/opt/mineru_venv/lib/python3.10/site-packages/uvicorn/main.py", line 410, in main
run(
└ <function run at 0x7f6d52d7edd0>
File "/opt/mineru_venv/lib/python3.10/site-packages/uvicorn/main.py", line 577, in run
server.run()
│ └ <function Server.run at 0x7f6d52d7e710>
└ <uvicorn.server.Server object at 0x7f6d52b81fc0>
File "/opt/mineru_venv/lib/python3.10/site-packages/uvicorn/server.py", line 65, in run
return asyncio.run(self.serve(sockets=sockets))
│ │ │ │ └ None
│ │ │ └ <function Server.serve at 0x7f6d52d7e7a0>
│ │ └ <uvicorn.server.Server object at 0x7f6d52b81fc0>
│ └ <function run at 0x7f6d538470a0>
└ <module 'asyncio' from '/usr/lib/python3.10/asyncio/init.py'>
File "/usr/lib/python3.10/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
│ │ └ <coroutine object Server.serve at 0x7f6d52b55a10>
│ └ <function BaseEventLoop.run_until_complete at 0x7f6d52ee49d0>
└ <_UnixSelectorEventLoop running=True closed=False debug=False>
File "/usr/lib/python3.10/asyncio/base_events.py", line 636, in run_until_complete
self.run_forever()
│ └ <function BaseEventLoop.run_forever at 0x7f6d52ee4940>
└ <_UnixSelectorEventLoop running=True closed=False debug=False>
File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
self._run_once()
│ └ <function BaseEventLoop._run_once at 0x7f6d52ee6440>
└ <_UnixSelectorEventLoop running=True closed=False debug=False>
File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
handle._run()
│ └ <function Handle._run at 0x7f6d53041e10>
└ <Handle Task.task_wakeup()>
File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
self._context.run(self._callback, self._args)
│ │ │ │ │ └ <member '_args' of 'Handle' objects>
│ │ │ │ └ <Handle Task.task_wakeup()>
│ │ │ └ <member '_callback' of 'Handle' objects>
│ │ └ <Handle Task.task_wakeup()>
│ └ <member '_context' of 'Handle' objects>
└ <Handle Task.task_wakeup()>
File "/opt/mineru_venv/lib/python3.10/site-packages/uvicorn/protocols/http/h11_impl.py", line 406, in run_asgi
result = await app( # type: ignore[func-returns-value]
└ <uvicorn.middleware.proxy_headers.ProxyHeadersMiddleware object at 0x7f6d52b810f0>
File "/opt/mineru_venv/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 70, in call
return await self.app(scope, receive, send)
│ │ │ │ └ <bound method RequestResponseCycle.send of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
│ │ │ └ <bound method RequestResponseCycle.receive of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
│ │ └ {'type': 'http', 'asgi': {'version': '3.0', 'spec_version': '2.4'}, 'http_version': '1.1', 'server': ('172.17.0.4', 8000), 'c...
│ └ <fastapi.applications.FastAPI object at 0x7f6d52bf5f90>
└ <uvicorn.middleware.proxy_headers.ProxyHeadersMiddleware object at 0x7f6d52b810f0>
File "/opt/mineru_venv/lib/python3.10/site-packages/fastapi/applications.py", line 1054, in call
await super().call(scope, receive, send)
│ │ └ <bound method RequestResponseCycle.send of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
│ └ <bound method RequestResponseCycle.receive of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
└ {'type': 'http', 'asgi': {'version': '3.0', 'spec_version': '2.4'}, 'http_version': '1.1', 'server': ('172.17.0.4', 8000), 'c...
File "/opt/mineru_venv/lib/python3.10/site-packages/starlette/applications.py", line 113, in call
await self.middleware_stack(scope, receive, send)
│ │ │ │ └ <bound method RequestResponseCycle.send of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
│ │ │ └ <bound method RequestResponseCycle.receive of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
│ │ └ {'type': 'http', 'asgi': {'version': '3.0', 'spec_version': '2.4'}, 'http_version': '1.1', 'server': ('172.17.0.4', 8000), 'c...
│ └ <starlette.middleware.errors.ServerErrorMiddleware object at 0x7f6c03f71750>
└ <fastapi.applications.FastAPI object at 0x7f6d52bf5f90>
File "/opt/mineru_venv/lib/python3.10/site-packages/starlette/middleware/errors.py", line 165, in call
await self.app(scope, receive, _send)
│ │ │ │ └ <function ServerErrorMiddleware.call.._send at 0x7f6c03f83910>
│ │ │ └ <bound method RequestResponseCycle.receive of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
│ │ └ {'type': 'http', 'asgi': {'version': '3.0', 'spec_version': '2.4'}, 'http_version': '1.1', 'server': ('172.17.0.4', 8000), 'c...
│ └ <starlette.middleware.exceptions.ExceptionMiddleware object at 0x7f6c03f71720>
└ <starlette.middleware.errors.ServerErrorMiddleware object at 0x7f6c03f71750>
File "/opt/mineru_venv/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 62, in call
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
│ │ │ │ │ │ └ <function ServerErrorMiddleware.call.._send at 0x7f6c03f83910>
│ │ │ │ │ └ <bound method RequestResponseCycle.receive of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
│ │ │ │ └ {'type': 'http', 'asgi': {'version': '3.0', 'spec_version': '2.4'}, 'http_version': '1.1', 'server': ('172.17.0.4', 8000), 'c...
│ │ │ └ <starlette.requests.Request object at 0x7f6c03f73700>
│ │ └ <fastapi.routing.APIRouter object at 0x7f6c03f71150>
│ └ <starlette.middleware.exceptions.ExceptionMiddleware object at 0x7f6c03f71720>
└ <function wrap_app_handling_exceptions at 0x7f6d51d77910>
File "/opt/mineru_venv/lib/python3.10/site-packages/starlette/_exception_handler.py", line 51, in wrapped_app
await app(scope, receive, sender)
│ │ │ └ <function wrap_app_handling_exceptions..wrapped_app..sender at 0x7f6c03f839a0>
│ │ └ <bound method RequestResponseCycle.receive of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
│ └ {'type': 'http', 'asgi': {'version': '3.0', 'spec_version': '2.4'}, 'http_version': '1.1', 'server': ('172.17.0.4', 8000), 'c...
└ <fastapi.routing.APIRouter object at 0x7f6c03f71150>
File "/opt/mineru_venv/lib/python3.10/site-packages/starlette/routing.py", line 715, in call
await self.middleware_stack(scope, receive, send)
│ │ │ │ └ <function wrap_app_handling_exceptions..wrapped_app..sender at 0x7f6c03f839a0>
│ │ │ └ <bound method RequestResponseCycle.receive of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
│ │ └ {'type': 'http', 'asgi': {'version': '3.0', 'spec_version': '2.4'}, 'http_version': '1.1', 'server': ('172.17.0.4', 8000), 'c...
│ └ <bound method Router.app of <fastapi.routing.APIRouter object at 0x7f6c03f71150>>
└ <fastapi.routing.APIRouter object at 0x7f6c03f71150>
File "/opt/mineru_venv/lib/python3.10/site-packages/starlette/routing.py", line 735, in app
await route.handle(scope, receive, send)
│ │ │ │ └ <function wrap_app_handling_exceptions..wrapped_app..sender at 0x7f6c03f839a0>
│ │ │ └ <bound method RequestResponseCycle.receive of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
│ │ └ {'type': 'http', 'asgi': {'version': '3.0', 'spec_version': '2.4'}, 'http_version': '1.1', 'server': ('172.17.0.4', 8000), 'c...
│ └ <function Route.handle at 0x7f6d51dacb80>
└ APIRoute(path='/pdf_parse', name='pdf_parse_main', methods=['POST'])
File "/opt/mineru_venv/lib/python3.10/site-packages/starlette/routing.py", line 288, in handle
await self.app(scope, receive, send)
│ │ │ │ └ <function wrap_app_handling_exceptions..wrapped_app..sender at 0x7f6c03f839a0>
│ │ │ └ <bound method RequestResponseCycle.receive of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
│ │ └ {'type': 'http', 'asgi': {'version': '3.0', 'spec_version': '2.4'}, 'http_version': '1.1', 'server': ('172.17.0.4', 8000), 'c...
│ └ <function request_response..app at 0x7f6c03f949d0>
└ APIRoute(path='/pdf_parse', name='pdf_parse_main', methods=['POST'])
File "/opt/mineru_venv/lib/python3.10/site-packages/starlette/routing.py", line 76, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
│ │ │ │ │ └ <function wrap_app_handling_exceptions..wrapped_app..sender at 0x7f6c03f839a0>
│ │ │ │ └ <bound method RequestResponseCycle.receive of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
│ │ │ └ {'type': 'http', 'asgi': {'version': '3.0', 'spec_version': '2.4'}, 'http_version': '1.1', 'server': ('172.17.0.4', 8000), 'c...
│ │ └ <starlette.requests.Request object at 0x7f6c03f73880>
│ └ <function request_response..app..app at 0x7f6c03f83880>
└ <function wrap_app_handling_exceptions at 0x7f6d51d77910>
File "/opt/mineru_venv/lib/python3.10/site-packages/starlette/_exception_handler.py", line 51, in wrapped_app
await app(scope, receive, sender)
│ │ │ └ <function wrap_app_handling_exceptions..wrapped_app..sender at 0x7f6c03f83be0>
│ │ └ <bound method RequestResponseCycle.receive of <uvicorn.protocols.http.h11_impl.RequestResponseCycle object at 0x7f6c03f72bf0>>
│ └ {'type': 'http', 'asgi': {'version': '3.0', 'spec_version': '2.4'}, 'http_version': '1.1', 'server': ('172.17.0.4', 8000), 'c...
└ <function request_response..app..app at 0x7f6c03f83880>
File "/opt/mineru_venv/lib/python3.10/site-packages/starlette/routing.py", line 73, in app
response = await f(request)
│ └ <starlette.requests.Request object at 0x7f6c03f73880>
└ <function get_request_handler..app at 0x7f6c03f94940>
File "/opt/mineru_venv/lib/python3.10/site-packages/fastapi/routing.py", line 301, in app
raw_response = await run_endpoint_function(
└ <function run_endpoint_function at 0x7f6d51dae680>
File "/opt/mineru_venv/lib/python3.10/site-packages/fastapi/routing.py", line 212, in run_endpoint_function
return await dependant.call(**values)
│ │ └ {'parse_method': 'auto', 'model_json_path': None, 'is_json_md_dump': True, 'output_dir': 'output', 'pdf_file': UploadFile(fil...
│ └ <function pdf_parse_main at 0x7f6c03f837f0>
└ Dependant(path_params=[], query_params=[ModelField(field_info=Query(auto), name='parse_method', mode='validation'), ModelFiel...
File "/opt/mineru_venv/lib/python3.10/site-packages/magic_pdf/pipe/UNIPipe.py", line 29, in pipe_analyze self.model_list = doc_analyze(self.pdf_bytes, ocr=False) │ │ │ │ └ b'%PDF-1.7\r\n%\xb5\xb5\xb5\xb5\r\n1 0 obj\r\n<</Type/Catalog/Pages 2 0 R/Lang(zh-CN) /StructTreeRoot 36 0 R/MarkInfo<</Marke... │ │ │ └ <magic_pdf.pipe.UNIPipe.UNIPipe object at 0x7f6c03f732e0> │ │ └ <function doc_analyze at 0x7f6cad1dfbe0> │ └ [] └ <magic_pdf.pipe.UNIPipe.UNIPipe object at 0x7f6c03f732e0> File "/opt/mineru_venv/lib/python3.10/site-packages/magic_pdf/model/doc_analyze_by_custom_model.py", line 119, in doc_analyze result = custom_model(img) │ └ array([[[255, 255, 255], │ [255, 255, 255], │ [255, 255, 255], │ ..., │ [255, 255, 255], │ [255... └ <magic_pdf.model.pdf_extract_kit.CustomPEKModel object at 0x7f6c03f73c10> File "/opt/mineru_venv/lib/python3.10/site-packages/magic_pdf/model/pdf_extract_kit.py", line 298, in call html_code = self.table_model.img2html(new_image) │ │ │ └ <PIL.Image.Image image mode=RGB size=1156x137 at 0x7F6A2CFD1720> │ │ └ <function ppTableModel.img2html at 0x7f6a2d2ddb40> │ └ <magic_pdf.model.ppTableModel.ppTableModel object at 0x7f6a2c45fcd0> └ <magic_pdf.model.pdf_extract_kit.CustomPEKModel object at 0x7f6c03f73c10> File "/opt/mineru_venv/lib/python3.10/site-packages/magic_pdf/model/ppTableModel.py", line 40, in img2html predres, = self.table_sys(image) │ │ └ array([[[255, 255, 255], │ │ [255, 255, 255], │ │ [255, 255, 255], │ │ ..., │ │ [255, 255, 255], │ │ [255... │ └ <paddleocr.ppstructure.table.predict_table.TableSystem object at 0x7f6a2c45f9d0> └ <magic_pdf.model.ppTableModel.ppTableModel object at 0x7f6a2c45fcd0> File "/opt/mineru_venv/lib/python3.10/site-packages/paddleocr/ppstructure/table/predict_table.py", line 86, in call structure_res, elapse = self._structure(copy.deepcopy(img)) │ │ │ │ └ array([[[255, 255, 255], │ │ │ │ [255, 255, 255], │ │ │ │ [255, 255, 255], │ │ │ │ ..., │ │ │ │ [255, 255, 255], │ │ │ │ [255... │ │ │ └ <function deepcopy at 0x7f6d52f8eb90> │ │ └ <module 'copy' from '/usr/lib/python3.10/copy.py'> │ └ <function TableSystem._structure at 0x7f6a2d2dd630> └ <paddleocr.ppstructure.table.predict_table.TableSystem object at 0x7f6a2c45f9d0> File "/opt/mineru_venv/lib/python3.10/site-packages/paddleocr/ppstructure/table/predict_table.py", line 109, in _structure structure_res, elapse = self.table_structurer(copy.deepcopy(img)) │ │ │ │ └ array([[[255, 255, 255], │ │ │ │ [255, 255, 255], │ │ │ │ [255, 255, 255], │ │ │ │ ..., │ │ │ │ [255, 255, 255], │ │ │ │ [255... │ │ │ └ <function deepcopy at 0x7f6d52f8eb90> │ │ └ <module 'copy' from '/usr/lib/python3.10/copy.py'> │ └ <ppstructure.table.predict_structure.TableStructurer object at 0x7f67d978d390> └ <paddleocr.ppstructure.table.predict_table.TableSystem object at 0x7f6a2c45f9d0> File "/opt/mineru_venv/lib/python3.10/site-packages/paddleocr/ppstructure/table/predict_structure.py", line 147, in call post_result = self.postprocess_op(preds, [shape_list]) │ │ │ └ array([[ 137, 1156, 0.41522, 0.41522, 480, 480]]) │ │ └ {'structure_probs': array([], dtype=float32), 'loc_preds': array([], dtype=float32)} │ └ <ppocr.postprocess.table_postprocess.TableMasterLabelDecode object at 0x7f67d991f8e0> └ <ppstructure.table.predict_structure.TableStructurer object at 0x7f67d978d390> File "/opt/mineru_venv/lib/python3.10/site-packages/paddleocr/ppocr/postprocess/table_postprocess.py", line 56, in call result = self.decode(structure_probs, bbox_preds, shape_list) │ │ │ │ └ array([[ 137, 1156, 0.41522, 0.41522, 480, 480]]) │ │ │ └ array([], dtype=float32) │ │ └ array([], dtype=float32) │ └ <function TableLabelDecode.decode at 0x7f6a2f795990> └ <ppocr.postprocess.table_postprocess.TableMasterLabelDecode object at 0x7f67d991f8e0> File "/opt/mineru_venv/lib/python3.10/site-packages/paddleocr/ppocr/postprocess/table_postprocess.py", line 69, in decode structure_idx = structure_probs.argmax(axis=2) │ └ <method 'argmax' of 'numpy.ndarray' objects> └ array([], dtype=float32)
numpy.exceptions.AxisError: axis 2 is out of bounds for array of dimension 1 INFO: 172.1.100.92:56884 - "POST /pdf_parse?parse_method=auto&is_json_md_dump=true&output_dir=output HTTP/1.1" 500 Internal Server Error
How to reproduce the bug | 如何复现
启用cuda和table能力。 使用auto模式启动magic-pdf。
Operating system | 操作系统
Linux
Python version | Python 版本
3.10
Software version | 软件版本 (magic-pdf --version)
0.7.x
Device mode | 设备模式
cuda