Open Volkopat opened 6 months ago
same here, have you figured out what's wrong with that?
Downgrading gradio version helped me, I don't have access to my laptop at the moment but somewhere in the issues section you may find the exact version
This is so strange, I don't even have Gradio in the environment where I run the marker.
Oh my bad the gradio part is what I was working on. As far as I remember, this was a problem with one of the packages in poetry. Take a look at my pyproject.toml:
[tool.poetry] name = "marker-pdf" version = "0.2.9" description = "Convert PDF to markdown with high speed and accuracy." authors = ["Vik Paruchuri github@vikas.sh"] readme = "README.md" license = "GPL-3.0-or-later" repository = "https://github.com/VikParuchuri/marker" keywords = ["pdf", "markdown", "ocr", "nlp"] packages = [ {include = "marker"} ] include = [ "convert.py", "convert_single.py", "chunk_convert.sh", "chunk_convert.py", ]
[tool.poetry.dependencies] python = ">=3.9,<3.13,!=3.9.7" scikit-learn = "^1.3.2" Pillow = "^10.1.0" pydantic = "^2.4.2" pydantic-settings = "^2.0.3" transformers = "^4.36.2" numpy = "^1.26.1" python-dotenv = "^1.0.0" torch = "^2.2.2" # Issue with torch 2.3.0 and vision models - https://github.com/pytorch/pytorch/issues/121834 ray = "^2.20.0" tqdm = "^4.66.1" tabulate = "^0.9.0" ftfy = "^6.1.1" texify = "^0.1.9" rapidfuzz = "^3.8.1" surya-ocr = "^0.4.8" filetype = "^1.2.0" regex = "^2024.4.28" pdftext = "^0.3.8" grpcio = "^1.63.0" gradio = "^3.36.0" ocrmypdf = "^14.0.2"
[tool.poetry.group.dev.dependencies] jupyter = "^1.0.0"
[tool.poetry.scripts] marker = "convert:main" marker_single = "convert_single:main" marker_chunk_convert = "chunk_convert:main"
[build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api"
Detecting bboxes: 100%|██████████| 2/2 [00:05<00:00, 2.96s/it] Finding reading order: 100%|██████████| 1/1 [00:04<00:00, 4.89s/it] Traceback (most recent call last): File "/app/convert_single.py", line 35, in
main()
File "/app/convert_single.py", line 26, in main
full_text, images, out_meta = convert_single_pdf(fname, model_lst, max_pages=args.max_pages, langs=langs, batch_multiplier=args.batch_multiplier)
File "/app/marker/convert.py", line 115, in convert_single_pdf
table_count = format_tables(pages)
File "/app/marker/tables/table.py", line 138, in format_tables
table_rows = get_table_pdftext(page, table_box)
File "/app/marker/tables/table.py", line 103, in get_table_pdftext
table_rows = assign_cells_to_columns(page, table_box, table_rows)
File "/app/marker/tables/cells.py", line 56, in assign_cells_to_columns
separators = find_column_separators(page, table_box, round_factor=round_factor)
File "/app/marker/tables/cells.py", line 31, in find_column_separators
line_boxes = [p.bbox for p in page.text_lines.bboxes]
AttributeError: 'NoneType' object has no attribute 'bboxes'
Traceback (most recent call last):
File "/root/.cache/pypoetry/virtualenvs/marker-pdf-9TtSrW0h-py3.9/lib/python3.9/site-packages/gradio/routes.py", line 534, in predict
output = await route_utils.call_process_api(
File "/root/.cache/pypoetry/virtualenvs/marker-pdf-9TtSrW0h-py3.9/lib/python3.9/site-packages/gradio/route_utils.py", line 226, in call_process_api
output = await app.get_blocks().process_api(
File "/root/.cache/pypoetry/virtualenvs/marker-pdf-9TtSrW0h-py3.9/lib/python3.9/site-packages/gradio/blocks.py", line 1550, in process_api
result = await self.call_function(
File "/root/.cache/pypoetry/virtualenvs/marker-pdf-9TtSrW0h-py3.9/lib/python3.9/site-packages/gradio/blocks.py", line 1185, in call_function
prediction = await anyio.to_thread.run_sync(
File "/root/.cache/pypoetry/virtualenvs/marker-pdf-9TtSrW0h-py3.9/lib/python3.9/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
File "/root/.cache/pypoetry/virtualenvs/marker-pdf-9TtSrW0h-py3.9/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 2144, in run_sync_in_worker_thread
return await future
File "/root/.cache/pypoetry/virtualenvs/marker-pdf-9TtSrW0h-py3.9/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 851, in run
result = context.run(func, args)
File "/root/.cache/pypoetry/virtualenvs/marker-pdf-9TtSrW0h-py3.9/lib/python3.9/site-packages/gradio/utils.py", line 661, in wrapper
response = f(args, **kwargs)
File "/app/run.py", line 21, in _fProcessDocument
subprocess.run(["python", "convert_single.py", input_path, output_path, "--batch_multiplier", workers], check=True)
File "/usr/lib/python3.9/subprocess.py", line 528, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['python', 'convert_single.py', '/tmp/gradio/c6f053961a397c3a97eb85201dfeabffc5a9331c/Managing the Design Process-Chpt 6.pdf', '/tmp/gradio/c6f053961a397c3a97eb85201dfeabffc5a9331c/Chapter6/output.md', '--batch_multiplier', '4']' returned non-zero exit status 1.