atlanhq / camelot

Camelot: PDF Table Extraction for Humans
https://camelot-py.readthedocs.io
Other
3.61k stars 349 forks source link

Get error while reading PDF using LATTICE, but works fine with STREAM. #463

Open siddharthchauhan opened 2 years ago

siddharthchauhan commented 2 years ago

import camelot

tables = camelot.read_pdf('04-08 Amended Prot 5.pdf',pages='all')


ypeError Traceback (most recent call last)

in ----> 1 tables = camelot.read_pdf('04-08 Amended Prot 5.pdf',pages='46') 2 tables C:\anaconda\envs\MaxisIT Projects\lib\site-packages\camelot\io.py in read_pdf(filepath, pages, password, flavor, suppress_stdout, layout_kwargs, **kwargs) 111 p = PDFHandler(filepath, pages=pages, password=password) 112 kwargs = remove_extra(kwargs, flavor=flavor) --> 113 tables = p.parse( 114 flavor=flavor, 115 suppress_stdout=suppress_stdout, C:\anaconda\envs\MaxisIT Projects\lib\site-packages\camelot\handlers.py in parse(self, flavor, suppress_stdout, layout_kwargs, **kwargs) 174 parser = Lattice(**kwargs) if flavor == "lattice" else Stream(**kwargs) 175 for p in pages: --> 176 t = parser.extract_tables( 177 p, suppress_stdout=suppress_stdout, layout_kwargs=layout_kwargs 178 ) C:\anaconda\envs\MaxisIT Projects\lib\site-packages\camelot\parsers\lattice.py in extract_tables(self, filename, suppress_stdout, layout_kwargs) 419 return [] 420 --> 421 self.backend.convert(self.filename, self.imagename) 422 423 self._generate_table_bbox() C:\anaconda\envs\MaxisIT Projects\lib\site-packages\camelot\backends\ghostscript_backend.py in convert(self, pdf_path, png_path, resolution) 45 pdf_path, 46 ] ---> 47 ghostscript.Ghostscript(*gs_command) C:\anaconda\envs\MaxisIT Projects\lib\site-packages\ghostscript\__init__.py in Ghostscript(*args) 136 if __instance__ is None: 137 __instance__ = gs.new_instance() --> 138 return __Ghostscript(__instance__, args) 139 140 def cleanup(): C:\anaconda\envs\MaxisIT Projects\lib\site-packages\ghostscript\__init__.py in __init__(self, instance, args) 67 self._initialized = False 68 self._instance = instance ---> 69 rc = gs.init_with_args(instance, args) 70 if rc == gs.e_Info: 71 raise PleaseDisplayUsage C:\anaconda\envs\MaxisIT Projects\lib\site-packages\ghostscript\_gsprint.py in init_with_args(instance, argv) 260 """ 261 ArgArray = c_char_p * len(argv) --> 262 c_argv = ArgArray(*argv) 263 rc = libgs.gsapi_init_with_args(instance, len(argv), c_argv) 264 if rc not in (0, e_Quit, e_Info): TypeError: bytes or integer address expected instead of str instance