TypeError: can't multiply sequence by non-int of type 'float'
Full error trace log
File "/home/siddhant/Desktop/deployment_extra/chatbot/dobby-be-preprocess/src/utils/convertors.py", line 162, in pdf_to_text
plain_text = extract_pdf_text(data_to_use)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/siddhant/Desktop/deployment_extra/chatbot/dobby-be-preprocess/src/utils/convertors.py", line 86, in extract_pdf_text
raise exc
File "/home/siddhant/Desktop/deployment_extra/chatbot/dobby-be-preprocess/src/utils/convertors.py", line 79, in extract_pdf_text
for page_layout in extract_pages(data_to_use):
File "/home/siddhant/anaconda3/envs/dobby-be-preprocess/lib/python3.11/site-packages/pdfminer/high_level.py", line 212, in extract_pages
interpreter.process_page(page)
File "/home/siddhant/anaconda3/envs/dobby-be-preprocess/lib/python3.11/site-packages/pdfminer/pdfinterp.py", line 997, in process_page
self.render_contents(page.resources, page.contents, ctm=ctm)
File "/home/siddhant/anaconda3/envs/dobby-be-preprocess/lib/python3.11/site-packages/pdfminer/pdfinterp.py", line 1016, in render_contents
self.execute(list_value(streams))
File "/home/siddhant/anaconda3/envs/dobby-be-preprocess/lib/python3.11/site-packages/pdfminer/pdfinterp.py", line 1045, in execute
func()
File "/home/siddhant/anaconda3/envs/dobby-be-preprocess/lib/python3.11/site-packages/pdfminer/pdfinterp.py", line 575, in do_s
self.do_S()
File "/home/siddhant/anaconda3/envs/dobby-be-preprocess/lib/python3.11/site-packages/pdfminer/pdfinterp.py", line 568, in do_S
self.device.paint_path(self.graphicstate, True, False, False, self.curpath)
File "/home/siddhant/anaconda3/envs/dobby-be-preprocess/lib/python3.11/site-packages/pdfminer/converter.py", line 127, in paint_path
self.paint_path(gstate, stroke, fill, evenodd, subpath)
File "/home/siddhant/anaconda3/envs/dobby-be-preprocess/lib/python3.11/site-packages/pdfminer/converter.py", line 140, in paint_path
pts = [apply_matrix_pt(self.ctm, pt) for pt in raw_pts]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/siddhant/anaconda3/envs/dobby-be-preprocess/lib/python3.11/site-packages/pdfminer/converter.py", line 140, in <listcomp>
pts = [apply_matrix_pt(self.ctm, pt) for pt in raw_pts]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/siddhant/anaconda3/envs/dobby-be-preprocess/lib/python3.11/site-packages/pdfminer/utils.py", line 266, in apply_matrix_pt
return a * x + c * y + e, b * x + d * y + f
~~^~~
TypeError: can't multiply sequence by non-int of type 'float'
Code
def extract_pdf_text(data_to_use: str | BytesIO) -> list[str]:
plain_text = []
for page_layout in extract_pages(data_to_use):
paragraph = " ".join(element.get_text() for element in page_layout if isinstance(element, LTTextContainer))
plain_text.append(paragraph)
return plain_text
Getting the following error
Full error trace log
Code
pdfminer.six version 20240706
The above errors happened with some pdf files.