Name: PyMuPDF
Version: 1.18.14
with fitz.open(stream=pdf_data) as doc:
要加上 filetype 参数
def extract_texts(self, filetype, pdf_data: bytes, force_ocr: bool) -> Tuple[Dict, List]:
texts, need_ocr_idxs = {}, []
with fitz.open(filetype=filetype, stream=pdf_data) as doc:
def __call__(
self,
content: Union[str, Path, bytes],
force_ocr: bool = False,
) -> List[List[Union[str, str, str]]]:
try:
file_type = self.which_type(content)
except (FileExistsError, TypeError) as e:
raise PDFExtracterError("The input content is empty.") from e
if file_type != "pdf":
raise PDFExtracterError("The file type is not PDF format.")
try:
pdf_data = self.load_pdf(content)
except PDFExtracterError as e:
warnings.warn(str(e))
return self.empyt_list
txts_dict, need_ocr_idxs = self.extract_texts(file_type, pdf_data, force_ocr)
page_img_dict = self.read_pdf_with_image(file_type, pdf_data, need_ocr_idxs)
Traceback (most recent call last):
File "demo.py", line 9, in
texts = pdf_extracter(pdf_path, force_ocr=True)
File "E:\OCR\RapidOCRPDF\rapidocr_pdf\main.py", line 49, in call
txts_dict, need_ocr_idxs = self.extract_texts(pdf_data, force_ocr)
File "E:\OCR\RapidOCRPDF\rapidocr_pdf\main.py", line 74, in extract_texts
with fitz.open(stream=pdf_data) as doc:
File "D:\Python\Python37\lib\site-packages\fitz\fitz.py", line 3576, in init
raise ValueError("need filetype for opening a stream")
ValueError: need filetype for opening a stream
Name: PyMuPDF Version: 1.18.14 with fitz.open(stream=pdf_data) as doc: 要加上 filetype 参数 def extract_texts(self, filetype, pdf_data: bytes, force_ocr: bool) -> Tuple[Dict, List]: texts, need_ocr_idxs = {}, [] with fitz.open(filetype=filetype, stream=pdf_data) as doc:
Traceback (most recent call last): File "demo.py", line 9, in
texts = pdf_extracter(pdf_path, force_ocr=True)
File "E:\OCR\RapidOCRPDF\rapidocr_pdf\main.py", line 49, in call
txts_dict, need_ocr_idxs = self.extract_texts(pdf_data, force_ocr)
File "E:\OCR\RapidOCRPDF\rapidocr_pdf\main.py", line 74, in extract_texts
with fitz.open(stream=pdf_data) as doc:
File "D:\Python\Python37\lib\site-packages\fitz\fitz.py", line 3576, in init
raise ValueError("need filetype for opening a stream")
ValueError: need filetype for opening a stream