Convert png, pdf, odt to jpg

Bout de code de extract_text_and_ocr qui peut aider

    with pymupdf.open(pdf_path) as pdf_document:
        if selected_pages == "all":
            selected_pages = tuple(range(1, pdf_document.page_count + 1))
        assert isinstance(selected_pages, Iterable), f"{selected_pages=}"
        for page_num in selected_pages:
            if page_num in current_cache:
                result[page_num] = current_cache[page_num]
                continue
            text = pdf_document.get_page_text(page_num - 1)  # # its 0-based page
            use_OCR = len(clean_pdf_text(text)[0].strip()) <= 30 and ocr != "no_ocr" # [0] pour récupérer uniquement le texte
            if not use_OCR:
                page_content = text
                result[page_num] = page_content
            else:
                page = pdf_document.load_page(page_num - 1)  # # its 0-based page
                image = page.get_pixmap(dpi=800)
                image_pil = Image.frombytes("RGB", [image.width, image.height], image.samples)

IA-Generative / ocr-api

Convert png, pdf, odt to jpg #1