chinapandaman / PyPDFForm

:fire: The Python library for PDF forms.
https://chinapandaman.github.io/PyPDFForm/
MIT License
389 stars 15 forks source link

PPF-726: Controlling Form Widget Layer #726

Open neotmhy opened 5 days ago

neotmhy commented 5 days ago

PyPDFForm=1.4.33

Controlling Form Widget Layer

Is there a way to control the layer an added widget occupies? I want preexisting overlapping graphics to appear on top, but the widgets are on top instead.

PDForm.py

import logging
import os
import sys
from collections import defaultdict
from PyPDFForm import PdfWrapper

# Setup logging
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    filename='pdform.log',
                    filemode='w')
logger = logging.getLogger(__name__)

class PDForm:
    def __init__(self, input_pdf, form_file, form_field_x_offset=0, form_field_y_offset=8, page_height=792):
        self.input_pdf = input_pdf
        self.form_file = form_file
        self.form_field_x_offset = form_field_x_offset
        self.form_field_y_offset = form_field_y_offset
        self.page_height = page_height
        self.output_pdf = self.generate_output_filename()
        self.single_line_max_height = 35  # Example constant, adjust as needed

    def generate_output_filename(self):
        base_name = os.path.splitext(os.path.basename(self.input_pdf))[0]
        return f"{base_name}_form.pdf"

    def parse_page_range(self, page_range, total_pages):
        try:
            if '-' in page_range:
                start, end = map(int, page_range.split('-'))
                return range(start, min(end, total_pages) + 1)
            else:
                return [min(int(page_range), total_pages)]
        except ValueError as e:
            logger.error(f"Invalid page range format: {page_range}. Error: {e}")
            return []

    def read_form_spec(self):
        form_spec = defaultdict(list)
        with open(self.form_file, 'r') as f:
            for line in f:
                try:
                    parts = line.strip().split(',')
                    if len(parts) != 7:
                        logger.error(f"Line does not have 7 parts: {line.strip()}")
                        continue
                    page_range, field_type, field_name, x, y, width, height = parts
                    x, y, width, height = map(float, (x, y, width, height))
                    page_nums = self.parse_page_range(page_range, 100)  # Assuming max 100 pages for simplicity
                    for page_num in page_nums:
                        form_spec[page_num].append((field_type, field_name, x, y, width, height))
                except ValueError as e:
                    logger.error(f"Error parsing line: {line}. Error: {e}")
        return form_spec

    def process_pdf(self):
        logger.info(f"Starting to process PDF: {self.input_pdf}")
        try:
            # Step 1: Read form spec file once
            form_spec = self.read_form_spec()

            # Step 2: Extract pages from the input PDF
            pdf_wrapper = PdfWrapper(self.input_pdf)
            pages = pdf_wrapper.pages
            total_pages = len(pages)
            logger.info(f"Total pages in input PDF: {total_pages}")                        

            # Step 3: Create widgets on the extracted pages
            for page_num, page in enumerate(pages, start=1):
                if page_num in form_spec:
                    logger.debug(f"Processing page {page_num}")
                    for field_type, field_name, x, y, width, height in form_spec[page_num]:

                        # Calculate multiline based on height
                        multiline = height > self.single_line_max_height

                        # Make field_name unique for repeated multi-page data
                        unique_field_name = f"{field_name}_{page_num}"

                        # Ensure parameters are within valid ranges
                        if page_num < 1 or page_num > len(pages):
                            logger.error(f"Invalid page number: {page_num}")
                            continue
                        if x < 0 or y < 0 or width <= 0 or height <= 0:
                            logger.error(f"Invalid widget dimensions: x={x}, y={y}, width={width}, height={height}")
                            continue

                        # Transform x-coordinate
                        # Override in case of any observed form field misalignment
                        x += self.form_field_x_offset
                        # Transform y-coordinate (top left origin vs bottom left origin)
                        y = self.page_height - y - height
                        # Override in case of any observed form field misalignment
                        y += self.form_field_y_offset
                        # Set text fields to multiline if taller than single line
                        if height > self.single_line_max_height: multiline = True

                        logger.debug(f"Creating widget: {unique_field_name}, type: {field_type}, on page {page_num} at ({x}, {y}) with size ({width}, {height}), multiline: {multiline}")

                        try:
                            # page.create_widget(               # Single page attempted work around (that throws exception)
                            pdf_wrapper.create_widget(
                                widget_type=field_type,
                                name=unique_field_name,
                                # page_number=0,                # For single page attempted work around           
                                page_number=page_num,              
                                x=x,
                                y=y,
                                width=width,
                                height=height,
                                max_length=0,                   # optional
                                font="Helvetica",               # optional, default "Helvetica"
                                font_size=10,                   # optional, default 12
                                font_color=(0, 0, 0),           # optional, default (0, 0, 0)
                                bg_color=(250, 250, 254),       # optional
                                border_color=(0, 0, 0),         # optional
                                border_width=0,                 # optional
                                alignment=0,                    # optional, 0=left, 1=center, 2=right
                                multiline=multiline             # optional, calculated based on height
                            )
                            logger.debug(f"Added field: {field_type} - {unique_field_name} on page 0 multiline: {multiline}")
                        except ValueError as e:
                            logger.error(f"Error creating widget for field {field_name} on page 0. Error: {e}")
                        except IndexError as e:
                            logger.error(f"Index error creating widget for field {field_name} on page 0. Error: {e}")
                        except Exception as e:
                            logger.error(f"Unexpected error creating widget for field {field_name} on page 0. Error: {e}")

            # Step 4: Merge the modified pages back into a single PDF (of attempted single page workaround)
            # logger.debug("Merging pages into a single PDF")
            # merged_pdf = pages[0]
            # for i, page in enumerate(pages[1:], start=2):
            #     logger.debug(f"Merging page {i}/{len(pages)}")
            #     merged_pdf += page

            # Step 5: Save the final merged PDF
            logger.debug(f"Saving final merged PDF to {self.output_pdf}")
            with open(self.output_pdf, "wb") as final_output_pdf_file:
                # final_output_pdf_file.write(merged_pdf.read())
                final_output_pdf_file.write(pdf_wrapper.read())

            logger.info(f"PDF processing completed. Output saved to {self.output_pdf}")
        except FileNotFoundError as e:
            logger.error(f"Form file not found: {self.form_file}. Error: {e}")
        except Exception as e:
            logger.error(f"An error occurred while processing the PDF. Error: {e}")

# Example usage
if __name__ == "__main__":
    if len(sys.argv) != 3:
        logger.error("Incorrect number of arguments provided")
        print('Usage: python script.py <input_pdf> <input_form_file>')
        sys.exit(1)

    input_pdf = sys.argv[1]
    form_file = sys.argv[2]

    logger.info(f"Starting PDForm process with input PDF: {input_pdf} and form file: {form_file}")
    pd_form = PDForm(input_pdf, form_file)
    pd_form.process_pdf()

### Example form_spec.txt file named "THB - 90-Day Reflection Journal - Current_spec.txt"
for a 96 page document in which the same fields are required on pp 5-94
Having the format:
page[-endPage],fieldType,fieldName,x,y,width,height

5-94,text,today,403,59,136,22
5-94,text,feeling,403,107,136,22
5-94,text,grateful1,85,218,465,22
5-94,text,grateful2,85,250,465,22
5-94,text,grateful3,85,282,465,22
5-94,text,reflections,64,406,485,153
5-94,text,wishes,64,604,485,146

### Example command line

$ python PDForm.py "THB - 90-Day Reflection Journal - Current.pdf" "THB - 90-Day Reflection Journal - Current_spec.txt"

Input / Output PDFs

Input PDF: https://drive.google.com/file/d/1co3VmLRQVCYjxwuzwdwiL7FerlgkFqIg/view?usp=sharing Output PDF: https://drive.google.com/file/d/1yLG3C3_GCc0K9gMWjE9GnJYQ4_8UY1vA/view?usp=sharing

Screenshots

Input: PDForm widget layer behind graphics Output: PDForm widget layer on top of graphics

chinapandaman commented 2 days ago

Hey, thanks for posting.

Unfortunately there's currently no such layer control functionality supported by the library. In your case I have two suggestions:

1) If after you created the widgets you are filling using PdfWrapper, you shouldn't worry about this issue as it will naturally remove widgets and leave only texts on your PDFs after filling. 2) If you are filling using FormWrapper, I just released v1.4.35 and alpha band is now supported for bg_color. So you can set a combination of bg_color=(250, 250, 254, 0) and border_width=0 to make a widget transparent. After which if you set flatten=True when filling you will result in a same filled PDF as if you are filling using PdfWrapper.

Let me know if you have more questions.