jmjjg / cloudooo

XML-RPC document conversion server ( Mirror of https://lab.nexedi.com/nexedi/cloudooo ; Please submit patches and issues there )
Other
0 stars 0 forks source link

Update pypdf to pypdf2 #3

Open jmjjg opened 1 year ago

jmjjg commented 1 year ago

Version 1.27.5 seems to python 2 and python 3 compatible.

Use the migration guide.

Run tests (python2 -m unittest discover ?)

jmjjg commented 1 year ago

Quick compatibility test outside of cloudooo

#!/usr/bin/env python

# pip install pypdf2==1.27.5
# apt install python3-pip
# pip3 install pypdf2==1.27.5
# python2 ./foo.py
# python3 ./foo.py

# from pyPdf import PdfFileWriter, PdfFileReader
# from pyPdf.generic import NameObject, createStringObject

from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.generic import NameObject, createStringObject

def setMetadata(document, metadata):
    """Returns a document with new metadata.
    Keyword arguments:
    metadata -- expected an dictionary with metadata.
    """
    # TODO: date as "D:20090401124817-04'00'" ASN.1 for ModDate and CreationDate
    input_pdf = PdfFileReader(open(document, "rb"))
    output_pdf = PdfFileWriter()

    modification_date = metadata.pop("ModificationDate", None)
    if modification_date:
      metadata['ModDate'] = modification_date
    if type(metadata.get('Keywords', None)) is list:
      metadata['Keywords'] = metadata['Keywords'].join(' ')
    args = {}
    for key, value in list(metadata.items()):
      args[NameObject('/' + key.capitalize())] = createStringObject(value)

    output_pdf._info.getObject().update(args)

    for page_num in range(input_pdf.getNumPages()):
      output_pdf.addPage(input_pdf.getPage(page_num))

    output_stream = open("/opt/cloudooo/output.pdf", "wb")
    output_pdf.write(output_stream)
    output_stream.close()

setMetadata("/opt/cloudooo/annexe.pdf", {"Title": "Test CBU"})