Hopding / pdf-lib

Create and modify PDF documents in any JavaScript environment
https://pdf-lib.js.org
MIT License
6.9k stars 657 forks source link

PDF with filled form fields asks to save in Acrobat Reader DC #185

Closed gfb107 closed 4 years ago

gfb107 commented 5 years ago

I've taken this approach to filling form fields, and it mostly works. I had to make quite a few changes for version 1.0.1, but got it figured out.

However, I find that if I open the filled PDF in Adobe Acrobat Reader DC, it renders as I expect, but when closing, I am prompted to save the changes I have made, which I haven't done explicitly.

The same thing happens with the filled PDF example: filled.pdf

Is there something more I can do when filling in the form to 'fix' the PDF so this won't happen?

msantic commented 5 years ago

Hey @gfb107, could you please share these few changes you made for version 1.0.1?

I'm getting error (node:49908) UnhandledPromiseRejectionWarning: TypeError: pdfDoc.catalog.getMaybe is not a function

gfb107 commented 5 years ago

Here you go.

const { PDFDocument, PDFName, PDFString, PDFNumber, PDFBool } = require( 'pdf-lib' );
const fs = require( 'fs' );

const fillForm = async ( inFileName, data, outFileName ) => {
  const pdfDoc = await PDFDocument.load( fs.readFileSync( inFileName ) );

  const form = pdfDoc.context.lookup( pdfDoc.catalog.get( PDFName.of( 'AcroForm' )));
  if ( !form ) {
    throw new Error( 'PDF does not contain a form' );
  }

  form.set( PDFName.of( 'NeedAppearances' ), PDFBool.True );

  const fieldRefs = form.context.lookup( form.get( PDFName.of( 'Fields' )));
  if ( !fieldRefs ) {
    throw new Error( 'PDF form does not contain any fields' );
  }

  const fields = fieldRefs.array.map( ref => form.context.lookup( ref ));

  fields.forEach( field => {
    const name = field.get( PDFName.of( 'T' ));
    if ( name ) {
      const newValue = data[ name.value ];
      if ( newValue ) {
        field.set( PDFName.of( 'V' ), PDFString.of( newValue ));
        field.set( PDFName.of( 'Ff' ), PDFNumber.of( 1 ));
      }
    }
  });

  fs.writeFileSync( outFileName, await pdfDoc.save());
}

const data = {
  'CharacterName 2': 'Mario',
  Age: '24 years',
  Height: `5' 1"`,
  Weight: '196 lbs',
  Eyes: 'blue',
  Skin: 'white',
  Hair: 'brown',
};

fillForm( 'template.pdf', data, 'filled.pdf' );
msantic commented 5 years ago

thanks @gfb107 but it seems like something is not right. There is no errors but generated filled.pdf document doesn't contain filled form items.

Here is your code with sample template.pdf https://www.dropbox.com/s/hu3awvi850l98ew/example.zip?dl=0

to test:

npm i
node index.js
gfb107 commented 5 years ago

All I can tell you is it works with the sample template file from the original example: template.pdf

msantic commented 5 years ago

It seems like I ended up with a corrupted PDF template form. I don't know why and how but if I do lots of form changes&saving in Adobe Acrobat PDF file become corrupted.

Thanks for you help

msantic commented 5 years ago

@gfb107 have you figured it out how to flatten PDF document once a form is filled?

gfb107 commented 5 years ago

I don't know what you mean by flatten, but I have done nothing more.

mohammedabualsoud commented 5 years ago

I'm not able to set the values of the checkboxes & radio correctly, does any one aware of this?

Code:

const {
  PDFDocument,
  PDFName,
  PDFString,
  PDFNumber,
  PDFBool,
} = require( 'pdf-lib' );
const fs = require( 'fs' );

const getAcroFields = (pdfDoc) => {

  const form = pdfDoc.context.lookup( pdfDoc.catalog.get( PDFName.of( 'AcroForm' )));
  if ( !form ) {
    throw new Error( 'PDF does not contain a form' );
  }
  form.set( PDFName.of( 'NeedAppearances' ), PDFBool.True );

  const fieldRefs = form.context.lookup( form.get( PDFName.of( 'Fields' )));
  if ( !fieldRefs ) {
    throw new Error( 'PDF form does not contain any fields' );
  }
  return fieldRefs.array.map( ref => form.context.lookup( ref ));

};

const fillForm = async ( input, data, outputStream ) => {

  const buffer = fs.readFileSync(input);
  const pdfDoc = await PDFDocument.load(buffer);

  const fields = getAcroFields(pdfDoc);

  fields.forEach( field => {
    const name = field.get( PDFName.of( 'T' ));
    if ( name && data.hasOwnProperty(name.value) ) {
      const newValue = data[ name.value ];
      let add = true;;
      if (typeof newValue === 'string' ) {
        field.set(PDFName.of('V'), PDFString.of(newValue));
      } else if (typeof newValue === 'number') {
        field.set(PDFName.of('V'), PDFNumber.of(newValue));
      } else if (typeof newValue === 'boolean') {
        if (newValue) {
          field.set(PDFName.of('V'), PDFBool.True);
        } else {
          field.set(PDFName.of('V'), PDFBool.False);
        }
      } else {
        add = false;
      }
      if (add) {
        field.set( PDFName.of( 'Ff' ), PDFNumber.of( 1 ));
        // field.set( PDFName.of( 'Ff' ), PDFNumber.of(
        //   1 << 0 // Read Only
        //
        // /));
      }
    }
  });

  var pdfBytes = await pdfDoc.save();
  outputStream.write(pdfBytes);

};

/**
 * extract all the pdf form fiels if it's available.
 */
const extractFields = async (input) => {

  const buffer = fs.readFileSync(input);
  const pdfDoc = await PDFDocument.load(buffer);
  const fields = getAcroFields(pdfDoc);
  return fields.map(field => {
    let fieldName = '';
    const name = field.get( PDFName.of( 'T' ));
    if ( name ) {
      fieldName = name.value;
    }
    return fieldName
  });
};

module.exports = {
  fillForm,
  extractFields,
};
astanet commented 5 years ago

@mohammedabualsoud Could you try these examples below? https://github.com/Hopding/pdf-lib/issues/109#issuecomment-538998881 https://github.com/Hopding/pdf-lib/issues/111#issuecomment-539001086 Hope these helps.

msvargas commented 4 years ago

I'm not able to set the values of the checkboxes & radio correctly, does any one aware of this?

Code:

const {
  PDFDocument,
  PDFName,
  PDFString,
  PDFNumber,
  PDFBool,
} = require( 'pdf-lib' );
const fs = require( 'fs' );

const getAcroFields = (pdfDoc) => {

  const form = pdfDoc.context.lookup( pdfDoc.catalog.get( PDFName.of( 'AcroForm' )));
  if ( !form ) {
    throw new Error( 'PDF does not contain a form' );
  }
  form.set( PDFName.of( 'NeedAppearances' ), PDFBool.True );

  const fieldRefs = form.context.lookup( form.get( PDFName.of( 'Fields' )));
  if ( !fieldRefs ) {
    throw new Error( 'PDF form does not contain any fields' );
  }
  return fieldRefs.array.map( ref => form.context.lookup( ref ));

};

const fillForm = async ( input, data, outputStream ) => {

  const buffer = fs.readFileSync(input);
  const pdfDoc = await PDFDocument.load(buffer);

  const fields = getAcroFields(pdfDoc);

  fields.forEach( field => {
    const name = field.get( PDFName.of( 'T' ));
    if ( name && data.hasOwnProperty(name.value) ) {
      const newValue = data[ name.value ];
      let add = true;;
      if (typeof newValue === 'string' ) {
        field.set(PDFName.of('V'), PDFString.of(newValue));
      } else if (typeof newValue === 'number') {
        field.set(PDFName.of('V'), PDFNumber.of(newValue));
      } else if (typeof newValue === 'boolean') {
        if (newValue) {
          field.set(PDFName.of('V'), PDFBool.True);
        } else {
          field.set(PDFName.of('V'), PDFBool.False);
        }
      } else {
        add = false;
      }
      if (add) {
        field.set( PDFName.of( 'Ff' ), PDFNumber.of( 1 ));
        // field.set( PDFName.of( 'Ff' ), PDFNumber.of(
        //   1 << 0 // Read Only
        //
        // /));
      }
    }
  });

  var pdfBytes = await pdfDoc.save();
  outputStream.write(pdfBytes);

};

/**
 * extract all the pdf form fiels if it's available.
 */
const extractFields = async (input) => {

  const buffer = fs.readFileSync(input);
  const pdfDoc = await PDFDocument.load(buffer);
  const fields = getAcroFields(pdfDoc);
  return fields.map(field => {
    let fieldName = '';
    const name = field.get( PDFName.of( 'T' ));
    if ( name ) {
      fieldName = name.value;
    }
    return fieldName
  });
};

module.exports = {
  fillForm,
  extractFields,
};

Use this: const getCheckboxValue = value => !!value ? PDFName.of("S#ED") : PDFName.of("Off");

Hopding commented 4 years ago

Update (9/16/2020)

pdf-lib now has form creation and filling APIs that should handle creating appearance streams automagically. See the form filling JSFiddle for a working example. Additional information is available in the README and API docs. The PDFTextField.setText method is of particular relevance to this issue.

Original

Hello @gfb107!

Generally speaking, there are two approaches one can take when filling AcroForm fields:

  1. Explicitly state how you want PDF readers to render your fields.
  2. Allow PDF readers to decide how they want to render the fields.

Option 1 is more complicated and error prone. It requires you to generate appearance streams that describe specifically how and what the reader should render on the page for the AcroForm. Option 2 is certainly the simplest and most flexible approach from the perspective of the code filling the forms. This approach requires that you set acroForm.set(PDFName.of('NeedAppearances'), PDFBool.True). This tells the PDF reader to generate its own appearance streams behind the scenes.

The form filling implementation you are using is implementing option 2. This is a valid approach that is documented in the PDF specification, so there's nothing wrong with this. However, it does have the unfortunate side effect of causing Adobe Acrobat to display a prompt asking if you want to save "your changes".

You might wonder why it would do this, seeing as how you've not made any changes. Well, strictly speaking, they are not your changes. They are the reader's changes. It is asking if you want to update the PDF to include the appearance streams that Acrobat generated for itself, thereby setting NeedAppearances to false. If you do this, then other readers will no longer generate their own appearance streams. They'll use the ones that Acrobat put in your document.

IMHO this is a poor design decision on Adobe's part. I think it is unnecessary and only serves to confuse the user. I'm not aware of any other PDF reader that does this. But regardless, I don't think that anything can be done to change this behavior. If you really want to get rid of the message then you cannot use option 2. You'll have to implement option 1 and generate your own appearance streams.

I originally explained how to do this in https://github.com/Hopding/pdf-lib/issues/48#issuecomment-441111299. But that is a very old comment written for an old version of pdf-lib. I recently created an updated version of that example here: https://github.com/Hopding/pdf-lib/issues/205#issuecomment-568938999.

I hope this helps. Please let me know if you have any additional questions!

fabioselau077 commented 4 years ago

Hello @gfb107!

Generally speaking, there are two approaches one can take when filling AcroForm fields:

  1. Explicitly state how you want PDF readers to render your fields.
  2. Allow PDF readers to decide how they want to render the fields.

Option 1 is more complicated and error prone. It requires you to generate appearance streams that describe specifically how and what the reader should render on the page for the AcroForm. Option 2 is certainly the simplest and most flexible approach from the perspective of the code filling the forms. This approach requires that you set acroForm.set(PDFName.of('NeedAppearances'), PDFBool.True). This tells the PDF reader to generate its own appearance streams behind the scenes.

The form filling implementation you are using is implementing option 2. This is a valid approach that is documented in the PDF specification, so there's nothing wrong with this. However, it does have the unfortunate side effect of causing Adobe Acrobat to display a prompt asking if you want to save "your changes".

You might wonder why it would do this, seeing as how you've not made any changes. Well, strictly speaking, they are not your changes. They are the reader's changes. It is asking if you want to update the PDF to include the appearance streams that Acrobat generated for itself, thereby setting NeedAppearances to false. If you do this, then other readers will no longer generate their own appearance streams. They'll use the ones that Acrobat put in your document.

IMHO this is a poor design decision on Adobe's part. I think it is unnecessary and only serves to confuse the user. I'm not aware of any other PDF reader that does this. But regardless, I don't think that anything can be done to change this behavior. If you really want to get rid of the message then you cannot use option 2. You'll have to implement option 1 and generate your own appearance streams.

I originally explained how to do this in #48 (comment). But that is a very old comment written for an old version of pdf-lib. I recently created an updated version of that example here: #205 (comment).

I hope this helps. Please let me know if you have any additional questions!

The biggest problem is going through the entire pdf to find the word to replace, correct? One way to resolve this would be to minimize the place you have to travel. For example to substitute {{name}} for the name in addition to sending Fabio, send the page (or the line too) to the method that at most will only go through that page or just the line. This would resolve to not have to scroll through the entire PDF. This will normally be used to replace variables in the pdf, such as {{surname}}, {{year}} end etc.

notHaiLuu commented 4 years ago

Here you go.

const { PDFDocument, PDFName, PDFString, PDFNumber, PDFBool } = require( 'pdf-lib' );
const fs = require( 'fs' );

const fillForm = async ( inFileName, data, outFileName ) => {
  const pdfDoc = await PDFDocument.load( fs.readFileSync( inFileName ) );

  const form = pdfDoc.context.lookup( pdfDoc.catalog.get( PDFName.of( 'AcroForm' )));
  if ( !form ) {
    throw new Error( 'PDF does not contain a form' );
  }

  form.set( PDFName.of( 'NeedAppearances' ), PDFBool.True );

  const fieldRefs = form.context.lookup( form.get( PDFName.of( 'Fields' )));
  if ( !fieldRefs ) {
    throw new Error( 'PDF form does not contain any fields' );
  }

  const fields = fieldRefs.array.map( ref => form.context.lookup( ref ));

  fields.forEach( field => {
    const name = field.get( PDFName.of( 'T' ));
    if ( name ) {
      const newValue = data[ name.value ];
      if ( newValue ) {
        field.set( PDFName.of( 'V' ), PDFString.of( newValue ));
        field.set( PDFName.of( 'Ff' ), PDFNumber.of( 1 ));
      }
    }
  });

  fs.writeFileSync( outFileName, await pdfDoc.save());
}

const data = {
  'CharacterName 2': 'Mario',
  Age: '24 years',
  Height: `5' 1"`,
  Weight: '196 lbs',
  Eyes: 'blue',
  Skin: 'white',
  Hair: 'brown',
};

fillForm( 'template.pdf', data, 'filled.pdf' );

You saved my life @punisher97. Works like a charm!!!

Hopding commented 4 years ago

pdf-lib now has form creation and filling APIs that should handle creating appearance streams automagically. See the form filling JSFiddle for a working example. Additional information is available in the README and API docs. The PDFTextField.setText method is of particular relevance to this issue.