galkahana / HummusJS

Node.js module for high performance creation, modification and parsing of PDF files and streams
http://www.pdfhummus.com
Other
1.14k stars 169 forks source link

Question: Is it possible to replace text in the (mediabox) of the pdf? #445

Open andresRiosC opened 4 years ago

andresRiosC commented 4 years ago

Hi,

I'm trying to replace text from the attached pdf, which has the content in (mediabox).

I am running the following script on the attached file.

SCRIPT

var hummus = require('hummus');

function strToByteArray(str) {
  var myBuffer = [];
  var buffer = new Buffer.from(str);
  for (var i = 0; i < buffer.length; i++) {
      myBuffer.push(buffer[i]);
  }
  return myBuffer;
}

function replaceText(sourceFile, targetFile, pageNumber, findText, replaceText) {  
    var writer = hummus.createWriterToModify(sourceFile, {
        modifiedFilePath: targetFile
    });
    var modifier = new hummus.PDFPageModifier(writer, pageNumber);
    var sourceParser = writer.createPDFCopyingContextForModifiedFile().getSourceDocumentParser();
    var pageObject = sourceParser.parsePage(pageNumber);
    var textObjectId = pageObject.getDictionary().toJSObject().Contents.getObjectID();
    var textStream = sourceParser.queryDictionaryObject(pageObject.getDictionary(), 'Contents');
    var data = [];
    var readStream = sourceParser.startReadingFromStream(textStream);
    while(readStream.notEnded()){
        Array.prototype.push.apply(data, readStream.read(10000));
    }
    var string = new Buffer.from(data).toString().replace(findText, replaceText);

   var objectsContext = writer.getObjectsContext();
    objectsContext.startModifiedIndirectObject(textObjectId);
    var stream = objectsContext.startUnfilteredPDFStream();
    stream.getWriteStream().write(strToByteArray(string));
    objectsContext.endPDFStream(stream);
    objectsContext.endIndirectObject();
    writer.end();
}

replaceText('./senapliegos.pdf', './output.pdf', 0, /(SENA)/g, 'SERVICIO NACIONAL');

senapliegos.pdf