galkahana / HummusJS

Node.js module for high performance creation, modification and parsing of PDF files and streams
http://www.pdfhummus.com
Other
1.14k stars 169 forks source link

createWriterToModify() is very slow #368

Open Dieken opened 5 years ago

Dieken commented 5 years ago
const express = require('express');
const fs = require('fs');
const hummus = require('hummus');
const stream = require('stream');

const template = fs.readFileSync(__dirname + '/a.pdf');

function render(res) {
    var t1 = Date.now();
    const pdfWriter = hummus.createWriterToModify(new hummus.PDFRStreamForBuffer(template), new hummus.PDFStreamForResponse(res));
    var t2 = Date.now();

    const pageModifier = new hummus.PDFPageModifier(pdfWriter, 0, true);
    var t3 = Date.now();
    const context = pageModifier.startContext().getContext();
    var t4 = Date.now();
    context.writeText(
        '18512345678',
        215, 580,
        {
            font: pdfWriter.getFontForFile('/System/Library/Fonts/PingFang.ttc'),
            size: 14,
            colorspace: 'gray',
            color: 0x00,
        }
    );
    var t5 = Date.now();

    pageModifier.endContext().writePage();
    var t6 = Date.now();
    pdfWriter.end();
    var t7 = Date.now();

    console.log("t2-t1=" + (t2-t1));
    console.log("t3-t2=" + (t3-t2));
    console.log("t4-t3=" + (t4-t3));
    console.log("t5-t4=" + (t5-t4));
    console.log("t6-t5=" + (t6-t5));
    console.log("t7-t6=" + (t7-t6));
    console.log();
}

const app = express();
app.get('/', function (req, res) {
    res.writeHead(200, {
        'Content-Type': 'application/pdf'
    });

    render(res);

    res.end();
});
app.listen(3000);

The a.pdf is 1.7MB.

## this takes about 800ms
$ time curl -s -v -o b.pdf localhost:3000/

Output from the service:

$ node c.js
t2-t1=697
t3-t2=0
t4-t3=10
t5-t4=4
t6-t5=54
t7-t6=47

t2-t1=669
t3-t2=0
t4-t3=7
t5-t4=2
t6-t5=50
t7-t6=49

t2-t1=650
t3-t2=0
t4-t3=7
t5-t4=2
t6-t5=50
t7-t6=50

Actually b.pdf is only different with a.pdf at the end, basically replaces %%EOF with more content. Is there any change to make this faster?

nodehack commented 5 years ago

I'm seeing the same issue. I have a pdf I'm modifying in an endpoint and it takes ~5 seconds to createWriterToModify on my server for a 1.9MB file. Locally on my machine it takes about 600ms like you're seeing. Is this process multithreaded? Can it be written with a callback instead of blocking?