Closed markb-trustifi closed 7 years ago
what's this inputStream object? you need to send either a path or an object if this type - https://github.com/galkahana/HummusJS/wiki/Custom-streams#reading-streams
it's PDFRStreamForBuffer from #121 issue.
donnow. wanna share the script and buffer?
This code works:
let filestream = new hummus.PDFRStreamForFile('somefile.pdf');
let pdfReader = hummus.createReader(filestream);
let isEncrypted = pdfReader.isEncrypted();
This code doesn't work:
let buff = fs.readFileSync('somefile.pdf');
let pdfReader = hummus.createReader(new inputStream(buff));
let isEncrypted = pdfReader.isEncrypted();
Where inputStream is:
class inputStream {
constructor(buffer) {
this.innerBuffer = buffer;
this.position = 0;
this.fileSize = buffer.byteLength;
}
read(inAmount) {
const previousPosition = this.position;
this.position += inAmount;
return [].concat(this.innerBuffer.slice(previousPosition, this.position));
}
notEnded() {
return this.position < this.fileSize;
}
setPosition(inPosition) {
this.position = inPosition;
}
setPositionFromEnd(inPosition) {
this.position = this.fileSize - inPosition;
}
skip(inAmount) {
this.position += inAmount;
}
getCurrentPosition() {
return this.position;
}
}
i say there's a bug in input stream. wanna give this a try? :
function PDFRStreamForBuffer(buffer){
this.innerArray = Array.prototype.slice.call(buffer, 0)
this.rposition = 0;
this.fileSize = buffer.byteLength;
}
PDFRStreamForBuffer.prototype.read = function(inAmount){
var arr = this.innerArray.slice(this.rposition,this.rposition+inAmount);
this.rposition += inAmount;
return arr;
}
PDFRStreamForBuffer.prototype.notEnded = function(){
return this.rposition < this.fileSize;
}
PDFRStreamForBuffer.prototype.setPosition = function(inPosition){
this.rposition = inPosition;
}
PDFRStreamForBuffer.prototype.setPositionFromEnd = function(inPosition){
this.rposition = this.fileSize-inPosition;
}
PDFRStreamForBuffer.prototype.skip = function(inAmount){
this.rposition += inAmount;
}
PDFRStreamForBuffer.prototype.getCurrentPosition = function(){
return this.rposition;
}
module.exports = PDFRStreamForBuffer;
Fixed, thanks
This worked for me I was using the other version of this I found elsewhere that was in class format and that didn't work correctly.
This is a working solution but unfortunately using the approach with PDFRStreamForBuffer instead of passing a local filepath to hummus.createReader() takes approx. 10 seconds vs. 0,5 seconds when reading and parsing a 16 MB PDF file with hundreds of form fields.
// fileContent is a buffer with 16 MB PDF data in it.
let rstream = new hummus.PDFRStreamForBuffer(fileContent); // Takes 1 second
let pdfParser = hummus.createReader(rstream);
let digitalForm = new PDFDigitalForm(pdfParser); // Takes 9 seconds
Since hummus is only a wrapper of the native c++ library I can't debug into the details but as far as I can see the function PDFRStreamForBuffer.prototype.read
is called very often with inAmount = 1
. So it seems slicing lots of small pieces out of a huge array takes quite some time.
As a workaround I just dump the buffer content into a temporary file and pass that to hummus. Using this approach it takes only a fraction of a second to parse the 16 MB pdf file.
This is just an additional information for anyone who came here through google like I did to find out how to pass a buffer of PDF data to hummus.
I'm using solution from issue #121:
But receive an error: Unable to start parsing PDF file The PDF buffer is correct and I can save it on a disk as a pdf file.