nisaacson / pdf-extract

Node PDF Extract
MIT License
384 stars 76 forks source link

Undefined vars in readme #20

Closed dsernst closed 5 years ago

dsernst commented 7 years ago

Here's an example you give:

var inspect = require('eyes').inspector({maxLength:20000});
var pdf_extract = require('pdf-extract');
var absolute_path_to_pdf = '~/Downloads/electronic.pdf'
var options = {
  type: 'text'  // extract the actual text in the pdf file
}
var processor = pdf_extract(absolute_path_to_pdf, options, function(err) {
  if (err) {
    return callback(err);
  }
});
processor.on('complete', function(data) {
  inspect(data.text_pages, 'extracted text pages');
  callback(null, data.text_pages);
});
processor.on('error', function(err) {
  inspect(err, 'error while extracting pages');
  return callback(err);
});

But the npm package eyes was never installed, and callback (called 3 times) is undefined.

mLuby commented 5 years ago

@nisaacson would you welcome a PR to fix this? I have a working version:

const path = require("path")
const pdf_extract = require('pdf-extract')

console.log("Usage: node thisfile.js the/path/tothe.pdf")
const absolute_path_to_pdf = path.resolve(process.argv[2])
if (absolute_path_to_pdf.includes(" ")) throw new Error("will fail for paths w spaces like "+absolute_path_to_pdf)

const options = {
  type: 'ocr', // perform ocr to get the text within the scanned image
  ocr_flags: ['--psm 1'] // automatically detect page orientation
}
const processor = pdf_extract(absolute_path_to_pdf, options, ()=>console.log("Starting…"))
processor.on('complete', data => callback(null, data))
processor.on('error', callback)
function callback (error, data) { error ? console.error(error) : console.log(data.text_pages[0]) }
nisaacson commented 5 years ago

sure