nisaacson / pdf-extract

Node PDF Extract
MIT License
383 stars 76 forks source link

One of the examples at Readme.md is wrong #13

Closed Loksly closed 8 years ago

Loksly commented 8 years ago

Text extract from searchable pdf

Extract from a pdf file which contains actual searchable text

var inspect = require('eyes').inspector({maxLength:20000});
var pdf_extract = require('pdf-extract');
var absolute_path_to_pdf = '~/Downloads/electronic.pdf'
var options = {
  type: 'text'  // extract the actual text in the pdf file
}
var processor = pdf_extract(absolute_path_to_pdf, options, function(err) {
  if (err) {
    return callback(err);
  }
});
processor.on('complete', function(data) {
  inspect(data.text_pages, 'extracted text pages');
  callback(null, data.text_pages); //<----- data.text_pages instead of just text_pages.
});
processor.on('error', function(err) {
  inspect(err, 'error while extracting pages');
  return callback(err);
});

I guess this little typo doesn't deserve a fork and a pull request. Thank you very much for your software.

nisaacson commented 8 years ago

Pushed change and published as 1.0.9. Thanks for the correction!

Loksly commented 8 years ago

OCR Extract from scanned image

Extract from a pdf file which contains a scanned image and no searchable text

var inspect = require('eyes').inspector({maxLength:20000});
var pdf_extract = require('pdf-extract');
var absolute_path_to_pdf = '~/Downloads/sample.pdf'
var options = {
  type: 'ocr' // perform ocr to get the text within the scanned image
}

var processor = pdf_extract(absolute_path_to_pdf, options, function(err) {
  if (err) {
    return callback(err);
  }
});
processor.on('complete', function(data) {
  inspect(data.text_pages, 'extracted text pages');
  callback(null, text_pages); //<----- same typo, data.text_pages instead of just text_pages.
});
processor.on('error', function(err) {
  inspect(err, 'error while extracting pages');
  return callback(err);
});

Thanks again. Have a nice day.