Extract ALL text content from the PDF.

Going to answer my own question. Could not figure out how to do it using the library so I implemented the following function using pdfjsLib

function extractText(pdfUrl) {
  var pdf = pdfjsLib.getDocument(pdfUrl);
  return pdf.promise.then(function (pdf) {
    var totalPageCount = pdf.numPages;
    var countPromises = [];
    for (var currentPage = 1; currentPage <= totalPageCount; currentPage++) {
      var page = pdf.getPage(currentPage);
      countPromises.push(
        page.then(function (page) {
          var textContent = page.getTextContent();
          return textContent.then(function (text) {
            return text.items
              .map(function (s) {
                return s.str;
              })
              .join("");
          });
        })
      );
    }

    return Promise.all(countPromises).then(function (texts) {
      return texts.join("");
    });
  });
}

and called it in the onDocumentLoad. This worked and I am getting all the text content from the PDF.

react-pdf-viewer / examples

Extract ALL text content from the PDF. #98