johntitus / node-horseman

Run PhantomJS from Node
MIT License
1.45k stars 124 forks source link

Unhandled Rejection Error - Can't seem to catch this #326

Open NoelDavies opened 6 years ago

NoelDavies commented 6 years ago

I'm crawling a group of X pages on a site my employer has built, each page is a /show/{x} essentially. On some pages I get the below error (or a similar error), and some I don't.

Unhandled rejection Error at ClientRequest.<anonymous> (/path/to/global/node_modules_folder/node_modules/node-phantom-simple/node-phantom-simple.js:659:12) at emitOne (events.js:115:13) at ClientRequest.emit (events.js:210:7) at Socket.socketOnEnd (_http_client.js:435:9) at emitNone (events.js:110:20) at Socket.emit (events.js:207:7) at endReadableNT (_stream_readable.js:1045:12) at _combinedTickCallback (internal/process/next_tick.js:102:11) at process._tickCallback (internal/process/next_tick.js:161:9)

By using the following Code: Note: You can see I've tried a number of thing to catch the errors or prevent them from occuring but I've had no luck, my guess is that it's running out of memory of something?

var Horseman = require('node-horseman');
const readline = require('readline');
const rl = readline.createInterface({
  input: process.stdin,
  output: process.stdout
});

let issuesThatCauseACrash = [15739, 15724];

var baseUrl = 'https://test.x.domain.com';

rl.question('What base URL would you like use for this X page visitor? (' + baseUrl + ')', (answer) => {
  if (answer.length !== 0) {
    baseUrl = answer;
  }

 try{
    rl.close();
    hg.setup();
    hg.login();
  } catch (err) {
    console.log('Error');
    hg.horseman.close();
    hg.visitCase(caseId);
  }
});

var hg = {
    horseman: {},

    setup: function () {
        hg.horseman = new Horseman({
            timeout: 500000,
            cookiesFile: './cookies.txt',
            loadImages: false,
            diskCache: false,
            diskCachePath: './cache/'
        })
        ;
    },

    login: function () {
        hg.horseman
          .log('Opening login page')
          .open(baseUrl + '/login')
          .type('input#username', 'usernameHere')
          .type('input#password', 'passwordHere')
          .click('#_submit')
          .log('Logging in...')
          .waitForNextPage()
          .open(baseUrl + '/')
          .waitForSelector('#table-of-new-items')
          .log('opened')
          .html('#table-new-cases tbody tr td:first-child() a')
          .then((attribute) => {
            this.visitCase(attribute);
          })
        ;
    },

    visitCase: function(caseId) {
        caseId = '' + caseId + '';
        console.log(caseId);
        var rawCaseId = '' + caseId.replace(/^BS/, '').replace(/^0+/, '');

        hg.horseman
            .log('Opening case ' + rawCaseId)
            .open(baseUrl + '/cases/' + rawCaseId)

                // .on('error', function (msg, trace) {
                //     console.log('error: ', msg, trace);
                // })
                        // .catch(function(e) {
                        //     console.log("horseman error: "+e);
                        // })
            // .log('Page opened, waiting for selector')
            .waitForSelector('#horse-accordion')
            // .log('found accordian')
            // .on('resourceReceived', (response) => {
            //  if (new RegExp(/newFrontEndPremium/).test(response.url)) {
            //      nextCaseId = rawCaseId - 1;

            //      if (nextCaseId > 0) {
            //          hg.visitCase(nextCaseId);
            //      }
            //  }
            // })
            .wait(2000)
            .close()
            .then(() => {
                nextCaseId = rawCaseId - 1;

                if (nextCaseId > 0) {
                    hg.visitCase(nextCaseId);
                }
            })
            // .status()
            // .then((statusCode) => {
            //  nextCaseId = rawCaseId - 1;

            //  if (nextCaseId > 0) {
            //      hg.visitCase(nextCaseId);
            //  }
            // })
        ;

    }
};

it always dies on the .open().