matthewmueller / x-ray

The next web scraper. See through the <html> noise.
MIT License
5.87k stars 350 forks source link

For Loop Only Crawls First Link #360

Open arlingtonkirk opened 4 years ago

arlingtonkirk commented 4 years ago

The site I'm trying to crawl is rendered by JS and the links use onClick with # as the value for href. I've successfully constructed the links via a filter, but for some reason can't crawl them in a for loop. See code below.

Your environment

Expected behaviour

Console should spit out 10 elements.

Actual behaviour

Console only outputs first item.

var phantom = require('x-ray-phantom'); var Xray = require('..'); var x = Xray({ filters: { makeLink: function(value) { if(value) { var str1 = 'https://hsbc.taleo.net/careersection/external/jobdetail.ftl?lang=en_GB&job=' var value = str1.concat(value); } return typeof value === 'string' ? value : null } } }) .driver(phantom({webSecurity:false}));

x('https://hsbc.taleo.net/careersection/external/moresearch.ftl?lang=en_GB&dropListSize=10', '.ftlrow', [ { link: '.editablesection div:nth-child(3) span:nth-child(5) | makeLink', } ]).then(function(myObj) {

let myRecord;   

for (var key in myObj) {
    var myLink = myObj[key]['link'];
    x(myLink, {
      title: '.titlepage',
      description: '.text'
    }).then(function(myObj) {
        console.log(myObj);
        //why is output only one item and not all?
        //console.log(key);
    })
}

//console.log(myRecord);
//it's only showing one time because x is overwritten? 

})

arlingtonkirk commented 4 years ago

It looks like setting a new instance of x-ray within the promises/callback did the trick. Please close issue.

var phantom = require('x-ray-phantom'); var Xray = require('..'); var x = Xray({ filters: { makeLink: function(value) { if(value) { var str1 = 'https://hsbc.taleo.net/careersection/external/jobdetail.ftl?lang=en_GB&job=' var value = str1.concat(value); } return typeof value === 'string' ? value : null } } }) .driver(phantom({webSecurity:false}));

x('https://hsbc.taleo.net/careersection/external/moresearch.ftl?lang=en_GB&dropListSize=10', '.ftlrow', [ { link: '.editablesection div:nth-child(3) span:nth-child(5) | makeLink', } ]).then(function(myObj) {

let myRecord;   
var myNewObj;

for (var key in myObj) {
    var y = Xray().driver(phantom({webSecurity:false}));    
    var myLink = myObj[key]['link'];
    y(myLink, {
      title: '.titlepage',
      description: '.text'
    }).then(function(myObj) {
        myNewObj.myObj; //why is output only one item and not all?
        //console.log(key);
    })
}
console.log(myNewObj);
//console.log(myRecord);
//it's only showing one time because x is overwritten? 

})