Open arlingtonkirk opened 4 years ago
It looks like setting a new instance of x-ray within the promises/callback did the trick. Please close issue.
var phantom = require('x-ray-phantom'); var Xray = require('..'); var x = Xray({ filters: { makeLink: function(value) { if(value) { var str1 = 'https://hsbc.taleo.net/careersection/external/jobdetail.ftl?lang=en_GB&job=' var value = str1.concat(value); } return typeof value === 'string' ? value : null } } }) .driver(phantom({webSecurity:false}));
x('https://hsbc.taleo.net/careersection/external/moresearch.ftl?lang=en_GB&dropListSize=10', '.ftlrow', [ { link: '.editablesection div:nth-child(3) span:nth-child(5) | makeLink', } ]).then(function(myObj) {
let myRecord;
var myNewObj;
for (var key in myObj) {
var y = Xray().driver(phantom({webSecurity:false}));
var myLink = myObj[key]['link'];
y(myLink, {
title: '.titlepage',
description: '.text'
}).then(function(myObj) {
myNewObj.myObj; //why is output only one item and not all?
//console.log(key);
})
}
console.log(myNewObj);
//console.log(myRecord);
//it's only showing one time because x is overwritten?
})
The site I'm trying to crawl is rendered by JS and the links use onClick with # as the value for href. I've successfully constructed the links via a filter, but for some reason can't crawl them in a for loop. See code below.
Your environment
Expected behaviour
Console should spit out 10 elements.
Actual behaviour
Console only outputs first item.
var phantom = require('x-ray-phantom'); var Xray = require('..'); var x = Xray({ filters: { makeLink: function(value) { if(value) { var str1 = 'https://hsbc.taleo.net/careersection/external/jobdetail.ftl?lang=en_GB&job=' var value = str1.concat(value); } return typeof value === 'string' ? value : null } } }) .driver(phantom({webSecurity:false}));
x('https://hsbc.taleo.net/careersection/external/moresearch.ftl?lang=en_GB&dropListSize=10', '.ftlrow', [ { link: '.editablesection div:nth-child(3) span:nth-child(5) | makeLink', } ]).then(function(myObj) {
})