Closed Raidus closed 5 years ago
I couldn't figure out how the abort functions works but meanwhile I came up with an own solutions. It's not really straight forward but at least it works :-)
const rp = require('request-promise');
const Xray = require('x-ray');
const x = Xray({
filters: {
correctURI: function(value) {
return typeof value === 'string'
? `https://www.amazon.de${value}`
: value;
}
}
});
const MAX_PAGES = 3;
const getPage = url => {
return rp({ url }).then(html => {
return new Promise((resolve, reject) => {
const result = x(html, {
pagnResult: x(html, '.s-result-item.celwidget', [
{
asin: '@data-asin'
}
]),
pagnNextLink: '#pagnNextLink@href | correctURI'
})((err, result) => {
if (err) reject(err);
if (!err) resolve(result);
});
});
});
};
function abort() {
// some useful conditition
return false;
}
async function scrapeNpages(book) {
let i,
results = [],
refererrs = [];
refererrs.push(
`https://www.amazon.de/s?&field-keywords=${encodeURIComponent(book)}`
);
for (i = 0; i < MAX_PAGES; ++i) {
try {
const result = await getPage(refererrs[i]);
refererrs.push(result.pagnNextLink);
results.push(result.pagnResult);
if (abort()) break;
} catch (err) {
console.log(err);
}
}
return Array.prototype.concat(...results);
}
scrapeNpages('harry potter').then(res => console.log(res));
In case anyone else comes up against this problem (I just came back to this repo after many months out of the scraping world and the "abort" method was my work), here's a working sample from my implementation of abort:
var xray = require('x-ray'),
x = xray(),
moment = require('moment');
function scrape(data) {
x(data.url, '.review', [{
title: '.review-title',
content: '.review-text',
id: '@id',
rating: '.review-rating',
date: '.review-date',
reviewer: {
name: '.author',
id: '.author@href'
}
}])
.paginate('.a-pagination .a-last a@href')
.abort((result, url) => {
for(let i = 0; i < result.length; i++) {
let dateStr = result[i].date.replace('on ', '');
let date = moment(dateStr, 'MMMM D, YYYY');
if(date.isBefore(moment().startOf('day'))) {
return true;
}
}
return false;
});
}
If this doesn't work, make sure you have the correct version of x-ray installed. :)
Subject of the issue
The documentation says that the abort method accepts a callback function with two arguments. I've tried a minimal example but it didn't work. I'm not sure how to use the "abort" function.
Could someone provide a minimal example how to use this function?
I've tried following code to understand the abort method but I'm getting an error.
Error:
My environment