I've got a custom scraper that pulls more Amazon data, inside an async.eachLimit loop.
asynclib.eachLimit(asins, 1, function(asin, callback) {
// do stuff here
startScrape(asin, function(cb){
callback();
})
}, function(e){
console.log('All Done')
});
async function startScrape(asin, callback) {
const site = {
name: "amazon",
hosts: ["www.amazon.com", "smile.amazon.com"],
scrape: async page => {
const title = await getText("#productTitle", page);
const brand = await getText("#bylineInfo_feature_div", page);
const bullets = await getText("#feature-bullets ul", page);
const price = await getText("#priceblock_ourprice", page);
const description = await getText("#productDescription", page);
const type = await getText("body", page);
return {
title,
brand,
bullets,
price,
description,
type
};
}
};
try {
console.log('Fetching ' + asin)
const data = await Scraper.scrape(`http://www.amazon.com/gp/product/${asin}/`,site);
// do stuff with data
callback()
} catch(e) {
callback()
}
}
What I've found though, is after going through a list of thousands of ASINs, it eventually brings my PC to a halt. Looking inside Task Manager, it seems that a new instance of Chromium is created for every scrape, but they never get closed, hence eating ram.
I've got a custom scraper that pulls more Amazon data, inside an async.eachLimit loop.
What I've found though, is after going through a list of thousands of ASINs, it eventually brings my PC to a halt. Looking inside Task Manager, it seems that a new instance of Chromium is created for every scrape, but they never get closed, hence eating ram.
Apologies for the poor image, PC locked up!