ruipgil / scraperjs

A complete and versatile web scraper.
MIT License
3.7k stars 188 forks source link

Unable to perform inside loop? #75

Closed avrebarra closed 6 years ago

avrebarra commented 6 years ago

I'm sorry if I implemented this wrong CMIIW. I tried to put scraper inside a loop. But somehow it doesn't run.

So, I have this code:

let scraperjs = require('scraperjs');

// scraper function
function scrape(pageURI, callback) {
    console.log('checkpoint 2');
    scraperjs.StaticScraper.create(pageURI).scrape(function($) {
        console.log('checkpoint 3');
        return $("div").map(function() {
            return $(this).text();
        }).get();
    }).then(function(sometext) {
        callback(sometext);
    });
}

// loop function
function testloop() {
    let finished = false;
    let processed = false;

    while (!finished) {
        if (!processed) {
            console.log('checkpoint 1');
            scrape('https://www.google.com/', (sometext) => {
                console.log('checkpoint 4', sometext);
                finished = true;
            });
            processed = true;
        }
    }
}

// non loop function
function testnonloop() {
    console.log('checkpoint 1');
    scrape('https://www.google.com/', (sometext) => {
        console.log('checkpoint 4', sometext);
    });
}

I tried to dispatch both functions expecting a complete 4 checkpoints output. But strangely the one with loop got jammed in checkpoint 2 and never progress. [I have also tried using promise instead of callback style, but still the same output.]

Where do I get wrong?

avrebarra commented 6 years ago

Sorry, actually it's my mistake haha. Actually it's more to the logic's fault, and not scraperjs.

I got the answer from stackoverflow here

Issue closed!