ruipgil / scraperjs

A complete and versatile web scraper.
MIT License
3.71k stars 188 forks source link

DynamicScraper can not run #73

Open willin opened 7 years ago

willin commented 7 years ago
yarn global add phantomjs-prebuilt
// ./doc/examples/HackerNews.js
var sjs = require('../../src/Scraper');
/*
 Scrape the news in Hacker News.
 */
sjs.DynamicScraper
    .create('https://news.ycombinator.com')
    .scrape(function ($) {
        return $('.title a').map(function () {
            console.log($(this).text());
            return $(this).text();
        }).get().filter(function (elm) {
            return elm != 'More';
        });
    })
    .then(function (news) {
        news.forEach(function (elm) {
            console.log(elm);
        });
    }).catch(function (err) { 
        console.error(err);
    });

run and did not get any result...

ezuid commented 7 years ago

+1

bryanrasmussen commented 7 years ago

same here, my code

var scraperjs = require('scraperjs'); scraperjs.DynamicScraper.create('http://www.sundhed.dk') .scrape(function($) { console.log('scrape worked'); return $("a").map(function() { console.log('map'); return $(this).attr('href'); }).get(); }) .then(function(links) { console.log(links). })

the page is running angular so it needs to be interpreted to extract the urls in this case. I was thinking that maybe the dynamic scraper was running too soon and needed to wait to make sure it had the urls to get, but I guess the hacker news example doesn't actually need a dynamic scraper and all the urls should be available on load?

The html that loads before the script evaluates has a few urls inside of a conditional comment, when I use the static scraper on the site I get those urls out.

willin commented 7 years ago

phantom@0.8.4: v1 is no longer maintained, please upgrade to v2.0+ as soon possible.

hotnAny commented 7 years ago

ran the sample code and didn't get any result ...

var scraperjs = require('scraperjs'); scraperjs.DynamicScraper.create('https://news.ycombinator.com/') .scrape(function($) { return $(".title a").map(function() { return $(this).text(); }).get(); }) .then(function(news) { console.log(news); })

MatthewKosloski commented 6 years ago

+1