bda-research / node-webcrawler

Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously
MIT License
41 stars 7 forks source link

still have memory leaks? #10

Open HarryF514 opened 7 years ago

HarryF514 commented 7 years ago

with this code

var Crawler = require("node-webcrawler");
var url = require('url');
var jsdom = require('jsdom');
var c = new Crawler({
    maxConnections : 10,
    jQuery: jsdom,
    // This will be called for each crawled page
    callback : function (error, result, $) {
        // $ is Cheerio by default
        //a lean implementation of core jQuery designed specifically for the server
        if(error){
            console.log(error);
        }else{

            try {
                console.log($("title").text());
                $('a').each(function (index, a) {
                    var toQueueUrl = $(a).prop('href');
                    //console.log(toQueueUrl);
                    c.queue(toQueueUrl);
                });

            } catch (e) {
                console.log(e);
            }
        }
    }
});

c.queue('http://www.wandoujia.com/');

the memory will go up to 800MB after around 10 mins.

am I doing something wrong or it is the problem of the module itself?

thanks,

mike442144 commented 7 years ago

We suggest you to use default module: cheerio