alsotang / node-lessons

:closed_book:《Node.js 包教不包会》 by alsotang
16.54k stars 4.7k forks source link

lessons 5 并发控制中遇到了问题,即如果需要两层async.mapLimit,限制并发数就不起作用了 #129

Open yhw60 opened 7 years ago

yhw60 commented 7 years ago

var util = require('./util/util'); var async = require('async'); var superagent = require('superagent'); var concurrencyCount = 0;

var cnodeUrl = 'https://cnodejs.org/'; var cheerio = require('cheerio');

var stime = new Date();

var fetchUrl = function(url, currentPage) { setTimeout(function () { superagent.get(url).end(function(err, res) { if (err) { // fetchUrl(url,currentPage); return err; }

    var $ = cheerio.load(res.text);

    var title = $('.topic_full_title').text().trim();
    // console.log(title);
    var author = $('div.changes span').find("span:nth-child(2) a").text().trim();
    var comment = $('.reply_content ').eq(0).text().trim() || "评论不存在";
    var obj = {
        title,
        author,
        comment,
        href: url,
        currentPage
    }
    console.log(title);

});

},200)

}

var allHref = []; var count = 0;

for (var i = 1; i <= 511; i++) {
    PagesUrl.push(cnodeUrl + "?tab=all&page=" + i);
}
async.mapLimit(PagesUrl, 3, function(url, callback) {
    count++;
    superagent.get(url).end(function(err, res) {
        if (err) {
            return err;
        }
        var $ = cheerio.load(res.text);
        var topicUrls = [];
        var currentPage = url.substring(url.lastIndexOf("=") + 1);
        $('#topic_list .topic_title').each(function(index, item) {
            var $element = $(item);
            // console.log(cnodeUrl);
            var url = require('url');
            var href = url.resolve(cnodeUrl, $element.attr("href"));
            var title = $element.text().trim();
            // console.log(href);
            // console.log(title);
            topicUrls.push(href);
            allHref.push(href);
            // var obj = {
            //   href,
            //   title,
            //   currentPage
            // }
            // saveDataTomysql(obj);
        });
        // callback(null, topicUrls);

        async.mapLimit(topicUrls, 3, function(url, cb) {
          count++;
            // console.log(url);
            console.log(count);
            fetchUrl(url, currentPage);

            cb(null, "fetch:" + url);
        }, function(err, result) {
            // console.log(result);
            callback(null, topicUrls);
        });
    });

}, function(err, result) {
    console.log('全部爬取完成,共耗时:' + (new Date() - stime) + 'ms');

})
yhw60 commented 7 years ago

我需要在外层获取每一页列表的url即topicUrls,然后在获取 详情页中的作者和评论,这种情况下async.mapLimit 限制并发数量不生效了, 貌似导致发起请求过多而阻塞了

alsotang commented 7 years ago

试试 async.queue,搞两层queue

2017-03-19 12:00 GMT+08:00 yanhongwei notifications@github.com:

我需要在外层获取每一页列表的url即topicUrls,然后在获取 详情页中的作者和评论,这种情况下async.mapLimit 限制并发数量不生效了, 貌似导致发起请求过多而阻塞了

— You are receiving this because you are subscribed to this thread. Reply to this email directly, view it on GitHub https://github.com/alsotang/node-lessons/issues/129#issuecomment-287592701, or mute the thread https://github.com/notifications/unsubscribe-auth/ABGB72_ycTOKJrv5yIk-UbYgYEFVJVjGks5rnKhkgaJpZM4Mhn7M .

-- GitHub: https://github.com/alsotang