alsotang / node-lessons

:closed_book:《Node.js 包教不包会》 by alsotang
16.54k stars 4.7k forks source link

关于lesson3抓取首页,链接和作者的问题 #165

Open freedomisCode opened 5 years ago

freedomisCode commented 5 years ago

//引入依赖 var express = require('express'); var superagent = require('superagent'); var cheerio = require('cheerio'); //建立express实例 var app = express();

app.get('/',function(req,res,next){ //用superagent去抓取https://cnodejs.org/的内容 superagent.get('https://cnodejs.org/') .end(function(err,sres){ //常规的错误处理 if(err){ return next(err); } //sres.txt里面存储着网页的html内容,将它传给cheerio.load之后 //就可以得到一个实现了jquery接口的变量,习惯命名为'$' var $ = cheerio.load(sres.text); var items = []; $('.cell').each(function(idx,element){ var $element = $(element); items.push({ title:$element.find('.topic_title').attr('title'), href:$element.find('.topic_title').attr('href'), author:$element.find('.user_avatar img').attr('title') }); }); res.send(items); }); }); app.listen(3000,function(){ console.log('app is running at port 3000'); });

freedomisCode commented 5 years ago

按照一行的思路抓取的,参照了aimer1124的方法。不知道为啥按照 $('#topic_list .topic_title')抓取不到 @alsotang

Leonardo-zyh commented 5 years ago

//你自己的问题,可以试试这段代码 $('#topic_list .topic_title ').each(function (idx, element) { var $element = $(element); items.push({ title: $element.attr('title'), href: $element.attr('href'), author: $element.parents('.cell').find('img').attr('title') });

scottMan1001 commented 5 years ago

$('#topic_list .user_avatar>img').each(function (idx, element) { var $element = $(element); items2.push({ author: $element.attr('title') }); }); items.map((item,index)=>{ item.author = items2[index].author }) 多此一举了。。重新找了作者的selector.在 作者基础代码上加入了上述代码。

zhaoqi1992 commented 4 years ago

“作者信息可以通过usser_avatar的href中获取”

let $ = cheerio.load(sres.text);
    let topics = $("#topic_list  .cell");
    let topicList = [];
    topics.each((idx, element) => {
      let $element = $(element);
      let $topicTitle = $element.find(".topic_title");
      let $userAvatar = $element.find(".user_avatar");
      topicList.push({
        title: $topicTitle.attr("title"),
        href: $topicTitle.attr("href"),
        author: $userAvatar
          .attr("href")
          .split("/")
          .pop()
      });