gaowei1012 / blog

this is blog
2 stars 0 forks source link

Python 爬虫小程序 #58

Open gaowei1012 opened 3 years ago

gaowei1012 commented 3 years ago
#coding:utf-8
#爬取小猪租房房屋信息

import requests
from lxml import etree
import time

for i in range(1, 6):

    # url 需要爬取的url
    url = 'http://sz.xiaozhu.com/search-duanzufang-p{}-0/'.format(i)

    # 解析成 data 对象
    data3 = requests.get(url).text

    # 拿到 HTML 
    h = etree.HTML(data3)
    home = h.xpath('//*[@id="page_list"]/ul/li')
    titles = h.xpath('//*[@id="page_list"]/ul/li/div[2]/div/a/span/text()')

    # 为了防止IP被封,加一个延时
    time.sleep(2)

    # 循环爬虫多条数据
    for div in home:

        title = div.xpath('./div[2]/div/a/span/text()')[0]

        price = div.xpath('./div[2]/span[1]/i/text()')[0]

        describle = div.xpath('./div[2]/div/em/text()')[0].strip()

        photo = div.xpath('./a/img/@lazy_src/')[0]

        # 延时防止封IP
        time.sleep(2)

        # 输出
        print('{}-->{}-->{}\n{}'.format(title, price, describle, photo))