Open gaowei1012 opened 3 years ago
#coding:utf-8 #爬取小猪租房房屋信息 import requests from lxml import etree import time for i in range(1, 6): # url 需要爬取的url url = 'http://sz.xiaozhu.com/search-duanzufang-p{}-0/'.format(i) # 解析成 data 对象 data3 = requests.get(url).text # 拿到 HTML h = etree.HTML(data3) home = h.xpath('//*[@id="page_list"]/ul/li') titles = h.xpath('//*[@id="page_list"]/ul/li/div[2]/div/a/span/text()') # 为了防止IP被封,加一个延时 time.sleep(2) # 循环爬虫多条数据 for div in home: title = div.xpath('./div[2]/div/a/span/text()')[0] price = div.xpath('./div[2]/span[1]/i/text()')[0] describle = div.xpath('./div[2]/div/em/text()')[0].strip() photo = div.xpath('./a/img/@lazy_src/')[0] # 延时防止封IP time.sleep(2) # 输出 print('{}-->{}-->{}\n{}'.format(title, price, describle, photo))