Closed zhushen12580 closed 9 months ago
这是请求代码部分: def ids(cookie,key): proxy = get_proxy()
cover_list = [] user_id_list = [] id_list = [] nick_name_list = [] avatar_list = [] cover_list = [] liked_count_list = [] display_title_list = [] for i in range(1,20,1): ua = UserAgent() try: xhs_client = XhsClient(cookie, sign=sign,timeout=5000,user_agent=ua.random) data = xhs_client.get_note_by_keyword(key,i,20) data = json.dumps(data, indent=4) data = json.loads(data) print(data) if data['has_more'] == False: break except: print("请求失败,重试一下") #proxy = get_proxy()#更新ip xhs_client = XhsClient(cookie, sign=sign,timeout=5000,user_agent=ua.random) data = xhs_client.get_note_by_keyword(key,i,20) data = json.dumps(data, indent=4) data = json.loads(data) print(data) break for item in data['items']: if 'note_card' in item: id_ = item.get('id', None)#帖子id id_list.append(id_) user_id = item['note_card']['user'].get('user_id', None)#用户id user_id_list.append(user_id) nick_name = item['note_card']['user'].get('nick_name', None)#用户昵称 nick_name_list.append(nick_name) avatar = item['note_card']['user'].get('avatar', None)#用户头像 avatar_list.append(avatar) if item['note_card']['cover'].get('trace_id', None) == None: cover = item['note_card']['cover'].get('url', None)#封面 else: cover = "https://sns-img-qc.xhscdn.com/"+item['note_card']['cover'].get('trace_id', None)#封面 cover = '<table><img src="'+str(cover)+'"width=50 height=70></img></table>' cover_list.append(cover) display_title = item['note_card'].get('display_title', None)#标题 display_title_list.append(display_title) liked_count = item['note_card']['interact_info'].get('liked_count',None)#点赞数 liked_count_list.append(liked_count) #将列表导出到excel print(len(id_list),len(user_id_list),len(nick_name_list),len(avatar_list),len(cover_list),len(display_title_list),len(liked_count_list)) df = pd.DataFrame({'id':id_list,'user_id':user_id_list,'nick_name':nick_name_list,'avatar':avatar_list,'cover':cover_list,'display_title':display_title_list,'liked_count':liked_count_list}) #根据user_id去重 df = df.drop_duplicates(subset=['user_id'],keep='first') df.to_excel('./关键词检索数据/'+key+'.xlsx', index=False) return df
报错:
频繁的获取数据很容易被封 IP,用高质量一点的代理池即可解决
佬 求高质量的ip代理推荐
这是请求代码部分: def ids(cookie,key): proxy = get_proxy()
设置空dataframe
报错: