Closed nethunter787 closed 1 year ago
我在这个接口遇到了返回值的量子叠加态
我在这个接口遇到了返回值的量子叠加态
成功了,但是没有完全成功
``` { "code": -509, "message": "请求过于频繁,请稍后再试", "ttl": 1 }{ "code": 0, "message": "0", "ttl": 1, "data": { "mid": ... ```
同,请求被拦截了,但没有完全被拦截
大概是从一周多前开始的,概率性的触发
同,请求被拦截了,但没有完全被拦截
大概是从一周多前开始的,概率性的触发
看上去就像是拦截之后忘了break或者return...
我用 https://api.bilibili.com/x/space/arc/search?mid=12345678 接口也是,B站弄出这么个bug,他们测试竟然这么久都没发现,我也是醉了。。。。
感觉和我做的另一个项目(关于学习通的)遇到的一些接口返回问题很像,同样是两个 JSON 被错误地拼接在一起
我用 https://api.bilibili.com/x/space/arc/search?mid=12345678 接口也是,B站弄出这么个bug,他们测试竟然这么久都没发现,我也是醉了。。。。
成功了,但是没有完全成功
你可以试试这个api:https://api.bilibili.com/x/space/wbi/arc/search?mid=
我用 https://api.bilibili.com/x/space/arc/search?mid=12345678 接口也是,B站弄出这么个bug,他们测试竟然这么久都没发现,我也是醉了。。。。 成功了,但是没有完全成功
你可以试试这个api:https://api.bilibili.com/x/space/wbi/arc/search?mid=
这个也不太稳定,请求20个返回9-17个左右
我用 https://api.bilibili.com/x/space/arc/search?mid=12345678 接口也是,B站弄出这么个bug,他们测试竟然这么久都没发现,我也是醉了。。。。
成功了,但是没有完全成功
这个接口已经挂掉了,现在稳定返回
{"code":-799,"message":"请求过于频繁,请稍后再试","ttl":1}
最新试验结果
和我之前预料的一样,这个 w_rid
参数开始生效,必须校验此参数。
https://api.bilibili.com/x/space/wbi/acc/info?mid=34249407&token=&platform=web&web_location=1550101&w_rid=e0f3392c6e1ffef532d9ead0e78183e3&wts=1684730763
目前必须要提交正确的 w_rid
才能够获取到用户信息,所以爬虫必须生成该参数,或者从某个接口获取该参数
w_rid该怎么获取..
w_rid该怎么获取..
详见 #631
这是源码修改自 #631 的结论
from functools import reduce
import hashlib
import time
def getMixinKey(ae):
oe = [46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41,
13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11, 36, 20, 34, 44, 52]
le = reduce(lambda s, i: s + ae[i], oe, "")
return le[:32]
def encWbi(params_in: dict):
params = params_in.copy() # 加上防止改变传入字典的原值?
resp = getjson("https://api.bilibili.com/x/web-interface/nav")
wbi_img: dict = resp["data"]["wbi_img"]
me = getMixinKey(wbi_img['img_url'].split("/")[-1].split(".")[0] + wbi_img['sub_url'].split("/")[-1].split(".")[0])
wts = int(time.time())
# wts = 1684940606
params["wts"] = wts
params = dict(sorted(params.items()))
Ae = "&".join([f'{key}={value}' for key, value in params.items()])
w_rid = hashlib.md5((Ae + me).encode(encoding='utf-8')).hexdigest()
return w_rid, wts
应用过程
# 输入uid 返回投稿视频的字典列表
def getUpVideos(up_uid,startpage=1,endpage=10,tid=0,keyword=''):
up_videos = []
for space_video_page in range(startpage,endpage+1): #最多下载10页 300个视频
time.sleep(3) # 频率不宜过快
space_video_search_params_dict={'mid' : up_uid, # UP主UID
'ps' : 30, # 每页的视频个数
'tid' : tid, # 分区筛选号 0为不筛选
'special_type' :'',
'pn' : space_video_page, # 页码
'keyword':keyword, # 搜索关键词
'order':'pubdate',# 降序排序 click(播放)/stow(收藏)
'platform':'web',
'web_location':1550101,
'order_avoided':'true'
}
w_rid, wts = encWbi(space_video_search_params_dict)
space_video_search_params_urlcoded = urllib.parse.urlencode(space_video_search_params_dict)
up_videos_api = 'https://api.bilibili.com/x/space/wbi/arc/search?%s&w_rid=%s&wts=%s'%(space_video_search_params_urlcoded,w_rid,wts)
space_video_search_json = getjson(up_videos_api,headers=[("credentials","include")])
if space_video_page == startpage:
#获取分类表 如果该页无视频则返回None
# tlist = space_video_search_json['data']['list']['tlist']
# for each in tlist :
# print('tid:',tlist[each]['tid'],'类名:',tlist[each]['name'],'数目:',tlist[each]['count'])
#获取视频总数 如果该页无视频则返回0
space_video_num = space_video_search_json['data']['page']['count']
if space_video_search_json['data']['list']['vlist']: #如果不存在视频则为空列表[]
thisPageVideos = space_video_search_json['data']['list']['vlist']
thisPageVideos.reverse()
thisPageVideos_num = len(thisPageVideos)
for each_video_id in range(thisPageVideos_num):
each_video_info = thisPageVideos[thisPageVideos_num-each_video_id-1]
# up_videos格式
up_videos.append({'title':each_video_info['title'],
'bvid':each_video_info['bvid'],
'author':each_video_info['author'],
'mid':each_video_info['mid'],
'created':each_video_info['created'],
})
if space_video_page == endpage:
print('[√] 已获取 [%d/%d] 个视频'%(len(up_videos),space_video_num))
return up_videos
else:#这页不存在视频
print('[√] 已获取 [%d/%d] 个视频'%(len(up_videos),space_video_num))
return up_videos
感觉B站已经对我的IP动手了,不换代理,好多资源第一次都下载失败
导入SESSDATA就能直接用了,好耶!感恩
from functools import reduce
import hashlib
import time
import urllib.parse
import requests
SESSDATA = 'xxx'
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:32.0) Gecko/20100101 Firefox/32.0',
'Cookie': 'SESSDATA=' + SESSDATA,
}
def getjson(url, headers=None):
response = requests.get(url, headers=headers)
if response.status_code == 200:
json_data = response.json()
return json_data
else:
return None
def getMixinKey(ae):
oe = [46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41,
13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11, 36, 20, 34, 44, 52]
le = reduce(lambda s, i: s + ae[i], oe, "")
return le[:32]
def encWbi(params_in: dict):
params = params_in.copy() # 加上防止改变传入字典的原值?
resp = getjson("https://api.bilibili.com/x/web-interface/nav")
wbi_img: dict = resp["data"]["wbi_img"]
me = getMixinKey(wbi_img['img_url'].split("/")[-1].split(".")[0] + wbi_img['sub_url'].split("/")[-1].split(".")[0])
wts = int(time.time())
# wts = 1684940606
params["wts"] = wts
params = dict(sorted(params.items()))
Ae = "&".join([f'{key}={value}' for key, value in params.items()])
w_rid = hashlib.md5((Ae + me).encode(encoding='utf-8')).hexdigest()
return w_rid, wts
# 输入uid 返回投稿视频的字典列表
def getUpVideos(up_uid,startpage=1,endpage=10,tid=0,keyword=''):
up_videos = []
for space_video_page in range(startpage,endpage+1): #最多下载10页 300个视频
time.sleep(3) # 频率不宜过快
space_video_search_params_dict={'mid' : up_uid, # UP主UID
'ps' : 30, # 每页的视频个数
'tid' : tid, # 分区筛选号 0为不筛选
'special_type' :'',
'pn' : space_video_page, # 页码
'keyword':keyword, # 搜索关键词
'order':'pubdate',# 降序排序 click(播放)/stow(收藏)
'platform':'web',
'web_location':1550101,
'order_avoided':'true'
}
w_rid, wts = encWbi(space_video_search_params_dict)
space_video_search_params_urlcoded = urllib.parse.urlencode(space_video_search_params_dict)
up_videos_api = 'https://api.bilibili.com/x/space/wbi/arc/search?%s&w_rid=%s&wts=%s'%(space_video_search_params_urlcoded,w_rid,wts)
space_video_search_json = getjson(up_videos_api, headers=headers)
if space_video_page == startpage:
#获取分类表 如果该页无视频则返回None
# tlist = space_video_search_json['data']['list']['tlist']
# for each in tlist :
# print('tid:',tlist[each]['tid'],'类名:',tlist[each]['name'],'数目:',tlist[each]['count'])
#获取视频总数 如果该页无视频则返回0
space_video_num = space_video_search_json['data']['page']['count']
if space_video_search_json['data']['list']['vlist']: #如果不存在视频则为空列表[]
thisPageVideos = space_video_search_json['data']['list']['vlist']
thisPageVideos.reverse()
thisPageVideos_num = len(thisPageVideos)
for each_video_id in range(thisPageVideos_num):
each_video_info = thisPageVideos[thisPageVideos_num-each_video_id-1]
# up_videos格式
up_videos.append({'title':each_video_info['title'],
'bvid':each_video_info['bvid'],
'author':each_video_info['author'],
'mid':each_video_info['mid'],
'created':each_video_info['created'],
})
if space_video_page == endpage:
print('[√] 已获取 [%d/%d] 个视频'%(len(up_videos),space_video_num))
return up_videos
else:#这页不存在视频
print('[√] 已获取 [%d/%d] 个视频'%(len(up_videos),space_video_num))
return up_videos
def dic2bvid(data):
bvid_list = [item['bvid'] for item in data]
return bvid_list
def getBvidList(uid):
data = getUpVideos(uid)
return dic2bvid(data)
# test get BvidList
print(getBvidList(235555226))
https://github.com/SocialSisterYi/bilibili-API-collect/blob/master/docs/user/info.md
用户 -> 信息查询
原接口不稳定,容易返回
{"code":-509,"message":"请求过于频繁,请稍后再试","ttl":1}
异常改进建议
接口建议由
'https://api.bilibili.com/x/space/acc/info?mid=%s'%mid
替换为:
https://api.bilibili.com/x/space/wbi/acc/info?mid=%s'%mid
后者较为稳定