1837669410 / bilibili_comment_crawl

爬取bilibili视频下的评论,最新出品!!!⚠本代码只适用于学习,做其他事情概不负责!!!
44 stars 0 forks source link

遇到IndexError: list index out of range问题 #5

Open irenetian22 opened 10 hours ago

irenetian22 commented 10 hours ago

您好~使用您的代码,遇到IndexError: list index out of range问题。想请教一下可能的原因。 以下是报错的代码: Error occurred during loading data. Trying to use cache server https://fake-useragent.herokuapp.com/browsers/0.1.11 Traceback (most recent call last): File "E:\anadonda\lib\site-packages\fake_useragent\utils.py", line 154, in load for item in get_browsers(verify_ssl=verify_ssl): File "E:\anadonda\lib\site-packages\fake_useragent\utils.py", line 99, in get_browsers html = html.split('')[1] IndexError: list index out of range

IndexError Traceback (most recent call last) File E:\anadonda\lib\site-packages\fake_useragent\utils.py:154, in load(use_cache_server, verify_ssl) 153 try: --> 154 for item in get_browsers(verify_ssl=verify_ssl): 155 browser, percent = item

File E:\anadonda\lib\site-packages\fake_useragent\utils.py:99, in get_browsers(verify_ssl) 98 html = html.decode('utf-8') ---> 99 html = html.split('

')[1] 100 html = html.split('
')[0]

IndexError: list index out of range

During handling of the above exception, another exception occurred:

ConnectionResetError Traceback (most recent call last) File E:\anadonda\lib\urllib\request.py:1346, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args) 1345 try: -> 1346 h.request(req.get_method(), req.selector, req.data, headers, 1347 encode_chunked=req.has_header('Transfer-encoding')) 1348 except OSError as err: # timeout error

File E:\anadonda\lib\http\client.py:1285, in HTTPConnection.request(self, method, url, body, headers, encode_chunked) 1284 """Send a complete request to the server.""" -> 1285 self._send_request(method, url, body, headers, encode_chunked)

File E:\anadonda\lib\http\client.py:1331, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked) 1330 body = _encode(body, 'body') -> 1331 self.endheaders(body, encode_chunked=encode_chunked)

File E:\anadonda\lib\http\client.py:1280, in HTTPConnection.endheaders(self, message_body, encode_chunked) 1279 raise CannotSendHeader() -> 1280 self._send_output(message_body, encode_chunked=encode_chunked)

File E:\anadonda\lib\http\client.py:1040, in HTTPConnection._send_output(self, message_body, encode_chunked) 1039 del self._buffer[:] -> 1040 self.send(msg) 1042 if message_body is not None: 1043 1044 # create a consistent interface to message_body

File E:\anadonda\lib\http\client.py:980, in HTTPConnection.send(self, data) 979 if self.auto_open: --> 980 self.connect() 981 else:

File E:\anadonda\lib\http\client.py:1454, in HTTPSConnection.connect(self) 1452 server_hostname = self.host -> 1454 self.sock = self._context.wrap_socket(self.sock, 1455 server_hostname=server_hostname)

File E:\anadonda\lib\ssl.py:501, in SSLContext.wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session) 495 def wrap_socket(self, sock, server_side=False, 496 do_handshake_on_connect=True, 497 suppress_ragged_eofs=True, 498 server_hostname=None, session=None): 499 # SSLSocket class handles server_hostname encoding before it calls 500 # ctx._wrap_socket() --> 501 return self.sslsocket_class._create( 502 sock=sock, 503 server_side=server_side, 504 do_handshake_on_connect=do_handshake_on_connect, 505 suppress_ragged_eofs=suppress_ragged_eofs, 506 server_hostname=server_hostname, 507 context=self, 508 session=session 509 )

File E:\anadonda\lib\ssl.py:1041, in SSLSocket._create(cls, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, context, session) 1040 raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets") -> 1041 self.do_handshake() 1042 except (OSError, ValueError):

File E:\anadonda\lib\ssl.py:1310, in SSLSocket.do_handshake(self, block) 1309 self.settimeout(None) -> 1310 self._sslobj.do_handshake() 1311 finally:

ConnectionResetError: [WinError 10054] 远程主机强迫关闭了一个现有的连接。

During handling of the above exception, another exception occurred:

URLError Traceback (most recent call last) File E:\anadonda\lib\site-packages\fake_useragent\utils.py:64, in get(url, verify_ssl) 62 context = None ---> 64 with contextlib.closing(urlopen( 65 request, 66 timeout=settings.HTTP_TIMEOUT, 67 context=context, 68 )) as response: 69 return response.read()

File E:\anadonda\lib\urllib\request.py:214, in urlopen(url, data, timeout, cafile, capath, cadefault, context) 213 opener = _opener --> 214 return opener.open(url, data, timeout)

File E:\anadonda\lib\urllib\request.py:517, in OpenerDirector.open(self, fullurl, data, timeout) 516 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method()) --> 517 response = self._open(req, data) 519 # post-process response

File E:\anadonda\lib\urllib\request.py:534, in OpenerDirector._open(self, req, data) 533 protocol = req.type --> 534 result = self._call_chain(self.handle_open, protocol, protocol + 535 '_open', req) 536 if result:

File E:\anadonda\lib\urllib\request.py:494, in OpenerDirector._call_chain(self, chain, kind, meth_name, args) 493 func = getattr(handler, meth_name) --> 494 result = func(args) 495 if result is not None:

File E:\anadonda\lib\urllib\request.py:1389, in HTTPSHandler.https_open(self, req) 1388 def https_open(self, req): -> 1389 return self.do_open(http.client.HTTPSConnection, req, 1390 context=self._context, check_hostname=self._check_hostname)

File E:\anadonda\lib\urllib\request.py:1349, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args) 1348 except OSError as err: # timeout error -> 1349 raise URLError(err) 1350 r = h.getresponse()

URLError: <urlopen error [WinError 10054] 远程主机强迫关闭了一个现有的连接。>

During handling of the above exception, another exception occurred:

FakeUserAgentError Traceback (most recent call last) Cell In[7], line 8 4 # 范例url:https://api.bilibili.com/x/v2/reply/main?csrf=40a227fcf12c380d7d3c81af2cd8c5e8&mode=3&next=3&oid=861032963&plat=1&type=1 5 # 如果有不懂的参照这个url对比下就知道了 6 url = "https://api.bilibili.com/x/v2/reply/main?csrf=40a227fcf12c380d7d3c81af2cd8c5e8&mode=3&next=3&oid=861032963&plat&plat=1&type=1" 7 header = { ----> 8 "user-agent": UserAgent().random, 9 "cookie": "替换成自己的cookie即可" 10 } 11 comment = [] 12 pre_comment_length = 0

File E:\anadonda\lib\site-packages\fake_useragent\fake.py:69, in FakeUserAgent.init(self, cache, use_cache_server, path, fallback, verify_ssl, safe_attrs) 66 self.data_randomize = [] 67 self.data_browsers = {} ---> 69 self.load()

File E:\anadonda\lib\site-packages\fake_useragent\fake.py:75, in FakeUserAgent.load(self) 73 with self.load.lock: 74 if self.cache: ---> 75 self.data = load_cached( 76 self.path, 77 use_cache_server=self.use_cache_server, 78 verify_ssl=self.verify_ssl, 79 ) 80 else: 81 self.data = load( 82 use_cache_server=self.use_cache_server, 83 verify_ssl=self.verify_ssl, 84 )

File E:\anadonda\lib\site-packages\fake_useragent\utils.py:250, in load_cached(path, use_cache_server, verify_ssl) 248 def load_cached(path, use_cache_server=True, verify_ssl=True): 249 if not exist(path): --> 250 update(path, use_cache_server=use_cache_server, verify_ssl=verify_ssl) 252 return read(path)

File E:\anadonda\lib\site-packages\fake_useragent\utils.py:245, in update(path, use_cache_server, verify_ssl) 242 def update(path, use_cache_server=True, verify_ssl=True): 243 rm(path) --> 245 write(path, load(use_cache_server=use_cache_server, verify_ssl=verify_ssl))

File E:\anadonda\lib\site-packages\fake_useragent\utils.py:187, in load(use_cache_server, verify_ssl) 180 logger.warning( 181 'Error occurred during loading data. ' 182 'Trying to use cache server %s', 183 settings.CACHE_SERVER, 184 exc_info=exc, 185 ) 186 try: --> 187 ret = json.loads(get( 188 settings.CACHE_SERVER, 189 verify_ssl=verify_ssl, 190 ).decode('utf-8')) 191 except (TypeError, ValueError): 192 raise FakeUserAgentError('Can not load data from cache server')

File E:\anadonda\lib\site-packages\fake_useragent\utils.py:84, in get(url, verify_ssl) 77 logger.debug( 78 'Error occurred during fetching %s', 79 url, 80 exc_info=exc, 81 ) 83 if attempt == settings.HTTP_RETRIES: ---> 84 raise FakeUserAgentError('Maximum amount of retries reached') 85 else: 86 logger.debug( 87 'Sleeping for %s seconds', 88 settings.HTTP_DELAY, 89 )

FakeUserAgentError: Maximum amount of retries reached

1837669410 commented 7 hours ago

抱歉这个看不懂,应该不是我的代码

irenetian22 commented 7 hours ago

11111 这是我借用您的代码跑的代码报错截图,能麻烦您看下我是否出现了理解问题,导致代码一直跑不通吗

1837669410 commented 7 hours ago

image 好像这儿写错了,其他的好像没问题

irenetian22 commented 7 hours ago

1111 可是即使我是直接复制使用您的示例B站链接,都也会报错,可以帮忙看看写错具体是应该怎样改吗?

1837669410 commented 7 hours ago

你对照一下我给的示例代码就行,然后把next=0改成next={}就能正常使用了,我测试了是没问题的