Closed rogerfederal closed 2 years ago
图片爬虫线程太多的话,会被ban掉ip地址。等我有时间优化一下,设置一个随机请求头。
Exception in thread Thread-6: Traceback (most recent call last): File "C:\PY\lib\threading.py", line 808, in bootstrap_inner self.run() File "test4.py", line 21, in run self.spider(url=page_url) File "test4.py", line 31, in spider fileName = 'img/'+name91[0].decode('utf-8').encode('gbk', 'replace') IndexError: list index out of range //这是第一种索引值报错,一般情况下就会暂停或者中断了 Exception in thread Thread-2: Traceback (most recent call last): File "C:\PY\lib\threading.py", line 808, in bootstrap_inner self.run() File "test4.py", line 21, in run self.spider(url=page_url) File "test4.py", line 45, in spider response = requests.get(img_url,headers=headers) File "C:\PY\lib\requests\api.py", line 72, in get return request('get', url, params=params, kwargs) File "C:\PY\lib\requests\api.py", line 58, in request return session.request(method=method, url=url, kwargs) File "C:\PY\lib\requests\sessions.py", line 512, in request resp = self.send(prep, send_kwargs) File "C:\PY\lib\requests\sessions.py", line 644, in send history = [resp for resp in gen] if allow_redirects else [] File "C:\PY\lib\requests\sessions.py", line 222, in resolve_redirects adapter_kwargs File "C:\PY\lib\requests\sessions.py", line 622, in send r = adapter.send(request, **kwargs) File "C:\PY\lib\requests\adapters.py", line 495, in send raise ConnectionError(err, request=request) ConnectionError: ('Connection aborted.', error(10060, '')) //这种就是服务器关闭了
好的,其实索引这个问题我遇见了,不过一直没影响效果,我就没改。。。等我有时间看一下,感谢。
Do you want to use proxy?[y/n]Y
Traceback (most recent call last):
File "test.py", line 107, in
D:\91porn-spider-master>python test.py 10 Do you want to use proxy?[y/n]n Exception in thread Thread-8: Traceback (most recent call last): File "D:\Python27\lib\threading.py", line 801, in bootstrap_inner self.run() File "D:\Python27\lib\threading.py", line 754, in run self.target(*self.args, self.__kwargs) File "test.py", line 71, in spider get_page=requests.get(url=page_url) File "D:\Python27\lib\site-packages\requests\api.py", line 75, in get return request('get', url, params=params, kwargs) File "D:\Python27\lib\site-packages\requests\api.py", line 60, in request return session.request(method=method, url=url, kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 533, in reques t resp = self.send(prep, send_kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 646, in send r = adapter.send(request, **kwargs) File "D:\Python27\lib\site-packages\requests\adapters.py", line 516, in send raise ConnectionError(e, request=request) ConnectionError: HTTPConnectionPool(host='91.91p27.space', port=80): Max retries exceeded with url: /v.php?category=top&viewtype=basic&page=7 (Caused by NewConn ectionError('<urllib3.connection.HTTPConnection object at 0x0000000002BF13C8>: F ailed to establish a new connection: [Errno 10060] ',)) Exception in thread Thread-6: Traceback (most recent call last): File "D:\Python27\lib\threading.py", line 801, in bootstrap_inner self.run() File "D:\Python27\lib\threading.py", line 754, in run self.target(*self.args, self.__kwargs) File "test.py", line 71, in spider get_page=requests.get(url=page_url) File "D:\Python27\lib\site-packages\requests\api.py", line 75, in get return request('get', url, params=params, kwargs) File "D:\Python27\lib\site-packages\requests\api.py", line 60, in request return session.request(method=method, url=url, kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 533, in reques t resp = self.send(prep, send_kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 646, in send r = adapter.send(request, kwargs) File "D:\Python27\lib\site-packages\requests\adapters.py", line 516, in send raise ConnectionError(e, request=request) ConnectionError: HTTPConnectionPool(host='91.91p27.space', port=80): Max retries exceeded with url: /v.php?category=top&viewtype=basic&page=5 (Caused by NewConn ectionError('<urllib3.connection.HTTPConnection object at 0x0000000002BF10F0>: F ailed to establish a new connection: [Errno 10060] ',)) Exception in thread Thread-5: Traceback (most recent call last): File "D:\Python27\lib\threading.py", line 801, in bootstrap_inner self.run() File "D:\Python27\lib\threading.py", line 754, in run self.target(*self.args, self.__kwargs) File "test.py", line 71, in spider get_page=requests.get(url=page_url) File "D:\Python27\lib\site-packages\requests\api.py", line 75, in get return request('get', url, params=params, kwargs) File "D:\Python27\lib\site-packages\requests\api.py", line 60, in request return session.request(method=method, url=url, kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 533, in reques t resp = self.send(prep, send_kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 646, in send r = adapter.send(request, **kwargs) File "D:\Python27\lib\site-packages\requests\adapters.py", line 516, in send raise ConnectionError(e, request=request) ConnectionError: HTTPConnectionPool(host='91.91p27.space', port=80): Max retries exceeded with url: /v.php?category=top&viewtype=basic&page=4 (Caused by NewConn ectionError('<urllib3.connection.HTTPConnection object at 0x0000000002BD8DD8>: F ailed to establish a new connection: [Errno 10060] ',)) Exception in thread Thread-10: Traceback (most recent call last): File "D:\Python27\lib\threading.py", line 801, in bootstrap_inner self.run() File "D:\Python27\lib\threading.py", line 754, in run self.target(*self.args, self.kwargs) File "test.py", line 71, in spider get_page=requests.get(url=page_url) File "D:\Python27\lib\site-packages\requests\api.py", line 75, in get return request('get', url, params=params, kwargs) File "D:\Python27\lib\site-packages\requests\api.py", line 60, in request return session.request(method=method, url=url, kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 533, in reques t resp = self.send(prep, send_kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 646, in send r = adapter.send(request, kwargs) File "D:\Python27\lib\site-packages\requests\adapters.py", line 516, in send raise ConnectionError(e, request=request) ConnectionError: HTTPConnectionPool(host='91.91p27.space', port=80): Max retries exceeded with url: /v.php?category=top&viewtype=basic&page=9 (Caused by NewConn ectionError('<urllib3.connection.HTTPConnection object at 0x0000000002BF1630>: F ailed to establish a new connection: [Errno 10060] ',)) Exception in thread Thread-9: Traceback (most recent call last): File "D:\Python27\lib\threading.py", line 801, in __bootstrap_inner self.run() File "D:\Python27\lib\threading.py", line 754, in run self.target(*self.args, **self.kwargs) File "test.py", line 71, in spider get_page=requests.get(url=page_url) File "D:\Python27\lib\site-packages\requests\api.py", line 75, in get return request('get', url, params=params, kwargs) File "D:\Python27\lib\site-packages\requests\api.py", line 60, in request return session.request(method=method, url=url, kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 533, in reques t resp = self.send(prep, send_kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 646, in send r = adapter.send(request, kwargs) File "D:\Python27\lib\site-packages\requests\adapters.py", line 516, in send raise ConnectionError(e, request=request) ConnectionError: HTTPConnectionPool(host='91.91p27.space', port=80): Max retries exceeded with url: /v.php?category=top&viewtype=basic&page=8 (Caused by NewConn ectionError('<urllib3.connection.HTTPConnection object at 0x0000000002BF1518>: F ailed to establish a new connection: [Errno 10060] ',))
Exception in thread Thread-2: Traceback (most recent call last): File "D:\Python27\lib\threading.py", line 801, in bootstrap_inner self.run() File "D:\Python27\lib\threading.py", line 754, in run self.target(*self.args, self.__kwargs) File "test.py", line 71, in spider get_page=requests.get(url=page_url) File "D:\Python27\lib\site-packages\requests\api.py", line 75, in get return request('get', url, params=params, kwargs) File "D:\Python27\lib\site-packages\requests\api.py", line 60, in request return session.request(method=method, url=url, kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 533, in reques t resp = self.send(prep, send_kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 646, in send r = adapter.send(request, **kwargs) File "D:\Python27\lib\site-packages\requests\adapters.py", line 516, in send raise ConnectionError(e, request=request) ConnectionError: HTTPConnectionPool(host='91.91p27.space', port=80): Max retries exceeded with url: /v.php?category=top&viewtype=basic&page=1 (Caused by NewConn ectionError('<urllib3.connection.HTTPConnection object at 0x0000000002BD8BE0>: F ailed to establish a new connection: [Errno 10060] ',)) Exception in thread Thread-7: Traceback (most recent call last): File "D:\Python27\lib\threading.py", line 801, in bootstrap_inner self.run() File "D:\Python27\lib\threading.py", line 754, in run self.target(*self.args, self.__kwargs) File "test.py", line 71, in spider get_page=requests.get(url=page_url) File "D:\Python27\lib\site-packages\requests\api.py", line 75, in get return request('get', url, params=params, kwargs) File "D:\Python27\lib\site-packages\requests\api.py", line 60, in request return session.request(method=method, url=url, kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 533, in reques t resp = self.send(prep, send_kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 646, in send r = adapter.send(request, **kwargs) File "D:\Python27\lib\site-packages\requests\adapters.py", line 516, in send raise ConnectionError(e, request=request) ConnectionError: HTTPConnectionPool(host='91.91p27.space', port=80): Max retries exceeded with url: /v.php?category=top&viewtype=basic&page=6 (Caused by NewConn ectionError('<urllib3.connection.HTTPConnection object at 0x0000000002BF1400>: F ailed to establish a new connection: [Errno 10060] ',))
Exception in thread Thread-4: Traceback (most recent call last): File "D:\Python27\lib\threading.py", line 801, in __bootstrap_inner self.run() File "D:\Python27\lib\threading.py", line 754, in run self.target(*self.args, self.__kwargs) File "test.py", line 71, in spider get_page=requests.get(url=page_url) File "D:\Python27\lib\site-packages\requests\api.py", line 75, in get return request('get', url, params=params, kwargs) File "D:\Python27\lib\site-packages\requests\api.py", line 60, in request return session.request(method=method, url=url, kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 533, in reques t resp = self.send(prep, send_kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 646, in send r = adapter.send(request, **kwargs) File "D:\Python27\lib\site-packages\requests\adapters.py", line 516, in send raise ConnectionError(e, request=request) ConnectionError: HTTPConnectionPool(host='91.91p27.space', port=80): Max retries exceeded with url: /v.php?category=top&viewtype=basic&page=3 (Caused by NewConn ectionError('<urllib3.connection.HTTPConnection object at 0x0000000002BD8D68>: F ailed to establish a new connection: [Errno 10060] ',))
Exception in thread Thread-3: Traceback (most recent call last): File "D:\Python27\lib\threading.py", line 801, in __bootstrap_inner self.run() File "D:\Python27\lib\threading.py", line 754, in run self.target(*self.args, self.__kwargs) File "test.py", line 71, in spider get_page=requests.get(url=page_url) File "D:\Python27\lib\site-packages\requests\api.py", line 75, in get return request('get', url, params=params, kwargs) File "D:\Python27\lib\site-packages\requests\api.py", line 60, in request return session.request(method=method, url=url, kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 533, in reques t resp = self.send(prep, send_kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 646, in send r = adapter.send(request, **kwargs) File "D:\Python27\lib\site-packages\requests\adapters.py", line 516, in send raise ConnectionError(e, request=request) ConnectionError: HTTPConnectionPool(host='91.91p27.space', port=80): Max retries exceeded with url: /v.php?category=top&viewtype=basic&page=2 (Caused by NewConn ectionError('<urllib3.connection.HTTPConnection object at 0x0000000002BD8C50>: F ailed to establish a new connection: [Errno 10060] ',))
Exception in thread Thread-1: Traceback (most recent call last): File "D:\Python27\lib\threading.py", line 801, in __bootstrap_inner self.run() File "D:\Python27\lib\threading.py", line 754, in run self.target(*self.args, self.__kwargs) File "test.py", line 71, in spider get_page=requests.get(url=page_url) File "D:\Python27\lib\site-packages\requests\api.py", line 75, in get return request('get', url, params=params, kwargs) File "D:\Python27\lib\site-packages\requests\api.py", line 60, in request return session.request(method=method, url=url, kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 533, in reques t resp = self.send(prep, send_kwargs) File "D:\Python27\lib\site-packages\requests\sessions.py", line 646, in send r = adapter.send(request, **kwargs) File "D:\Python27\lib\site-packages\requests\adapters.py", line 516, in send raise ConnectionError(e, request=request) ConnectionError: HTTPConnectionPool(host='91.91p27.space', port=80): Max retries exceeded with url: /v.php?category=top&viewtype=basic&page=0 (Caused by NewConn ectionError('<urllib3.connection.HTTPConnection object at 0x0000000002BD8668>: F ailed to establish a new connection: [Errno 10060] ',))
Exception in thread Thread-8: Traceback (most recent call last): File "C:\PY\lib\threading.py", line 808, in __bootstrap_inner self.run() File "test4.py", line 20, in run self.spider(url=page_url) File "test4.py", line 25, in spider r = requests.get(url=url, headers=headers) File "C:\PY\lib\requests\api.py", line 72, in get return request('get', url, params=params, kwargs) File "C:\PY\lib\requests\api.py", line 58, in request return session.request(method=method, url=url, kwargs) File "C:\PY\lib\requests\sessions.py", line 512, in request resp = self.send(prep, send_kwargs) File "C:\PY\lib\requests\sessions.py", line 644, in send history = [resp for resp in gen] if allow_redirects else [] File "C:\PY\lib\requests\sessions.py", line 222, in resolve_redirects adapter_kwargs File "C:\PY\lib\requests\sessions.py", line 622, in send r = adapter.send(request, **kwargs) File "C:\PY\lib\requests\adapters.py", line 495, in send raise ConnectionError(err, request=request) ConnectionError: ('Connection aborted.', error(10060, ''))