hi 谢谢您的开源
但是我在使用过程中出现了一些问题 比如acl会议2014-2018年关于summarization的paper无法使用,bug记录如下:
parse <bound method parse of <config.DefaultConfig object at 0x10bcdb4f0>>
debug
2015:
fail
fail
^CTraceback (most recent call last):
File "spider.py", line 15, in
fire.Fire()
File "/Users/zili/anaconda3/lib/python3.8/site-packages/fire/core.py", line 141, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/fire/core.py", line 466, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "/Users/zili/anaconda3/lib/python3.8/site-packages/fire/core.py", line 681, in _CallAndUpdateTrace
component = fn(*varargs, kwargs)
File "spider.py", line 10, in spider
spiderTool.main()
File "/Users/zili/Downloads/NLPPapersSpider-master/spidersMoudle/BasicSpider.py", line 88, in main
page = self.get_page(url)
File "/Users/zili/Downloads/NLPPapersSpider-master/spidersMoudle/BasicSpider.py", line 23, in get_page
response = requests.get(url, headers=headers)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/requests/api.py", line 76, in get
return request('get', url, params=params, kwargs)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/requests/api.py", line 61, in request
return session.request(method=method, url=url, kwargs)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/requests/sessions.py", line 530, in request
resp = self.send(prep, send_kwargs)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/requests/sessions.py", line 643, in send
r = adapter.send(request, **kwargs)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/requests/adapters.py", line 439, in send
resp = conn.urlopen(
File "/Users/zili/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py", line 667, in urlopen
self._prepare_proxy(conn)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py", line 932, in _prepare_proxy
conn.connect()
File "/Users/zili/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 362, in connect
self.sock = ssl_wrapsocket(
File "/Users/zili/anaconda3/lib/python3.8/site-packages/urllib3/util/ssl.py", line 386, in ssl_wrap_socket
return context.wrap_socket(sock, server_hostname=server_hostname)
File "/Users/zili/anaconda3/lib/python3.8/ssl.py", line 500, in wrap_socket
return self.sslsocket_class._create(
File "/Users/zili/anaconda3/lib/python3.8/ssl.py", line 1040, in _create
self.do_handshake()
File "/Users/zili/anaconda3/lib/python3.8/ssl.py", line 1309, in do_handshake
self._sslobj.do_handshake()
KeyboardInterrupt
Traceback (most recent call last):
File "spider.py", line 15, in
fire.Fire()
File "/Users/zili/anaconda3/lib/python3.8/site-packages/fire/core.py", line 141, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/fire/core.py", line 466, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "/Users/zili/anaconda3/lib/python3.8/site-packages/fire/core.py", line 681, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
File "spider.py", line 10, in spider
spiderTool.main()
File "/Users/zili/Downloads/NLPPapersSpider-master/spidersMoudle/BasicSpider.py", line 102, in main
self.get_content(url1, year)
File "/Users/zili/Downloads/NLPPapersSpider-master/spidersMoudle/ACLSeries.py", line 22, in get_content
tag = soup.select('#title a')[0]
IndexError: list index out of range
hi 谢谢您的开源 但是我在使用过程中出现了一些问题 比如acl会议2014-2018年关于summarization的paper无法使用,bug记录如下: parse <bound method parse of <config.DefaultConfig object at 0x10bcdb4f0>> debug 2015: fail fail ^CTraceback (most recent call last): File "spider.py", line 15, in
fire.Fire()
File "/Users/zili/anaconda3/lib/python3.8/site-packages/fire/core.py", line 141, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/fire/core.py", line 466, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "/Users/zili/anaconda3/lib/python3.8/site-packages/fire/core.py", line 681, in _CallAndUpdateTrace
component = fn(*varargs, kwargs)
File "spider.py", line 10, in spider
spiderTool.main()
File "/Users/zili/Downloads/NLPPapersSpider-master/spidersMoudle/BasicSpider.py", line 88, in main
page = self.get_page(url)
File "/Users/zili/Downloads/NLPPapersSpider-master/spidersMoudle/BasicSpider.py", line 23, in get_page
response = requests.get(url, headers=headers)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/requests/api.py", line 76, in get
return request('get', url, params=params, kwargs)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/requests/api.py", line 61, in request
return session.request(method=method, url=url, kwargs)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/requests/sessions.py", line 530, in request
resp = self.send(prep, send_kwargs)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/requests/sessions.py", line 643, in send
r = adapter.send(request, **kwargs)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/requests/adapters.py", line 439, in send
resp = conn.urlopen(
File "/Users/zili/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py", line 667, in urlopen
self._prepare_proxy(conn)
File "/Users/zili/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py", line 932, in _prepare_proxy
conn.connect()
File "/Users/zili/anaconda3/lib/python3.8/site-packages/urllib3/connection.py", line 362, in connect
self.sock = ssl_wrapsocket(
File "/Users/zili/anaconda3/lib/python3.8/site-packages/urllib3/util/ssl.py", line 386, in ssl_wrap_socket
return context.wrap_socket(sock, server_hostname=server_hostname)
File "/Users/zili/anaconda3/lib/python3.8/ssl.py", line 500, in wrap_socket
return self.sslsocket_class._create(
File "/Users/zili/anaconda3/lib/python3.8/ssl.py", line 1040, in _create
self.do_handshake()
File "/Users/zili/anaconda3/lib/python3.8/ssl.py", line 1309, in do_handshake
self._sslobj.do_handshake()
KeyboardInterrupt
(base) ➜ NLPPapersSpider-master python spider.py spider user config: spiderTool ACLSeries Keywords ['summarization'] Years [2015] Field summarization Meeting EMNLP path /Users/zili/Desktop/test/ parse <bound method parse of <config.DefaultConfig object at 0x10bb0a280>> debug 2015: urls: ['https://doi.org/10.18653/v1/d15-1220', 'https://doi.org/10.18653/v1/d15-1045', 'https://doi.org/10.18653/v1/d15-1013', 'https://doi.org/10.18653/v1/d15-1011', 'https://doi.org/10.18653/v1/d15-1229', 'https://doi.org/10.18653/v1/d15-1232', 'https://doi.org/10.18653/v1/d15-1219', 'https://doi.org/10.18653/v1/d15-1223', 'https://doi.org/10.18653/v1/d15-1222', 'https://doi.org/10.18653/v1/d15-1226', 'https://doi.org/10.18653/v1/d15-1044', 'https://doi.org/10.18653/v1/d15-1014', 'https://doi.org/10.18653/v1/d15-1012', 'https://doi.org/10.18653/v1/d15-1228'] 所有论文页面url提取成功!共14篇。 论文页面:https://doi.org/10.18653/v1/d15-1220 url1: https://doi.org/10.18653/v1/d15-1220 page:
Traceback (most recent call last): File "spider.py", line 15, in