Closed scumola closed 5 years ago
Thanks for the report, I managed to repro with grab-site --1 http://www.realmediajapan.net/
and I'll try to get this fixed soon.
Another one:
https://www.youtube.com/watch?v=wZ_xBBf2fI0&feature=youtu.be ...
http://xmeatx.com/ ...
http://www.akipiro5.com/robots.txt ...
http://www.akipiro5.com/sitemap.xml ...
ERROR Fatal exception.
Traceback (most recent call last):
File "/root/gs-venv/lib/python3.7/site-packages/wpull/application/app.py", line 157, in run
yield from pipeline.process()
File "/root/gs-venv/lib/python3.7/site-packages/wpull/pipeline/pipeline.py", line 194, in process
yield from self._process_one_worker()
File "/root/gs-venv/lib/python3.7/site-packages/wpull/pipeline/pipeline.py", line 215, in _process_one_worker
task.result()
File "/root/gs-venv/lib/python3.7/site-packages/wpull/pipeline/pipeline.py", line 119, in process
item = yield from self.process_one(_worker_id=worker_id)
File "/root/gs-venv/lib/python3.7/site-packages/wpull/pipeline/pipeline.py", line 103, in process_one
yield from task.process(item)
File "/root/gs-venv/lib/python3.7/site-packages/wpull/application/tasks/download.py", line 421, in process
yield from session.app_session.factory['Processor'].process(session)
File "/root/gs-venv/lib/python3.7/site-packages/wpull/processor/delegate.py", line 29, in process
return (yield from processor.process(item_session))
File "/root/gs-venv/lib/python3.7/site-packages/wpull/processor/web.py", line 91, in process
return (yield from session.process())
File "/root/gs-venv/lib/python3.7/site-packages/wpull/processor/web.py", line 185, in process
yield from self._process_loop()
File "/root/gs-venv/lib/python3.7/site-packages/wpull/processor/web.py", line 244, in _process_loop
exit_early, wait_time = yield from self._fetch_one(cast(Request, self._item_session.request))
File "/root/gs-venv/lib/python3.7/site-packages/wpull/processor/web.py", line 308, in _fetch_one
action = self._handle_response(request, response)
File "/root/gs-venv/lib/python3.7/site-packages/wpull/processor/web.py", line 423, in _handle_response
self._processing_rule.scrape_document(self._item_session)
File "/root/gs-venv/lib/python3.7/site-packages/libgrabsite/wpull_tweaks.py", line 55, in scrape_document
super().scrape_document(item_session)
File "/root/gs-venv/lib/python3.7/site-packages/wpull/processor/rule.py", line 527, in scrape_document
item_session.url_record.link_type
File "/root/gs-venv/lib/python3.7/site-packages/wpull/scraper/base.py", line 186, in scrape_info
scrape_result = scraper.scrape(request, response, link_type)
File "/root/gs-venv/lib/python3.7/site-packages/wpull/scraper/sitemap.py", line 37, in scrape
for link in link_iter:
File "/root/gs-venv/lib/python3.7/site-packages/wpull/scraper/base.py", line 150, in iter_processed_links
for link in self.iter_links(file, encoding):
File "/root/gs-venv/lib/python3.7/site-packages/wpull/document/sitemap.py", line 71, in iter_links
and html_obj.tag.endswith('loc'):
AttributeError: 'cython_function_or_method' object has no attribute 'endswith'
CRITICAL Sorry, Wpull unexpectedly crashed.
https://www.riviera.co.jp/marina/ ...
Disconnected from ws:// server: RuntimeError('Event loop is closed')
Exception ignored in: <coroutine object sender at 0x7f7bc10a41c8>
Traceback (most recent call last):
File "/root/gs-venv/lib/python3.7/site-packages/libgrabsite/dashboard_client.py", line 54, in sender
await asyncio.sleep(delay)
File "/root/.pyenv/versions/3.7.0/lib/python3.7/asyncio/tasks.py", line 562, in sleep
future, result)
File "/root/.pyenv/versions/3.7.0/lib/python3.7/asyncio/base_events.py", line 641, in call_later
context=context)
File "/root/.pyenv/versions/3.7.0/lib/python3.7/asyncio/base_events.py", line 651, in call_at
self._check_closed()
File "/root/.pyenv/versions/3.7.0/lib/python3.7/asyncio/base_events.py", line 461, in _check_closed
raise RuntimeError('Event loop is closed')
RuntimeError: Event loop is closed
Exception ignored in: <generator object WebSocketCommonProtocol.close_connection at 0x7f7bc0423138>
Traceback (most recent call last):
File "/root/gs-venv/lib/python3.7/site-packages/websockets/protocol.py", line 853, in close_connection
self.writer.close()
File "/root/.pyenv/versions/3.7.0/lib/python3.7/asyncio/streams.py", line 317, in close
return self._transport.close()
File "/root/.pyenv/versions/3.7.0/lib/python3.7/asyncio/selector_events.py", line 661, in close
self._loop.call_soon(self._call_connection_lost, None)
File "/root/.pyenv/versions/3.7.0/lib/python3.7/asyncio/base_events.py", line 672, in call_soon
self._check_closed()
File "/root/.pyenv/versions/3.7.0/lib/python3.7/asyncio/base_events.py", line 461, in _check_closed
raise RuntimeError('Event loop is closed')
RuntimeError: Event loop is closed
Just wrap that stuff in a try/catch thingie! Not important to catch every last thing. Important to not die. LOL :)