SEO python scraper to extract data from major searchengine result pages. Extract data like url, title, snippet, richsnippet and the type from searchresults for given keywords. Detect Ads or make automated screenshots. You can also fetch text content of urls provided in searchresults or by your own. It's usefull for SEO and business related research tasks.
Using 0.9.1 with lots of proxies.
The log is full of unhandled Exceptions.
1)
Exception in thread [google]SelScrape:
Traceback (most recent call last):
File "/home/test/anaconda2/envs/python3/lib/python3.4/threading.py", line 911, in _bootstrap_inner
self.run()
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/scrapcore/scraper/selenium.py", line 710, in run
if not self._get_webdriver():
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/scrapcore/scraper/selenium.py", line 230, in _get_webdriver
return self._get_PhantomJS()
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/scrapcore/scraper/selenium.py", line 344, in _get_PhantomJS
desired_capabilities=dcap
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/selenium/webdriver/phantomjs/webdriver.py", line 58, in __init__
desired_capabilities=desired_capabilities)
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/selenium/webdriver/remote/webdriver.py", line 140, in __init__
self.start_session(desired_capabilities, browser_profile)
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/selenium/webdriver/remote/webdriver.py", line 229, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/selenium/webdriver/remote/webdriver.py", line 295, in execute
response = self.command_executor.execute(driver_command, params)
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/selenium/webdriver/remote/remote_connection.py", line 464, in execute
return self._request(command_info[0], url, body=data)
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/selenium/webdriver/remote/remote_connection.py", line 526, in _request
resp = opener.open(request, timeout=self._timeout)
File "/home/test/anaconda2/envs/python3/lib/python3.4/urllib/request.py", line 464, in open
response = self._open(req, data)
File "/home/test/anaconda2/envs/python3/lib/python3.4/urllib/request.py", line 482, in _open
'_open', req)
File "/home/test/anaconda2/envs/python3/lib/python3.4/urllib/request.py", line 442, in _call_chain
result = func(*args)
File "/home/test/anaconda2/envs/python3/lib/python3.4/urllib/request.py", line 1211, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/home/test/anaconda2/envs/python3/lib/python3.4/urllib/request.py", line 1186, in do_open
r = h.getresponse()
File "/home/test/anaconda2/envs/python3/lib/python3.4/http/client.py", line 1227, in getresponse
response.begin()
File "/home/test/anaconda2/envs/python3/lib/python3.4/http/client.py", line 386, in begin
version, status, reason = self._read_status()
File "/home/test/anaconda2/envs/python3/lib/python3.4/http/client.py", line 348, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/home/test/anaconda2/envs/python3/lib/python3.4/socket.py", line 378, in readinto
return self._sock.recv_into(b)
**ConnectionResetError: [Errno 104] Connection reset by peer**
2)
Exception in thread [google]SelScrape:
Traceback (most recent call last):
File "/home/test/anaconda2/envs/python3/lib/python3.4/urllib/request.py", line 1183, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "/home/test/anaconda2/envs/python3/lib/python3.4/http/client.py", line 1137, in request
self._send_request(method, url, body, headers)
File "/home/test/anaconda2/envs/python3/lib/python3.4/http/client.py", line 1182, in _send_request
self.endheaders(body)
File "/home/test/anaconda2/envs/python3/lib/python3.4/http/client.py", line 1133, in endheaders
self._send_output(message_body)
File "/home/test/anaconda2/envs/python3/lib/python3.4/http/client.py", line 963, in _send_output
self.send(msg)
File "/home/test/anaconda2/envs/python3/lib/python3.4/http/client.py", line 898, in send
self.connect()
File "/home/test/anaconda2/envs/python3/lib/python3.4/http/client.py", line 871, in connect
self.timeout, self.source_address)
File "/home/test/anaconda2/envs/python3/lib/python3.4/socket.py", line 516, in create_connection
raise err
File "/home/test/anaconda2/envs/python3/lib/python3.4/socket.py", line 507, in create_connection
sock.connect(sa)
**ConnectionRefusedError: [Errno 111] Connection refused**
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/test/anaconda2/envs/python3/lib/python3.4/threading.py", line 911, in _bootstrap_inner
self.run()
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/scrapcore/scraper/selenium.py", line 710, in run
if not self._get_webdriver():
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/scrapcore/scraper/selenium.py", line 230, in _get_webdriver
return self._get_PhantomJS()
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/scrapcore/scraper/selenium.py", line 344, in _get_PhantomJS
desired_capabilities=dcap
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/selenium/webdriver/phantomjs/webdriver.py", line 58, in __init__
desired_capabilities=desired_capabilities)
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/selenium/webdriver/remote/webdriver.py", line 140, in __init__
self.start_session(desired_capabilities, browser_profile)
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/selenium/webdriver/remote/webdriver.py", line 229, in start_session
response = self.execute(Command.NEW_SESSION, parameters)
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/selenium/webdriver/remote/webdriver.py", line 295, in execute
response = self.command_executor.execute(driver_command, params)
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/selenium/webdriver/remote/remote_connection.py", line 464, in execute
return self._request(command_info[0], url, body=data)
File "/home/test/anaconda2/envs/python3/lib/python3.4/site-packages/selenium/webdriver/remote/remote_connection.py", line 526, in _request
resp = opener.open(request, timeout=self._timeout)
File "/home/test/anaconda2/envs/python3/lib/python3.4/urllib/request.py", line 464, in open
response = self._open(req, data)
File "/home/test/anaconda2/envs/python3/lib/python3.4/urllib/request.py", line 482, in _open
'_open', req)
File "/home/test/anaconda2/envs/python3/lib/python3.4/urllib/request.py", line 442, in _call_chain
result = func(*args)
File "/home/test/anaconda2/envs/python3/lib/python3.4/urllib/request.py", line 1211, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/home/test/anaconda2/envs/python3/lib/python3.4/urllib/request.py", line 1185, in do_open
raise URLError(err)
**urllib.error.URLError: <urlopen error [Errno 111] Connection refused>**
Using 0.9.1 with lots of proxies.
The log is full of unhandled Exceptions.
1)
2)