Hi,
Below is the stack trace ::
root@blr-1st-1-dhcp622:~/Dump/htcap# python htcap.py crawl https://i-cant-tell-you-this.com test.db
Initializing . . Traceback (most recent call last):
File "htcap.py", line 49, in
Crawler(sys.argv[2:])
File "/root/Dump/htcap/core/crawl/crawler.py", line 82, in init
self.main(argv)
File "/root/Dump/htcap/core/crawl/crawler.py", line 557, in main
start_requests = self.init_crawl(start_req, initial_checks, get_robots_txt)
File "/root/Dump/htcap/core/crawl/crawler.py", line 359, in init_crawl
rrequests = self.get_requests_from_robots(start_req)
File "/root/Dump/htcap/core/crawl/crawler.py", line 203, in get_requests_from_robots
lines = httpget.get_file().split("\n")
File "/root/Dump/htcap/core/lib/http_get.py", line 209, in get_file
res = opener.open(req, None, self.timeout)
File "/usr/lib/python2.7/urllib2.py", line 435, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 548, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 467, in error
result = self._call_chain(args)
File "/usr/lib/python2.7/urllib2.py", line 407, in _call_chain
result = func(args)
File "/usr/lib/python2.7/urllib2.py", line 654, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python2.7/urllib2.py", line 429, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 447, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 407, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1228, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib/python2.7/urllib2.py", line 1201, in do_open
r = h.getresponse(buffering=True)
File "/usr/lib/python2.7/httplib.py", line 1121, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 438, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 394, in _read_status
line = self.fp.readline(_MAXLINE + 1)
File "/usr/lib/python2.7/socket.py", line 480, in readline
data = self._sock.recv(self._rbufsize)
socket.error: [Errno 104] Connection reset by peer
curl and python requests is able to GET the page though
Hi, Below is the stack trace :: root@blr-1st-1-dhcp622:~/Dump/htcap# python htcap.py crawl https://i-cant-tell-you-this.com test.db Initializing . . Traceback (most recent call last): File "htcap.py", line 49, in
Crawler(sys.argv[2:])
File "/root/Dump/htcap/core/crawl/crawler.py", line 82, in init
self.main(argv)
File "/root/Dump/htcap/core/crawl/crawler.py", line 557, in main
start_requests = self.init_crawl(start_req, initial_checks, get_robots_txt)
File "/root/Dump/htcap/core/crawl/crawler.py", line 359, in init_crawl
rrequests = self.get_requests_from_robots(start_req)
File "/root/Dump/htcap/core/crawl/crawler.py", line 203, in get_requests_from_robots
lines = httpget.get_file().split("\n")
File "/root/Dump/htcap/core/lib/http_get.py", line 209, in get_file
res = opener.open(req, None, self.timeout)
File "/usr/lib/python2.7/urllib2.py", line 435, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 548, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 467, in error
result = self._call_chain(args)
File "/usr/lib/python2.7/urllib2.py", line 407, in _call_chain
result = func(args)
File "/usr/lib/python2.7/urllib2.py", line 654, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python2.7/urllib2.py", line 429, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 447, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 407, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1228, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib/python2.7/urllib2.py", line 1201, in do_open
r = h.getresponse(buffering=True)
File "/usr/lib/python2.7/httplib.py", line 1121, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 438, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 394, in _read_status
line = self.fp.readline(_MAXLINE + 1)
File "/usr/lib/python2.7/socket.py", line 480, in readline
data = self._sock.recv(self._rbufsize)
socket.error: [Errno 104] Connection reset by peer
curl and python requests is able to GET the page though