Closed lovit closed 4 years ago
인증서 오류
(korpora) ➜ Korpora git:(CI#16) ✗ python3 -m pytest --cov-report=xml --cov=./ tests/*
========================= test session starts =========================
platform darwin -- Python 3.7.3, pytest-6.0.1, py-1.9.0, pluggy-0.13.1
rootdir: /Users/david/works/Korpora
plugins: cov-2.10.1
collected 1 item
tests/test_nsmc.py F [100%]
============================== FAILURES ===============================
_____________________________ test_usage ______________________________
self = <urllib.request.HTTPSHandler object at 0x10cb9cc50>
http_class = <class 'http.client.HTTPSConnection'>
req = <urllib.request.Request object at 0x10cb6eac8>
http_conn_args = {'check_hostname': None, 'context': None}
host = 'raw.githubusercontent.com'
h = <http.client.HTTPSConnection object at 0x10cb9ccc0>
def do_open(self, http_class, req, **http_conn_args):
"""Return an HTTPResponse object for the request, using http_class.
http_class must implement the HTTPConnection API from http.client.
"""
host = req.host
if not host:
raise URLError('no host given')
# will parse host:port
h = http_class(host, timeout=req.timeout, **http_conn_args)
h.set_debuglevel(self._debuglevel)
headers = dict(req.unredirected_hdrs)
headers.update({k: v for k, v in req.headers.items()
if k not in headers})
# TODO(jhylton): Should this be redesigned to handle
# persistent connections?
# We want to make an HTTP/1.1 request, but the addinfourl
# class isn't prepared to deal with a persistent connection.
# It will try to read all remaining data from the socket,
# which will block while the server waits for the next request.
# So make sure the connection gets closed after the (only)
# request.
headers["Connection"] = "close"
headers = {name.title(): val for name, val in headers.items()}
if req._tunnel_host:
tunnel_headers = {}
proxy_auth_hdr = "Proxy-Authorization"
if proxy_auth_hdr in headers:
tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
# Proxy-Authorization should not be sent to origin
# server.
del headers[proxy_auth_hdr]
h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
try:
try:
h.request(req.get_method(), req.selector, req.data, headers,
> encode_chunked=req.has_header('Transfer-encoding'))
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/urllib/request.py:1317:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <http.client.HTTPSConnection object at 0x10cb9ccc0>
method = 'GET', url = '/e9t/nsmc/master/ratings_train.txt', body = None
headers = {'Connection': 'close', 'Host': 'raw.githubusercontent.com', 'User-Agent': 'Python-urllib/3.7'}
def request(self, method, url, body=None, headers={}, *,
encode_chunked=False):
"""Send a complete request to the server."""
> self._send_request(method, url, body, headers, encode_chunked)
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/http/client.py:1229:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <http.client.HTTPSConnection object at 0x10cb9ccc0>
method = 'GET', url = '/e9t/nsmc/master/ratings_train.txt', body = None
headers = {'Connection': 'close', 'Host': 'raw.githubusercontent.com', 'User-Agent': 'Python-urllib/3.7'}
encode_chunked = False
def _send_request(self, method, url, body, headers, encode_chunked):
# Honor explicitly requested Host: and Accept-Encoding: headers.
header_names = frozenset(k.lower() for k in headers)
skips = {}
if 'host' in header_names:
skips['skip_host'] = 1
if 'accept-encoding' in header_names:
skips['skip_accept_encoding'] = 1
self.putrequest(method, url, **skips)
# chunked encoding will happen if HTTP/1.1 is used and either
# the caller passes encode_chunked=True or the following
# conditions hold:
# 1. content-length has not been explicitly set
# 2. the body is a file or iterable, but not a str or bytes-like
# 3. Transfer-Encoding has NOT been explicitly set by the caller
if 'content-length' not in header_names:
# only chunk body if not explicitly set for backwards
# compatibility, assuming the client code is already handling the
# chunking
if 'transfer-encoding' not in header_names:
# if content-length cannot be automatically determined, fall
# back to chunked encoding
encode_chunked = False
content_length = self._get_content_length(body, method)
if content_length is None:
if body is not None:
if self.debuglevel > 0:
print('Unable to determine size of %r' % body)
encode_chunked = True
self.putheader('Transfer-Encoding', 'chunked')
else:
self.putheader('Content-Length', str(content_length))
else:
encode_chunked = False
for hdr, value in headers.items():
self.putheader(hdr, value)
if isinstance(body, str):
# RFC 2616 Section 3.7.1 says that text default has a
# default charset of iso-8859-1.
body = _encode(body, 'body')
> self.endheaders(body, encode_chunked=encode_chunked)
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/http/client.py:1275:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <http.client.HTTPSConnection object at 0x10cb9ccc0>
message_body = None
def endheaders(self, message_body=None, *, encode_chunked=False):
"""Indicate that the last header line has been sent to the server.
This method sends the request to the server. The optional message_body
argument can be used to pass a message body associated with the
request.
"""
if self.__state == _CS_REQ_STARTED:
self.__state = _CS_REQ_SENT
else:
raise CannotSendHeader()
> self._send_output(message_body, encode_chunked=encode_chunked)
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/http/client.py:1224:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <http.client.HTTPSConnection object at 0x10cb9ccc0>
message_body = None, encode_chunked = False
def _send_output(self, message_body=None, encode_chunked=False):
"""Send the currently buffered request and clear the buffer.
Appends an extra \\r\\n to the buffer.
A message_body may be specified, to be appended to the request.
"""
self._buffer.extend((b"", b""))
msg = b"\r\n".join(self._buffer)
del self._buffer[:]
> self.send(msg)
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/http/client.py:1016:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <http.client.HTTPSConnection object at 0x10cb9ccc0>
data = b'GET /e9t/nsmc/master/ratings_train.txt HTTP/1.1\r\nAccept-Encoding: identity\r\nHost: raw.githubusercontent.com\r\nUser-Agent: Python-urllib/3.7\r\nConnection: close\r\n\r\n'
def send(self, data):
"""Send `data' to the server.
``data`` can be a string object, a bytes object, an array object, a
file-like object that supports a .read() method, or an iterable object.
"""
if self.sock is None:
if self.auto_open:
> self.connect()
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/http/client.py:956:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <http.client.HTTPSConnection object at 0x10cb9ccc0>
def connect(self):
"Connect to a host on a given (SSL) port."
super().connect()
if self._tunnel_host:
server_hostname = self._tunnel_host
else:
server_hostname = self.host
self.sock = self._context.wrap_socket(self.sock,
> server_hostname=server_hostname)
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/http/client.py:1392:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <ssl.SSLContext object at 0x10d017388>
sock = <socket.socket [closed] fd=-1, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6>
server_side = False, do_handshake_on_connect = True
suppress_ragged_eofs = True
server_hostname = 'raw.githubusercontent.com', session = None
def wrap_socket(self, sock, server_side=False,
do_handshake_on_connect=True,
suppress_ragged_eofs=True,
server_hostname=None, session=None):
# SSLSocket class handles server_hostname encoding before it calls
# ctx._wrap_socket()
return self.sslsocket_class._create(
sock=sock,
server_side=server_side,
do_handshake_on_connect=do_handshake_on_connect,
suppress_ragged_eofs=suppress_ragged_eofs,
server_hostname=server_hostname,
context=self,
> session=session
)
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/ssl.py:412:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
cls = <class 'ssl.SSLSocket'>
sock = <socket.socket [closed] fd=-1, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6>
server_side = False, do_handshake_on_connect = True
suppress_ragged_eofs = True
server_hostname = 'raw.githubusercontent.com'
context = <ssl.SSLContext object at 0x10d017388>, session = None
@classmethod
def _create(cls, sock, server_side=False, do_handshake_on_connect=True,
suppress_ragged_eofs=True, server_hostname=None,
context=None, session=None):
if sock.getsockopt(SOL_SOCKET, SO_TYPE) != SOCK_STREAM:
raise NotImplementedError("only stream sockets are supported")
if server_side:
if server_hostname:
raise ValueError("server_hostname can only be specified "
"in client mode")
if session is not None:
raise ValueError("session can only be specified in "
"client mode")
if context.check_hostname and not server_hostname:
raise ValueError("check_hostname requires server_hostname")
kwargs = dict(
family=sock.family, type=sock.type, proto=sock.proto,
fileno=sock.fileno()
)
self = cls.__new__(cls, **kwargs)
super(SSLSocket, self).__init__(**kwargs)
self.settimeout(sock.gettimeout())
sock.detach()
self._context = context
self._session = session
self._closed = False
self._sslobj = None
self.server_side = server_side
self.server_hostname = context._encode_hostname(server_hostname)
self.do_handshake_on_connect = do_handshake_on_connect
self.suppress_ragged_eofs = suppress_ragged_eofs
# See if we are connected
try:
self.getpeername()
except OSError as e:
if e.errno != errno.ENOTCONN:
raise
connected = False
else:
connected = True
self._connected = connected
if connected:
# create the SSL object
try:
self._sslobj = self._context._wrap_socket(
self, server_side, self.server_hostname,
owner=self, session=self._session,
)
if do_handshake_on_connect:
timeout = self.gettimeout()
if timeout == 0.0:
# non-blocking
raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets")
> self.do_handshake()
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/ssl.py:853:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <ssl.SSLSocket [closed] fd=-1, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=0>
block = False
def do_handshake(self, block=False):
"""Perform a TLS/SSL handshake."""
self._check_connected()
timeout = self.gettimeout()
try:
if timeout == 0.0 and block:
self.settimeout(None)
> self._sslobj.do_handshake()
E ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1056)
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/ssl.py:1117: SSLCertVerificationError
During handling of the above exception, another exception occurred:
def test_usage():
root_dir = './data/'
> nsmc = Korpora.load('nsmc', root_dir)
tests/test_nsmc.py:6:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
Korpora/loader.py:22: in load
corpora = [KORPORA[corpus_name](root_dir, force_download) for corpus_name in corpus_names]
Korpora/loader.py:22: in <listcomp>
corpora = [KORPORA[corpus_name](root_dir, force_download) for corpus_name in corpus_names]
Korpora/korpora_nsmc.py:32: in __init__
fetch('nsmc', root_dir)
Korpora/fetch.py:17: in fetch
do_download(information, root_dir)
Korpora/fetch.py:25: in do_download
download(url, destination, corpus_name)
Korpora/utils.py:62: in download
request.urlretrieve(url, filename=local_path, reporthook=_reporthook(t))
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/urllib/request.py:247: in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/urllib/request.py:222: in urlopen
return opener.open(url, data, timeout)
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/urllib/request.py:525: in open
response = self._open(req, data)
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/urllib/request.py:543: in _open
'_open', req)
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/urllib/request.py:503: in _call_chain
result = func(*args)
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/urllib/request.py:1360: in https_open
context=self._context, check_hostname=self._check_hostname)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <urllib.request.HTTPSHandler object at 0x10cb9cc50>
http_class = <class 'http.client.HTTPSConnection'>
req = <urllib.request.Request object at 0x10cb6eac8>
http_conn_args = {'check_hostname': None, 'context': None}
host = 'raw.githubusercontent.com'
h = <http.client.HTTPSConnection object at 0x10cb9ccc0>
def do_open(self, http_class, req, **http_conn_args):
"""Return an HTTPResponse object for the request, using http_class.
http_class must implement the HTTPConnection API from http.client.
"""
host = req.host
if not host:
raise URLError('no host given')
# will parse host:port
h = http_class(host, timeout=req.timeout, **http_conn_args)
h.set_debuglevel(self._debuglevel)
headers = dict(req.unredirected_hdrs)
headers.update({k: v for k, v in req.headers.items()
if k not in headers})
# TODO(jhylton): Should this be redesigned to handle
# persistent connections?
# We want to make an HTTP/1.1 request, but the addinfourl
# class isn't prepared to deal with a persistent connection.
# It will try to read all remaining data from the socket,
# which will block while the server waits for the next request.
# So make sure the connection gets closed after the (only)
# request.
headers["Connection"] = "close"
headers = {name.title(): val for name, val in headers.items()}
if req._tunnel_host:
tunnel_headers = {}
proxy_auth_hdr = "Proxy-Authorization"
if proxy_auth_hdr in headers:
tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
# Proxy-Authorization should not be sent to origin
# server.
del headers[proxy_auth_hdr]
h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
try:
try:
h.request(req.get_method(), req.selector, req.data, headers,
encode_chunked=req.has_header('Transfer-encoding'))
except OSError as err: # timeout error
> raise URLError(err)
E urllib.error.URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1056)>
/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.7/lib/python3.7/urllib/request.py:1319: URLError
------------------------ Captured stdout call -------------------------
Fetch nsmc to /Users/david/works/Korpora/data
------------------------ Captured stderr call -------------------------
[nsmc] download ratings_train.txt: 0.00B [00:00, ?B/s]
---------- coverage: platform darwin, python 3.7.3-final-0 -----------
Coverage XML written to file coverage.xml
======================= short test summary info =======================
FAILED tests/test_nsmc.py::test_usage - urllib.error.URLError: <urlo...
========================== 1 failed in 1.36s ==========================
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
docker image
FROM ubuntu:latest
MAINTAINER ratsgo "ratsgo@naver.com"
RUN apt-get update \ && apt-get install -y python3-pip python3-dev \ && cd /usr/local/bin \ && ln -s /usr/bin/python3 python
RUN apt-get install -y git-core
- commands
```bash
docker build -t ratsgo/korpora:0.1 -f dockerfile .
docker login
docker push ratsgo/korpora:0.1
docker tag ratsgo/korpora:0.1 ratsgo/korpora:latest
docker push ratsgo/korpora:latest
dev gitflow 관련
- 이에 fetch.py에 다음 코드를 삽입하여 이슈 해결
- reference : https://stackoverflow.com/questions/50236117/scraping-ssl-certificate-verify-failed-error-for-http-en-wikipedia-org
import ssl ssl._create_default_https_context = ssl._create_unverified_context
이 인증서 오류라는게 어떤 문제인가요? 왜 문제가 발생하는거고 어떤 방향에서 해결하면 되는 것인지도 공부 차 궁금해서 질문드려요~