ssardina / tapp-fixture

Produce TeamApp Schedule Fixtures
Apache License 2.0
2 stars 0 forks source link

Crashes accessing tinyurl repetitvely #5

Closed ssardina closed 1 year ago

ssardina commented 1 year ago

When building the dataframe for teamapps, function phq_club.to_teamsapp_schedule crashes after calling the shorten url tinyurl service repetitively:

023-10-18 11:34:07 WARNING Certificate did not match expected hostname: da.gd. Certificate: {'subject': ((('countryName', 'AU'),), (('stateOrProvinceName', 'Victoria'),), (('localityName', 'Melbourne'),), (('organizationName', 'Telstra Limited'),), (('commonName', 'landing.telstra.com'),)), 'issuer': ((('countryName', 'US'),), (('organizationName', 'DigiCert Inc'),), (('commonName', 'DigiCert Global G2 TLS RSA SHA256 2020 CA1'),)), 'version': 3, 'serialNumber': '080D2501E6CE01CE9850A4888D70F1D2', 'notBefore': 'Mar 24 00:00:00 2023 GMT', 'notAfter': 'Mar 24 23:59:59 2024 GMT', 'subjectAltName': (('DNS', 'landing.telstra.com'), ('DNS', 'warning-security.landing.telstra.com'), ('DNS', 'warning-copyright.landing.telstra.com'), ('DNS', 'warning-noclass.landing.telstra.com'), ('DNS', 'warning-phishing.landing.telstra.com'), ('DNS', 'warning-botnet.landing.telstra.com')), 'OCSP': ('http://ocsp.digicert.com',), 'caIssuers': ('http://cacerts.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crt',), 'crlDistributionPoints': ('http://crl3.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crl', 'http://crl4.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crl')}
​
---------------------------------------------------------------------------
timeout                                   Traceback (most recent call last)
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/urllib3/connectionpool.py:426, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    422         except BaseException as e:
    423             # Remove the TypeError from the exception chain in
    424             # Python 3 (including for exceptions like SystemExit).
    425             # Otherwise it looks like a bug in the code.
--> 426             six.raise_from(e, None)
    427 except (SocketTimeout, BaseSSLError, SocketError) as e:
​
File <string>:3, in raise_from(value, from_value)
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/urllib3/connectionpool.py:421, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    420 try:
--> 421     httplib_response = conn.getresponse()
    422 except BaseException as e:
    423     # Remove the TypeError from the exception chain in
    424     # Python 3 (including for exceptions like SystemExit).
    425     # Otherwise it looks like a bug in the code.
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/http/client.py:1347, in HTTPConnection.getresponse(self)
   1346 try:
-> 1347     response.begin()
   1348 except ConnectionError:
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/http/client.py:307, in HTTPResponse.begin(self)
    306 while True:
--> 307     version, status, reason = self._read_status()
    308     if status != CONTINUE:
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/http/client.py:268, in HTTPResponse._read_status(self)
    267 def _read_status(self):
--> 268     line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
    269     if len(line) > _MAXLINE:
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/socket.py:704, in SocketIO.readinto(self, b)
    703 try:
--> 704     return self._sock.recv_into(b)
    705 except timeout:
​
timeout: timed out
​
During handling of the above exception, another exception occurred:
​
ReadTimeoutError                          Traceback (most recent call last)
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/requests/adapters.py:486, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    485 try:
--> 486     resp = conn.urlopen(
    487         method=request.method,
    488         url=url,
    489         body=request.body,
    490         headers=request.headers,
    491         redirect=False,
    492         assert_same_host=False,
    493         preload_content=False,
    494         decode_content=False,
    495         retries=self.max_retries,
    496         timeout=timeout,
    497         chunked=chunked,
    498     )
    500 except (ProtocolError, OSError) as err:
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/urllib3/connectionpool.py:726, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    724     e = ProtocolError("Connection aborted.", e)
--> 726 retries = retries.increment(
    727     method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
    728 )
    729 retries.sleep()
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/urllib3/util/retry.py:410, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
    409 if read is False or not self._is_method_retryable(method):
--> 410     raise six.reraise(type(error), error, _stacktrace)
    411 elif read is not None:
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/urllib3/packages/six.py:735, in reraise(tp, value, tb)
    734         raise value.with_traceback(tb)
--> 735     raise value
    736 finally:
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/urllib3/connectionpool.py:670, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    669 # Make the request on the httplib connection object.
--> 670 httplib_response = self._make_request(
    671     conn,
    672     method,
    673     url,
    674     timeout=timeout_obj,
    675     body=body,
    676     headers=headers,
    677     chunked=chunked,
    678 )
    680 # If we're going to release the connection in ``finally:``, then
    681 # the response doesn't need to know about the connection. Otherwise
    682 # it will also try to release it and we'll have a double-release
    683 # mess.
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/urllib3/connectionpool.py:428, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    427 except (SocketTimeout, BaseSSLError, SocketError) as e:
--> 428     self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
    429     raise
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/urllib3/connectionpool.py:335, in HTTPConnectionPool._raise_timeout(self, err, url, timeout_value)
    334 if isinstance(err, SocketTimeout):
--> 335     raise ReadTimeoutError(
    336         self, url, "Read timed out. (read timeout=%s)" % timeout_value
    337     )
    339 # See the above comment about EAGAIN in Python 3. In Python 2 we have
    340 # to specifically catch it and throw the timeout error
​
ReadTimeoutError: HTTPConnectionPool(host='tinyurl.com', port=80): Read timed out. (read timeout=2)
​
During handling of the above exception, another exception occurred:
​
ReadTimeout                               Traceback (most recent call last)
File ~/Dev/Perso/tapp-fixture/utils.py:40, in shorten_url(url)
     39 try:
---> 40     return s.tinyurl.short(url)
     41 except:
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/pyshorteners/shorteners/tinyurl.py:34, in Shortener.short(self, url)
     33 url = self.clean_url(url)
---> 34 response = self._get(self.api_url, params=dict(url=url))
     35 if response.ok:
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/pyshorteners/base.py:63, in BaseShortener._get(self, url, params, headers)
     62 url = self.clean_url(url)
---> 63 response = requests.get(
     64     url,
     65     params=params,
     66     verify=self.verify,
     67     timeout=self.timeout,
     68     headers=headers,
     69     proxies=self.proxies,
     70 )
     71 return response
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/requests/api.py:73, in get(url, params, **kwargs)
     63 r"""Sends a GET request.
     64 
     65 :param url: URL for the new :class:`Request` object.
   (...)
     70 :rtype: requests.Response
     71 """
---> 73 return request("get", url, params=params, **kwargs)
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/requests/api.py:59, in request(method, url, **kwargs)
     58 with sessions.Session() as session:
---> 59     return session.request(method=method, url=url, **kwargs)
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
    591 return resp
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
    702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
    705 # Total elapsed time of the request (approximately)
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/requests/adapters.py:532, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    531 elif isinstance(e, ReadTimeoutError):
--> 532     raise ReadTimeout(e, request=request)
    533 elif isinstance(e, _InvalidHeader):
​
ReadTimeout: HTTPConnectionPool(host='tinyurl.com', port=80): Read timed out. (read timeout=2)
​
During handling of the above exception, another exception occurred:
​
SSLCertVerificationError                  Traceback (most recent call last)
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/urllib3/connectionpool.py:670, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    669 # Make the request on the httplib connection object.
--> 670 httplib_response = self._make_request(
    671     conn,
    672     method,
    673     url,
    674     timeout=timeout_obj,
    675     body=body,
    676     headers=headers,
    677     chunked=chunked,
    678 )
    680 # If we're going to release the connection in ``finally:``, then
    681 # the response doesn't need to know about the connection. Otherwise
    682 # it will also try to release it and we'll have a double-release
    683 # mess.
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/urllib3/connectionpool.py:381, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    380 try:
--> 381     self._validate_conn(conn)
    382 except (SocketTimeout, BaseSSLError) as e:
    383     # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/urllib3/connectionpool.py:978, in HTTPSConnectionPool._validate_conn(self, conn)
    977 if not getattr(conn, "sock", None):  # AppEngine might not have  `.sock`
--> 978     conn.connect()
    980 if not conn.is_verified:
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/urllib3/connection.py:397, in HTTPSConnection.connect(self)
    388         warnings.warn(
    389             (
    390                 "Certificate for {0} has no `subjectAltName`, falling back to check for a "
   (...)
    395             SubjectAltNameWarning,
    396         )
--> 397     _match_hostname(cert, self.assert_hostname or server_hostname)
    399 self.is_verified = (
    400     context.verify_mode == ssl.CERT_REQUIRED
    401     or self.assert_fingerprint is not None
    402 )
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/site-packages/urllib3/connection.py:407, in _match_hostname(cert, asserted_hostname)
    406 try:
--> 407     match_hostname(cert, asserted_hostname)
    408 except CertificateError as e:
​
File ~/.pyenv/versions/3.9.2/lib/python3.9/ssl.py:416, in match_hostname(cert, hostname)
    415 if len(dnsnames) > 1:
--> 416     raise CertificateError("hostname %r "
    417         "doesn't match either of %s"
    418         % (hostname, ', '.join(map(repr, dnsnames))))
    419 elif len(dnsnames) == 1:
​
SSLCertVerificationError: ("hostname 'da.gd' doesn't match either of 'landing.telstra.com', 'warning-security.landing.telstra.com', 'warning-copyright.landing.telstra.com', 'warning-noclass.landing.telstra.com', 'warning-phishing.landing.telstra.com', 'warning-botnet.landing.telstra.com'",)
​
During handling of the above exception, another exception occurred:
​
MaxRetryError                             Traceback (most recent call last)
ssardina commented 1 year ago

I think tinyrul resets the connection after realizing that there were many consecutive requests

We can fix this by catching the exception, waiting 1 second, and then re-trying:

# TinyURL shortener service
def shorten_url(url):
    s = pyshorteners.Shortener()
    try:
        return s.tinyurl.short(url)
    # except:
    #     return s.dagd.short(url)
    except: # in case of error, wait 1 second and try again
        time.sleep(1)
        return shorten_url(url)

a bit of a hack but it is OK in this situation.