recommenders-team / recommenders

Best Practices on Recommendation Systems
https://recommenders-team.github.io/recommenders/intro.html
MIT License
19.28k stars 3.11k forks source link

[BUG] Test breaking due to error with Movielens host #2121

Closed miguelgfierro closed 4 months ago

miguelgfierro commented 4 months ago

Description

2024-06-26T06:13:26.3278111Z tests/functional/examples/test_notebooks_pyspark.py:21: 
2024-06-26T06:13:26.3278366Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
2024-06-26T06:13:26.3278767Z recommenders/utils/notebook_utils.py:102: in execute_notebook
2024-06-26T06:13:26.3279014Z     executed_notebook, _ = execute_preprocessor.preprocess(
2024-06-26T06:13:26.3279742Z /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/nbconvert/preprocessors/execute.py:103: in preprocess
2024-06-26T06:13:26.3279938Z     self.preprocess_cell(cell, resources, index)
2024-06-26T06:13:26.3280660Z /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/nbconvert/preprocessors/execute.py:124: in preprocess_cell
2024-06-26T06:13:26.3280904Z     cell = self.execute_cell(cell, index, store_history=True)
2024-06-26T06:13:26.3281547Z /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/jupyter_core/utils/__init__.py:165: in wrapped
2024-06-26T06:13:26.3281713Z     return loop.run_until_complete(inner)
2024-06-26T06:13:26.3282309Z /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/asyncio/base_events.py:649: in run_until_complete
2024-06-26T06:13:26.3282457Z     return future.result()
2024-06-26T06:13:26.3283089Z /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/nbclient/client.py:1062: in async_execute_cell
2024-06-26T06:13:26.3283356Z     await self._check_raise_for_error(cell, cell_index, exec_reply)
2024-06-26T06:13:26.3283613Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
2024-06-26T06:13:26.3283619Z 
2024-06-26T06:13:26.3284019Z self = <nbconvert.preprocessors.execute.ExecutePreprocessor object at 0x147ea0ed4df0>
2024-06-26T06:13:26.3285150Z cell = ***'cell_type': 'code', 'execution_count': 4, 'metadata': ***'execution': ***'iopub.status.busy': '2024-06-26T06:12:17.71881...LongType()),\n    )\n)\n\ndata = movielens.load_spark_df(spark, size=MOVIELENS_DATA_SIZE, schema=schema)\ndata.show()'***
2024-06-26T06:13:26.3285282Z cell_index = 9
2024-06-26T06:13:26.3286454Z exec_reply = ***'buffers': [], 'content': ***'ename': 'SSLError', 'engine_info': ***'engine_id': -1, 'engine_uuid': '05c826bd-c5db-4d83-a...e, 'engine': '05c826bd-c5db-4d83-ad08-261ef5c7b3ce', 'started': '2024-06-26T06:12:17.719165Z', 'status': 'error'***, ...***
2024-06-26T06:13:26.3286461Z 
2024-06-26T06:13:26.3286620Z     async def _check_raise_for_error(
2024-06-26T06:13:26.3286960Z         self, cell: NotebookNode, cell_index: int, exec_reply: dict[str, t.Any] | None
2024-06-26T06:13:26.3287108Z     ) -> None:
2024-06-26T06:13:26.3287265Z         if exec_reply is None:
2024-06-26T06:13:26.3287399Z             return None
2024-06-26T06:13:26.3287517Z     
2024-06-26T06:13:26.3287703Z         exec_reply_content = exec_reply["content"]
2024-06-26T06:13:26.3287886Z         if exec_reply_content["status"] != "error":
2024-06-26T06:13:26.3288018Z             return None
2024-06-26T06:13:26.3288134Z     
2024-06-26T06:13:26.3288376Z         cell_allows_errors = (not self.force_raise_errors) and (
2024-06-26T06:13:26.3288522Z             self.allow_errors
2024-06-26T06:13:26.3288794Z             or exec_reply_content.get("ename") in self.allow_error_names
2024-06-26T06:13:26.3289211Z             or "raises-exception" in cell.metadata.get("tags", [])
2024-06-26T06:13:26.3289339Z         )
2024-06-26T06:13:26.3289473Z         await run_hook(
2024-06-26T06:13:26.3289810Z             self.on_cell_error, cell=cell, cell_index=cell_index, execute_reply=exec_reply
2024-06-26T06:13:26.3289930Z         )
2024-06-26T06:13:26.3290079Z         if not cell_allows_errors:
2024-06-26T06:13:26.3290392Z >           raise CellExecutionError.from_cell_and_msg(cell, exec_reply_content)
2024-06-26T06:13:26.3290844Z E           nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell:
2024-06-26T06:13:26.3291010Z E           ------------------
2024-06-26T06:13:26.3291507Z E           # Note: The DataFrame-based API for ALS currently only supports integers for user and item ids.
2024-06-26T06:13:26.3291780Z E           schema = StructType(
2024-06-26T06:13:26.3291908Z E               (
2024-06-26T06:13:26.3292116Z E                   StructField(COL_USER, IntegerType()),
2024-06-26T06:13:26.3292311Z E                   StructField(COL_ITEM, IntegerType()),
2024-06-26T06:13:26.3292507Z E                   StructField(COL_RATING, FloatType()),
2024-06-26T06:13:26.3292719Z E                   StructField(COL_TIMESTAMP, LongType()),
2024-06-26T06:13:26.3292842Z E               )
2024-06-26T06:13:26.3292967Z E           )
2024-06-26T06:13:26.3293089Z E           
2024-06-26T06:13:26.3293436Z E           data = movielens.load_spark_df(spark, size=MOVIELENS_DATA_SIZE, schema=schema)
2024-06-26T06:13:26.3293572Z E           data.show()
2024-06-26T06:13:26.3293740Z E           ------------------
2024-06-26T06:13:26.3293862Z E           
2024-06-26T06:13:26.3293983Z E           
2024-06-26T06:13:26.3294340Z E           ---------------------------------------------------------------------------
2024-06-26T06:13:26.3294757Z E           SSLCertVerificationError                  Traceback (most recent call last)
2024-06-26T06:13:26.3296310Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/urllib3/connectionpool.py:466, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
2024-06-26T06:13:26.3296546Z E               465 try:
2024-06-26T06:13:26.3297114Z E           --> 466     self._validate_conn(conn)
2024-06-26T06:13:26.3297587Z E               467 except (SocketTimeout, BaseSSLError) as e:
2024-06-26T06:13:26.3297709Z E           
2024-06-26T06:13:26.3298726Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/urllib3/connectionpool.py:1095, in HTTPSConnectionPool._validate_conn(self, conn)
2024-06-26T06:13:26.3299078Z E              1094 if conn.is_closed:
2024-06-26T06:13:26.3299513Z E           -> 1095     conn.connect()
2024-06-26T06:13:26.3300036Z E              1097 # TODO revise this, see https://github.com/urllib3/urllib3/issues/2791
2024-06-26T06:13:26.3300158Z E           
2024-06-26T06:13:26.3301059Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/urllib3/connection.py:652, in HTTPSConnection.connect(self)
2024-06-26T06:13:26.3301732Z E               650 server_hostname_rm_dot = server_hostname.rstrip(".")
2024-06-26T06:13:26.3302239Z E           --> 652 sock_and_verified = _ssl_wrap_socket_and_match_hostname(
2024-06-26T06:13:26.3302773Z E               653     sock=sock,
2024-06-26T06:13:26.3303407Z E               654     cert_reqs=self.cert_reqs,
2024-06-26T06:13:26.3304043Z E               655     ssl_version=self.ssl_version,
2024-06-26T06:13:26.3304745Z E               656     ssl_minimum_version=self.ssl_minimum_version,
2024-06-26T06:13:26.3305457Z E               657     ssl_maximum_version=self.ssl_maximum_version,
2024-06-26T06:13:26.3306196Z E               658     ca_certs=self.ca_certs,
2024-06-26T06:13:26.3306833Z E               659     ca_cert_dir=self.ca_cert_dir,
2024-06-26T06:13:26.3307488Z E               660     ca_cert_data=self.ca_cert_data,
2024-06-26T06:13:26.3308113Z E               661     cert_file=self.cert_file,
2024-06-26T06:13:26.3308727Z E               662     key_file=self.key_file,
2024-06-26T06:13:26.3309409Z E               663     key_password=self.key_password,
2024-06-26T06:13:26.3309945Z E               664     server_hostname=server_hostname_rm_dot,
2024-06-26T06:13:26.3310583Z E               665     ssl_context=self.ssl_context,
2024-06-26T06:13:26.3311032Z E               666     tls_in_tls=tls_in_tls,
2024-06-26T06:13:26.3311708Z E               667     assert_hostname=self.assert_hostname,
2024-06-26T06:13:26.3312420Z E               668     assert_fingerprint=self.assert_fingerprint,
2024-06-26T06:13:26.3312647Z E               669 )
2024-06-26T06:13:26.3313156Z E               670 self.sock = sock_and_verified.socket
2024-06-26T06:13:26.3313281Z E           
2024-06-26T06:13:26.3315152Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/urllib3/connection.py:805, in _ssl_wrap_socket_and_match_hostname(sock, cert_reqs, ssl_version, ssl_minimum_version, ssl_maximum_version, cert_file, key_file, key_password, ca_certs, ca_cert_dir, ca_cert_data, assert_hostname, assert_fingerprint, server_hostname, ssl_context, tls_in_tls)
2024-06-26T06:13:26.3315481Z E               803         server_hostname = normalized
2024-06-26T06:13:26.3316057Z E           --> 805 ssl_sock = ssl_wrap_socket(
2024-06-26T06:13:26.3316631Z E               806     sock=sock,
2024-06-26T06:13:26.3317089Z E               807     keyfile=key_file,
2024-06-26T06:13:26.3317539Z E               808     certfile=cert_file,
2024-06-26T06:13:26.3318013Z E               809     key_password=key_password,
2024-06-26T06:13:26.3318453Z E               810     ca_certs=ca_certs,
2024-06-26T06:13:26.3318917Z E               811     ca_cert_dir=ca_cert_dir,
2024-06-26T06:13:26.3319398Z E               812     ca_cert_data=ca_cert_data,
2024-06-26T06:13:26.3320054Z E               813     server_hostname=server_hostname,
2024-06-26T06:13:26.3320505Z E               814     ssl_context=context,
2024-06-26T06:13:26.3320952Z E               815     tls_in_tls=tls_in_tls,
2024-06-26T06:13:26.3321202Z E               816 )
2024-06-26T06:13:26.3321430Z E               818 try:
2024-06-26T06:13:26.3321554Z E           
2024-06-26T06:13:26.3323042Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/urllib3/util/ssl_.py:465, in ssl_wrap_socket(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir, key_password, ca_cert_data, tls_in_tls)
2024-06-26T06:13:26.3323294Z E               463     pass
2024-06-26T06:13:26.3324264Z E           --> 465 ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname)
2024-06-26T06:13:26.3324532Z E               466 return ssl_sock
2024-06-26T06:13:26.3324657Z E           
2024-06-26T06:13:26.3325711Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/urllib3/util/ssl_.py:509, in _ssl_wrap_socket_impl(sock, ssl_context, tls_in_tls, server_hostname)
2024-06-26T06:13:26.3326167Z E               507     return SSLTransport(sock, ssl_context, server_hostname)
2024-06-26T06:13:26.3327174Z E           --> 509 return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
2024-06-26T06:13:26.3327301Z E           
2024-06-26T06:13:26.3328443Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/ssl.py:513, in SSLContext.wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session)
2024-06-26T06:13:26.3329116Z E               507 def wrap_socket(self, sock, server_side=False,
2024-06-26T06:13:26.3329536Z E               508                 do_handshake_on_connect=True,
2024-06-26T06:13:26.3329955Z E               509                 suppress_ragged_eofs=True,
2024-06-26T06:13:26.3330666Z E               510                 server_hostname=None, session=None):
2024-06-26T06:13:26.3331217Z E               511     # SSLSocket class handles server_hostname encoding before it calls
2024-06-26T06:13:26.3331524Z E               512     # ctx._wrap_socket()
2024-06-26T06:13:26.3332259Z E           --> 513     return self.sslsocket_class._create(
2024-06-26T06:13:26.3332675Z E               514         sock=sock,
2024-06-26T06:13:26.3333156Z E               515         server_side=server_side,
2024-06-26T06:13:26.3333847Z E               516         do_handshake_on_connect=do_handshake_on_connect,
2024-06-26T06:13:26.3334403Z E               517         suppress_ragged_eofs=suppress_ragged_eofs,
2024-06-26T06:13:26.3334908Z E               518         server_hostname=server_hostname,
2024-06-26T06:13:26.3335374Z E               519         context=self,
2024-06-26T06:13:26.3335773Z E               520         session=session
2024-06-26T06:13:26.3336002Z E               521     )
2024-06-26T06:13:26.3336133Z E           
2024-06-26T06:13:26.3337837Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/ssl.py:1104, in SSLSocket._create(cls, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, context, session)
2024-06-26T06:13:26.3339350Z E              1103             raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets")
2024-06-26T06:13:26.3340202Z E           -> 1104         self.do_handshake()
2024-06-26T06:13:26.3341035Z E              1105 except (OSError, ValueError):
2024-06-26T06:13:26.3341235Z E           
2024-06-26T06:13:26.3342487Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/ssl.py:1375, in SSLSocket.do_handshake(self, block)
2024-06-26T06:13:26.3343307Z E              1374         self.settimeout(None)
2024-06-26T06:13:26.3344411Z E           -> 1375     self._sslobj.do_handshake()
2024-06-26T06:13:26.3344822Z E              1376 finally:
2024-06-26T06:13:26.3345015Z E           
2024-06-26T06:13:26.3346138Z E           SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)
2024-06-26T06:13:26.3346350Z E           
2024-06-26T06:13:26.3346831Z E           During handling of the above exception, another exception occurred:
2024-06-26T06:13:26.3347018Z E           
2024-06-26T06:13:26.3347629Z E           SSLError                                  Traceback (most recent call last)
2024-06-26T06:13:26.3350369Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/urllib3/connectionpool.py:789, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
2024-06-26T06:13:26.3350872Z E               788 # Make the request on the HTTPConnection object
2024-06-26T06:13:26.3351449Z E           --> 789 response = self._make_request(
2024-06-26T06:13:26.3351742Z E               790     conn,
2024-06-26T06:13:26.3352025Z E               791     method,
2024-06-26T06:13:26.3352294Z E               792     url,
2024-06-26T06:13:26.3352744Z E               793     timeout=timeout_obj,
2024-06-26T06:13:26.3353285Z E               794     body=body,
2024-06-26T06:13:26.3353731Z E               795     headers=headers,
2024-06-26T06:13:26.3354168Z E               796     chunked=chunked,
2024-06-26T06:13:26.3354608Z E               797     retries=retries,
2024-06-26T06:13:26.3355091Z E               798     response_conn=response_conn,
2024-06-26T06:13:26.3355589Z E               799     preload_content=preload_content,
2024-06-26T06:13:26.3356289Z E               800     decode_content=decode_content,
2024-06-26T06:13:26.3356765Z E               801     **response_kw,
2024-06-26T06:13:26.3356983Z E               802 )
2024-06-26T06:13:26.3357295Z E               804 # Everything went great!
2024-06-26T06:13:26.3357427Z E           
2024-06-26T06:13:26.3360028Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/urllib3/connectionpool.py:490, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
2024-06-26T06:13:26.3360954Z E               489         new_e = _wrap_proxy_error(new_e, conn.proxy.scheme)
2024-06-26T06:13:26.3361415Z E           --> 490     raise new_e
2024-06-26T06:13:26.3362260Z E               492 # conn.request() calls http.client.*.request, not the method in
2024-06-26T06:13:26.3363098Z E               493 # urllib3.request. It also calls makefile (recv) on the socket.
2024-06-26T06:13:26.3363314Z E           
2024-06-26T06:13:26.3364321Z E           SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)
2024-06-26T06:13:26.3364522Z E           
2024-06-26T06:13:26.3365025Z E           The above exception was the direct cause of the following exception:
2024-06-26T06:13:26.3365240Z E           
2024-06-26T06:13:26.3365932Z E           MaxRetryError                             Traceback (most recent call last)
2024-06-26T06:13:26.3367816Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/requests/adapters.py:667, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
2024-06-26T06:13:26.3368527Z E               666 try:
2024-06-26T06:13:26.3369455Z E           --> 667     resp = conn.urlopen(
2024-06-26T06:13:26.3370488Z E               668         method=request.method,
2024-06-26T06:13:26.3371216Z E               669         url=url,
2024-06-26T06:13:26.3372245Z E               670         body=request.body,
2024-06-26T06:13:26.3373329Z E               671         headers=request.headers,
2024-06-26T06:13:26.3374527Z E               672         redirect=False,
2024-06-26T06:13:26.3375503Z E               673         assert_same_host=False,
2024-06-26T06:13:26.3376487Z E               674         preload_content=False,
2024-06-26T06:13:26.3377449Z E               675         decode_content=False,
2024-06-26T06:13:26.3378610Z E               676         retries=self.max_retries,
2024-06-26T06:13:26.3379410Z E               677         timeout=timeout,
2024-06-26T06:13:26.3380241Z E               678         chunked=chunked,
2024-06-26T06:13:26.3380663Z E               679     )
2024-06-26T06:13:26.3381676Z E               681 except (ProtocolError, OSError) as err:
2024-06-26T06:13:26.3381886Z E           
2024-06-26T06:13:26.3385014Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/urllib3/connectionpool.py:843, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
2024-06-26T06:13:26.3386117Z E               841     new_e = ProtocolError("Connection aborted.", new_e)
2024-06-26T06:13:26.3387071Z E           --> 843 retries = retries.increment(
2024-06-26T06:13:26.3389994Z E               844     method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
2024-06-26T06:13:26.3390398Z E               845 )
2024-06-26T06:13:26.3390835Z E               846 retries.sleep()
2024-06-26T06:13:26.3391044Z E           
2024-06-26T06:13:26.3392958Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/urllib3/util/retry.py:519, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
2024-06-26T06:13:26.3393975Z E               518     reason = error or ResponseError(cause)
2024-06-26T06:13:26.3395359Z E           --> 519     raise MaxRetryError(_pool, url, reason) from reason  # type: ignore[arg-type]
2024-06-26T06:13:26.3397317Z E               521 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
2024-06-26T06:13:26.3397549Z E           
2024-06-26T06:13:26.3400046Z E           MaxRetryError: HTTPSConnectionPool(host='files.grouplens.org', port=443): Max retries exceeded with url: /datasets/movielens/ml-1m.zip (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)')))
2024-06-26T06:13:26.3400595Z E           
2024-06-26T06:13:26.3401131Z E           During handling of the above exception, another exception occurred:
2024-06-26T06:13:26.3401342Z E           
2024-06-26T06:13:26.3402024Z E           SSLError                                  Traceback (most recent call last)
2024-06-26T06:13:26.3402356Z E           Cell In[4], line 11
2024-06-26T06:13:26.3403489Z E                 1 # Note: The DataFrame-based API for ALS currently only supports integers for user and item ids.
2024-06-26T06:13:26.3403958Z E                 2 schema = StructType(
2024-06-26T06:13:26.3404253Z E                 3     (
2024-06-26T06:13:26.3404785Z E                 4         StructField(COL_USER, IntegerType()),
2024-06-26T06:13:26.3405083Z E              (...)
2024-06-26T06:13:26.3405398Z E                 8     )
2024-06-26T06:13:26.3405701Z E                 9 )
2024-06-26T06:13:26.3408031Z E           ---> 11 data = movielens.load_spark_df(spark, size=MOVIELENS_DATA_SIZE, schema=schema)
2024-06-26T06:13:26.3408457Z E                12 data.show()
2024-06-26T06:13:26.3408660Z E           
2024-06-26T06:13:26.3411040Z E           File /mnt/azureml/cr/j/eead6898f1904961a42d902e2e4e28f5/exe/wd/recommenders/datasets/movielens.py:445, in load_spark_df(spark, size, header, schema, local_cache_path, dbutils, title_col, genres_col, year_col)
2024-06-26T06:13:26.3411968Z E               443 with download_path(local_cache_path) as path:
2024-06-26T06:13:26.3413766Z E               444     filepath = os.path.join(path, "ml-***.zip".format(size))
2024-06-26T06:13:26.3415183Z E           --> 445     datapath, item_datapath = _maybe_download_and_extract(size, filepath)
2024-06-26T06:13:26.3416683Z E               446     spark_datapath = "file:///" + datapath  # shorten form of file://localhost/
2024-06-26T06:13:26.3417552Z E               448     # Load movie features such as title, genres, and release year.
2024-06-26T06:13:26.3418623Z E               449     # Since the file size is small, we directly load as pd.DataFrame from the driver node
2024-06-26T06:13:26.3419386Z E               450     # and then convert into pyspark.sql.DataFrame
2024-06-26T06:13:26.3419591Z E           
2024-06-26T06:13:26.3421718Z E           File /mnt/azureml/cr/j/eead6898f1904961a42d902e2e4e28f5/exe/wd/recommenders/datasets/movielens.py:539, in _maybe_download_and_extract(size, dest_path)
2024-06-26T06:13:26.3422606Z E               536 item_path = os.path.join(dirs, item_filename)
2024-06-26T06:13:26.3424263Z E               538 if not os.path.exists(rating_path) or not os.path.exists(item_path):
2024-06-26T06:13:26.3424941Z E           --> 539     download_movielens(size, dest_path)
2024-06-26T06:13:26.3425338Z E               540     extract_movielens(size, rating_path, item_path, dest_path)
2024-06-26T06:13:26.3425890Z E               542 return rating_path, item_path
2024-06-26T06:13:26.3426019Z E           
2024-06-26T06:13:26.3427256Z E           File /mnt/azureml/cr/j/eead6898f1904961a42d902e2e4e28f5/exe/wd/recommenders/datasets/movielens.py:557, in download_movielens(size, dest_path)
2024-06-26T06:13:26.3428289Z E               555 url = "https://files.grouplens.org/datasets/movielens/ml-" + size + ".zip"
2024-06-26T06:13:26.3428759Z E               556 dirs, file = os.path.split(dest_path)
2024-06-26T06:13:26.3429564Z E           --> 557 maybe_download(url, file, work_directory=dirs)
2024-06-26T06:13:26.3429696Z E           
2024-06-26T06:13:26.3430640Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/retrying.py:56, in retry.<locals>.wrap.<locals>.wrapped_f(*args, **kw)
2024-06-26T06:13:26.3430940Z E                54 @six.wraps(f)
2024-06-26T06:13:26.3431541Z E                55 def wrapped_f(*args, **kw):
2024-06-26T06:13:26.3433107Z E           ---> 56     return Retrying(*dargs, **dkw).call(f, *args, **kw)
2024-06-26T06:13:26.3433235Z E           
2024-06-26T06:13:26.3434121Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/retrying.py:266, in Retrying.call(self, fn, *args, **kwargs)
2024-06-26T06:13:26.3434698Z E               263 if self.stop(attempt_number, delay_since_first_attempt_ms):
2024-06-26T06:13:26.3435473Z E               264     if not self._wrap_exception and attempt.has_exception:
2024-06-26T06:13:26.3436390Z E               265         # get() on an attempt with an exception should cause it to be raised, but raise just in case
2024-06-26T06:13:26.3436947Z E           --> 266         raise attempt.get()
2024-06-26T06:13:26.3437199Z E               267     else:
2024-06-26T06:13:26.3437699Z E               268         raise RetryError(attempt)
2024-06-26T06:13:26.3437831Z E           
2024-06-26T06:13:26.3438691Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/retrying.py:301, in Attempt.get(self, wrap_exception)
2024-06-26T06:13:26.3439063Z E               299         raise RetryError(self)
2024-06-26T06:13:26.3439306Z E               300     else:
2024-06-26T06:13:26.3441059Z E           --> 301         six.reraise(self.value[0], self.value[1], self.value[2])
2024-06-26T06:13:26.3441433Z E               302 else:
2024-06-26T06:13:26.3441837Z E               303     return self.value
2024-06-26T06:13:26.3441961Z E           
2024-06-26T06:13:26.3442738Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/six.py:719, in reraise(tp, value, tb)
2024-06-26T06:13:26.3443329Z E               717     if value.__traceback__ is not tb:
2024-06-26T06:13:26.3443741Z E               718         raise value.with_traceback(tb)
2024-06-26T06:13:26.3444013Z E           --> 719     raise value
2024-06-26T06:13:26.3444255Z E               720 finally:
2024-06-26T06:13:26.3444589Z E               721     value = None
2024-06-26T06:13:26.3444711Z E           
2024-06-26T06:13:26.3445602Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/retrying.py:251, in Retrying.call(self, fn, *args, **kwargs)
2024-06-26T06:13:26.3446016Z E               248     self._before_attempts(attempt_number)
2024-06-26T06:13:26.3446245Z E               250 try:
2024-06-26T06:13:26.3447259Z E           --> 251     attempt = Attempt(fn(*args, **kwargs), attempt_number, False)
2024-06-26T06:13:26.3447499Z E               252 except:
2024-06-26T06:13:26.3447840Z E               253     tb = sys.exc_info()
2024-06-26T06:13:26.3447964Z E           
2024-06-26T06:13:26.3449079Z E           File /mnt/azureml/cr/j/eead6898f1904961a42d902e2e4e28f5/exe/wd/recommenders/datasets/download_utils.py:36, in maybe_download(url, filename, work_directory, expected_bytes)
2024-06-26T06:13:26.3449598Z E                34 filepath = os.path.join(work_directory, filename)
2024-06-26T06:13:26.3450141Z E                35 if not os.path.exists(filepath):
2024-06-26T06:13:26.3451026Z E           ---> 36     r = requests.get(url, stream=True)
2024-06-26T06:13:26.3451641Z E                37     if r.status_code == 200:
2024-06-26T06:13:26.3452421Z E                38         log.info(f"Downloading ***url***")
2024-06-26T06:13:26.3452549Z E           
2024-06-26T06:13:26.3453393Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/requests/api.py:73, in get(url, params, **kwargs)
2024-06-26T06:13:26.3454063Z E                62 def get(url, params=None, **kwargs):
2024-06-26T06:13:26.3454493Z E                63     r"""Sends a GET request.
2024-06-26T06:13:26.3454786Z E                64 
2024-06-26T06:13:26.3455226Z E                65     :param url: URL for the new :class:`Request` object.
2024-06-26T06:13:26.3455399Z E              (...)
2024-06-26T06:13:26.3455731Z E                70     :rtype: requests.Response
2024-06-26T06:13:26.3455968Z E                71     """
2024-06-26T06:13:26.3457329Z E           ---> 73     return request("get", url, params=params, **kwargs)
2024-06-26T06:13:26.3457454Z E           
2024-06-26T06:13:26.3458306Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/requests/api.py:59, in request(method, url, **kwargs)
2024-06-26T06:13:26.3458848Z E                55 # By using the 'with' statement we are sure the session is closed, thus we
2024-06-26T06:13:26.3459367Z E                56 # avoid leaving sockets open which can trigger a ResourceWarning in some
2024-06-26T06:13:26.3459768Z E                57 # cases, and look like a memory leak in others.
2024-06-26T06:13:26.3460255Z E                58 with sessions.Session() as session:
2024-06-26T06:13:26.3461557Z E           ---> 59     return session.request(method=method, url=url, **kwargs)
2024-06-26T06:13:26.3461685Z E           
2024-06-26T06:13:26.3463118Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
2024-06-26T06:13:26.3463392Z E               584 send_kwargs = ***
2024-06-26T06:13:26.3463782Z E               585     "timeout": timeout,
2024-06-26T06:13:26.3464238Z E               586     "allow_redirects": allow_redirects,
2024-06-26T06:13:26.3464418Z E               587 ***
2024-06-26T06:13:26.3464740Z E               588 send_kwargs.update(settings)
2024-06-26T06:13:26.3465814Z E           --> 589 resp = self.send(prep, **send_kwargs)
2024-06-26T06:13:26.3466079Z E               591 return resp
2024-06-26T06:13:26.3466204Z E           
2024-06-26T06:13:26.3467150Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
2024-06-26T06:13:26.3467448Z E               700 start = preferred_clock()
2024-06-26T06:13:26.3467735Z E               702 # Send the request
2024-06-26T06:13:26.3468606Z E           --> 703 r = adapter.send(request, **kwargs)
2024-06-26T06:13:26.3469163Z E               705 # Total elapsed time of the request (approximately)
2024-06-26T06:13:26.3469562Z E               706 elapsed = preferred_clock() - start
2024-06-26T06:13:26.3469686Z E           
2024-06-26T06:13:26.3470770Z E           File /azureml-envs/azureml_66eae34152a1571d4aee896f90647183/lib/python3.10/site-packages/requests/adapters.py:698, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
2024-06-26T06:13:26.3471210Z E               694         raise ProxyError(e, request=request)
2024-06-26T06:13:26.3471692Z E               696     if isinstance(e.reason, _SSLError):
2024-06-26T06:13:26.3472103Z E               697         # This branch is for urllib3 v1.22 and later.
2024-06-26T06:13:26.3472539Z E           --> 698         raise SSLError(e, request=request)
2024-06-26T06:13:26.3473077Z E               700     raise ConnectionError(e, request=request)
2024-06-26T06:13:26.3473489Z E               702 except ClosedPoolError as e:
2024-06-26T06:13:26.3473613Z E           
2024-06-26T06:13:26.3475038Z E           SSLError: HTTPSConnectionPool(host='files.grouplens.org', port=443): Max retries exceeded with url: /datasets/movielens/ml-1m.zip (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)')))

In which platform does it happen?

All

How do we replicate the issue?

More info: https://github.com/recommenders-team/recommenders/actions/runs/9671261357

Expected behavior (i.e. solution)

Other Comments

daviddavo commented 4 months ago

They have just recently updated the certificate:

Issued On   Wednesday 26 June 2024 at 19:07:24
Expires On  Tuesday 24 September 2024 at 19:07:23
miguelgfierro commented 4 months ago

Trying again: https://github.com/recommenders-team/recommenders/actions/runs/9726027809

miguelgfierro commented 4 months ago

it's working now