jupyterlite / pyodide-kernel

Python kernel for JupyterLite powered by Pyodide
https://jupyterlite-pyodide-kernel.readthedocs.io/en/latest/_static/
BSD 3-Clause "New" or "Revised" License
46 stars 25 forks source link

read_csv error #109

Closed 911432 closed 5 months ago

911432 commented 5 months ago

Description

  1. Enter JupiterLite.
  2. Enter Python(pyodide)
  3. Input python code and run it.
    import pandas as pd
    df=pd.read_csv('https://raw.githubusercontent.com/YoungjinBD/dataset/main/facebook.csv', encoding='UTF-8')
  4. Then the following error occurs:
    
    ---------------------------------------------------------------------------
    OSError                                   Traceback (most recent call last)
    File /lib/python311.zip/urllib/request.py:1348, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
    1347 try:
    -> 1348     h.request(req.get_method(), req.selector, req.data, headers,
    1349               encode_chunked=req.has_header('Transfer-encoding'))
    1350 except OSError as err: # timeout error

File /lib/python311.zip/http/client.py:1283, in HTTPConnection.request(self, method, url, body, headers, encode_chunked) 1282 """Send a complete request to the server.""" -> 1283 self._send_request(method, url, body, headers, encode_chunked)

File /lib/python311.zip/http/client.py:1329, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked) 1328 body = _encode(body, 'body') -> 1329 self.endheaders(body, encode_chunked=encode_chunked)

File /lib/python311.zip/http/client.py:1278, in HTTPConnection.endheaders(self, message_body, encode_chunked) 1277 raise CannotSendHeader() -> 1278 self._send_output(message_body, encode_chunked=encode_chunked)

File /lib/python311.zip/http/client.py:1038, in HTTPConnection._send_output(self, message_body, encode_chunked) 1037 del self._buffer[:] -> 1038 self.send(msg) 1040 if message_body is not None: 1041 1042 # create a consistent interface to message_body

File /lib/python311.zip/http/client.py:976, in HTTPConnection.send(self, data) 975 if self.auto_open: --> 976 self.connect() 977 else:

File /lib/python311.zip/http/client.py:1448, in HTTPSConnection.connect(self) 1446 "Connect to a host on a given (SSL) port." -> 1448 super().connect() 1450 if self._tunnel_host:

File /lib/python311.zip/http/client.py:942, in HTTPConnection.connect(self) 941 sys.audit("http.client.connect", self, self.host, self.port) --> 942 self.sock = self._create_connection( 943 (self.host,self.port), self.timeout, self.source_address) 944 # Might fail in OSs that don't implement TCP_NODELAY

File /lib/python311.zip/socket.py:851, in create_connection(address, timeout, source_address, all_errors) 850 if not all_errors: --> 851 raise exceptions[0] 852 raise ExceptionGroup("create_connection failed", exceptions)

File /lib/python311.zip/socket.py:836, in create_connection(address, timeout, source_address, all_errors) 835 sock.bind(source_address) --> 836 sock.connect(sa) 837 # Break explicitly a reference cycle

OSError: [Errno 23] Host is unreachable

During handling of the above exception, another exception occurred:

URLError Traceback (most recent call last) Cell In[8], line 2 1 import pandas as pd ----> 2 df=pd.read_csv('https://raw.githubusercontent.com/YoungjinBD/dataset/main/facebook.csv', encoding='UTF-8')

File /lib/python3.11/site-packages/pandas/util/_decorators.py:211, in deprecate_kwarg.._deprecate_kwarg..wrapper(*args, *kwargs) 209 else: 210 kwargs[new_arg_name] = new_arg_value --> 211 return func(args, **kwargs)

File /lib/python3.11/site-packages/pandas/util/_decorators.py:331, in deprecate_nonkeyword_arguments..decorate..wrapper(*args, *kwargs) 325 if len(args) > num_allow_args: 326 warnings.warn( 327 msg.format(arguments=_format_argument_list(allow_args)), 328 FutureWarning, 329 stacklevel=find_stack_level(), 330 ) --> 331 return func(args, **kwargs)

File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options) 935 kwds_defaults = _refine_defaults_read( 936 dialect, 937 delimiter, (...) 946 defaults={"delimiter": ","}, 947 ) 948 kwds.update(kwds_defaults) --> 950 return _read(filepath_or_buffer, kwds)

File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:605, in _read(filepath_or_buffer, kwds) 602 _validate_names(kwds.get("names", None)) 604 # Create the parser. --> 605 parser = TextFileReader(filepath_or_buffer, **kwds) 607 if chunksize or iterator: 608 return parser

File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:1442, in TextFileReader.init(self, f, engine, **kwds) 1439 self.options["has_index_names"] = kwds["has_index_names"] 1441 self.handles: IOHandles | None = None -> 1442 self._engine = self._make_engine(f, self.engine)

File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:1735, in TextFileReader._make_engine(self, f, engine) 1733 if "b" not in mode: 1734 mode += "b" -> 1735 self.handles = get_handle( 1736 f, 1737 mode, 1738 encoding=self.options.get("encoding", None), 1739 compression=self.options.get("compression", None), 1740 memory_map=self.options.get("memory_map", False), 1741 is_text=is_text, 1742 errors=self.options.get("encoding_errors", "strict"), 1743 storage_options=self.options.get("storage_options", None), 1744 ) 1745 assert self.handles is not None 1746 f = self.handles.handle

File /lib/python3.11/site-packages/pandas/io/common.py:713, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options) 710 codecs.lookup_error(errors) 712 # open URLs --> 713 ioargs = _get_filepath_or_buffer( 714 path_or_buf, 715 encoding=encoding, 716 compression=compression, 717 mode=mode, 718 storage_options=storage_options, 719 ) 721 handle = ioargs.filepath_or_buffer 722 handles: list[BaseBuffer]

File /lib/python3.11/site-packages/pandas/io/common.py:363, in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options) 361 # assuming storage_options is to be interpreted as headers 362 req_info = urllib.request.Request(filepath_or_buffer, headers=storage_options) --> 363 with urlopen(req_info) as req: 364 content_encoding = req.headers.get("Content-Encoding", None) 365 if content_encoding == "gzip": 366 # Override compression based on Content-Encoding header

File /lib/python3.11/site-packages/pandas/io/common.py:265, in urlopen(*args, *kwargs) 259 """ 260 Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of 261 the stdlib. 262 """ 263 import urllib.request --> 265 return urllib.request.urlopen(args, **kwargs)

File /lib/python311.zip/urllib/request.py:216, in urlopen(url, data, timeout, cafile, capath, cadefault, context) 214 else: 215 opener = _opener --> 216 return opener.open(url, data, timeout)

File /lib/python311.zip/urllib/request.py:519, in OpenerDirector.open(self, fullurl, data, timeout) 516 req = meth(req) 518 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method()) --> 519 response = self._open(req, data) 521 # post-process response 522 meth_name = protocol+"_response"

File /lib/python311.zip/urllib/request.py:536, in OpenerDirector._open(self, req, data) 533 return result 535 protocol = req.type --> 536 result = self._call_chain(self.handle_open, protocol, protocol + 537 '_open', req) 538 if result: 539 return result

File /lib/python311.zip/urllib/request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, args) 494 for handler in handlers: 495 func = getattr(handler, meth_name) --> 496 result = func(args) 497 if result is not None: 498 return result

File /lib/python311.zip/urllib/request.py:1391, in HTTPSHandler.https_open(self, req) 1390 def https_open(self, req): -> 1391 return self.do_open(http.client.HTTPSConnection, req, 1392 context=self._context, check_hostname=self._check_hostname)

File /lib/python311.zip/urllib/request.py:1351, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args) 1348 h.request(req.get_method(), req.selector, req.data, headers, 1349 encode_chunked=req.has_header('Transfer-encoding')) 1350 except OSError as err: # timeout error -> 1351 raise URLError(err) 1352 r = h.getresponse() 1353 except:

URLError: <urlopen error [Errno 23] Host is unreachable>


5. Also, when I open the Chrome console, the following error appears.
```console
pyodide.asm.js:9 Mixed Content: The page at 'https://jupyterlite.github.io/demo/extensions/@jupyterlite/pyodide-kernel-extension/static/576.c0192b77701147fba206.js?v=c0192b77701147fba206' was loaded over HTTPS, but attempted to connect to the insecure WebSocket endpoint 'ws://raw.githubusercontent.com:443/'. This request has been blocked; this endpoint must be available over WSS.

error

Reproduce

  1. Go to '...'
  2. Click on '...'
  3. Scroll down to '...'
  4. See error '...'

Expected behavior

Context

Browser Output
Paste the output from your browser Javascript console here.
bollwyvl commented 5 months ago

Please try pyodide_http, included in the standard pyodide distribution for a few releases:

import pyodide_http
pyodide_http.patch_all()

... prior to making any http requests.

911432 commented 5 months ago
import pyodide_http
pyodide_http.patch_all()
import pandas as pd
df=pd.read_csv('https://raw.githubusercontent.com/YoungjinBD/dataset/main/facebook.csv', encoding='UTF-8')
df.info()

It loads normally. resolve