read_csv error - Githubissues

911432 commented 5 months ago

Description

Enter JupiterLite.
Enter Python(pyodide)

Input python code and run it.

import pandas as pd
df=pd.read_csv('https://raw.githubusercontent.com/YoungjinBD/dataset/main/facebook.csv', encoding='UTF-8')

Then the following error occurs:


---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
File /lib/python311.zip/urllib/request.py:1348, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
1347 try:
-> 1348     h.request(req.get_method(), req.selector, req.data, headers,
1349               encode_chunked=req.has_header('Transfer-encoding'))
1350 except OSError as err: # timeout error

File /lib/python311.zip/http/client.py:1283, in HTTPConnection.request(self, method, url, body, headers, encode_chunked) 1282 """Send a complete request to the server.""" -> 1283 self._send_request(method, url, body, headers, encode_chunked)

File /lib/python311.zip/http/client.py:1329, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked) 1328 body = _encode(body, 'body') -> 1329 self.endheaders(body, encode_chunked=encode_chunked)

File /lib/python311.zip/http/client.py:1278, in HTTPConnection.endheaders(self, message_body, encode_chunked) 1277 raise CannotSendHeader() -> 1278 self._send_output(message_body, encode_chunked=encode_chunked)

File /lib/python311.zip/http/client.py:1038, in HTTPConnection._send_output(self, message_body, encode_chunked) 1037 del self._buffer[:] -> 1038 self.send(msg) 1040 if message_body is not None: 1041 1042 # create a consistent interface to message_body

File /lib/python311.zip/http/client.py:976, in HTTPConnection.send(self, data) 975 if self.auto_open: --> 976 self.connect() 977 else:

File /lib/python311.zip/http/client.py:1448, in HTTPSConnection.connect(self) 1446 "Connect to a host on a given (SSL) port." -> 1448 super().connect() 1450 if self._tunnel_host:

File /lib/python311.zip/http/client.py:942, in HTTPConnection.connect(self) 941 sys.audit("http.client.connect", self, self.host, self.port) --> 942 self.sock = self._create_connection( 943 (self.host,self.port), self.timeout, self.source_address) 944 # Might fail in OSs that don't implement TCP_NODELAY

File /lib/python311.zip/socket.py:851, in create_connection(address, timeout, source_address, all_errors) 850 if not all_errors: --> 851 raise exceptions[0] 852 raise ExceptionGroup("create_connection failed", exceptions)

File /lib/python311.zip/socket.py:836, in create_connection(address, timeout, source_address, all_errors) 835 sock.bind(source_address) --> 836 sock.connect(sa) 837 # Break explicitly a reference cycle

OSError: [Errno 23] Host is unreachable

During handling of the above exception, another exception occurred:

URLError Traceback (most recent call last) Cell In[8], line 2 1 import pandas as pd ----> 2 df=pd.read_csv('https://raw.githubusercontent.com/YoungjinBD/dataset/main/facebook.csv', encoding='UTF-8')

File /lib/python3.11/site-packages/pandas/util/_decorators.py:211, in deprecate_kwarg.._deprecate_kwarg..wrapper(*args, *kwargs) 209 else: 210 kwargs[new_arg_name] = new_arg_value --> 211 return func(args, **kwargs)

File /lib/python3.11/site-packages/pandas/util/_decorators.py:331, in deprecate_nonkeyword_arguments..decorate..wrapper(*args, *kwargs) 325 if len(args) > num_allow_args: 326 warnings.warn( 327 msg.format(arguments=_format_argument_list(allow_args)), 328 FutureWarning, 329 stacklevel=find_stack_level(), 330 ) --> 331 return func(args, **kwargs)

File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options) 935 kwds_defaults = _refine_defaults_read( 936 dialect, 937 delimiter, (...) 946 defaults={"delimiter": ","}, 947 ) 948 kwds.update(kwds_defaults) --> 950 return _read(filepath_or_buffer, kwds)

File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:605, in _read(filepath_or_buffer, kwds) 602 _validate_names(kwds.get("names", None)) 604 # Create the parser. --> 605 parser = TextFileReader(filepath_or_buffer, **kwds) 607 if chunksize or iterator: 608 return parser

File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:1442, in TextFileReader.init(self, f, engine, **kwds) 1439 self.options["has_index_names"] = kwds["has_index_names"] 1441 self.handles: IOHandles | None = None -> 1442 self._engine = self._make_engine(f, self.engine)

File /lib/python3.11/site-packages/pandas/io/parsers/readers.py:1735, in TextFileReader._make_engine(self, f, engine) 1733 if "b" not in mode: 1734 mode += "b" -> 1735 self.handles = get_handle( 1736 f, 1737 mode, 1738 encoding=self.options.get("encoding", None), 1739 compression=self.options.get("compression", None), 1740 memory_map=self.options.get("memory_map", False), 1741 is_text=is_text, 1742 errors=self.options.get("encoding_errors", "strict"), 1743 storage_options=self.options.get("storage_options", None), 1744 ) 1745 assert self.handles is not None 1746 f = self.handles.handle

File /lib/python3.11/site-packages/pandas/io/common.py:713, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options) 710 codecs.lookup_error(errors) 712 # open URLs --> 713 ioargs = _get_filepath_or_buffer( 714 path_or_buf, 715 encoding=encoding, 716 compression=compression, 717 mode=mode, 718 storage_options=storage_options, 719 ) 721 handle = ioargs.filepath_or_buffer 722 handles: list[BaseBuffer]

File /lib/python3.11/site-packages/pandas/io/common.py:363, in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options) 361 # assuming storage_options is to be interpreted as headers 362 req_info = urllib.request.Request(filepath_or_buffer, headers=storage_options) --> 363 with urlopen(req_info) as req: 364 content_encoding = req.headers.get("Content-Encoding", None) 365 if content_encoding == "gzip": 366 # Override compression based on Content-Encoding header

File /lib/python3.11/site-packages/pandas/io/common.py:265, in urlopen(*args, *kwargs) 259 """ 260 Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of 261 the stdlib. 262 """ 263 import urllib.request --> 265 return urllib.request.urlopen(args, **kwargs)

File /lib/python311.zip/urllib/request.py:216, in urlopen(url, data, timeout, cafile, capath, cadefault, context) 214 else: 215 opener = _opener --> 216 return opener.open(url, data, timeout)

File /lib/python311.zip/urllib/request.py:519, in OpenerDirector.open(self, fullurl, data, timeout) 516 req = meth(req) 518 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method()) --> 519 response = self._open(req, data) 521 # post-process response 522 meth_name = protocol+"_response"

File /lib/python311.zip/urllib/request.py:536, in OpenerDirector._open(self, req, data) 533 return result 535 protocol = req.type --> 536 result = self._call_chain(self.handle_open, protocol, protocol + 537 '_open', req) 538 if result: 539 return result

File /lib/python311.zip/urllib/request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, args) 494 for handler in handlers: 495 func = getattr(handler, meth_name) --> 496 result = func(args) 497 if result is not None: 498 return result

File /lib/python311.zip/urllib/request.py:1391, in HTTPSHandler.https_open(self, req) 1390 def https_open(self, req): -> 1391 return self.do_open(http.client.HTTPSConnection, req, 1392 context=self._context, check_hostname=self._check_hostname)

File /lib/python311.zip/urllib/request.py:1351, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args) 1348 h.request(req.get_method(), req.selector, req.data, headers, 1349 encode_chunked=req.has_header('Transfer-encoding')) 1350 except OSError as err: # timeout error -> 1351 raise URLError(err) 1352 r = h.getresponse() 1353 except:

URLError: <urlopen error [Errno 23] Host is unreachable>


5. Also, when I open the Chrome console, the following error appears.
```console
pyodide.asm.js:9 Mixed Content: The page at 'https://jupyterlite.github.io/demo/extensions/@jupyterlite/pyodide-kernel-extension/static/576.c0192b77701147fba206.js?v=c0192b77701147fba206' was loaded over HTTPS, but attempted to connect to the insecure WebSocket endpoint 'ws://raw.githubusercontent.com:443/'. This request has been blocked; this endpoint must be available over WSS.

error

Reproduce

Go to '...'
Click on '...'
Scroll down to '...'
See error '...'

Expected behavior

Context

JupyterLite version:
Operating System and version:
Browser and version:

Browser Output

Paste the output from your browser Javascript console here.

bollwyvl commented 5 months ago

Please try pyodide_http, included in the standard pyodide distribution for a few releases:

import pyodide_http
pyodide_http.patch_all()

... prior to making any http requests.

911432 commented 5 months ago

import pyodide_http
pyodide_http.patch_all()
import pandas as pd
df=pd.read_csv('https://raw.githubusercontent.com/YoungjinBD/dataset/main/facebook.csv', encoding='UTF-8')
df.info()

It loads normally. resolve

jupyterlite / pyodide-kernel

read_csv error #109

Description

Reproduce

Expected behavior

Context