koenvo / pyodide-http

Provides patches for widely used http libraries to make them work in Pyodide environments like JupyterLite
MIT License
78 stars 15 forks source link

Error with some module in pandas_datareader #25

Closed anarinsk closed 1 year ago

anarinsk commented 1 year ago

This nice package has solved most of my user cases with jupyterlite except following one problem for now.

Working env

My working environment is the same as following link:

https://jupyterlite.github.io/demo/lab/index.html

Reproducible code

%pip install -q pyodide-http pandas-datareader requests

import pyodide_http
pyodide_http.patch_all()  # Patch all libraries

import pandas_datareader as pdr
pdr.get_data_fred('GS10')

Error messages

---------------------------------------------------------------------------
JsException                               Traceback (most recent call last)
Cell In[3], line 2
      1 import pandas_datareader as pdr
----> 2 pdr.get_data_fred('GS10')

File /lib/python3.10/site-packages/pandas_datareader/data.py:72, in get_data_fred(*args, **kwargs)
     71 def get_data_fred(*args, **kwargs):
---> 72     return FredReader(*args, **kwargs).read()

File /lib/python3.10/site-packages/pandas_datareader/fred.py:27, in FredReader.read(self)
     18 """Read data
     19 
     20 Returns
   (...)
     24     DataFrame is the outer join of the indicies of each series.
     25 """
     26 try:
---> 27     return self._read()
     28 finally:
     29     self.close()

File /lib/python3.10/site-packages/pandas_datareader/fred.py:62, in FredReader._read(self)
     55             raise IOError(
     56                 "Failed to get the data. Check that "
     57                 "{0!r} is a valid FRED series.".format(name)
     58             )
     59         raise
     61 df = concat(
---> 62     [fetch_data(url, n) for url, n in zip(urls, names)], axis=1, join="outer"
     63 )
     64 return df

File /lib/python3.10/site-packages/pandas_datareader/fred.py:62, in <listcomp>(.0)
     55             raise IOError(
     56                 "Failed to get the data. Check that "
     57                 "{0!r} is a valid FRED series.".format(name)
     58             )
     59         raise
     61 df = concat(
---> 62     [fetch_data(url, n) for url, n in zip(urls, names)], axis=1, join="outer"
     63 )
     64 return df

File /lib/python3.10/site-packages/pandas_datareader/fred.py:41, in FredReader._read.<locals>.fetch_data(url, name)
     39 def fetch_data(url, name):
     40     """Utillity to fetch data"""
---> 41     resp = self._read_url_as_StringIO(url)
     42     data = read_csv(
     43         resp,
     44         index_col=0,
   (...)
     49         na_values=".",
     50     )
     51     try:

File /lib/python3.10/site-packages/pandas_datareader/base.py:119, in _BaseReader._read_url_as_StringIO(self, url, params)
    115 def _read_url_as_StringIO(self, url, params=None):
    116     """
    117     Open url (and retry)
    118     """
--> 119     response = self._get_response(url, params=params)
    120     text = self._sanitize_response(response)
    121     out = StringIO()

File /lib/python3.10/site-packages/pandas_datareader/base.py:155, in _BaseReader._get_response(self, url, params, headers)
    153 last_response_text = ""
    154 for _ in range(self.retry_count + 1):
--> 155     response = self.session.get(
    156         url, params=params, headers=headers, timeout=self.timeout
    157     )
    158     if response.status_code == requests.codes.ok:
    159         return response

File /lib/python3.10/site-packages/requests/sessions.py:600, in Session.get(self, url, **kwargs)
    592 r"""Sends a GET request. Returns :class:`Response` object.
    593 
    594 :param url: URL for the new :class:`Request` object.
    595 :param \*\*kwargs: Optional arguments that ``request`` takes.
    596 :rtype: requests.Response
    597 """
    599 kwargs.setdefault("allow_redirects", True)
--> 600 return self.request("GET", url, **kwargs)

File /lib/python3.10/site-packages/requests/sessions.py:587, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    582 send_kwargs = {
    583     "timeout": timeout,
    584     "allow_redirects": allow_redirects,
    585 }
    586 send_kwargs.update(settings)
--> 587 resp = self.send(prep, **send_kwargs)
    589 return resp

File /lib/python3.10/site-packages/requests/sessions.py:701, in Session.send(self, request, **kwargs)
    698 start = preferred_clock()
    700 # Send the request
--> 701 r = adapter.send(request, **kwargs)
    703 # Total elapsed time of the request (approximately)
    704 elapsed = preferred_clock() - start

File /lib/python3.10/site-packages/pyodide_http/_requests.py:42, in PyodideHTTPAdapter.send(self, request, **kwargs)
     40     pyodide_request.set_body(request.body)
     41 try:
---> 42     resp = send(pyodide_request, stream)
     43 except _StreamingTimeout:
     44     from requests import ConnectTimeout

File /lib/python3.10/site-packages/pyodide_http/_core.py:113, in send(request, stream)
    110 for name, value in request.headers.items():
    111     xhr.setRequestHeader(name, value)
--> 113 xhr.send(to_js(request.body))
    115 headers = dict(Parser().parsestr(xhr.getAllResponseHeaders()))
    117 if _IN_WORKER:

JsException: NetworkError: Failed to execute 'send' on 'XMLHttpRequest': Failed to load 'https://fred.stlouisfed.org/graph/fredgraph.csv?id=GS10'.

Working case

In the same package, pandas-datareader, following code works well.

from pandas_datareader import wb
matches = wb.search('gdp.*capita.*const')
koenvo commented 1 year ago

Thank you for this report.

The CORS headers are missing: https://cors-test.codehappy.dev/?url=https%3A%2F%2Ffred.stlouisfed.org%2Fgraph%2Ffredgraph.csv%3Fid%3DGS10&method=get

Also see https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS for more info about CORS.

image
anarinsk commented 1 year ago

Thanks for the answer.