hyriver / pynhd

A part of HyRiver software stack that provides access to NHD+ V2 data through NLDI and WaterData web services
https://docs.hyriver.io
Other
33 stars 8 forks source link

Unicode error with streamcat #76

Closed mhweber closed 3 months ago

mhweber commented 3 months ago

What happened?

With

from pynhd import StreamCat sc = StreamCat()

I get the following unicode error:

---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\site-packages\async_retriever\_utils.py:81, in retriever(uid, url, s_kwds, session, read_type, r_kwds, raise_status)
     80 try:
---> 81     return uid, await getattr(response, read_type)(**r_kwds)
     82 except (ClientResponseError, ValueError) as ex:

File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\site-packages\aiohttp_client_cache\response.py:221, in CachedResponse.text(self, encoding, errors)
    220 """Read response payload and decode"""
--> 221 return self._body.decode(encoding or self.encoding, errors=errors)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb0 in position 33476: invalid start byte

During handling of the above exception, another exception occurred:

UnicodeDecodeError                        Traceback (most recent call last)
Cell In[7], line 2
      1 from pynhd import StreamCat
----> 2 sc = StreamCat()

File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\site-packages\pynhd\nhdplus_derived.py:512, in StreamCat.__init__(self)
    503 self.valid_slopes = tlz.merge_with(
    504     list,
    505     (
   (...)
    508     ),
    509 )
    511 url_vars = f"{self.base_url}/variable_info.csv"
--> 512 names = pd.read_csv(io.StringIO(ar.retrieve_text([url_vars])[0]))
    513 names["METRIC_NAME"] = names["METRIC_NAME"].str.replace(r"\[AOI\]|Slp[12]0", "", regex=True)
    514 names["SLOPE"] = [
    515     ", ".join(self.valid_slopes.get(m.replace("[Year]", "").lower(), []))
    516     for m in names.METRIC_NAME
    517 ]

File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\site-packages\async_retriever\async_retriever.py:498, in retrieve_text(urls, request_kwds, request_method, max_workers, cache_name, timeout, expire_after, ssl, disable, raise_status)
    437 def retrieve_text(
    438     urls: Sequence[StrOrURL],
    439     request_kwds: Sequence[dict[str, Any]] | None = None,
   (...)
    447     raise_status: bool = True,
    448 ) -> list[str]:
    449     r"""Send async requests and get the response as ``text``.
    450 
    451     Parameters
   (...)
    496     '01646500'
    497     """
--> 498     return retrieve(
    499         urls,
    500         "text",
    501         request_kwds,
    502         request_method,
    503         max_workers,
    504         cache_name,
    505         timeout,
    506         expire_after,
    507         ssl,
    508         disable,
    509         raise_status,
    510     )

File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\site-packages\async_retriever\async_retriever.py:431, in retrieve(urls, read_method, request_kwds, request_method, max_workers, cache_name, timeout, expire_after, ssl, disable, raise_status)
    428 loop, new_loop = utils.get_event_loop()
    429 results = (loop.run_until_complete(session(url_kwds=c)) for c in chunked_reqs)
--> 431 resp = [r for _, r in sorted(tlz.concat(results))]
    432 if new_loop:
    433     loop.close()

File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\site-packages\async_retriever\async_retriever.py:429, in <genexpr>(.0)
    427 chunked_reqs = tlz.partition_all(max_workers, inp.url_kwds)
    428 loop, new_loop = utils.get_event_loop()
--> 429 results = (loop.run_until_complete(session(url_kwds=c)) for c in chunked_reqs)
    431 resp = [r for _, r in sorted(tlz.concat(results))]
    432 if new_loop:

File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\site-packages\nest_asyncio.py:98, in _patch_loop.<locals>.run_until_complete(self, future)
     95 if not f.done():
     96     raise RuntimeError(
     97         'Event loop stopped before Future completed.')
---> 98 return f.result()

File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\asyncio\futures.py:203, in Future.result(self)
    201 self.__log_traceback = False
    202 if self._exception is not None:
--> 203     raise self._exception.with_traceback(self._exception_tb)
    204 return self._result

File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\asyncio\tasks.py:314, in Task.__step_run_and_handle_result(***failed resolving arguments***)
    310 try:
    311     if exc is None:
    312         # We use the `send` method directly, because coroutines
    313         # don't have `__iter__` and `__next__` methods.
--> 314         result = coro.send(None)
    315     else:
    316         result = coro.throw(exc)

File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\site-packages\async_retriever\async_retriever.py:232, in async_session_with_cache(url_kwds, read, r_kwds, request_method, cache_name, timeout, expire_after, ssl, raise_status)
    227 request_func = getattr(session, request_method.lower())
    228 tasks = (
    229     utils.retriever(uid, url, kwds, request_func, read, r_kwds, raise_status)
    230     for uid, url, kwds in url_kwds
    231 )
--> 232 return await asyncio.gather(*tasks)

File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\asyncio\tasks.py:385, in Task.__wakeup(self, future)
    383 def __wakeup(self, future):
    384     try:
--> 385         future.result()
    386     except BaseException as exc:
    387         # This may also be a cancellation.
    388         self.__step(exc)

File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\asyncio\tasks.py:314, in Task.__step_run_and_handle_result(***failed resolving arguments***)
    310 try:
    311     if exc is None:
    312         # We use the `send` method directly, because coroutines
    313         # don't have `__iter__` and `__next__` methods.
--> 314         result = coro.send(None)
    315     else:
    316         result = coro.throw(exc)

File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\site-packages\async_retriever\_utils.py:84, in retriever(uid, url, s_kwds, session, read_type, r_kwds, raise_status)
     82 except (ClientResponseError, ValueError) as ex:
     83     if raise_status:
---> 84         raise ServiceError(await response.text(), str(response.url)) from ex
     85     return uid, None

File ~\AppData\Local\miniforge3\envs\HyRiver\Lib\site-packages\aiohttp_client_cache\response.py:221, in CachedResponse.text(self, encoding, errors)
    219 async def text(self, encoding: str | None = None, errors: str = 'strict') -> str:
    220     """Read response payload and decode"""
--> 221     return self._body.decode(encoding or self.encoding, errors=errors)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb0 in position 33476: invalid start byte

What did you expect to happen?

I expected an object I could use to check out attrs like sc.metrics_df, sc.valid_aois

Minimal Complete Verifiable Example

from pynhd import StreamCat
sc = StreamCat()

MVCE confirmation

Relevant log output

No response

Anything else we need to know?

pynhd.streamcat("fert", comids=13212248) returns the same unicode error

Environment

SYS INFO -------- commit: None python: 3.12.1 | packaged by conda-forge | (main, Dec 23 2023, 07:53:56) [MSC v.1937 64 bit (AMD64)] python-bits: 64 OS: Windows OS-release: 11 machine: AMD64 processor: AMD64 Family 25 Model 8 Stepping 2, AuthenticAMD byteorder: little LC_ALL: None LANG: None LOCALE: English_United States.1252 libhdf5: 1.14.3 libnetcdf: 4.9.2 PACKAGE VERSION ------------------------------- aiohttp 3.9.5 aiohttp-client-cache 0.11.1 aiosqlite 0.20.0 async-retriever 0.15.2 bottleneck N/A click 8.1.7 cytoolz 0.12.3 defusedxml 0.7.1 folium 0.17.0 geopandas 1.0.1 h5netcdf 1.3.0 hydrosignatures 0.15.2 joblib 1.4.2 matplotlib 3.8.2 multidict 6.0.5 netcdf4 1.6.5 networkx 3.3 numba N/A numpy 1.26.4 owslib 0.31.0 pandas 2.2.2 py3dep 0.15.2 py7zr N/A pyarrow 14.0.2 pydaymet 0.15.2 pyflwdir N/A pygeohydro 0.15.2 pygeoogc 0.15.2 pygeoutils 0.15.2 pynhd 0.15.2 pynldas2 0.15.2 pyogrio N/A pyproj 3.6.1 rasterio 1.3.9 requests 2.32.3 requests-cache 1.2.1 rioxarray 0.17.0 scipy 1.14.0 shapely 2.0.2 ujson 5.10.0 url-normalize 1.4.3 urllib3 2.2.2 xarray 2024.7.0 yarl 1.9.4 -------------------------------
mhweber commented 3 months ago

As you pointed at @cheginit I was running behind on my version of pynhd - - conda install -c conda-forge pynhd=0.17.0 solved the issue!