intake / intake-xarray

Intake plugin for xarray
https://intake-xarray.readthedocs.io/
BSD 2-Clause "Simplified" License
74 stars 36 forks source link

Loading a remote file using RasterIOSource fails #107

Closed samueljackson92 closed 3 years ago

samueljackson92 commented 3 years ago

Hi,

I'm trying to pull some data from Earth Data using intake-xarray. My script fails and reports that the file is not a supported file format. However it seems to open fine if I manually download and open the file using rioxarray. Below is a minimal script to reproduce the problem:

from netrc import netrc
import intake
from satsearch import Search
import netrc, fsspec, aiohttp
from getpass import getpass

endpoint = "urs.earthdata.nasa.gov"
username = input('Username: ')
password = getpass('Password: ')

fsspec.config.conf['https'] = dict(client_kwargs={'auth': aiohttp.BasicAuth(username, password)})

results = Search(url='https://cmr.earthdata.nasa.gov/stac/LAADS',
                 collections=['MOD021KM.v6.1'], 
                 bbox = '-74.5,40.2128,-73.5,41.2128' ,
                 datetime='2020-04-23/2021-04-23', 
                )
item_names = results.items()
print(len(item_names))

intake_items = intake.open_stac_item_collection(item_names)

file_handle = intake_items[item_names[0].id]['data']
file_handle.to_dask()

I was using intake==0.6.2, intake-xarray==0.5.0, and xarray==0.18.2.

Any advice would be greatly appreciated.

I also attach the full stacktrace below:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/file_manager.py in _acquire_with_cache_info(self, needs_lock)
    198             try:
--> 199                 file = self._cache[self._key]
    200             except KeyError:

~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/lru_cache.py in __getitem__(self, key)
     52         with self._lock:
---> 53             value = self._cache[key]
     54             self._cache.move_to_end(key)

KeyError: [<function open at 0x7efcf007dd30>, ('https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MOD021KM/2020/114/MOD021KM.A2020114.0230.061.2020114131605.hdf',), 'r', ()]

During handling of the above exception, another exception occurred:

CPLE_OpenFailedError                      Traceback (most recent call last)
rasterio/_base.pyx in rasterio._base.DatasetBase.__init__()

rasterio/_shim.pyx in rasterio._shim.open_dataset()

rasterio/_err.pyx in rasterio._err.exc_wrap_pointer()

CPLE_OpenFailedError: '/vsicurl/https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MOD021KM/2020/114/MOD021KM.A2020114.0230.061.2020114131605.hdf' not recognized as a supported file format.

During handling of the above exception, another exception occurred:

RasterioIOError                           Traceback (most recent call last)
<ipython-input-2-cc9efadbc009> in <module>
     22 
     23 file_handle = intake_items[item_names[0].id]['data']
---> 24 file_handle.to_dask()

~/miniconda3/envs/pangeo/lib/python3.9/site-packages/intake_xarray/base.py in to_dask(self)
     67     def to_dask(self):
     68         """Return xarray object where variables are dask arrays"""
---> 69         return self.read_chunked()
     70 
     71     def close(self):

~/miniconda3/envs/pangeo/lib/python3.9/site-packages/intake_xarray/base.py in read_chunked(self)
     42     def read_chunked(self):
     43         """Return xarray object (which will have chunks)"""
---> 44         self._load_metadata()
     45         return self._ds
     46 

~/miniconda3/envs/pangeo/lib/python3.9/site-packages/intake/source/base.py in _load_metadata(self)
    234         """load metadata only if needed"""
    235         if self._schema is None:
--> 236             self._schema = self._get_schema()
    237             self.dtype = self._schema.dtype
    238             self.shape = self._schema.shape

~/miniconda3/envs/pangeo/lib/python3.9/site-packages/intake_xarray/raster.py in _get_schema(self)
     99 
    100         if self._ds is None:
--> 101             self._open_dataset()
    102 
    103             ds2 = xr.Dataset({'raster': self._ds})

~/miniconda3/envs/pangeo/lib/python3.9/site-packages/intake_xarray/raster.py in _open_dataset(self)
     87             self._ds = self._open_files(files)
     88         else:
---> 89             self._ds = xr.open_rasterio(files, chunks=self.chunks,
     90                                         **self._kwargs)
     91 

~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/rasterio_.py in open_rasterio(filename, parse_coordinates, chunks, cache, lock)
    274 
    275     manager = CachingFileManager(rasterio.open, filename, lock=lock, mode="r")
--> 276     riods = manager.acquire()
    277     if vrt_params is not None:
    278         riods = WarpedVRT(riods, **vrt_params)

~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/file_manager.py in acquire(self, needs_lock)
    179             An open file object, as returned by ``opener(*args, **kwargs)``.
    180         """
--> 181         file, _ = self._acquire_with_cache_info(needs_lock)
    182         return file
    183 

~/miniconda3/envs/pangeo/lib/python3.9/site-packages/xarray/backends/file_manager.py in _acquire_with_cache_info(self, needs_lock)
    203                     kwargs = kwargs.copy()
    204                     kwargs["mode"] = self._mode
--> 205                 file = self._opener(*self._args, **kwargs)
    206                 if self._mode == "w":
    207                     # ensure file doesn't get overriden when opened again

~/miniconda3/envs/pangeo/lib/python3.9/site-packages/rasterio/env.py in wrapper(*args, **kwds)
    433 
    434         with env_ctor(session=session):
--> 435             return f(*args, **kwds)
    436 
    437     return wrapper

~/miniconda3/envs/pangeo/lib/python3.9/site-packages/rasterio/__init__.py in open(fp, mode, driver, width, height, count, crs, transform, dtype, nodata, sharing, **kwargs)
    218         # None.
    219         if mode == 'r':
--> 220             s = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
    221         elif mode == "r+":
    222             s = get_writer_for_path(path, driver=driver)(

rasterio/_base.pyx in rasterio._base.DatasetBase.__init__()

RasterioIOError: '/vsicurl/https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/61/MOD021KM/2020/114/MOD021KM.A2020114.0230.061.2020114131605.hdf' not recognized as a supported file format.
martindurant commented 3 years ago

As far as I know, rasterio cannot support remote files until https://github.com/mapbox/rasterio/pull/2141 is finished up. Add your voice to those asking for the feature!

samueljackson92 commented 3 years ago

That explains it then! Thank you. I shall eagerly await that feature!