intake / intake-thredds

Intake interface to THREDDS data catalogs
https://intake-thredds.readthedocs.io/
Apache License 2.0
11 stars 7 forks source link

issues opening GFS archive #26

Open raybellwaves opened 3 years ago

raybellwaves commented 3 years ago

Thanks for this packages. I tried this today but couldn't open a file. I'm able to open it use xr.open_dataset and i'm not sure if intake-thredds wants the data to be in a certain format.

I'm reading in archive GFS forecast (https://rda.ucar.edu/datasets/ds084.1/#!description). Note you will have to get a log in to access it (https://stackoverflow.com/questions/66178846/read-in-authorized-opendap-url-using-xarray)

url = "https://rda.ucar.edu/thredds/dodsC/files/g/ds084.1/2020/20200201/gfs.0p25.2020020100.f000.grib2"
ds = xr.open_mfdataset([url])

vs

cat_url = "https://rda.ucar.edu/thredds/catalog/files/g/ds084.1/2020/20200201/catalog.xml"
catalog = intake.open_thredds_cat(cat_url, name="GFS-catalog")
file = list(catalog)[0]
source = catalog[file]
ds = source().to_dask()

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-25-c6e6fbd68981> in <module>
----> 1 ds = source().to_dask()
      2 ds

~/miniconda/envs/main/lib/python3.8/site-packages/intake_xarray/base.py in to_dask(self)
     67     def to_dask(self):
     68         """Return xarray object where variables are dask arrays"""
---> 69         return self.read_chunked()
     70 
     71     def close(self):

~/miniconda/envs/main/lib/python3.8/site-packages/intake_xarray/base.py in read_chunked(self)
     42     def read_chunked(self):
     43         """Return xarray object (which will have chunks)"""
---> 44         self._load_metadata()
     45         return self._ds
     46 

~/miniconda/envs/main/lib/python3.8/site-packages/intake/source/base.py in _load_metadata(self)
    234         """load metadata only if needed"""
    235         if self._schema is None:
--> 236             self._schema = self._get_schema()
    237             self.dtype = self._schema.dtype
    238             self.shape = self._schema.shape

~/miniconda/envs/main/lib/python3.8/site-packages/intake_xarray/base.py in _get_schema(self)
     16 
     17         if self._ds is None:
---> 18             self._open_dataset()
     19 
     20             metadata = {

~/miniconda/envs/main/lib/python3.8/site-packages/intake_xarray/opendap.py in _open_dataset(self)
     92         import xarray as xr
     93         store = self._get_store()
---> 94         self._ds = xr.open_dataset(store, chunks=self.chunks, **self._kwargs)

~/miniconda/envs/main/lib/python3.8/site-packages/xarray/backends/api.py in open_dataset(filename_or_obj, group, decode_cf, mask_and_scale, decode_times, concat_characters, decode_coords, engine, chunks, lock, cache, drop_variables, backend_kwargs, use_cftime, decode_timedelta)
    555 
    556     with close_on_error(store):
--> 557         ds = maybe_decode_store(store, chunks)
    558 
    559     # Ensure source filename always stored in dataset object (GH issue #2550)

~/miniconda/envs/main/lib/python3.8/site-packages/xarray/backends/api.py in maybe_decode_store(store, chunks)
    451 
    452     def maybe_decode_store(store, chunks):
--> 453         ds = conventions.decode_cf(
    454             store,
    455             mask_and_scale=mask_and_scale,

~/miniconda/envs/main/lib/python3.8/site-packages/xarray/conventions.py in decode_cf(obj, concat_characters, mask_and_scale, decode_times, decode_coords, drop_variables, use_cftime, decode_timedelta)
    637         encoding = obj.encoding
    638     elif isinstance(obj, AbstractDataStore):
--> 639         vars, attrs = obj.load()
    640         extra_coords = set()
    641         close = obj.close

~/miniconda/envs/main/lib/python3.8/site-packages/xarray/backends/common.py in load(self)
    111         """
    112         variables = FrozenDict(
--> 113             (_decode_variable_name(k), v) for k, v in self.get_variables().items()
    114         )
    115         attributes = FrozenDict(self.get_attrs())

~/miniconda/envs/main/lib/python3.8/site-packages/xarray/backends/pydap_.py in get_variables(self)
     97 
     98     def get_variables(self):
---> 99         return FrozenDict(
    100             (k, self.open_store_variable(self.ds[k])) for k in self.ds.keys()
    101         )

~/miniconda/envs/main/lib/python3.8/site-packages/xarray/core/utils.py in FrozenDict(*args, **kwargs)
    451 
    452 def FrozenDict(*args, **kwargs) -> Frozen:
--> 453     return Frozen(dict(*args, **kwargs))
    454 
    455 

~/miniconda/envs/main/lib/python3.8/site-packages/xarray/backends/pydap_.py in <genexpr>(.0)
     98     def get_variables(self):
     99         return FrozenDict(
--> 100             (k, self.open_store_variable(self.ds[k])) for k in self.ds.keys()
    101         )
    102 

~/miniconda/envs/main/lib/python3.8/site-packages/xarray/backends/pydap_.py in open_store_variable(self, var)
     94     def open_store_variable(self, var):
     95         data = indexing.LazilyOuterIndexedArray(PydapArrayWrapper(var))
---> 96         return Variable(var.dimensions, data, _fix_attributes(var.attributes))
     97 
     98     def get_variables(self):

~/miniconda/envs/main/lib/python3.8/site-packages/xarray/core/variable.py in __init__(self, dims, data, attrs, encoding, fastpath)
    340         """
    341         self._data = as_compatible_data(data, fastpath=fastpath)
--> 342         self._dims = self._parse_dimensions(dims)
    343         self._attrs = None
    344         self._encoding = None

~/miniconda/envs/main/lib/python3.8/site-packages/xarray/core/variable.py in _parse_dimensions(self, dims)
    601         dims = tuple(dims)
    602         if len(dims) != self.ndim:
--> 603             raise ValueError(
    604                 "dimensions %s must have the same length as the "
    605                 "number of data dimensions, ndim=%s" % (dims, self.ndim)

ValueError: dimensions ('height_above_ground_layer',) must have the same length as the number of data dimensions, ndim=2
aaronspring commented 3 years ago

Try specifying driver=netcdf, https://github.com/intake/intake-xarray/blob/master/intake_xarray/netcdf.py. Opendap is default and used above. Also try to add all kwargs you need for your xr.open_dataset example

aaronspring commented 3 years ago

My suggestion won’t work as there is no httpserver available, which has been available for most other thredds servers.

https://rda.ucar.edu/thredds/catalog/files/g/ds084.1/2020/20200201/catalog.html?dataset=files/g/ds084.1/2020/20200201/gfs.0p25.2020020118.f150.grib2

aaronspring commented 3 years ago

have you tried with decode_cf=False?