ecmwf / earthkit-data

A format-agnostic Python interface for geospatial data
Apache License 2.0
53 stars 12 forks source link

TypeError processing DatetimeJulian coordinates #399

Closed malmans2 closed 3 months ago

malmans2 commented 3 months ago

What happened?

The following snippet shows an example of earthkit crashing when handling DatetimeJulian coordinates

What are the steps to reproduce the bug?

import earthkit.data
dataset = "satellite-humidity-profiles"
request = {
    "variable": "all",
    "product_type": "radio_occultation_data",
    "year": 2007,
    "month": "01",
}
earthkit.data.from_source("cds", dataset, request)  # TypeError

Version

0.7.0

Platform (OS and architecture)

Linux eqc-quality-tools.eqc.compute.cci1.ecmwf.int 5.14.0-362.8.1.el9_3.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Nov 8 17:36:32 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux

Relevant log output

2024-06-03 12:21:53,670 INFO Welcome to the CDS
2024-06-03 12:21:53,671 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/satellite-humidity-profiles
2024-06-03 12:21:53,750 INFO Request is completed
2024-06-03 12:21:53,751 INFO Downloading https://download-0005-clone.copernicus-climate.eu/cache-compute-0005/cache/data3/dataset-satellite-humidity-profiles-47d24ba0-e481-4f86-bc3d-5838d138dc3e.zip to /data/wp5/.tmp/tmpnjokjzlp/cds-retriever-52b47ff6a52780826a19957ec1d268f266aef501297594a26ed18599adca2910.cache.tmp (51.9K)
2024-06-03 12:21:53,764 INFO Download rate 3.8M/s

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[1], line 14
      5 collection_id = "satellite-humidity-profiles"
      7 request = {
      8     "variable": "all",
      9     "product_type": "radio_occultation_data",
     10     "year": 2007,
     11     "month": "01",
     12 }
---> 14 earthkit.data.from_source("cds", collection_id, request)

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/sources/__init__.py:155, in from_source(name, lazily, *args, **kwargs)
    153 while src is not prev:
    154     prev = src
--> 155     src = src.mutate()
    156 return src

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/sources/file.py:77, in FileSource.mutate(self)
     73     return FileIndexedSource(self.path, filter=filter, merger=self.merger, **kw)
     75 # Give a chance to directories and zip files
     76 # to return a multi-source
---> 77 source = self._reader.mutate_source()
     78 if source not in (None, self):
     79     source._parent = self

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/sources/file.py:96, in FileSource._reader(self)
     93 @property
     94 def _reader(self):
     95     if self._reader_ is None:
---> 96         self._reader_ = reader(
     97             self, self.path, content_type=self.content_type, parts=self.parts
     98         )
     99     return self._reader_

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/__init__.py:184, in reader(source, path, **kwargs)
    180     magic = f.read(n_bytes)
    182 LOG.debug("Looking for a reader for %s (%s)", path, magic)
--> 184 return _find_reader(
    185     "reader",
    186     source,
    187     path,
    188     magic=magic,
    189     **kwargs,
    190 )

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/__init__.py:132, in _find_reader(method_name, source, path_or_data, **kwargs)
    130         reader = r(source, path_or_data, deeper_check=deeper_check, **kwargs)
    131         if reader is not None:
--> 132             return reader.mutate()
    134 return _unknown(method_name, source, path_or_data, **kwargs)

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/zip.py:74, in ZIPReader.mutate(self)
     71 if self._mutate:
     72     return self._mutate
---> 74 return super().mutate()

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/archive.py:51, in ArchiveReader.mutate(self)
     49 def mutate(self):
     50     if os.path.isdir(self.path):
---> 51         return find_reader(self.source, self.path).mutate()
     53     return self

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/__init__.py:169, in reader(source, path, **kwargs)
    166 if os.path.isdir(path):
    167     from .directory import DirectoryReader
--> 169     return DirectoryReader(source, path).mutate()
    170 LOG.debug("Reader for %s", path)
    172 if not os.path.exists(path):

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/directory.py:64, in DirectoryReader.mutate(self)
     62 def mutate(self):
     63     if len(self._content) == 1:
---> 64         return find_reader(self.source, self._content[0])
     65     return self

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/__init__.py:184, in reader(source, path, **kwargs)
    180     magic = f.read(n_bytes)
    182 LOG.debug("Looking for a reader for %s (%s)", path, magic)
--> 184 return _find_reader(
    185     "reader",
    186     source,
    187     path,
    188     magic=magic,
    189     **kwargs,
    190 )

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/__init__.py:130, in _find_reader(method_name, source, path_or_data, **kwargs)
    126 for deeper_check in (False, True):
    127     # We do two passes, the second one
    128     # allow the plugin to look deeper in the buffer
    129     for name, r in _readers(method_name).items():
--> 130         reader = r(source, path_or_data, deeper_check=deeper_check, **kwargs)
    131         if reader is not None:
    132             return reader.mutate()

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/netcdf/__init__.py:88, in reader(source, path, magic, deeper_check, **kwargs)
     86 if _match_magic(magic, deeper_check):
     87     fs = NetCDFFieldListReader(source, path)
---> 88     if fs.has_fields():
     89         return fs
     90     else:

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/netcdf/fieldlist.py:167, in XArrayFieldListCore.has_fields(self)
    165 def has_fields(self):
    166     if self._fields is None:
--> 167         return get_fields_from_ds(
    168             DataSet(self.xr_dataset),
    169             self.array_backend,
    170             field_type=self.FIELD_TYPE,
    171             check_only=True,
    172         )
    173     else:
    174         return len(self._fields)

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/netcdf/fieldlist.py:127, in get_fields_from_ds(ds, array_backend, field_type, check_only)
    124         use = True
    126     if not use:
--> 127         coordinates.append(OtherCoordinate(c, coord in info))
    129 if not (has_lat and has_lon):
    130     # self.log.info("NetCDFReader: skip %s (Not a 2 field)", name)
    131     continue

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/netcdf/coords.py:65, in Coordinate.__init__(self, variable, info)
     63     self.values = [self.convert(variable.values)]
     64 else:
---> 65     self.values = [self.convert(t) for t in variable.values.flatten()]

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/netcdf/coords.py:65, in <listcomp>(.0)
     63     self.values = [self.convert(variable.values)]
     64 else:
---> 65     self.values = [self.convert(t) for t in variable.values.flatten()]

File /data/common/miniforge3/envs/wp5/lib/python3.11/site-packages/earthkit/data/readers/netcdf/coords.py:28, in as_level(self, level)
     26 if isinstance(level, str):
     27     return level
---> 28 n = float(level)
     29 if int(n) == n:
     30     return int(n)

TypeError: float() argument must be a string or a real number, not 'cftime._cftime.DatetimeJulian'

Accompanying data

No response

Organisation

B-Open / CADS-EQC

sandorkertesz commented 3 months ago

@malmans2 Thank you for reporting this issue. It is fixed now with #403