boutproject / xBOUT

Collects BOUT++ data from parallelized simulations into xarray.
https://xbout.readthedocs.io/en/latest/
Apache License 2.0
21 stars 8 forks source link

xbout datasets cannot be stored #290

Closed dschwoerer closed 1 year ago

dschwoerer commented 1 year ago

Reproducing example:

In [17]: db = xbout.open_boutdataset("/u/dave/soft/BOUT-dev/prod/build-cont-2d/examples/2Dturbulence_multigrid/data/BOUT.dmp.0.nc")

In [18]: db.to_netcdf("bout.nc")
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[18], line 1
----> 1 db.to_netcdf("bout.nc")

File /usr/lib/python3.11/site-packages/xarray/core/dataset.py:1917, in Dataset.to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims, compute, invalid_netcdf)
   1914     encoding = {}
   1915 from xarray.backends.api import to_netcdf
-> 1917 return to_netcdf(  # type: ignore  # mypy cannot resolve the overloads:(
   1918     self,
   1919     path,
   1920     mode=mode,
   1921     format=format,
   1922     group=group,
   1923     engine=engine,
   1924     encoding=encoding,
   1925     unlimited_dims=unlimited_dims,
   1926     compute=compute,
   1927     multifile=False,
   1928     invalid_netcdf=invalid_netcdf,
   1929 )

File /usr/lib/python3.11/site-packages/xarray/backends/api.py:1169, in to_netcdf(dataset, path_or_file, mode, format, group, engine, encoding, unlimited_dims, compute, multifile, invalid_netcdf)
   1167 # validate Dataset keys, DataArray names, and attr keys/values
   1168 _validate_dataset_names(dataset)
-> 1169 _validate_attrs(dataset, invalid_netcdf=invalid_netcdf and engine == "h5netcdf")
   1171 try:
   1172     store_open = WRITEABLE_STORES[engine]

File /usr/lib/python3.11/site-packages/xarray/backends/api.py:187, in _validate_attrs(dataset, invalid_netcdf)
    185 # Check attrs on the dataset itself
    186 for k, v in dataset.attrs.items():
--> 187     check_attr(k, v, valid_types)
    189 # Check attrs on each variable within the dataset
    190 for variable in dataset.variables.values():

File /usr/lib/python3.11/site-packages/xarray/backends/api.py:179, in _validate_attrs.<locals>.check_attr(name, value, valid_types)
    173     raise TypeError(
    174         f"Invalid name for attr: {name!r} must be a string for "
    175         "serialization to netCDF files"
    176     )
    178 if not isinstance(value, valid_types):
--> 179     raise TypeError(
    180         f"Invalid value for attr {name!r}: {value!r}. For serialization to "
    181         "netCDF files, its value must be of one of the following types: "
    182         f"{', '.join([vtype.__name__ for vtype in valid_types])}"
    183     )

TypeError: Invalid value for attr 'metadata': {'BOUT_VERSION': 5.0, 'MXG': 2, 'MXSUB': 256, 'MYG': 0, 'MYSUB': 1, 'MZ': 256, 'MZG': 0, 'MZSUB': 256, 'NXPE': 1, 'NYPE': 1, 'NZPE': 1, 'ZMAX': 15.91549431, 'ZMIN': 0.0, 'has_fftw': 1, 'has_gettext': 1, 'has_lapack': 1, 'has_netcdf': 1, 'has_petsc': 0, 'has_pretty_function': 1, 'has_pvode': 1, 'has_scorep': 0, 'has_slepc': 0, 'has_sundials': 0, 'hist_hi': 0, 'iteration': -1, 'ixseps1': 260, 'ixseps2': 260, 'jyseps1_1': -1, 'jyseps1_2': 0, 'jyseps2_1': 0, 'jyseps2_2': 0, 'nx': 260, 'ny': 1, 'ny_inner': 0, 'nz': 256, 'run_id': '0ac63dca-34c8-49f3-bd9d-a59542ba97be', 'run_restart_from': 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', 'tt': 0.0, 'use_backtrace': 1, 'use_color': 1, 'use_openmp': 0, 'use_output_debug': 0, 'use_sigfpe': 0, 'use_signal': 1, 'use_track': 1, 'zperiod': 0, 'bout_tdim': 't', 'bout_xdim': 'x', 'bout_ydim': 'y', 'bout_zdim': 'z', 'keep_xboundaries': 1, 'keep_yboundaries': 0, 'is_restart': 0, 'fine_interpolation_factor': 8}. For serialization to netCDF files, its value must be of one of the following types: str, Number, ndarray, number, list, tuple

It would be nice if I could store the dataset, so I can e.g. add data easily.

dschwoerer commented 1 year ago

As @johnomotani pointed out, there is db.bout.save() to work around this limitations.