Closed hscannell closed 3 years ago
I'm getting errors when trying to save my xarray.Dataset to netcdf or zarr.
ds.to_netcdf('qg_checkpoint.nc')
'''---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-25-6ddaf2cb0cd1> in <module>
----> 1 ds.to_netcdf('qg_checkpoint.nc')
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/core/dataset.py in to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims, compute, invalid_netcdf)
1687 from ..backends.api import to_netcdf
1688
-> 1689 return to_netcdf(
1690 self,
1691 path,
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/api.py in to_netcdf(dataset, path_or_file, mode, format, group, engine, encoding, unlimited_dims, compute, multifile, invalid_netcdf)
1105 # TODO: allow this work (setting up the file for writing array data)
1106 # to be parallelized with dask
-> 1107 dump_to_store(
1108 dataset, store, writer, encoding=encoding, unlimited_dims=unlimited_dims
1109 )
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/api.py in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
1152 variables, attrs = encoder(variables, attrs)
1153
-> 1154 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)
1155
1156
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/common.py in store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
253 self.set_attributes(attributes)
254 self.set_dimensions(variables, unlimited_dims=unlimited_dims)
--> 255 self.set_variables(
256 variables, check_encoding_set, writer, unlimited_dims=unlimited_dims
257 )
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/common.py in set_variables(self, variables, check_encoding_set, writer, unlimited_dims)
291 name = _encode_variable_name(vn)
292 check = vn in check_encoding_set
--> 293 target, source = self.prepare_variable(
294 name, v, check, unlimited_dims=unlimited_dims
295 )
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/scipy_.py in prepare_variable(self, name, variable, check_encoding, unlimited_dims)
216 # incremental writes.
217 if name not in self.ds.variables:
--> 218 self.ds.createVariable(name, data.dtype, variable.dims)
219 scipy_var = self.ds.variables[name]
220 for k, v in variable.attrs.items():
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/scipy/io/netcdf.py in createVariable(self, name, type, dimensions)
389 typecode, size = type.char, type.itemsize
390 if (typecode, size) not in REVERSE:
--> 391 raise ValueError("NetCDF 3 does not support type %s" % type)
392
393 data = empty(shape_, dtype=type.newbyteorder("B")) # convert to big endian always for NetCDF 3
ValueError: NetCDF 3 does not support type complex128'''
ds.to_zarr('qg_checkpoint.zarr')
'''---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-26-e4ba79cabe8a> in <module>
----> 1 ds.to_zarr('qg_checkpoint.zarr')
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/core/dataset.py in to_zarr(self, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region)
1788 encoding = {}
1789
-> 1790 return to_zarr(
1791 self,
1792 store=store,
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/api.py in to_zarr(dataset, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region)
1473 writer = ArrayWriter()
1474 # TODO: figure out how to properly handle unlimited_dims
-> 1475 dump_to_store(dataset, zstore, writer, encoding=encoding)
1476 writes = writer.sync(compute=compute)
1477
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/api.py in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
1152 variables, attrs = encoder(variables, attrs)
1153
-> 1154 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)
1155
1156
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/zarr.py in store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
452 self.set_attributes(attributes)
453 self.set_dimensions(variables_encoded, unlimited_dims=unlimited_dims)
--> 454 self.set_variables(
455 variables_encoded, check_encoding_set, writer, unlimited_dims=unlimited_dims
456 )
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/zarr.py in set_variables(self, variables, check_encoding_set, writer, unlimited_dims)
528
529 region = tuple(write_region[dim] for dim in dims)
--> 530 writer.add(v.data, zarr_array, region)
531
532 def close(self):
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/common.py in add(self, source, target, region)
142 else:
143 if region:
--> 144 target[region] = source
145 else:
146 target[...] = source
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/zarr/core.py in __setitem__(self, selection, value)
1120
1121 fields, selection = pop_fields(selection)
-> 1122 self.set_basic_selection(selection, value, fields=fields)
1123
1124 def set_basic_selection(self, selection, value, fields=None):
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/zarr/core.py in set_basic_selection(self, selection, value, fields)
1213 # handle zero-dimensional arrays
1214 if self._shape == ():
-> 1215 return self._set_basic_selection_zd(selection, value, fields=fields)
1216 else:
1217 return self._set_basic_selection_nd(selection, value, fields=fields)
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/zarr/core.py in _set_basic_selection_zd(self, selection, value, fields)
1466 selection = ensure_tuple(selection)
1467 if selection not in ((), (Ellipsis,)):
-> 1468 err_too_many_indices(selection, self._shape)
1469
1470 # check fields
~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/zarr/errors.py in err_too_many_indices(selection, shape)
66
67 def err_too_many_indices(selection, shape):
---> 68 raise IndexError('too many indices for array; expected {}, got {}'
69 .format(len(shape), len(selection)))
70
IndexError: too many indices for array; expected 0, got 1'''
What's the difference between the first and second example? You showed the same code, but different errors.
ds.to_netcdf()
vs.
ds.to_zarr()
yes obvious now 🤪
The netCDF error suggests this is related to the complex data type. I found this SO issue which suggested using the engine='h5netcdf'
option. That seemed to work for this example
import xarray as xr
ds = xr.DataArray([1j], name='foo').to_dataset()
try:
ds.to_netcdf('test_complex.nc', mode='w')
except TypeError:
# expected that
pass
ds.to_netcdf('test_complex.nc', engine='h5netcdf', mode='w')
ds1 = xr.open_dataset('test_complex.nc', engine='h5netcdf')
assert ds1.identical(ds)
Ah OK, that explains the netcdf error, but I'm still puzzeled by the zarr one.
Converting pyqg to an
xarray.Dataset
is what we want to do. There is a PR open here. Can we do this for floater too?? Needs some TLC.