ocean-transport / lcs-ml

Lagrangian Coherent Structure identification for machine learning
1 stars 0 forks source link

Figure out best way to save pyqg model #18

Closed hscannell closed 3 years ago

hscannell commented 3 years ago

Converting pyqg to an xarray.Dataset is what we want to do. There is a PR open here. Can we do this for floater too?? Needs some TLC.

hscannell commented 3 years ago

I'm getting errors when trying to save my xarray.Dataset to netcdf or zarr.

ds.to_netcdf('qg_checkpoint.nc')
'''---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-25-6ddaf2cb0cd1> in <module>
----> 1 ds.to_netcdf('qg_checkpoint.nc')

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/core/dataset.py in to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims, compute, invalid_netcdf)
   1687         from ..backends.api import to_netcdf
   1688 
-> 1689         return to_netcdf(
   1690             self,
   1691             path,

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/api.py in to_netcdf(dataset, path_or_file, mode, format, group, engine, encoding, unlimited_dims, compute, multifile, invalid_netcdf)
   1105         # TODO: allow this work (setting up the file for writing array data)
   1106         # to be parallelized with dask
-> 1107         dump_to_store(
   1108             dataset, store, writer, encoding=encoding, unlimited_dims=unlimited_dims
   1109         )

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/api.py in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
   1152         variables, attrs = encoder(variables, attrs)
   1153 
-> 1154     store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)
   1155 
   1156 

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/common.py in store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
    253         self.set_attributes(attributes)
    254         self.set_dimensions(variables, unlimited_dims=unlimited_dims)
--> 255         self.set_variables(
    256             variables, check_encoding_set, writer, unlimited_dims=unlimited_dims
    257         )

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/common.py in set_variables(self, variables, check_encoding_set, writer, unlimited_dims)
    291             name = _encode_variable_name(vn)
    292             check = vn in check_encoding_set
--> 293             target, source = self.prepare_variable(
    294                 name, v, check, unlimited_dims=unlimited_dims
    295             )

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/scipy_.py in prepare_variable(self, name, variable, check_encoding, unlimited_dims)
    216         # incremental writes.
    217         if name not in self.ds.variables:
--> 218             self.ds.createVariable(name, data.dtype, variable.dims)
    219         scipy_var = self.ds.variables[name]
    220         for k, v in variable.attrs.items():

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/scipy/io/netcdf.py in createVariable(self, name, type, dimensions)
    389         typecode, size = type.char, type.itemsize
    390         if (typecode, size) not in REVERSE:
--> 391             raise ValueError("NetCDF 3 does not support type %s" % type)
    392 
    393         data = empty(shape_, dtype=type.newbyteorder("B"))  # convert to big endian always for NetCDF 3

ValueError: NetCDF 3 does not support type complex128'''
ds.to_zarr('qg_checkpoint.zarr')

'''---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-26-e4ba79cabe8a> in <module>
----> 1 ds.to_zarr('qg_checkpoint.zarr')

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/core/dataset.py in to_zarr(self, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region)
   1788             encoding = {}
   1789 
-> 1790         return to_zarr(
   1791             self,
   1792             store=store,

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/api.py in to_zarr(dataset, store, chunk_store, mode, synchronizer, group, encoding, compute, consolidated, append_dim, region)
   1473     writer = ArrayWriter()
   1474     # TODO: figure out how to properly handle unlimited_dims
-> 1475     dump_to_store(dataset, zstore, writer, encoding=encoding)
   1476     writes = writer.sync(compute=compute)
   1477 

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/api.py in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
   1152         variables, attrs = encoder(variables, attrs)
   1153 
-> 1154     store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)
   1155 
   1156 

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/zarr.py in store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
    452             self.set_attributes(attributes)
    453             self.set_dimensions(variables_encoded, unlimited_dims=unlimited_dims)
--> 454         self.set_variables(
    455             variables_encoded, check_encoding_set, writer, unlimited_dims=unlimited_dims
    456         )

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/zarr.py in set_variables(self, variables, check_encoding_set, writer, unlimited_dims)
    528 
    529             region = tuple(write_region[dim] for dim in dims)
--> 530             writer.add(v.data, zarr_array, region)
    531 
    532     def close(self):

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/xarray/backends/common.py in add(self, source, target, region)
    142         else:
    143             if region:
--> 144                 target[region] = source
    145             else:
    146                 target[...] = source

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/zarr/core.py in __setitem__(self, selection, value)
   1120 
   1121         fields, selection = pop_fields(selection)
-> 1122         self.set_basic_selection(selection, value, fields=fields)
   1123 
   1124     def set_basic_selection(self, selection, value, fields=None):

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/zarr/core.py in set_basic_selection(self, selection, value, fields)
   1213         # handle zero-dimensional arrays
   1214         if self._shape == ():
-> 1215             return self._set_basic_selection_zd(selection, value, fields=fields)
   1216         else:
   1217             return self._set_basic_selection_nd(selection, value, fields=fields)

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/zarr/core.py in _set_basic_selection_zd(self, selection, value, fields)
   1466         selection = ensure_tuple(selection)
   1467         if selection not in ((), (Ellipsis,)):
-> 1468             err_too_many_indices(selection, self._shape)
   1469 
   1470         # check fields

~/opt/anaconda3/envs/lcs-ml/lib/python3.8/site-packages/zarr/errors.py in err_too_many_indices(selection, shape)
     66 
     67 def err_too_many_indices(selection, shape):
---> 68     raise IndexError('too many indices for array; expected {}, got {}'
     69                      .format(len(shape), len(selection)))
     70 

IndexError: too many indices for array; expected 0, got 1'''
rabernat commented 3 years ago

What's the difference between the first and second example? You showed the same code, but different errors.

hscannell commented 3 years ago
ds.to_netcdf() 

vs.

ds.to_zarr()
rabernat commented 3 years ago

yes obvious now 🤪

rabernat commented 3 years ago

The netCDF error suggests this is related to the complex data type. I found this SO issue which suggested using the engine='h5netcdf' option. That seemed to work for this example

import xarray as xr
ds = xr.DataArray([1j], name='foo').to_dataset()
try:
    ds.to_netcdf('test_complex.nc', mode='w')
except TypeError:
    # expected that
    pass
ds.to_netcdf('test_complex.nc', engine='h5netcdf', mode='w')
ds1 = xr.open_dataset('test_complex.nc', engine='h5netcdf')
assert ds1.identical(ds)
hscannell commented 3 years ago

Ah OK, that explains the netcdf error, but I'm still puzzeled by the zarr one.