based on #833 I am trying to read a 2d array from a nested structure. this file is an anndata file. i was hoping to do this without extracting this array into a new hdf5 file.
In [6]: f = vaex.open(filename, group="/obsm/spatial/")
[12/06/22 20:19:07] ERROR error opening <filename>
results in:
complete traceback
```python
Traceback (most recent call last):
File
"/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/__ini
t__.py", line 244, in open
ds = vaex.dataset.open(path, fs_options=fs_options, fs=fs, **kwargs)
File
"/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/datas
et.py", line 81, in open
return opener.open(path, fs_options=fs_options, fs=fs, *args, **kwargs)
File
"/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/datas
et.py", line 1457, in open
return cls(path, *args, **kwargs)
File
"/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/hdf5/
dataset.py", line 71, in __init__
self._load()
File
"/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/hdf5/
dataset.py", line 214, in _load
self._load_columns(self.h5file[self.group])
File
"/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/hdf5/
dataset.py", line 291, in _load_columns
h5columns = h5data if self._version == 1 else h5data['columns']
File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
File
"/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/d
ataset.py", line 760, in __getitem__
return self.fields(names, _prior_dtype=new_dtype)[args]
File
"/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/d
ataset.py", line 418, in fields
return FieldsWrapper(self, _prior_dtype, names)
File
"/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/d
ataset.py", line 254, in __init__
self.read_dtype = readtime_dtype(prior_dtype, names)
File
"/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/d
ataset.py", line 273, in readtime_dtype
raise ValueError("Field names only allowed for compound types")
ValueError: Field names only allowed for compound types
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[6], line 1
----> 1 f = vaex.open("atlas_brain_638850.hdf5", group="/obsm/spatial/")
File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/__init__.py:244, in open(path, convert, progress, shuffle, fs_options, fs, *args, **kwargs)
242 ds = vaex.dataset.open(path_output, fs_options=fs_options, fs=fs)
243 else:
--> 244 ds = vaex.dataset.open(path, fs_options=fs_options, fs=fs, **kwargs)
245 df = vaex.from_dataset(ds)
246 if df is None:
File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/dataset.py:81, in open(path, fs_options, fs, *args, **kwargs)
79 if opener.quick_test(path, fs_options=fs_options, fs=fs):
80 if opener.can_open(path, fs_options=fs_options, fs=fs, *args, **kwargs):
---> 81 return opener.open(path, fs_options=fs_options, fs=fs, *args, **kwargs)
83 # otherwise try all openers
84 for opener in opener_classes:
File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/dataset.py:1457, in DatasetFile.open(cls, path, *args, **kwargs)
1455 @classmethod
1456 def open(cls, path, *args, **kwargs):
-> 1457 return cls(path, *args, **kwargs)
File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/hdf5/dataset.py:71, in Hdf5MemoryMapped.__init__(self, path, write, fs_options, fs, nommap, group, _fingerprint)
69 self.group = group
70 self._version = 1
---> 71 self._load()
72 if not write: # in write mode, call freeze yourself, so the hashes are computed
73 self._freeze()
File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/hdf5/dataset.py:214, in Hdf5MemoryMapped._load(self)
212 else:
213 self._version = 2
--> 214 self._load_columns(self.h5file[self.group])
216 if "properties" in self.h5file:
217 self._load_variables(self.h5file["/properties"]) # old name, kept for portability
File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/hdf5/dataset.py:291, in Hdf5MemoryMapped._load_columns(self, h5data, first)
289 self.description = ensure_string(h5data.attrs["description"])
290 # hdf5, or h5py doesn't keep the order of columns, so manually track that, also enables reordering later
--> 291 h5columns = h5data if self._version == 1 else h5data['columns']
292 if "column_order" in h5columns.attrs:
293 column_order = ensure_string(h5columns.attrs["column_order"]).split(",")
File h5py/_objects.pyx:54, in h5py._objects.with_phil.wrapper()
File h5py/_objects.pyx:55, in h5py._objects.with_phil.wrapper()
File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/dataset.py:760, in Dataset.__getitem__(self, args, new_dtype)
758 names = names[0] # Read with simpler dtype of this field
759 args = tuple(x for x in args if not isinstance(x, str))
--> 760 return self.fields(names, _prior_dtype=new_dtype)[args]
762 if new_dtype is None:
763 new_dtype = self.dtype
File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/dataset.py:418, in Dataset.fields(self, names, _prior_dtype)
416 if _prior_dtype is None:
417 _prior_dtype = self.dtype
--> 418 return FieldsWrapper(self, _prior_dtype, names)
File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/dataset.py:254, in FieldsWrapper.__init__(self, dset, prior_dtype, names)
252 self.extract_field = names
253 names = [names]
--> 254 self.read_dtype = readtime_dtype(prior_dtype, names)
File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/dataset.py:273, in readtime_dtype(basetype, names)
271 """Make a NumPy compound dtype with a subset of available fields"""
272 if basetype.names is None: # Names provided, but not compound
--> 273 raise ValueError("Field names only allowed for compound types")
275 for name in names: # Check all names are legal
276 if not name in basetype.names:
ValueError: Field names only allowed for compound types
```
Description
based on #833 I am trying to read a 2d array from a nested structure. this file is an anndata file. i was hoping to do this without extracting this array into a new hdf5 file.
reading this file with vaex
results in:
complete traceback
```python Traceback (most recent call last): File "/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/__ini t__.py", line 244, in open ds = vaex.dataset.open(path, fs_options=fs_options, fs=fs, **kwargs) File "/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/datas et.py", line 81, in open return opener.open(path, fs_options=fs_options, fs=fs, *args, **kwargs) File "/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/datas et.py", line 1457, in open return cls(path, *args, **kwargs) File "/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/hdf5/ dataset.py", line 71, in __init__ self._load() File "/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/hdf5/ dataset.py", line 214, in _load self._load_columns(self.h5file[self.group]) File "/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/hdf5/ dataset.py", line 291, in _load_columns h5columns = h5data if self._version == 1 else h5data['columns'] File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper File "/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/d ataset.py", line 760, in __getitem__ return self.fields(names, _prior_dtype=new_dtype)[args] File "/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/d ataset.py", line 418, in fields return FieldsWrapper(self, _prior_dtype, names) File "/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/d ataset.py", line 254, in __init__ self.read_dtype = readtime_dtype(prior_dtype, names) File "/Users/satra/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/d ataset.py", line 273, in readtime_dtype raise ValueError("Field names only allowed for compound types") ValueError: Field names only allowed for compound types --------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[6], line 1 ----> 1 f = vaex.open("atlas_brain_638850.hdf5", group="/obsm/spatial/") File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/__init__.py:244, in open(path, convert, progress, shuffle, fs_options, fs, *args, **kwargs) 242 ds = vaex.dataset.open(path_output, fs_options=fs_options, fs=fs) 243 else: --> 244 ds = vaex.dataset.open(path, fs_options=fs_options, fs=fs, **kwargs) 245 df = vaex.from_dataset(ds) 246 if df is None: File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/dataset.py:81, in open(path, fs_options, fs, *args, **kwargs) 79 if opener.quick_test(path, fs_options=fs_options, fs=fs): 80 if opener.can_open(path, fs_options=fs_options, fs=fs, *args, **kwargs): ---> 81 return opener.open(path, fs_options=fs_options, fs=fs, *args, **kwargs) 83 # otherwise try all openers 84 for opener in opener_classes: File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/dataset.py:1457, in DatasetFile.open(cls, path, *args, **kwargs) 1455 @classmethod 1456 def open(cls, path, *args, **kwargs): -> 1457 return cls(path, *args, **kwargs) File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/hdf5/dataset.py:71, in Hdf5MemoryMapped.__init__(self, path, write, fs_options, fs, nommap, group, _fingerprint) 69 self.group = group 70 self._version = 1 ---> 71 self._load() 72 if not write: # in write mode, call freeze yourself, so the hashes are computed 73 self._freeze() File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/hdf5/dataset.py:214, in Hdf5MemoryMapped._load(self) 212 else: 213 self._version = 2 --> 214 self._load_columns(self.h5file[self.group]) 216 if "properties" in self.h5file: 217 self._load_variables(self.h5file["/properties"]) # old name, kept for portability File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/vaex/hdf5/dataset.py:291, in Hdf5MemoryMapped._load_columns(self, h5data, first) 289 self.description = ensure_string(h5data.attrs["description"]) 290 # hdf5, or h5py doesn't keep the order of columns, so manually track that, also enables reordering later --> 291 h5columns = h5data if self._version == 1 else h5data['columns'] 292 if "column_order" in h5columns.attrs: 293 column_order = ensure_string(h5columns.attrs["column_order"]).split(",") File h5py/_objects.pyx:54, in h5py._objects.with_phil.wrapper() File h5py/_objects.pyx:55, in h5py._objects.with_phil.wrapper() File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/dataset.py:760, in Dataset.__getitem__(self, args, new_dtype) 758 names = names[0] # Read with simpler dtype of this field 759 args = tuple(x for x in args if not isinstance(x, str)) --> 760 return self.fields(names, _prior_dtype=new_dtype)[args] 762 if new_dtype is None: 763 new_dtype = self.dtype File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/dataset.py:418, in Dataset.fields(self, names, _prior_dtype) 416 if _prior_dtype is None: 417 _prior_dtype = self.dtype --> 418 return FieldsWrapper(self, _prior_dtype, names) File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/dataset.py:254, in FieldsWrapper.__init__(self, dset, prior_dtype, names) 252 self.extract_field = names 253 names = [names] --> 254 self.read_dtype = readtime_dtype(prior_dtype, names) File ~/software/mambaforge/envs/cubiekb/lib/python3.10/site-packages/h5py/_hl/dataset.py:273, in readtime_dtype(basetype, names) 271 """Make a NumPy compound dtype with a subset of available fields""" 272 if basetype.names is None: # Names provided, but not compound --> 273 raise ValueError("Field names only allowed for compound types") 275 for name in names: # Check all names are legal 276 if not name in basetype.names: ValueError: Field names only allowed for compound types ```Software information
Vaex version (
import vaex; vaex.__version__)
: {'vaex-core': '4.16.0', 'vaex-viz': '0.5.4', 'vaex-hdf5': '0.14.1', 'vaex-server': '0.8.1', 'vaex-astro': '0.9.3', 'vaex-jupyter': '0.8.1', 'vaex-ml': '0.18.1'}Vaex was installed via: conda-forge
OS: macos arm