Open mtvector opened 3 months ago
I think this is related to both https://github.com/scverse/anndata/issues/1577 and https://github.com/scverse/anndata/issues/679. I cannot reproduce this, though:
from scanpy.datasets import pbmc3k_processed
adata = pbmc3k_processed()
adata.obs['louvain'] = adata.obs['louvain'].astype('string')
adata.write_h5ad('foo.h5ad')
works for me.
But
In [40]: ad.write_h5ad('foo.h5ad', adata, convert_strings_to_categoricals=False)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[40], line 1
----> 1 ad.write_h5ad('foo.h5ad', adata, convert_strings_to_categoricals=False)
AttributeError: module 'anndata' has no attribute 'write_h5ad'
In [41]: ad._io.write_h5ad('foo.h5ad', adata, convert_strings_to_categoricals=False)
---------------------------------------------------------------------------
IORegistryError Traceback (most recent call last)
Cell In[41], line 1
----> 1 ad._io.write_h5ad('foo.h5ad', adata, convert_strings_to_categoricals=False)
File ~/Projects/Theis/anndata/src/anndata/_io/h5ad.py:103, in write_h5ad(filepath, adata, as_dense, convert_strings_to_categoricals, dataset_kwargs, **kwargs)
101 elif adata.raw is not None:
102 write_elem(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
--> 103 write_elem(f, "obs", adata.obs, dataset_kwargs=dataset_kwargs)
104 write_elem(f, "var", adata.var, dataset_kwargs=dataset_kwargs)
105 write_elem(f, "obsm", dict(adata.obsm), dataset_kwargs=dataset_kwargs)
File ~/Projects/Theis/anndata/src/anndata/_io/specs/registry.py:432, in write_elem(store, k, elem, dataset_kwargs)
408 def write_elem(
409 store: GroupStorageType,
410 k: str,
(...)
413 dataset_kwargs: Mapping[str, Any] = MappingProxyType({}),
414 ) -> None:
415 """
416 Write an element to a storage group using anndata encoding.
417
(...)
430 E.g. for zarr this would be `chunks`, `compressor`.
431 """
--> 432 Writer(_REGISTRY).write_elem(store, k, elem, dataset_kwargs=dataset_kwargs)
File ~/Projects/Theis/anndata/src/anndata/_io/utils.py:247, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs)
245 raise ValueError("No element found in args.")
246 try:
--> 247 return func(*args, **kwargs)
248 except Exception as e:
249 path = _get_display_path(store)
File ~/Projects/Theis/anndata/src/anndata/_io/specs/registry.py:357, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers)
354 write_func = self.find_write_func(dest_type, elem, modifiers)
356 if self.callback is None:
--> 357 return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)
358 return self.callback(
359 write_func,
360 store,
(...)
364 iospec=self.registry.get_spec(elem),
365 )
File ~/Projects/Theis/anndata/src/anndata/_io/specs/registry.py:73, in write_spec.<locals>.decorator.<locals>.wrapper(g, k, *args, **kwargs)
71 @wraps(func)
72 def wrapper(g: GroupStorageType, k: str, *args, **kwargs):
---> 73 result = func(g, k, *args, **kwargs)
74 g[k].attrs.setdefault("encoding-type", spec.encoding_type)
75 g[k].attrs.setdefault("encoding-version", spec.encoding_version)
File ~/Projects/Theis/anndata/src/anndata/_io/specs/methods.py:863, in write_dataframe(f, key, df, _writer, dataset_kwargs)
858 _writer.write_elem(
859 group, index_name, df.index._values, dataset_kwargs=dataset_kwargs
860 )
861 for colname, series in df.items():
862 # TODO: this should write the "true" representation of the series (i.e. the underlying array or ndarray depending)
--> 863 _writer.write_elem(
864 group, colname, series._values, dataset_kwargs=dataset_kwargs
865 )
File ~/Projects/Theis/anndata/src/anndata/_io/utils.py:247, in report_write_key_on_error.<locals>.func_wrapper(*args, **kwargs)
245 raise ValueError("No element found in args.")
246 try:
--> 247 return func(*args, **kwargs)
248 except Exception as e:
249 path = _get_display_path(store)
File ~/Projects/Theis/anndata/src/anndata/_io/specs/registry.py:354, in Writer.write_elem(self, store, k, elem, dataset_kwargs, modifiers)
351 elif k in store:
352 del store[k]
--> 354 write_func = self.find_write_func(dest_type, elem, modifiers)
356 if self.callback is None:
357 return write_func(store, k, elem, dataset_kwargs=dataset_kwargs)
File ~/Projects/Theis/anndata/src/anndata/_io/specs/registry.py:321, in Writer.find_write_func(self, dest_type, elem, modifiers)
317 return self.registry.get_write(
318 dest_type, pattern, modifiers, writer=self
319 )
320 # Raises IORegistryError
--> 321 return self.registry.get_write(dest_type, type(elem), modifiers, writer=self)
File ~/Projects/Theis/anndata/src/anndata/_io/specs/registry.py:137, in IORegistry.get_write(self, dest_type, src_type, modifiers, writer)
134 dest_type = h5py.Group
136 if (dest_type, src_type, modifiers) not in self.write:
--> 137 raise IORegistryError._from_write_parts(dest_type, src_type, modifiers)
138 internal = self.write[(dest_type, src_type, modifiers)]
139 return partial(internal, _writer=writer)
IORegistryError: No method registered for writing <class 'pandas.core.arrays.string_.StringArray'> into <class 'h5py._hl.group.Group'>
Error raised while writing key 'louvain' of <class 'h5py._hl.group.Group'> to /obs
errors on main
, which is why I linked the issue. When I use 0.10.8, I can't reproduce your specific problem either (or any, the write_h5ad
call works). Could you share a clearer reproducer?
This issue has been automatically marked as stale because it has not had recent activity. Please add a comment if you want to keep the issue open. Thank you for your contributions!
Please make sure these conditions are met
Report
I'm getting the following error when I attempt to write an h5ad file from an anndata:
Traceback:
This occurs for an adata like this:
The .obs types are as follows:
And it seems all the string[python] typed columns all yield this problem.
Any insight you could give would be very helpful. Maybe I'm missing something obvious? Thanks!
Versions