Starlitnightly / omicverse

A python library for multi omics included bulk, single cell and spatial RNA-seq analysis.
https://starlitnightly.github.io/omicverse/
GNU General Public License v3.0
431 stars 46 forks source link

Can't implicitly convert non-string objects to strings #16

Closed amaukisumi closed 5 months ago

amaukisumi commented 1 year ago

I'm get an error when saving h5ad file, do you have any suggestion or solution for this?

TypeError: Can't implicitly convert non-string objects to strings Above error raised while writing key 'names' of <class 'h5py._hl.group.Group'> to /

Traceback

```python --------------------------------------------------------------------------- TypeError Traceback (most recent call last) File ~/.local/lib/python3.8/site-packages/anndata/_io/utils.py:214, in report_write_key_on_error..func_wrapper(elem, key, val, *args, **kwargs) 213 try: --> 214 return func(elem, key, val, *args, **kwargs) 215 except Exception as e: File ~/.local/lib/python3.8/site-packages/anndata/_io/specs/registry.py:175, in write_elem(f, k, elem, modifiers, *args, **kwargs) 174 else: --> 175 _REGISTRY.get_writer(dest_type, t, modifiers)(f, k, elem, *args, **kwargs) File ~/.local/lib/python3.8/site-packages/anndata/_io/specs/registry.py:24, in write_spec..decorator..wrapper(g, k, *args, **kwargs) 22 @wraps(func) 23 def wrapper(g, k, *args, **kwargs): ---> 24 result = func(g, k, *args, **kwargs) 25 g[k].attrs.setdefault("encoding-type", spec.encoding_type) File ~/.local/lib/python3.8/site-packages/anndata/_io/specs/methods.py:400, in write_recarray(f, k, elem, dataset_kwargs) 397 @_REGISTRY.register_write(H5Group, (np.ndarray, "V"), IOSpec("rec-array", "0.2.0")) 398 @_REGISTRY.register_write(H5Group, np.recarray, IOSpec("rec-array", "0.2.0")) 399 def write_recarray(f, k, elem, dataset_kwargs=MappingProxyType({})): --> 400 f.create_dataset(k, data=_to_hdf5_vlen_strings(elem), **dataset_kwargs) File ~/.local/lib/python3.8/site-packages/h5py/_hl/group.py:183, in Group.create_dataset(self, name, shape, dtype, data, **kwds) 181 group = self.require_group(parent_path) --> 183 dsid = dataset.make_new_dset(group, shape, dtype, data, name, **kwds) 184 dset = dataset.Dataset(dsid) File ~/.local/lib/python3.8/site-packages/h5py/_hl/dataset.py:168, in make_new_dset(parent, shape, dtype, data, name, chunks, compression, shuffle, fletcher32, maxshape, compression_opts, fillvalue, scaleoffset, track_times, external, track_order, dcpl, dapl, efile_prefix, virtual_prefix, allow_unknown_filter, rdcc_nslots, rdcc_nbytes, rdcc_w0) 167 if (data is not None) and (not isinstance(data, Empty)): --> 168 dset_id.write(h5s.ALL, h5s.ALL, data) 170 return dset_id File h5py/_objects.pyx:54, in h5py._objects.with_phil.wrapper() File h5py/_objects.pyx:55, in h5py._objects.with_phil.wrapper() File h5py/h5d.pyx:280, in h5py.h5d.DatasetID.write() File h5py/_proxy.pyx:145, in h5py._proxy.dset_rw() File h5py/_conv.pyx:444, in h5py._conv.str2vlen() File h5py/_conv.pyx:110, in h5py._conv.generic_converter() File h5py/_conv.pyx:249, in h5py._conv.conv_str2vlen() TypeError: Can't implicitly convert non-string objects to strings The above exception was the direct cause of the following exception: TypeError Traceback (most recent call last) Cell In[86], line 1 ----> 1 adata.write("20230721.h5ad") File ~/.local/lib/python3.8/site-packages/anndata/_core/anndata.py:1918, in AnnData.write_h5ad(self, filename, compression, compression_opts, force_dense, as_dense) 1915 if filename is None: 1916 filename = self.filename -> 1918 _write_h5ad( 1919 Path(filename), 1920 self, 1921 compression=compression, 1922 compression_opts=compression_opts, 1923 force_dense=force_dense, 1924 as_dense=as_dense, 1925 ) 1927 if self.isbacked: 1928 self.file.filename = filename File ~/.local/lib/python3.8/site-packages/anndata/_io/h5ad.py:105, in write_h5ad(filepath, adata, force_dense, as_dense, dataset_kwargs, **kwargs) 103 write_elem(f, "varp", dict(adata.varp), dataset_kwargs=dataset_kwargs) 104 write_elem(f, "layers", dict(adata.layers), dataset_kwargs=dataset_kwargs) --> 105 write_elem(f, "uns", dict(adata.uns), dataset_kwargs=dataset_kwargs) File ~/.local/lib/python3.8/site-packages/anndata/_io/utils.py:214, in report_write_key_on_error..func_wrapper(elem, key, val, *args, **kwargs) 211 @wraps(func) 212 def func_wrapper(elem, key, val, *args, **kwargs): 213 try: --> 214 return func(elem, key, val, *args, **kwargs) 215 except Exception as e: 216 if "Above error raised while writing key" in format(e): File ~/.local/lib/python3.8/site-packages/anndata/_io/specs/registry.py:175, in write_elem(f, k, elem, modifiers, *args, **kwargs) 171 _REGISTRY.get_writer(dest_type, (t, elem.dtype.kind), modifiers)( 172 f, k, elem, *args, **kwargs 173 ) 174 else: --> 175 _REGISTRY.get_writer(dest_type, t, modifiers)(f, k, elem, *args, **kwargs) File ~/.local/lib/python3.8/site-packages/anndata/_io/specs/registry.py:24, in write_spec..decorator..wrapper(g, k, *args, **kwargs) 22 @wraps(func) 23 def wrapper(g, k, *args, **kwargs): ---> 24 result = func(g, k, *args, **kwargs) 25 g[k].attrs.setdefault("encoding-type", spec.encoding_type) 26 g[k].attrs.setdefault("encoding-version", spec.encoding_version) File ~/.local/lib/python3.8/site-packages/anndata/_io/specs/methods.py:281, in write_mapping(f, k, v, dataset_kwargs) 279 g = f.create_group(k) 280 for sub_k, sub_v in v.items(): --> 281 write_elem(g, sub_k, sub_v, dataset_kwargs=dataset_kwargs) File ~/.local/lib/python3.8/site-packages/anndata/_io/utils.py:214, in report_write_key_on_error..func_wrapper(elem, key, val, *args, **kwargs) 211 @wraps(func) 212 def func_wrapper(elem, key, val, *args, **kwargs): 213 try: --> 214 return func(elem, key, val, *args, **kwargs) 215 except Exception as e: 216 if "Above error raised while writing key" in format(e): File ~/.local/lib/python3.8/site-packages/anndata/_io/specs/registry.py:175, in write_elem(f, k, elem, modifiers, *args, **kwargs) 171 _REGISTRY.get_writer(dest_type, (t, elem.dtype.kind), modifiers)( 172 f, k, elem, *args, **kwargs 173 ) 174 else: --> 175 _REGISTRY.get_writer(dest_type, t, modifiers)(f, k, elem, *args, **kwargs) File ~/.local/lib/python3.8/site-packages/anndata/_io/specs/registry.py:24, in write_spec..decorator..wrapper(g, k, *args, **kwargs) 22 @wraps(func) 23 def wrapper(g, k, *args, **kwargs): ---> 24 result = func(g, k, *args, **kwargs) 25 g[k].attrs.setdefault("encoding-type", spec.encoding_type) 26 g[k].attrs.setdefault("encoding-version", spec.encoding_version) File ~/.local/lib/python3.8/site-packages/anndata/_io/specs/methods.py:281, in write_mapping(f, k, v, dataset_kwargs) 279 g = f.create_group(k) 280 for sub_k, sub_v in v.items(): --> 281 write_elem(g, sub_k, sub_v, dataset_kwargs=dataset_kwargs) File ~/.local/lib/python3.8/site-packages/anndata/_io/utils.py:220, in report_write_key_on_error..func_wrapper(elem, key, val, *args, **kwargs) 218 else: 219 parent = _get_parent(elem) --> 220 raise type(e)( 221 f"{e}\n\n" 222 f"Above error raised while writing key {key!r} of {type(elem)} " 223 f"to {parent}" 224 ) from e TypeError: Can't implicitly convert non-string objects to strings Above error raised while writing key 'names' of to / ```

Starlitnightly commented 1 year ago

Hi,

This error is usually due to the fact that a column in the DataFrame of obs or var is not of the same category, such as False mixed with NaN. We can find out through your error message that it is the adata.obs['name'] or adata.var['name'] that has different attribute information in it.

Solution: You can either delete the column or replace all NaNs with .fillna(0) or specify the column's attributes with .asype(str).

amaukisumi commented 1 year ago

Many thanks for your reply!

I checked obs and var but there was no adata.obs['names'] or adata.var['names'] , so I checked other data again and ultimately found the source of the problem in adata.uns. When I ran sc.tl.filter_rank_genes_groups(), a recarray object with a mix of nans and strings was generated in adata.uns['dea_leiden_res1_filtered']['names'], like this:

```python rec.array([(nan, nan, 'S100A9', 'GNLY', 'S100A8', nan, nan, 'MS4A1', 'GZMK', nan, 'MS4A1', nan, 'JCHAIN', nan, 'CALD1', nan, 'EPAS1', nan, nan, 'FCGR3A', nan, nan, 'KIT', 'DCN', 'GPM6B', nan, 'MPIG6B', 'ALAS2'), ('TCF7', nan, 'S100A8', 'NKG7', 'S100A9', 'NKG7', nan, 'BANK1', nan, nan, nan, nan, nan, nan, 'IGFBP7', nan, 'A2M', nan, nan, 'MS4A7', nan, nan, 'AREG', 'C1S', 'PLP1', nan, 'TUBB1', 'HBA2'), (nan, nan, nan, 'KLRD1', 'C5AR1', nan, 'IGSF6', nan, 'CRTAM', 'IGHG4', 'BANK1', nan, 'IGHA1', nan, 'NOTCH3', nan, nan, nan, 'ENSG00000225885', nan, nan, nan, nan, 'MMP2', 'L1CAM', nan, 'NRGN', 'HBB'), (nan, 'TBC1D4', 'LYZ', 'KLRF1', nan, 'GZMA', nan, nan, nan, 'IGHG3', 'PAX5', nan, 'MZB1', 'PRDX4', 'TNS1', nan, 'CAV1', nan, 'RGS13', nan, nan, nan, nan, 'CCDC80', 'ERBB3', nan, 'ITGA2B', 'SLC4A1'), (nan, nan, 'MNDA', 'PRF1', 'MNDA', nan, nan, nan, 'CCL4', 'IGHG1', 'IGHM', nan, nan, 'IGHG2', 'SPARC', 'IGLC2', 'IGFBP4', nan, 'MS4A1', 'TCF7L2', nan, nan, nan, 'COL3A1', 'NRXN1', 'NREP', 'ITGB3', 'HBA1'), (nan, nan, 'FCN1', 'IL2RB', nan, nan, nan, nan, nan, nan, 'CD79A', nan, 'DERL3', 'IGHG3', 'TIMP3', nan, 'NFIB', nan, 'CD22', nan, nan, nan, nan, 'COL6A1', 'SCN7A', nan, 'PPBP', 'TRIM58'), (nan, nan, 'CSTA', 'GZMA', nan, 'CD8A', nan, nan, nan, nan, nan, nan, nan, 'SDC1', 'NR2F2', 'DERL3', 'IGFBP7', nan, 'PAX5', 'PELATON', nan, nan, nan, 'SELENOP', 'CHL1', nan, nan, 'DMTN'), ..., (nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan), (nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan), (nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan), (nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan), (nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan), (nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan), (nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan)], dtype=[('0', 'O'), ('1', 'O'), ('2', 'O'), ('3', 'O'), ('4', 'O'), ('5', 'O'), ('6', 'O'), ('7', 'O'), ('8', 'O'), ('9', 'O'), ('10', 'O'), ('11', 'O'), ('12', 'O'), ('13', 'O'), ('14', 'O'), ('15', 'O'), ('16', 'O'), ('17', 'O'), ('18', 'O'), ('19', 'O'), ('20', 'O'), ('21', 'O'), ('22', 'O'), ('23', 'O'), ('24', 'O'), ('25', 'O'), ('26', 'O'), ('27', 'O')]) ```

The above error occurs when trying to save as .h5ad at such a point. Once I deleted adata.uns['dea_leiden_res1_filtered']['names'], it worked fine. Anyway, thanks again.