scverse / scanpy

Single-cell analysis in Python. Scales to >1M cells.
https://scanpy.readthedocs.io
BSD 3-Clause "New" or "Revised" License
1.9k stars 597 forks source link

Reading an h5ad file not working anymore after I run the rank_genes_groups function (used to work fine before python module update) #937

Closed kousaa closed 4 years ago

kousaa commented 4 years ago

Yesterday I moved to a new server and I had to install miniconda3, Jupiter and all the necessary modules for my scRNA-seq analysis including scanpy

I can read fine an h5ad file and run various steps with scanpy and I can then save the object as an h5ad file and read it back without a problem. However, if I run the rank_genes_groups function, even though I can perfectly fine save my object as an h5ad file I get an error when I am attempting to read it back.

I have to say that this exact piece of code used to work with my older modules before updating it. Also, some people seem to have spotted a similar error in the newest numpy package: https://github.com/numpy/numpy/issues/13431

# I have already read in an Ann data object from an h5ad existing file
sc.tl.pca(adata, n_comps=30, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=15)
sc.tl.umap(adata)

k = 15
communities, graph, Q = sc.external.tl.phenograph(pd.DataFrame(adata.obsm['X_pca']),k=k)
adata.obs['PhenoGraph_clusters'] = pd.Categorical(communities)
adata.uns['PhenoGraph_Q'] = Q
adata.uns['PhenoGraph_k'] = k

path_to_h5ad_file = '~/test.h5ad'
adata.write_h5ad(path_to_h5ad_file) # works

# but if I run
sc.tl.rank_genes_groups(adata, n_genes=21515,groupby='PhenoGraph_clusters', method='wilcoxon')
rcParams['figure.figsize'] = 4,4
rcParams['axes.grid'] = True
sc.pl.rank_genes_groups(adata)
pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(5)

path_to_h5ad_file = '~/test.h5ad' # works
adata.write_h5ad(path_to_h5ad_file) # gives ERROR bellow
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-23-cb0bc3c267ae> in <module>
----> 1 adata = sc.read(path_to_h5ad_file)

~/miniconda3/lib/python3.7/site-packages/scanpy/readwrite.py in read(filename, backed, sheet, ext, delimiter, first_column_names, backup_url, cache, **kwargs)
     95             filename, backed=backed, sheet=sheet, ext=ext,
     96             delimiter=delimiter, first_column_names=first_column_names,
---> 97             backup_url=backup_url, cache=cache, **kwargs,
     98         )
     99     # generate filename and read to dict

~/miniconda3/lib/python3.7/site-packages/scanpy/readwrite.py in _read(filename, backed, sheet, ext, delimiter, first_column_names, backup_url, cache, suppress_cache_warning, **kwargs)
    497     if ext in {'h5', 'h5ad'}:
    498         if sheet is None:
--> 499             return read_h5ad(filename, backed=backed)
    500         else:
    501             logg.debug(f'reading sheet {sheet} from file {filename}')

~/miniconda3/lib/python3.7/site-packages/anndata/readwrite/read.py in read_h5ad(filename, backed, chunk_size)
    445     else:
    446         # load everything into memory
--> 447         constructor_args = _read_args_from_h5ad(filename=filename, chunk_size=chunk_size)
    448         X = constructor_args[0]
    449         dtype = None

~/miniconda3/lib/python3.7/site-packages/anndata/readwrite/read.py in _read_args_from_h5ad(adata, filename, mode, chunk_size)
    484             d[key] = None
    485         else:
--> 486             _read_key_value_from_h5(f, d, key, chunk_size=chunk_size)
    487     # backwards compat: save X with the correct name
    488     if 'X' not in d:

~/miniconda3/lib/python3.7/site-packages/anndata/readwrite/read.py in _read_key_value_from_h5(f, d, key, key_write, chunk_size)
    508         d[key_write] = OrderedDict() if key == 'uns' else {}
    509         for k in f[key].keys():
--> 510             _read_key_value_from_h5(f, d[key_write], key + '/' + k, k, chunk_size)
    511         return
    512 

~/miniconda3/lib/python3.7/site-packages/anndata/readwrite/read.py in _read_key_value_from_h5(f, d, key, key_write, chunk_size)
    508         d[key_write] = OrderedDict() if key == 'uns' else {}
    509         for k in f[key].keys():
--> 510             _read_key_value_from_h5(f, d[key_write], key + '/' + k, k, chunk_size)
    511         return
    512 

~/miniconda3/lib/python3.7/site-packages/anndata/readwrite/read.py in _read_key_value_from_h5(f, d, key, key_write, chunk_size)
    542         return key, value
    543 
--> 544     key, value = postprocess_reading(key, value)
    545     d[key_write] = value
    546     return

~/miniconda3/lib/python3.7/site-packages/anndata/readwrite/read.py in postprocess_reading(key, value)
    539             new_dtype = [((dt[0], 'U{}'.format(int(int(dt[1][2:])/4)))
    540                           if dt[1][1] == 'S' else dt) for dt in value.dtype.descr]
--> 541             value = value.astype(new_dtype)
    542         return key, value
    543 

ValueError: invalid shape in fixed-type tuple.

Versions:

scanpy==1.4.4.post1 anndata==0.6.22.post1 umap==0.3.10 numpy==1.17.4 scipy==1.3.3 pandas==0.25.3 scikit-learn==0.21.3 statsmodels==0.10.2 python-igraph==0.7.1 louvain==0.6.1

ivirshup commented 4 years ago

This looks like a duplicate of #832 which is fixed on anndata master.