scverse / scanpy

Single-cell analysis in Python. Scales to >1M cells.
https://scanpy.readthedocs.io
BSD 3-Clause "New" or "Revised" License
1.87k stars 594 forks source link

Dendrogram returns an error. #2125

Closed ddiez closed 2 years ago

ddiez commented 2 years ago

Note: Please read this guide detailing how to provide the necessary information for us to reproduce your bug.

Minimal code sample (that we can copy&paste without having any data)

import scanpy as sc
sc.logging.print_header()
adata = sc.datasets.pbmc68k_reduced()
sc.tl.dendrogram(adata, groupby='bulk_labels')
sc.pl.dendrogram(adata)
markers = ['C1QA', 'PSAP', 'CD79A', 'CD79B', 'CST3', 'LYZ']
sc.pl.dotplot(adata, markers, groupby='bulk_labels', dendrogram=True)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [3], in <module>
      2 sc.logging.print_header()
      3 adata = sc.datasets.pbmc68k_reduced()
----> 4 sc.tl.dendrogram(adata, groupby='bulk_labels')
      5 sc.pl.dendrogram(adata)
      6 markers = ['C1QA', 'PSAP', 'CD79A', 'CD79B', 'CST3', 'LYZ']

File ~/miniconda3/envs/scanpy/lib/python3.8/site-packages/scanpy/tools/_dendrogram.py:139, in dendrogram(adata, groupby, n_pcs, use_rep, var_names, use_raw, cor_method, linkage_method, optimal_ordering, key_added, inplace)
    136 from scipy.spatial import distance
    138 corr_matrix = mean_df.T.corr(method=cor_method)
--> 139 corr_condensed = distance.squareform(1 - corr_matrix)
    140 z_var = sch.linkage(
    141     corr_condensed, method=linkage_method, optimal_ordering=optimal_ordering
    142 )
    143 dendro_info = sch.dendrogram(z_var, labels=list(categories), no_plot=True)

File ~/miniconda3/envs/scanpy/lib/python3.8/site-packages/scipy/spatial/distance.py:2362, in squareform(X, force, checks)
   2360     raise ValueError('The matrix argument must be square.')
   2361 if checks:
-> 2362     is_valid_dm(X, throw=True, name='X')
   2364 # One-side of the dimensions is set here.
   2365 d = s[0]

File ~/miniconda3/envs/scanpy/lib/python3.8/site-packages/scipy/spatial/distance.py:2443, in is_valid_dm(D, tol, throw, name, warning)
   2441 if not (D[range(0, s[0]), range(0, s[0])] == 0).all():
   2442     if name:
-> 2443         raise ValueError(('Distance matrix \'%s\' diagonal must '
   2444                           'be zero.') % name)
   2445     else:
   2446         raise ValueError('Distance matrix diagonal must be zero.')

ValueError: Distance matrix 'X' diagonal must be zero.

Versions

scanpy==1.8.2 anndata==0.7.8 umap==0.5.2 numpy==1.21.5 scipy==1.7.3 pandas==1.4.0 scikit-learn==1.0.2 statsmodels==0.13.1 python-igraph==0.9.9 pynndescent==0.5.6
MeiYihan416 commented 2 years ago

i met the same error when using sc.pl.dotplot

did you fix that?

ddiez commented 2 years ago

As mentioned here: https://github.com/theislab/scanpy/pull/2129#issuecomment-1039247646 the issue is fixed with pandas 1.4.1. I checked and it is indeed solved.

scottgigante-immunai commented 2 years ago

Temporary fix for anyone looking in case an upgrade is not yet possible:

import scipy.spatial.distance as ssd
from contextlib import contextmanager

def squareform_force_zero_diagonal(X, *args, **kwargs):
    if len(X.shape) == 2:
        if isinstance(X, pd.DataFrame):
            X.iloc[(np.arange(X.shape[0]), np.arange(X.shape[0]))] = 0
        else:
            X[(np.arange(X.shape[0]), np.arange(X.shape[0]))] = 0
    return _squareform(X, *args, **kwargs)

@contextmanager
def patch_squareform():
    _squareform = ssd.squareform
    ssd.squareform = squareform_force_zero_diagonal
    try:
        yield
    finally:
        ssd.squareform = _squareform

import scanpy as sc
adata = sc.datasets.pbmc68k_reduced()
with patch_squareform():
    sc.tl.dendrogram(adata, groupby='bulk_labels')
jolespin commented 1 year ago

Getting this issue as well even when I explicitly specify a 0 diagonal:

from scipy.sparse import csr_matrix

A = adata_ns.obsp["distances"].todense()
np.fill_diagonal(A, 0)
adata_ns.obsp["distances"] = csr_matrix(A)

sc.tl.dendrogram(adata_ns, groupby='leiden')
sc.pl.dendrogram(adata_ns)

# Input In [251], in <module>
#       4 np.fill_diagonal(A, 0)
#       5 adata_ns.obsp["distances"] = csr_matrix(A)
# ----> 7 sc.tl.dendrogram(adata_ns, groupby='leiden')
#       8 sc.pl.dendrogram(adata_ns)
#       9 sc.pl.dotplot(adata, geatures, groupby='leiden', dendrogram=True)

# File ~/anaconda3/envs/soothsayer_py3.9_env/lib/python3.9/site-packages/scanpy/tools/_dendrogram.py:139, in dendrogram(adata, groupby, n_pcs, use_rep, var_names, use_raw, cor_method, linkage_method, optimal_ordering, key_added, inplace)
#     136 from scipy.spatial import distance
#     138 corr_matrix = mean_df.T.corr(method=cor_method)
# --> 139 corr_condensed = distance.squareform(1 - corr_matrix)
#     140 z_var = sch.linkage(
#     141     corr_condensed, method=linkage_method, optimal_ordering=optimal_ordering
#     142 )
#     143 dendro_info = sch.dendrogram(z_var, labels=list(categories), no_plot=True)

# File ~/anaconda3/envs/soothsayer_py3.9_env/lib/python3.9/site-packages/scipy/spatial/distance.py:2363, in squareform(X, force, checks)
#    2361     raise ValueError('The matrix argument must be square.')
#    2362 if checks:
# -> 2363     is_valid_dm(X, throw=True, name='X')
#    2365 # One-side of the dimensions is set here.
#    2366 d = s[0]

# File ~/anaconda3/envs/soothsayer_py3.9_env/lib/python3.9/site-packages/scipy/spatial/distance.py:2444, in is_valid_dm(D, tol, throw, name, warning)
#    2442 if not (D[range(0, s[0]), range(0, s[0])] == 0).all():
#    2443     if name:
# -> 2444         raise ValueError(('Distance matrix \'%s\' diagonal must '
#    2445                           'be zero.') % name)
#    2446     else:
#    2447         raise ValueError('Distance matrix diagonal must be zero.')

# ValueError: Distance matrix 'X' diagonal must be zero.