KrishnaswamyLab / PHATE

PHATE (Potential of Heat-diffusion for Affinity-based Transition Embedding) is a tool for visualizing high dimensional data.
http://phate.readthedocs.io
Other
472 stars 73 forks source link

AttributeError: 'kNNGraph' object has no attribute '_kernel' #131

Closed josephineyates closed 1 year ago

josephineyates commented 1 year ago

Describe the bug Dear all, thank you for this great tool! I am trying to run PHATE on a private dataset (original size (150,11,000)) to obtain the visualization. However, when I call the operator I get the error in the title.

To Reproduce import phate import pandas as pd

X = pd.read_csv("...") phate_operator = phate.PHATE()

from sklearn.decomposition import PCA pca = PCA(n_components=50) X_pca = pca.fit_transform(X.values)

tree_phate = phate_operator.fit_transform(X_pca)

Expected behavior I would expect this to work out of the box. I've tried on the original dataset or on the computed PCA.

Actual behavior

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/graphtools/base.py:716, in BaseGraph.K(self)
    715 try:
--> 716     return self._kernel
    717 except AttributeError:

AttributeError: 'kNNGraph' object has no attribute '_kernel'

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
Cell In [43], line 1
----> 1 tree_phate = phate_operator.fit_transform(X_pca)

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/phate/phate.py:947, in PHATE.fit_transform(self, X, **kwargs)
    926 """Computes the diffusion operator and the position of the cells in the
    927 embedding space
    928 
   (...)
    944     The cells embedded in a lower dimensional space using PHATE
    945 """
    946 with _logger.task("PHATE"):
--> 947     self.fit(X)
    948     embedding = self.transform(**kwargs)
    949 return embedding

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/phate/phate.py:826, in PHATE.fit(self, X)
    824 if self.graph is None:
    825     with _logger.task("graph and diffusion operator"):
--> 826         self.graph = graphtools.Graph(
    827             X,
    828             n_pca=n_pca,
    829             n_landmark=n_landmark,
    830             distance=self.knn_dist,
    831             precomputed=precomputed,
    832             knn=self.knn,
    833             knn_max=self.knn_max,
    834             decay=self.decay,
    835             thresh=1e-4,
    836             n_jobs=self.n_jobs,
    837             verbose=self.verbose,
    838             random_state=self.random_state,
    839             **(self.kwargs)
    840         )
    842 # landmark op doesn't build unless forced
    843 self.diff_op

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/graphtools/api.py:289, in Graph(data, n_pca, rank_threshold, knn, decay, bandwidth, bandwidth_scale, knn_max, anisotropy, distance, thresh, kernel_symm, theta, precomputed, beta, sample_idx, adaptive_k, n_landmark, n_svd, n_jobs, verbose, random_state, graphtype, use_pygsp, initialize, **kwargs)
    276 # build graph and return
    277 _logger.log_debug(
    278     "Initializing {} with arguments {}".format(
    279         parent_classes,
   (...)
    287     )
    288 )
--> 289 return Graph(**params)

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/graphtools/graphs.py:136, in kNNGraph.__init__(self, data, knn, decay, knn_max, search_multiplier, bandwidth, bandwidth_scale, distance, thresh, n_pca, **kwargs)
    134 self.distance = distance
    135 self.thresh = thresh
--> 136 super().__init__(data, n_pca=n_pca, **kwargs)

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/graphtools/base.py:1019, in DataGraph.__init__(self, data, verbose, n_jobs, **kwargs)
   1017 self.verbose = verbose
   1018 _logger.set_level(verbose)
-> 1019 super().__init__(data, **kwargs)

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/graphtools/base.py:137, in Data.__init__(self, data, n_pca, rank_threshold, random_state, **kwargs)
    135 self.random_state = random_state
    136 self.data_nu = self._reduce_data()
--> 137 super().__init__(**kwargs)

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/graphtools/base.py:503, in BaseGraph.__init__(self, kernel_symm, theta, anisotropy, gamma, initialize, **kwargs)
    501 if initialize:
    502     _logger.log_debug("Initializing kernel...")
--> 503     self.K
    504 else:
    505     _logger.log_debug("Not initializing kernel.")

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/graphtools/base.py:718, in BaseGraph.K(self)
    716     return self._kernel
    717 except AttributeError:
--> 718     self._kernel = self._build_kernel()
    719     return self._kernel

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/graphtools/base.py:548, in BaseGraph._build_kernel(self)
    534 def _build_kernel(self):
    535     """Private method to build kernel matrix
    536 
    537     Runs public method to build kernel matrix and runs
   (...)
    546     RuntimeWarning : if K is not symmetric
    547     """
--> 548     kernel = self.build_kernel()
    549     kernel = self.symmetrize_kernel(kernel)
    550     kernel = self.apply_anisotropy(kernel)

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/graphtools/graphs.py:264, in kNNGraph.build_kernel(self)
    252 """Build the KNN kernel.
    253 
    254 Build a k nearest neighbors kernel, optionally with alpha decay.
   (...)
    261     with no non-negative entries.
    262 """
    263 knn_max = self.knn_max + 1 if self.knn_max else None
--> 264 K = self.build_kernel_to_data(self.data_nu, knn=self.knn + 1, knn_max=knn_max)
    265 return K

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/graphtools/graphs.py:424, in kNNGraph.build_kernel_to_data(self, Y, knn, knn_max, bandwidth, bandwidth_scale)
    418 _logger.log_debug(
    419     "knn search to knn_max ({}) on {}".format(
    420         knn_max, len(update_idx)
    421     )
    422 )
    423 # give up - search out to knn_max
--> 424 dist_new, ind_new = knn_tree.kneighbors(
    425     Y[update_idx], n_neighbors=search_knn
    426 )
    427 for i, idx in enumerate(update_idx):
    428     distances[idx] = dist_new[i]

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/sklearn/neighbors/_base.py:763, in KNeighborsMixin.kneighbors(self, X, n_neighbors, return_distance)
    756 use_pairwise_distances_reductions = (
    757     self._fit_method == "brute"
    758     and PairwiseDistancesArgKmin.is_usable_for(
    759         X if X is not None else self._fit_X, self._fit_X, self.effective_metric_
    760     )
    761 )
    762 if use_pairwise_distances_reductions:
--> 763     results = PairwiseDistancesArgKmin.compute(
    764         X=X,
    765         Y=self._fit_X,
    766         k=n_neighbors,
    767         metric=self.effective_metric_,
    768         metric_kwargs=self.effective_metric_params_,
    769         strategy="auto",
    770         return_distance=return_distance,
    771     )
    773 elif (
    774     self._fit_method == "brute" and self.metric == "precomputed" and issparse(X)
    775 ):
    776     results = _kneighbors_from_graph(
    777         X, n_neighbors=n_neighbors, return_distance=return_distance
    778     )

File sklearn/metrics/_pairwise_distances_reduction.pyx:698, in sklearn.metrics._pairwise_distances_reduction.PairwiseDistancesArgKmin.compute()

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/sklearn/utils/fixes.py:151, in threadpool_limits(limits, user_api)
    149     return controller.limit(limits=limits, user_api=user_api)
    150 else:
--> 151     return threadpoolctl.threadpool_limits(limits=limits, user_api=user_api)

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:171, in threadpool_limits.__init__(self, limits, user_api)
    167 def __init__(self, limits=None, user_api=None):
    168     self._limits, self._user_api, self._prefixes = \
    169         self._check_params(limits, user_api)
--> 171     self._original_info = self._set_threadpool_limits()

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:268, in threadpool_limits._set_threadpool_limits(self)
    265 if self._limits is None:
    266     return None
--> 268 modules = _ThreadpoolInfo(prefixes=self._prefixes,
    269                           user_api=self._user_api)
    270 for module in modules:
    271     # self._limits is a dict {key: num_threads} where key is either
    272     # a prefix or a user_api. If a module matches both, the limit
    273     # corresponding to the prefix is chosed.
    274     if module.prefix in self._limits:

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:340, in _ThreadpoolInfo.__init__(self, user_api, prefixes, modules)
    337     self.user_api = [] if user_api is None else user_api
    339     self.modules = []
--> 340     self._load_modules()
    341     self._warn_if_incompatible_openmp()
    342 else:

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:371, in _ThreadpoolInfo._load_modules(self)
    369 """Loop through loaded libraries and store supported ones"""
    370 if sys.platform == "darwin":
--> 371     self._find_modules_with_dyld()
    372 elif sys.platform == "win32":
    373     self._find_modules_with_enum_process_module_ex()

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:428, in _ThreadpoolInfo._find_modules_with_dyld(self)
    425 filepath = filepath.decode("utf-8")
    427 # Store the module if it is supported and selected
--> 428 self._make_module_from_path(filepath)

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:515, in _ThreadpoolInfo._make_module_from_path(self, filepath)
    513 if prefix in self.prefixes or user_api in self.user_api:
    514     module_class = globals()[module_class]
--> 515     module = module_class(filepath, prefix, user_api, internal_api)
    516     self.modules.append(module)

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:606, in _Module.__init__(self, filepath, prefix, user_api, internal_api)
    604 self.internal_api = internal_api
    605 self._dynlib = ctypes.CDLL(filepath, mode=_RTLD_NOLOAD)
--> 606 self.version = self.get_version()
    607 self.num_threads = self.get_num_threads()
    608 self._get_extra_info()

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:646, in _OpenBLASModule.get_version(self)
    643 get_config = getattr(self._dynlib, "openblas_get_config",
    644                      lambda: None)
    645 get_config.restype = ctypes.c_char_p
--> 646 config = get_config().split()
    647 if config[0] == b"OpenBLAS":
    648     return config[1].decode("utf-8")

AttributeError: 'NoneType' object has no attribute 'split'

System information:

Output of phate.__version__:

1.0.10

Output of pd.show_versions():

``` INSTALLED VERSIONS ------------------ commit : ca60aab7340d9989d9428e11a51467658190bb6b python : 3.8.13.final.0 python-bits : 64 OS : Darwin OS-release : 22.1.0 Version : Darwin Kernel Version 22.1.0: Sun Oct 9 20:14:30 PDT 2022; root:xnu-8792.41.9~2/RELEASE_ARM64_T8103 machine : x86_64 processor : i386 byteorder : little LC_ALL : None LANG : None LOCALE : None.UTF-8 pandas : 1.4.4 numpy : 1.23.5 pytz : 2022.6 dateutil : 2.8.2 setuptools : 63.4.2 pip : 22.2.2 Cython : None pytest : None hypothesis : None sphinx : None blosc : None feather : None xlsxwriter : 3.0.7 lxml.etree : 4.9.2 html5lib : None pymysql : 1.0.2 psycopg2 : None jinja2 : 3.1.2 IPython : 8.6.0 pandas_datareader: None bs4 : 4.11.1 bottleneck : 1.3.5 brotli : fastparquet : None fsspec : None gcsfs : None markupsafe : 2.1.1 matplotlib : 3.5.3 numba : 0.56.4 numexpr : 2.8.3 odfpy : None openpyxl : 3.0.10 pandas_gbq : None pyarrow : None pyreadstat : None pyxlsb : None s3fs : None scipy : 1.10.0 snappy : None sqlalchemy : None tables : None tabulate : 0.9.0 xarray : 2022.12.0 xlrd : None xlwt : None zstandard : None ``` Thank you!
scottgigante commented 1 year ago

Your traceback points to an error in torch-gpu caused by sklearn.KNeighbors.kneighbors. Please open an issue with scikit-learn.

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/graphtools/graphs.py:424, in kNNGraph.build_kernel_to_data(self, Y, knn, knn_max, bandwidth, bandwidth_scale)
    418 _logger.log_debug(
    419     "knn search to knn_max ({}) on {}".format(
    420         knn_max, len(update_idx)
    421     )
    422 )
    423 # give up - search out to knn_max
--> 424 dist_new, ind_new = knn_tree.kneighbors(
    425     Y[update_idx], n_neighbors=search_knn
    426 )
    427 for i, idx in enumerate(update_idx):
    428     distances[idx] = dist_new[i]

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/sklearn/neighbors/_base.py:763, in KNeighborsMixin.kneighbors(self, X, n_neighbors, return_distance)
    756 use_pairwise_distances_reductions = (
    757     self._fit_method == "brute"
    758     and PairwiseDistancesArgKmin.is_usable_for(
    759         X if X is not None else self._fit_X, self._fit_X, self.effective_metric_
    760     )
    761 )
    762 if use_pairwise_distances_reductions:
--> 763     results = PairwiseDistancesArgKmin.compute(
    764         X=X,
    765         Y=self._fit_X,
    766         k=n_neighbors,
    767         metric=self.effective_metric_,
    768         metric_kwargs=self.effective_metric_params_,
    769         strategy="auto",
    770         return_distance=return_distance,
    771     )
    773 elif (
    774     self._fit_method == "brute" and self.metric == "precomputed" and issparse(X)
    775 ):
    776     results = _kneighbors_from_graph(
    777         X, n_neighbors=n_neighbors, return_distance=return_distance
    778     )

File sklearn/metrics/_pairwise_distances_reduction.pyx:698, in sklearn.metrics._pairwise_distances_reduction.PairwiseDistancesArgKmin.compute()

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/sklearn/utils/fixes.py:151, in threadpool_limits(limits, user_api)
    149     return controller.limit(limits=limits, user_api=user_api)
    150 else:
--> 151     return threadpoolctl.threadpool_limits(limits=limits, user_api=user_api)

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:171, in threadpool_limits.__init__(self, limits, user_api)
    167 def __init__(self, limits=None, user_api=None):
    168     self._limits, self._user_api, self._prefixes = \
    169         self._check_params(limits, user_api)
--> 171     self._original_info = self._set_threadpool_limits()

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:268, in threadpool_limits._set_threadpool_limits(self)
    265 if self._limits is None:
    266     return None
--> 268 modules = _ThreadpoolInfo(prefixes=self._prefixes,
    269                           user_api=self._user_api)
    270 for module in modules:
    271     # self._limits is a dict {key: num_threads} where key is either
    272     # a prefix or a user_api. If a module matches both, the limit
    273     # corresponding to the prefix is chosed.
    274     if module.prefix in self._limits:

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:340, in _ThreadpoolInfo.__init__(self, user_api, prefixes, modules)
    337     self.user_api = [] if user_api is None else user_api
    339     self.modules = []
--> 340     self._load_modules()
    341     self._warn_if_incompatible_openmp()
    342 else:

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:371, in _ThreadpoolInfo._load_modules(self)
    369 """Loop through loaded libraries and store supported ones"""
    370 if sys.platform == "darwin":
--> 371     self._find_modules_with_dyld()
    372 elif sys.platform == "win32":
    373     self._find_modules_with_enum_process_module_ex()

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:428, in _ThreadpoolInfo._find_modules_with_dyld(self)
    425 filepath = filepath.decode("utf-8")
    427 # Store the module if it is supported and selected
--> 428 self._make_module_from_path(filepath)

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:515, in _ThreadpoolInfo._make_module_from_path(self, filepath)
    513 if prefix in self.prefixes or user_api in self.user_api:
    514     module_class = globals()[module_class]
--> 515     module = module_class(filepath, prefix, user_api, internal_api)
    516     self.modules.append(module)

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:606, in _Module.__init__(self, filepath, prefix, user_api, internal_api)
    604 self.internal_api = internal_api
    605 self._dynlib = ctypes.CDLL(filepath, mode=_RTLD_NOLOAD)
--> 606 self.version = self.get_version()
    607 self.num_threads = self.get_num_threads()
    608 self._get_extra_info()

File ~/opt/anaconda3/envs/torch-gpu/lib/python3.8/site-packages/threadpoolctl.py:646, in _OpenBLASModule.get_version(self)
    643 get_config = getattr(self._dynlib, "openblas_get_config",
    644                      lambda: None)
    645 get_config.restype = ctypes.c_char_p
--> 646 config = get_config().split()
    647 if config[0] == b"OpenBLAS":
    648     return config[1].decode("utf-8")

AttributeError: 'NoneType' object has no attribute 'split'