ShobiStassen / VIA

trajectory inference
https://pyvia.readthedocs.io/en/latest/
MIT License
76 stars 20 forks source link

Error in v0.run_VIA() #42

Closed yrushil closed 1 year ago

yrushil commented 1 year ago

Hi All, I am running the VIA code on scRNA-seq data. When running the line v0.run_VIA() I get the error below. I am having trouble resolving this. Below is my code prior and the error message. Code

ncomps=80
knn=30
v0_random_seed=4
root_user = [3723] #the index of a cell belonging to the HSC cell type
dataset = ''

'''
#NOTE1, if you decide to choose a cell type as a root, then you need to set the dataset as 'group'
#root_user=['HSC1']
#dataset = 'group'# 'humanCD34'
#NOTE2, if rna-velocity is available, considering using it to compute the root automatically- see RNA velocity tutorial
'''

v0 = via.VIA(data=adata.obsm['X_pca'][:, 0:ncomps], true_label=adata.obs['ident'],
             jac_std_global=0.15, cluster_graph_pruning_std=0.15,
             knn=knn, too_big_factor=0.3, root_user=root_user,
             dataset=dataset, random_seed=v0_random_seed, embedding = embedding)

v0.run_VIA()

Error

2023-05-17 23:11:08.680298  From root 16,  the Terminal state 25 is reached 590 times.
2023-05-17 23:11:08.729189  Terminal clusters corresponding to unique lineages are {33: 'Tumor c3', 7: 'Tumor c2', 14: 'Tumor c5', 17: 'Tumor c5', 21: 'Tumor c2', 22: 'Tumor c2', 24: 'Tumor c2', 25: 'Tumor c2'}
2023-05-17 23:11:08.729276  Begin projection of pseudotime and lineage likelihood
2023-05-17 23:11:09.427198  Graph has 1 connected components before pruning
2023-05-17 23:11:09.431525  Graph has 10 connected components after pruning
2023-05-17 23:11:09.442976  Graph has 1 connected components after reconnecting
2023-05-17 23:11:09.443813  63.1% links trimmed from local pruning relative to start
2023-05-17 23:11:09.443864  72.7% links trimmed from global pruning relative to start
2023-05-17 23:11:09.448086  Start making edgebundle milestone...
2023-05-17 23:11:09.448162  Start finding milestones
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[7], line 19
      7 '''
      8 #NOTE1, if you decide to choose a cell type as a root, then you need to set the dataset as 'group'
      9 #root_user=['HSC1']
     10 #dataset = 'group'# 'humanCD34'
     11 #NOTE2, if rna-velocity is available, considering using it to compute the root automatically- see RNA velocity tutorial
     12 '''
     14 v0 = via.VIA(data=adata.obsm['X_pca'][:, 0:ncomps], true_label=adata.obs['ident'],
     15              jac_std_global=0.15, cluster_graph_pruning_std=0.15,
     16              knn=knn, too_big_factor=0.3, root_user=root_user,
     17              dataset=dataset, random_seed=v0_random_seed, embedding = embedding)
---> 19 v0.run_VIA()

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/pyVIA/core.py:3140, in VIA.run_VIA(self)
   3138 self.knn_struct = _construct_knn(self.data, knn=self.knn, distance=self.distance, num_threads=self.num_threads)
   3139 st = time.time()
-> 3140 self.run_subPARC()
   3141 run_time = time.time() - st
   3142 print(f'{datetime.now()}\tTime elapsed {round(run_time,1)} seconds')

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/pyVIA/core.py:2667, in VIA.run_subPARC(self)
   2665 self.hammerbundle_milestone_dict = None
   2666 if self.embedding is not None:
-> 2667     self.hammerbundle_milestone_dict = make_edgebundle_milestone(embedding=self.embedding,
   2668                                                              sc_graph=self.ig_full_graph,
   2669                                                              n_milestones=n_milestones,
   2670                                                              global_visual_pruning=global_visual_pruning,
   2671                                                              initial_bandwidth=initial_bandwidth, decay=decay,
   2672                                                              weighted=True,
   2673                                                              sc_labels_numeric=self.time_series_labels,
   2674                                                              sc_pt=self.single_cell_pt_markov, terminal_cluster_list=self.terminal_clusters, single_cell_lineage_prob=self.single_cell_bp)
   2676 self.labels = list(self.labels)
   2678 from statistics import mode

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/pyVIA/plotting_via.py:140, in make_edgebundle_milestone(embedding, sc_graph, via_object, sc_pt, initial_bandwidth, decay, n_milestones, milestone_labels, sc_labels_numeric, weighted, global_visual_pruning, terminal_cluster_list, single_cell_lineage_prob, random_state)
    138 print(f'{datetime.now()}\tStart finding milestones')
    139 from sklearn.cluster import KMeans
--> 140 kmeans = KMeans(n_clusters=n_milestones, random_state=random_state).fit(embedding)
    141 milestone_labels = kmeans.labels_.flatten().tolist()
    142 print(f'{datetime.now()}\tEnd milestones')

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/sklearn/cluster/_kmeans.py:1468, in KMeans.fit(self, X, y, sample_weight)
   1465     print("Initialization complete")
   1467 # run a k-means once
-> 1468 labels, inertia, centers, n_iter_ = kmeans_single(
   1469     X,
   1470     sample_weight,
   1471     centers_init,
   1472     max_iter=self.max_iter,
   1473     verbose=self.verbose,
   1474     tol=self._tol,
   1475     n_threads=self._n_threads,
   1476 )
   1478 # determine if these results are the best so far
   1479 # we chose a new run if it has a better inertia and the clustering is
   1480 # different from the best so far (it's possible that the inertia is
   1481 # slightly better even if the clustering is the same with potentially
   1482 # permuted labels, due to rounding errors)
   1483 if best_inertia is None or (
   1484     inertia < best_inertia
   1485     and not _is_same_clustering(labels, best_labels, self.n_clusters)
   1486 ):

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/sklearn/cluster/_kmeans.py:679, in _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter, verbose, tol, n_threads)
    675 strict_convergence = False
    677 # Threadpoolctl context to limit the number of threads in second level of
    678 # nested parallelism (i.e. BLAS) to avoid oversubscription.
--> 679 with threadpool_limits(limits=1, user_api="blas"):
    680     for i in range(max_iter):
    681         lloyd_iter(
    682             X,
    683             sample_weight,
   (...)
    689             n_threads,
    690         )

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/sklearn/utils/fixes.py:151, in threadpool_limits(limits, user_api)
    149     return controller.limit(limits=limits, user_api=user_api)
    150 else:
--> 151     return threadpoolctl.threadpool_limits(limits=limits, user_api=user_api)

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/threadpoolctl.py:171, in threadpool_limits.__init__(self, limits, user_api)
    167 def __init__(self, limits=None, user_api=None):
    168     self._limits, self._user_api, self._prefixes = \
    169         self._check_params(limits, user_api)
--> 171     self._original_info = self._set_threadpool_limits()

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/threadpoolctl.py:268, in threadpool_limits._set_threadpool_limits(self)
    265 if self._limits is None:
    266     return None
--> 268 modules = _ThreadpoolInfo(prefixes=self._prefixes,
    269                           user_api=self._user_api)
    270 for module in modules:
    271     # self._limits is a dict {key: num_threads} where key is either
    272     # a prefix or a user_api. If a module matches both, the limit
    273     # corresponding to the prefix is chosed.
    274     if module.prefix in self._limits:

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/threadpoolctl.py:340, in _ThreadpoolInfo.__init__(self, user_api, prefixes, modules)
    337     self.user_api = [] if user_api is None else user_api
    339     self.modules = []
--> 340     self._load_modules()
    341     self._warn_if_incompatible_openmp()
    342 else:

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/threadpoolctl.py:371, in _ThreadpoolInfo._load_modules(self)
    369 """Loop through loaded libraries and store supported ones"""
    370 if sys.platform == "darwin":
--> 371     self._find_modules_with_dyld()
    372 elif sys.platform == "win32":
    373     self._find_modules_with_enum_process_module_ex()

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/threadpoolctl.py:428, in _ThreadpoolInfo._find_modules_with_dyld(self)
    425 filepath = filepath.decode("utf-8")
    427 # Store the module if it is supported and selected
--> 428 self._make_module_from_path(filepath)

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/threadpoolctl.py:515, in _ThreadpoolInfo._make_module_from_path(self, filepath)
    513 if prefix in self.prefixes or user_api in self.user_api:
    514     module_class = globals()[module_class]
--> 515     module = module_class(filepath, prefix, user_api, internal_api)
    516     self.modules.append(module)

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/threadpoolctl.py:606, in _Module.__init__(self, filepath, prefix, user_api, internal_api)
    604 self.internal_api = internal_api
    605 self._dynlib = ctypes.CDLL(filepath, mode=_RTLD_NOLOAD)
--> 606 self.version = self.get_version()
    607 self.num_threads = self.get_num_threads()
    608 self._get_extra_info()

File ~/opt/miniconda3/envs/scanpy_env_A/lib/python3.9/site-packages/threadpoolctl.py:646, in _OpenBLASModule.get_version(self)
    643 get_config = getattr(self._dynlib, "openblas_get_config",
    644                      lambda: None)
    645 get_config.restype = ctypes.c_char_p
--> 646 config = get_config().split()
    647 if config[0] == b"OpenBLAS":
    648     return config[1].decode("utf-8")

AttributeError: 'NoneType' object has no attribute 'split'
MinatoKobashi commented 1 year ago

This is perhaps a problem in the dependecies of sklearn. You may need to update or downgrade the dependencies (numpy / threadpoolctl). Check out the thread for more info: https://github.com/scikit-learn/scikit-learn/issues/24238

You can print the versions using:

import sklearn
print(sklearn.show_versions())
yrushil commented 1 year ago

Thank you! This resolved my issue