Starlitnightly / omicverse

A python library for multi omics included bulk, single cell and spatial RNA-seq analysis.
https://starlitnightly.github.io/omicverse/
GNU General Public License v3.0
454 stars 55 forks source link

cnmf_obj.k_selection_plot() get ValueError: Number of labels is 2. Valid values are 2 to n_samples - 1 (inclusive) #174

Closed Roger-GOAT closed 3 weeks ago

Roger-GOAT commented 4 weeks ago

好全面的工具!非常棒! great work! a issue about cnmf_obj.k_selection_plot()

import numpy as np
## Initialize the cnmf object that will be used to run analyses
cnmf_obj = ov.single.cNMF(adata,components=np.arange(2,12), n_iter=20, seed=14, num_highvar_genes=3000,
                          output_dir='example_dg/cNMF', name='dg_cNMF')
cnmf_obj.factorize(worker_i=0, total_workers=20)
cnmf_obj.combine(skip_missing_files=True)
Combining factorizations for k=2.
Missing file: example_dg/cNMF/dg_cNMF/cnmf_tmp/dg_cNMF.spectra.k_2.iter_1.df.npz. Skipping.
Missing file: example_dg/cNMF/dg_cNMF/cnmf_tmp/dg_cNMF.spectra.k_2.iter_2.df.npz. Skipping.
......
Missing file: example_dg/cNMF/dg_cNMF/cnmf_tmp/dg_cNMF.spectra.k_11.iter_18.df.npz. Skipping.
Missing file: example_dg/cNMF/dg_cNMF/cnmf_tmp/dg_cNMF.spectra.k_11.iter_19.df.npz. Skipping.
cnmf_obj.k_selection_plot()

ValueError Traceback (most recent call last) Cell In[37], line 1 ----> 1 cnmf_obj.k_selection_plot()

File ~/miniconda3/envs/omicverse/lib/python3.10/site-packages/omicverse/externel/cnmf/cnmf.py:952, in cNMF.k_selection_plot(self, close_fig) 950 norm_counts = sc.read(self.paths['normalized_counts']) 951 for k in sorted(set(run_params.n_components)): --> 952 stats.append(self.consensus(k, skip_density_and_return_after_stats=True, 953 show_clustering=False, close_clustergram_fig=True, 954 norm_counts=norm_counts).stats) 956 stats = pd.DataFrame(stats) 957 stats.reset_index(drop = True, inplace = True)

File ~/miniconda3/envs/omicverse/lib/python3.10/site-packages/omicverse/externel/cnmf/cnmf.py:772, in cNMF.consensus(self, k, density_threshold, local_neighborhood_size, show_clustering, skip_density_and_return_after_stats, close_clustergram_fig, refit_usage, normalize_tpm_spectra, norm_counts) 769 rf_usages = pd.DataFrame(rf_usages, index=norm_counts.obs.index, columns=median_spectra.index)
771 if skip_density_and_return_after_stats: --> 772 silhouette = silhouette_score(l2_spectra.values, kmeans_cluster_labels, metric='euclidean') 774 # Compute prediction error as a frobenius norm 775 rf_pred_norm_counts = rf_usages.dot(median_spectra)

File ~/.local/lib/python3.10/site-packages/sklearn/utils/_param_validation.py:213, in validate_params..decorator..wrapper(*args, *kwargs) 207 try: 208 with config_context( 209 skip_parameter_validation=( 210 prefer_skip_nested_validation or global_skip_validation 211 ) 212 ): --> 213 return func(args, **kwargs) 214 except InvalidParameterError as e: 215 # When the function is just a wrapper around an estimator, we allow 216 # the function to delegate validation to the estimator, but we replace 217 # the name of the estimator by the name of the function in the error 218 # message to avoid confusion. 219 msg = re.sub( 220 r"parameter of \w+ must be", 221 f"parameter of {func.qualname} must be", 222 str(e), 223 )

File ~/.local/lib/python3.10/site-packages/sklearn/metrics/cluster/_unsupervised.py:140, in silhouette_score(X, labels, metric, sample_size, random_state, kwds) 138 else: 139 X, labels = X[indices], labels[indices] --> 140 return np.mean(silhouette_samples(X, labels, metric=metric, kwds))

File ~/.local/lib/python3.10/site-packages/sklearn/utils/_param_validation.py:186, in validate_params..decorator..wrapper(*args, kwargs) 184 global_skip_validation = get_config()["skip_parameter_validation"] 185 if global_skip_validation: --> 186 return func(*args, *kwargs) 188 func_sig = signature(func) 190 # Map args/kwargs to the function signature

File ~/.local/lib/python3.10/site-packages/sklearn/metrics/cluster/_unsupervised.py:297, in silhouette_samples(X, labels, metric, **kwds) 295 n_samples = len(labels) 296 label_freqs = np.bincount(labels) --> 297 check_number_oflabels(len(le.classes), n_samples) 299 kwds["metric"] = metric 300 reduce_func = functools.partial( 301 _silhouette_reduce, labels=labels, label_freqs=label_freqs 302 )

File ~/.local/lib/python3.10/site-packages/sklearn/metrics/cluster/_unsupervised.py:37, in check_number_of_labels(n_labels, n_samples) 26 """Check that number of labels are valid. 27 28 Parameters (...) 34 Number of samples. 35 """ 36 if not 1 < n_labels < n_samples: ---> 37 raise ValueError( 38 "Number of labels is %d. Valid values are 2 to n_samples - 1 (inclusive)" 39 % n_labels 40 )

ValueError: Number of labels is 2. Valid values are 2 to n_samples - 1 (inclusive)

if go next step

selected_K = 7
density_threshold = 2.00
cnmf_obj.consensus(k=selected_K, 
                   density_threshold=density_threshold, 
                   show_clustering=True, 
                   close_clustergram_fig=False)

RuntimeError Traceback (most recent call last) Cell In[39], line 1 ----> 1 cnmf_obj.consensus(k=selected_K, 2 density_threshold=density_threshold, 3 show_clustering=True, 4 close_clustergram_fig=False)

File ~/miniconda3/envs/omicverse/lib/python3.10/site-packages/omicverse/externel/cnmf/cnmf.py:755, in cNMF.consensus(self, k, density_threshold, local_neighborhood_size, show_clustering, skip_density_and_return_after_stats, close_clustergram_fig, refit_usage, normalize_tpm_spectra, norm_counts) 753 l2_spectra = l2_spectra.loc[density_filter, :] 754 if l2_spectra.shape[0] == 0: --> 755 raise RuntimeError("Zero components remain after density filtering. Consider increasing density threshold") 757 kmeans_model = KMeans(n_clusters=k, n_init=10, random_state=1) 758 kmeans_model.fit(l2_spectra)

RuntimeError: Zero components remain after density filtering. Consider increasing density threshold

Starlitnightly commented 3 weeks ago

Maybe you need to up the threshold of density to avoid it