f-hamidlab / nuclearpy

MIT License
0 stars 0 forks source link

NaN values. #9

Closed Marcel-Salier closed 2 years ago

Marcel-Salier commented 2 years ago

This is something that we have before with Emilio, there are some NaN values that we have to discard before going to the cluster and UMAP:

computing neighbors

ValueError Traceback (most recent call last) Input In [65], in <cell line: 1>() ----> 1 obj.findNeighbours(method = "umap") 2 obj.findClusters(method = "leiden", res=0.6) 3 obj.runDimReduc(method = "umap")

File /opt/anaconda3/envs/ngtools/lib/python3.10/site-packages/ngtools/analyzer.py:967, in Analyzor.findNeighbours(self, method, n, use_rep) 949 def findNeighbours(self, method = "umap", n = 30, use_rep = "X"): 950 """ 951 Finds neighbours of cells 952 (...) 965 966 """ --> 967 sc.pp.neighbors(self.adata, n_neighbors=n, use_rep=use_rep, method=method)

File /opt/anaconda3/envs/ngtools/lib/python3.10/site-packages/scanpy/neighbors/init.py:139, in neighbors(adata, n_neighbors, n_pcs, use_rep, knn, random_state, method, metric, metric_kwds, key_added, copy) 137 adata._init_as_actual(adata.copy()) 138 neighbors = Neighbors(adata) --> 139 neighbors.compute_neighbors( 140 n_neighbors=n_neighbors, 141 knn=knn, 142 n_pcs=n_pcs, 143 use_rep=use_rep, 144 method=method, 145 metric=metric, 146 metric_kwds=metric_kwds, 147 random_state=random_state, 148 ) 150 if key_added is None: 151 key_added = 'neighbors'

File /opt/anaconda3/envs/ngtools/lib/python3.10/site-packages/scanpy/neighbors/init.py:775, in Neighbors.compute_neighbors(self, n_neighbors, knn, n_pcs, use_rep, method, random_state, write_knn_indices, metric, metric_kwds) 773 use_dense_distances = (metric == 'euclidean' and X.shape[0] < 8192) or not knn 774 if use_dense_distances: --> 775 _distances = pairwise_distances(X, metric=metric, **metric_kwds) 776 knn_indices, knn_distances = _get_indices_distances_from_dense_matrix( 777 _distances, n_neighbors 778 ) 779 if knn:

File /opt/anaconda3/envs/ngtools/lib/python3.10/site-packages/sklearn/metrics/pairwise.py:1989, in pairwise_distances(X, Y, metric, n_jobs, force_all_finite, kwds) 1986 return distance.squareform(distance.pdist(X, metric=metric, kwds)) 1987 func = partial(distance.cdist, metric=metric, kwds) -> 1989 return _parallel_pairwise(X, Y, func, n_jobs, kwds)

File /opt/anaconda3/envs/ngtools/lib/python3.10/site-packages/sklearn/metrics/pairwise.py:1530, in _parallel_pairwise(X, Y, func, n_jobs, kwds) 1527 X, Y, dtype = _return_float_dtype(X, Y) 1529 if effective_n_jobs(n_jobs) == 1: -> 1530 return func(X, Y, kwds) 1532 # enforce a threading backend to prevent data communication overhead 1533 fd = delayed(_dist_wrapper)

File /opt/anaconda3/envs/ngtools/lib/python3.10/site-packages/sklearn/metrics/pairwise.py:302, in euclidean_distances(X, Y, Y_norm_squared, squared, X_norm_squared) 226 def euclidean_distances( 227 X, Y=None, *, Y_norm_squared=None, squared=False, X_norm_squared=None 228 ): 229 """ 230 Compute the distance matrix between each pair from a vector array X and Y. 231 (...) 300 [1.41421356]]) 301 """ --> 302 X, Y = check_pairwise_arrays(X, Y) 304 if X_norm_squared is not None: 305 X_norm_squared = check_array(X_norm_squared, ensure_2d=False)

File /opt/anaconda3/envs/ngtools/lib/python3.10/site-packages/sklearn/metrics/pairwise.py:147, in check_pairwise_arrays(X, Y, precomputed, dtype, accept_sparse, force_all_finite, copy) 144 dtype = dtype_float 146 if Y is X or Y is None: --> 147 X = Y = check_array( 148 X, 149 accept_sparse=accept_sparse, 150 dtype=dtype, 151 copy=copy, 152 force_all_finite=force_all_finite, 153 estimator=estimator, 154 ) 155 else: 156 X = check_array( 157 X, 158 accept_sparse=accept_sparse, (...) 162 estimator=estimator, 163 )

File /opt/anaconda3/envs/ngtools/lib/python3.10/site-packages/sklearn/utils/validation.py:899, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name) 893 raise ValueError( 894 "Found array with dim %d. %s expected <= 2." 895 % (array.ndim, estimator_name) 896 ) 898 if force_all_finite: --> 899 _assert_all_finite( 900 array, 901 input_name=input_name, 902 estimator_name=estimator_name, 903 allow_nan=force_all_finite == "allow-nan", 904 ) 906 if ensure_min_samples > 0: 907 n_samples = _num_samples(array)

File /opt/anaconda3/envs/ngtools/lib/python3.10/site-packages/sklearn/utils/validation.py:146, in _assert_all_finite(X, allow_nan, msg_dtype, estimator_name, input_name) 124 if ( 125 not allow_nan 126 and estimator_name (...) 130 # Improve the error message on how to handle missing values in 131 # scikit-learn. 132 msg_err += ( 133 f"\n{estimator_name} does not accept missing values" 134 " encoded as NaN natively. For supervised learning, you might want" (...) 144 "#estimators-that-handle-nan-values" 145 ) --> 146 raise ValueError(msg_err) 148 # for object dtype data, we only check for NaNs (GH-13254) 149 elif X.dtype == np.dtype("object") and not allow_nan:

ValueError: Input contains NaN.

Marcel-Salier commented 2 years ago

Please, leave it and we can solve it during the week, I'm just posting issues before I forget. Thanks!

fursham-h commented 2 years ago

I see, which variable has missing values?

Marcel-Salier commented 2 years ago

I cannot say. But sometimes happens with the dots. I think if we replace NaN for 0 before to run this.

fursham-h commented 2 years ago

The latest segmentador code should have no missing values in any nuclear features. Will reopen this if this still recur