simonwm / tacco

TACCO: Transfer of Annotations to Cells and their COmbinations
BSD 3-Clause "New" or "Revised" License
44 stars 1 forks source link

Problems when annotating the spatial data #10

Closed AkkSeannn closed 1 year ago

AkkSeannn commented 1 year ago

A problem was met when using tc.tl.annotate, the code is: tc.tl.annotate(adata_st1, adata_sc, annotation_key='fine',result_key='fine',multi_center=10,);

Both the reference scRNA data and spatial transcriptome data were read by Scanpy, and the error is:

ValueError Traceback (most recent call last) Input In [19], in <cell line: 1>() ----> 1 tc.tl.annotate(adata_st1, adata_sc, annotation_key='fine',result_key='fine',multi_center=10,)

File ~/.local/lib/python3.9/site-packages/tacco/tools/_annotate.py:802, in annotate(adata, reference, annotation_key, result_key, counts_location, method, bisections, bisection_divisor, platform_iterations, normalize_to, annotation_prior, multi_center, multi_center_amplitudes, reconstruction_key, max_annotation, min_counts_per_gene, min_counts_per_cell, min_cells_per_gene, min_genes_per_cell, remove_constant_genes, remove_zero_cells, min_log2foldchange, min_expression, remove_mito, n_hvg, skip_checks, assume_valid_counts, return_reference, gene_keys, verbose, **kw_args) 800 print('\n'.join(method_construction_info[::-1])) 801 start = time.time() --> 802 cell_type = _method(tdata, reference, annotation_key, annotation_prior, verbose) 803 if verbose > 0: 804 print(f'Finished annotation in {np.round(time.time() - start, 2)} seconds.')

File ~/.local/lib/python3.9/site-packages/tacco/tools/_annotate.py:328, in platform_normalize_annotation_method.._method(adata, reference, annotation_key, annotation_prior, verbose) 325 # renormalize profiles as they have been denormalized by platform noramlization 326 reference.varm[annotation_key] /= reference.varm[annotation_key].sum(axis=0).to_numpy() --> 328 cell_type = annotation_method(adata, reference, annotation_key, annotation_prior, verbose) 329 return cell_type

File ~/.local/lib/python3.9/site-packages/tacco/tools/_annotate.py:408, in multi_center_annotation_method.._method(adata, reference, annotation_key, annotation_prior, verbose) 406 utils.log1p(preped) 407 sc.pp.scale(preped) --> 408 sc.pp.pca(preped, random_state=42, n_comps=min(10,min(preped.shape[0],preped.shape[1])-1)) 410 new_cats = [] 411 for cat, df in reference.obs.groupby(annotation_key):

File ~/.local/lib/python3.9/site-packages/scanpy/preprocessing/_pca.py:188, in pca(data, n_comps, zero_center, svd_solver, random_state, return_info, use_highly_variable, dtype, copy, chunked, chunksize) 184 X = X.toarray() 185 pca = PCA( 186 n_components=n_comps, svd_solver=svd_solver, random_state=random_state 187 ) --> 188 Xpca = pca.fit_transform(X) 189 elif issparse(X) and zero_center: 190 from sklearn.decomposition import PCA

File ~/anaconda3/lib/python3.9/site-packages/sklearn/decomposition/_pca.py:407, in PCA.fit_transform(self, X, y) 385 def fit_transform(self, X, y=None): 386 """Fit the model with X and apply the dimensionality reduction on X. 387 388 Parameters (...) 405 C-ordered array, use 'np.ascontiguousarray'. 406 """ --> 407 U, S, Vt = self._fit(X) 408 U = U[:, : self.ncomponents] 410 if self.whiten: 411 # X_new = X V / S sqrt(n_samples) = U * sqrt(n_samples)

File ~/anaconda3/lib/python3.9/site-packages/sklearn/decomposition/_pca.py:430, in PCA._fit(self, X) 424 if issparse(X): 425 raise TypeError( 426 "PCA does not support sparse input. See " 427 "TruncatedSVD for a possible alternative." 428 ) --> 430 X = self._validate_data( 431 X, dtype=[np.float64, np.float32], ensure_2d=True, copy=self.copy 432 ) 434 # Handle n_components==None 435 if self.n_components is None:

File ~/anaconda3/lib/python3.9/site-packages/sklearn/base.py:566, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, check_params) 564 raise ValueError("Validation should be done on X, y or both.") 565 elif not no_val_X and no_val_y: --> 566 X = check_array(X, check_params) 567 out = X 568 elif no_val_X and not no_val_y:

File ~/anaconda3/lib/python3.9/site-packages/sklearn/utils/validation.py:800, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator) 794 raise ValueError( 795 "Found array with dim %d. %s expected <= 2." 796 % (array.ndim, estimator_name) 797 ) 799 if force_all_finite: --> 800 _assert_all_finite(array, allow_nan=force_all_finite == "allow-nan") 802 if ensure_min_samples > 0: 803 n_samples = _num_samples(array)

File ~/anaconda3/lib/python3.9/site-packages/sklearn/utils/validation.py:114, in _assert_all_finite(X, allow_nan, msg_dtype) 107 if ( 108 allow_nan 109 and np.isinf(X).any() 110 or not allow_nan 111 and not np.isfinite(X).all() 112 ): 113 type_err = "infinity" if allow_nan else "NaN, infinity" --> 114 raise ValueError( 115 msg_err.format( 116 type_err, msg_dtype if msg_dtype is not None else X.dtype 117 ) 118 ) 119 # for object dtype data, we only check for NaNs (GH-13254) 120 elif X.dtype == np.dtype("object") and not allow_nan:

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

How can I solve it? Thanks.

AkkSeannn commented 1 year ago

The problems were solved after assigning the raw counts adata_sc.raw.X of the reference data.