theislab / diffxpy

Differential expression analysis for single-cell RNA-seq data.
https://diffxpy.rtfd.io
BSD 3-Clause "New" or "Revised" License
191 stars 23 forks source link

RuntimeError: Cannot convert a sparse array to dense automatically. To manually densify, use the todense method. #217

Open SimaDubnov opened 2 years ago

SimaDubnov commented 2 years ago

Hi! I get this error when running a very simple wald test. I saw that you solved the same problem two years ago but didn't publish how exactly. Could you please guide me here too?

Here is the whole error:

RuntimeError Traceback (most recent call last) Input In [26], in <cell line: 1>() ----> 1 test=de.test.wald(data=data_Ast, formula_loc='~1 + AD', factor_loc_totest='AD')

File ~/.conda/envs/scRNAseq/lib/python3.10/site-packages/diffxpy/testing/tests.py:717, in wald(data, factor_loc_totest, coef_to_test, formula_loc, formula_scale, as_numeric, init_a, init_b, gene_names, sample_description, dmat_loc, dmat_scale, constraints_loc, constraints_scale, noise_model, size_factors, batch_size, backend, train_args, training_strategy, quick_scale, dtype, kwargs) 714 col_indices = np.array([np.where(constraints_loc_temp[x, :] == 1)[0][0] for x in col_indices]) 716 # Fit model. --> 717 model = _fit( 718 noise_model=noise_model, 719 data=data, 720 design_loc=design_loc, 721 design_scale=design_scale, 722 design_loc_names=design_loc_names, 723 design_scale_names=design_scale_names, 724 constraints_loc=constraints_loc, 725 constraints_scale=constraints_scale, 726 init_a=init_a, 727 init_b=init_b, 728 gene_names=gene_names, 729 size_factors=size_factors, 730 batch_size=batch_size, 731 backend=backend, 732 train_args=train_args, 733 training_strategy=training_strategy, 734 quick_scale=quick_scale, 735 dtype=dtype, 736 kwargs, 737 ) 739 # Prepare differential expression test. 740 de_test = DifferentialExpressionTestWald( 741 model_estim=model, 742 col_indices=col_indices, 743 noise_model=noise_model, 744 sample_description=sample_description 745 )

File ~/.conda/envs/scRNAseq/lib/python3.10/site-packages/diffxpy/testing/tests.py:222, in _fit(noise_model, data, design_loc, design_scale, design_loc_names, design_scale_names, constraints_loc, constraints_scale, init_model, init_a, init_b, gene_names, size_factors, batch_size, backend, training_strategy, quick_scale, train_args, close_session, dtype) 219 else: 220 raise ValueError('backend="%s" not recognized.' % backend) --> 222 estim = Estimator( 223 input_data=input_data, 224 init_a=init_a, 225 init_b=init_b, 226 dtype=dtype, 227 **constructor_args 228 ) 229 estim.initialize() 231 # Assemble backend specific key word arguments to training function:

File ~/.conda/envs/scRNAseq/lib/python3.10/site-packages/batchglm/train/numpy/glm_nb/estimator.py:59, in Estimator.init(self, input_data, init_a, init_b, batch_size, quick_scale, dtype, kwargs) 19 def init( 20 self, 21 input_data: InputDataGLM, (...) 27 kwargs 28 ): 29 """ 30 Performs initialisation and creates a new estimator. 31 (...) 57 :param dtype: Numerical precision. 58 """ ---> 59 init_a, init_b, train_loc, train_scale = init_par( 60 input_data=input_data, 61 init_a=init_a, 62 init_b=init_b, 63 init_model=None 64 ) 65 self._train_loc = train_loc 66 self._train_scale = train_scale

File ~/.conda/envs/scRNAseq/lib/python3.10/site-packages/batchglm/models/glm_nb/utils.py:120, in init_par(input_data, init_a, init_b, init_model) 117 init_a = "standard" if not one_hot else "closed_form" 119 if init_a.lower() == "closed_form": --> 120 groupwise_means, init_a, rmsd_a = closedform_nb_glm_logmu( 121 x=input_data.x, 122 design_loc=input_data.design_loc, 123 constraints_loc=input_data.constraints_loc, 124 size_factors=input_data.size_factors, 125 link_fn=lambda mu: np.log(mu+np.nextafter(0, 1, dtype=mu.dtype)) 126 ) 128 # train mu, if the closed-form solution is inaccurate 129 train_loc = not (np.all(np.abs(rmsd_a) < 1e-20) or rmsd_a.size == 0)

File ~/.conda/envs/scRNAseq/lib/python3.10/site-packages/batchglm/models/glm_nb/utils.py:30, in closedform_nb_glm_logmu(x, design_loc, constraints_loc, size_factors, link_fn, inv_link_fn) 10 def closedform_nb_glm_logmu( 11 x: Union[np.ndarray, scipy.sparse.csr_matrix], 12 design_loc: np.ndarray, (...) 16 inv_link_fn=np.exp 17 ): 18 r""" 19 Calculates a closed-form solution for the mu parameters of negative-binomial GLMs. 20 (...) 28 :return: tuple: (groupwise_means, mu, rmsd) 29 """ ---> 30 return closedform_glm_mean( 31 x=x, 32 dmat=design_loc, 33 constraints=constraints_loc, 34 size_factors=size_factors, 35 link_fn=link_fn, 36 inv_link_fn=inv_link_fn 37 )

File ~/.conda/envs/scRNAseq/lib/python3.10/site-packages/batchglm/models/base_glm/utils.py:118, in closedform_glm_mean(x, dmat, constraints, size_factors, link_fn, inv_link_fn) 115 else: 116 return link_fn(groupwise_means) --> 118 linker_groupwise_means, mu, rmsd, rank, s = groupwise_solve_lm( 119 dmat=dmat, 120 apply_fun=apply_fun, 121 constraints=constraints 122 ) 123 if inv_link_fn is not None: 124 return inv_link_fn(linker_groupwise_means), mu, rmsd

File ~/.conda/envs/scRNAseq/lib/python3.10/site-packages/batchglm/utils/linalg.py:93, in groupwise_solve_lm(dmat, apply_fun, constraints) 89 logger.error("model is not full rank!") 91 # Get group-wise means in linker space based on group assignments 92 # based on unique rows of design matrix: ---> 93 params = apply_fun(inverse_idx) 95 # Use least-squares solver to compute model parameterization 96 # accounting for dependent parameters, ie. degrees of freedom 97 # of the model which appear as groups in the design matrix (...) 100 # <X, <theta, H> = means -> <X, theta>, H> = means -> lstsqs for theta 101 # (This is faster and more accurate than using matrix inversion.) 102 logger.debug(" ** Solve lstsq problem")

File ~/.conda/envs/scRNAseq/lib/python3.10/site-packages/batchglm/models/base_glm/utils.py:109, in closedform_glm_mean..apply_fun(grouping) 108 def apply_fun(grouping): --> 109 groupwise_means = np.asarray(np.vstack([ 110 np.mean(x[np.where(grouping == g)[0], :], axis=0) 111 for g in np.unique(grouping) 112 ])) 113 if link_fn is None: 114 return groupwise_means

File ~/.conda/envs/scRNAseq/lib/python3.10/site-packages/dask/array/core.py:1642, in Array.array(self, dtype, **kwargs) 1640 x = x.astype(dtype) 1641 if not isinstance(x, np.ndarray): -> 1642 x = np.array(x) 1643 return x

File ~/.conda/envs/scRNAseq/lib/python3.10/site-packages/sparse/_sparse_array.py:229, in SparseArray.array(self, *args, *kwargs) 226 from ._settings import AUTO_DENSIFY 228 if not AUTO_DENSIFY: --> 229 raise RuntimeError( 230 "Cannot convert a sparse array to dense automatically. " 231 "To manually densify, use the todense method." 232 ) 234 return np.asarray(self.todense(), args, **kwargs)

RuntimeError: Cannot convert a sparse array to dense automatically. To manually densify, use the todense method.

merelkuijs commented 1 year ago

Hi, I looked at this issue and thanks to Zethson's comment there I was able to resolve this issue by explicitly installing the following package versions dask==2021.4.0 sparse==0.9.1