theislab / diffxpy

Differential expression analysis for single-cell RNA-seq data.
https://diffxpy.rtfd.io
BSD 3-Clause "New" or "Revised" License
191 stars 23 forks source link

Cannot handle sparse matrices #145

Closed LuckyMD closed 4 years ago

LuckyMD commented 4 years ago

Hey! i'm running a simple model with de.tests.wald() and get the following issue with sparse input matrices

This is on Diffxpy 0.7.3 and batchglm 0.7.3, with sparse 0.9.1 and dask 1.1.0.

The call is:

    sex_test_tmp = de.test.wald(
        data=adata_tmp.X,
        formula_loc="~ 1 + sex + dataset + total_counts",
        as_numeric=['total_counts'],
        factor_loc_totest=["sex"],
        sample_description=adata_tmp.obs,
        gene_names=adata_tmp.var_names,
        noise_model='nb',
        dtype="float64"
    )

And the error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-16-e09783913589> in <module>
     41         gene_names=adata_tmp.var_names,
     42         noise_model='nb',
---> 43         dtype="float64"
     44     )
     45 

~/.local/lib/python3.6/site-packages/diffxpy/testing/tests.py in wald(data, factor_loc_totest, coef_to_test, formula_loc, formula_scale, as_numeric, init_a, init_b, gene_names, sample_description, dmat_loc, dmat_scale, constraints_loc, constraints_scale, noise_model, size_factors, batch_size, backend, train_args, training_strategy, quick_scale, dtype, **kwargs)
    734         quick_scale=quick_scale,
    735         dtype=dtype,
--> 736         **kwargs,
    737     )
    738 

~/.local/lib/python3.6/site-packages/diffxpy/testing/tests.py in _fit(noise_model, data, design_loc, design_scale, design_loc_names, design_scale_names, constraints_loc, constraints_scale, init_model, init_a, init_b, gene_names, size_factors, batch_size, backend, training_strategy, quick_scale, train_args, close_session, dtype)
    225         init_b=init_b,
    226         dtype=dtype,
--> 227         **constructor_args
    228     )
    229     estim.initialize()

~/.local/lib/python3.6/site-packages/batchglm/train/numpy/glm_nb/estimator.py in __init__(self, input_data, init_a, init_b, batch_size, quick_scale, dtype, **kwargs)
     65             init_a=init_a,
     66             init_b=init_b,
---> 67             init_model=None
     68         )
     69         init_a = init_a.astype(dtype)

~/.local/lib/python3.6/site-packages/batchglm/train/numpy/glm_nb/estimator.py in init_par(self, input_data, init_a, init_b, init_model)
    153 
    154                     init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
--> 155                     init_a[0, :] = np.log(overall_means)
    156                     self._train_loc = True
    157 

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/array/core.py in __array__(self, dtype, **kwargs)
   1026 
   1027     def __array__(self, dtype=None, **kwargs):
-> 1028         x = self.compute()
   1029         if dtype and x.dtype != dtype:
   1030             x = x.astype(dtype)

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/base.py in compute(self, **kwargs)
    154         dask.base.compute
    155         """
--> 156         (result,) = compute(self, traverse=False, **kwargs)
    157         return result
    158 

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
    396     keys = [x.__dask_keys__() for x in collections]
    397     postcomputes = [x.__dask_postcompute__() for x in collections]
--> 398     results = schedule(dsk, keys, **kwargs)
    399     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    400 

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
     74     results = get_async(pool.apply_async, len(pool._pool), dsk, result,
     75                         cache=cache, get_id=_thread_get_id,
---> 76                         pack_exception=pack_exception, **kwargs)
     77 
     78     # Cleanup pools associated to dead threads

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
    457                         _execute_task(task, data)  # Re-execute locally
    458                     else:
--> 459                         raise_exception(exc, tb)
    460                 res, worker_id = loads(res_info)
    461                 state['cache'][key] = res

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/compatibility.py in reraise(exc, tb)
    110         if exc.__traceback__ is not tb:
    111             raise exc.with_traceback(tb)
--> 112         raise exc
    113 
    114     import pickle as cPickle

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    228     try:
    229         task, data = loads(task_info)
--> 230         result = _execute_task(task, data)
    231         id = get_id()
    232         result = dumps((result, id))

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
    116     elif istask(arg):
    117         func, args = arg[0], arg[1:]
--> 118         args2 = [_execute_task(a, cache) for a in args]
    119         return func(*args2)
    120     elif not ishashable(arg):

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/core.py in <listcomp>(.0)
    116     elif istask(arg):
    117         func, args = arg[0], arg[1:]
--> 118         args2 = [_execute_task(a, cache) for a in args]
    119         return func(*args2)
    120     elif not ishashable(arg):

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
    116     elif istask(arg):
    117         func, args = arg[0], arg[1:]
--> 118         args2 = [_execute_task(a, cache) for a in args]
    119         return func(*args2)
    120     elif not ishashable(arg):

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/core.py in <listcomp>(.0)
    116     elif istask(arg):
    117         func, args = arg[0], arg[1:]
--> 118         args2 = [_execute_task(a, cache) for a in args]
    119         return func(*args2)
    120     elif not ishashable(arg):

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
    117         func, args = arg[0], arg[1:]
    118         args2 = [_execute_task(a, cache) for a in args]
--> 119         return func(*args2)
    120     elif not ishashable(arg):
    121         return arg

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/optimization.py in __call__(self, *args)
    940                              % (len(self.inkeys), len(args)))
    941         return core.get(self.dsk, self.outkey,
--> 942                         dict(zip(self.inkeys, args)))
    943 
    944     def __reduce__(self):

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/core.py in get(dsk, out, cache)
    147     for key in toposort(dsk):
    148         task = dsk[key]
--> 149         result = _execute_task(task, cache)
    150         cache[key] = result
    151     result = _execute_task(out, cache)

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
    117         func, args = arg[0], arg[1:]
    118         args2 = [_execute_task(a, cache) for a in args]
--> 119         return func(*args2)
    120     elif not ishashable(arg):
    121         return arg

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/compatibility.py in apply(func, args, kwargs)
     91     def apply(func, args, kwargs=None):
     92         if kwargs:
---> 93             return func(*args, **kwargs)
     94         else:
     95             return func(*args)

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/array/reductions.py in mean_chunk(x, sum, numel, dtype, **kwargs)
    330 
    331 def mean_chunk(x, sum=chunk.sum, numel=numel, dtype='f8', **kwargs):
--> 332     n = numel(x, dtype=dtype, **kwargs)
    333     total = sum(x, dtype=dtype, **kwargs)
    334     empty = empty_lookup.dispatch(type(n))

~/anaconda3/envs/cov19/lib/python3.6/site-packages/dask/array/reductions.py in numel(x, **kwargs)
    321 def numel(x, **kwargs):
    322     """ A reduction to count the number of elements """
--> 323     return chunk.sum(np.ones_like(x), **kwargs)
    324 
    325 

~/anaconda3/envs/cov19/lib/python3.6/site-packages/numpy/core/numeric.py in ones_like(a, dtype, order, subok)
    286 
    287     """
--> 288     res = empty_like(a, dtype=dtype, order=order, subok=subok)
    289     multiarray.copyto(res, 1, casting='unsafe')
    290     return res

~/.local/lib/python3.6/site-packages/sparse/_sparse_array.py in __array__(self, **kwargs)
    221         if not AUTO_DENSIFY:
    222             raise RuntimeError(
--> 223                 "Cannot convert a sparse array to dense automatically. "
    224                 "To manually densify, use the todense method."
    225             )

RuntimeError: Cannot convert a sparse array to dense automatically. To manually densify, use the todense method.
davidsebfischer commented 4 years ago

Resolved in private communication, the issue was a version conflict between numpy and dask.

tinakeshav commented 2 years ago

I have the same problem. Could you kindly let me know what the version conflict was?

davidsebfischer commented 2 years ago

Did you check the pip installation messages when installing dask? That would be a good starting point.

Keyi-Li commented 1 year ago

Would you please kindly share how to solve the version conflict?