theislab / diffxpy

Differential expression analysis for single-cell RNA-seq data.
https://diffxpy.rtfd.io
BSD 3-Clause "New" or "Revised" License
179 stars 23 forks source link

ValueError: array must not contain infs or NaNs #201

Open Zethson opened 3 years ago

Zethson commented 3 years ago

Hey,

import diffxpy.api as de

de_w_test = de.test.wald(
    data=leukocytes_only_raw.X,
    formula_loc="~ 1 + condition",
    factor_loc_totest="condition",
    gene_names=leukocytes_only_raw.var_names,
    sample_description=leukocytes_only_raw.obs,
)

results in

╭──────────────────────────── Traceback (most recent call last) ────────────────────────────╮
│ <ipython-input-75-467c1e316174>:3 in <module>                                             │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/diffxpy/test │
│ ing/tests.py:717 in wald                                                                  │
│                                                                                           │
│    714 │   col_indices = np.array([np.where(constraints_loc_temp[x, :] == 1)[0][0] for x  │
│    715 │                                                                                  │
│    716 │   # Fit model.                                                                   │
│ ❱  717 │   model = _fit(                                                                  │
│    718 │   │   noise_model=noise_model,                                                   │
│    719 │   │   data=data,                                                                 │
│    720 │   │   design_loc=design_loc,                                                     │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/diffxpy/test │
│ ing/tests.py:222 in _fit                                                                  │
│                                                                                           │
│    219 │   else:                                                                          │
│    220 │   │   raise ValueError('backend="%s" not recognized.' % backend)                 │
│    221 │                                                                                  │
│ ❱  222 │   estim = Estimator(                                                             │
│    223 │   │   input_data=input_data,                                                     │
│    224 │   │   init_a=init_a,                                                             │
│    225 │   │   init_b=init_b,                                                             │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/batchglm/tra │
│ in/numpy/glm_nb/estimator.py:59 in __init__                                               │
│                                                                                           │
│    56 │   │   │   Useful in scenarios where fitting the exact `scale` is not absolutely n │
│    57 │   │   :param dtype: Numerical precision.                                          │
│    58 │   │   """                                                                         │
│ ❱  59 │   │   init_a, init_b, train_loc, train_scale = init_par(                          │
│    60 │   │   │   input_data=input_data,                                                  │
│    61 │   │   │   init_a=init_a,                                                          │
│    62 │   │   │   init_b=init_b,                                                          │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/batchglm/mod │
│ els/glm_nb/utils.py:129 in init_par                                                       │
│                                                                                           │
│   126 │   │   │   │   )                                                                   │
│   127 │   │   │   │                                                                       │
│   128 │   │   │   │   # train mu, if the closed-form solution is inaccurate               │
│ ❱ 129 │   │   │   │   train_loc = not (np.all(np.abs(rmsd_a) < 1e-20) or rmsd_a.size == 0 │
│   130 │   │   │   │                                                                       │
│   131 │   │   │   │   if input_data.size_factors is not None:                             │
│   132 │   │   │   │   │   if np.any(input_data.size_factors != 1):                        │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/dask/array/c │
│ ore.py:1622 in __bool__                                                                   │
│                                                                                           │
│   1619 │   │   │   │   "Use a.any() or a.all().".format(self.__class__.__name__)          │
│   1620 │   │   │   )                                                                      │
│   1621 │   │   else:                                                                      │
│ ❱ 1622 │   │   │   return bool(self.compute())                                            │
│   1623 │                                                                                  │
│   1624 │   __nonzero__ = __bool__  # python 2                                             │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/dask/base.py │
│ :283 in compute                                                                           │
│                                                                                           │
│    280 │   │   --------                                                                   │
│    281 │   │   dask.base.compute                                                          │
│    282 │   │   """                                                                        │
│ ❱  283 │   │   (result,) = compute(self, traverse=False, **kwargs)                        │
│    284 │   │   return result                                                              │
│    285 │                                                                                  │
│    286 │   def __await__(self):                                                           │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/dask/base.py │
│ :565 in compute                                                                           │
│                                                                                           │
│    562 │   │   keys.append(x.__dask_keys__())                                             │
│    563 │   │   postcomputes.append(x.__dask_postcompute__())                              │
│    564 │                                                                                  │
│ ❱  565 │   results = schedule(dsk, keys, **kwargs)                                        │
│    566 │   return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])          │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/dask/threade │
│ d.py:76 in get                                                                            │
│                                                                                           │
│   73 │   │   │   │   atexit.register(pool.close)                                          │
│   74 │   │   │   │   pools[thread][num_workers] = pool                                    │
│   75 │                                                                                    │
│ ❱ 76 │   results = get_async(                                                             │
│   77 │   │   pool.apply_async,                                                            │
│   78 │   │   len(pool._pool),                                                             │
│   79 │   │   dsk,                                                                         │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/dask/local.p │
│ y:487 in get_async                                                                        │
│                                                                                           │
│   484 │   │   │   │   │   │   task = dsk[key]                                             │
│   485 │   │   │   │   │   │   _execute_task(task, data)  # Re-execute locally             │
│   486 │   │   │   │   │   else:                                                           │
│ ❱ 487 │   │   │   │   │   │   raise_exception(exc, tb)                                    │
│   488 │   │   │   │   res, worker_id = loads(res_info)                                    │
│   489 │   │   │   │   state["cache"][key] = res                                           │
│   490 │   │   │   │   finish_task(dsk, key, state, results, keyorder.get)                 │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/dask/local.p │
│ y:317 in reraise                                                                          │
│                                                                                           │
│   314 def reraise(exc, tb=None):                                                          │
│   315 │   if exc.__traceback__ is not tb:                                                 │
│   316 │   │   raise exc.with_traceback(tb)                                                │
│ ❱ 317 │   raise exc                                                                       │
│   318                                                                                     │
│   319                                                                                     │
│   320 def identity(x):                                                                    │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/dask/local.p │
│ y:222 in execute_task                                                                     │
│                                                                                           │
│   219 │   """                                                                             │
│   220 │   try:                                                                            │
│   221 │   │   task, data = loads(task_info)                                               │
│ ❱ 222 │   │   result = _execute_task(task, data)                                          │
│   223 │   │   id = get_id()                                                               │
│   224 │   │   result = dumps((result, id))                                                │
│   225 │   │   failed = False                                                              │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/dask/core.py │
│ :121 in _execute_task                                                                     │
│                                                                                           │
│   118 │   │   # Note: Don't assign the subtask results to a variable. numpy detects       │
│   119 │   │   # temporaries by their reference count and can execute certain              │
│   120 │   │   # operations in-place.                                                      │
│ ❱ 121 │   │   return func(*(_execute_task(a, cache) for a in args))                       │
│   122 │   elif not ishashable(arg):                                                       │
│   123 │   │   return arg                                                                  │
│   124 │   elif arg in cache:                                                              │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/scipy/linalg │
│ /basic.py:334 in solve_triangular                                                         │
│                                                                                           │
│    331 │   │   │    'versions of SciPy.', DeprecationWarning, stacklevel=2)               │
│    332 │                                                                                  │
│    333 │   a1 = _asarray_validated(a, check_finite=check_finite)                          │
│ ❱  334 │   b1 = _asarray_validated(b, check_finite=check_finite)                          │
│    335 │   if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]:                           │
│    336 │   │   raise ValueError('expected square matrix')                                 │
│    337 │   if a1.shape[0] != b1.shape[0]:                                                 │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/scipy/_lib/_ │
│ util.py:262 in _asarray_validated                                                         │
│                                                                                           │
│   259 │   │   if np.ma.isMaskedArray(a):                                                  │
│   260 │   │   │   raise ValueError('masked arrays are not supported')                     │
│   261 │   toarray = np.asarray_chkfinite if check_finite else np.asarray                  │
│ ❱ 262 │   a = toarray(a)                                                                  │
│   263 │   if not objects_ok:                                                              │
│   264 │   │   if a.dtype is np.dtype('O'):                                                │
│   265 │   │   │   raise ValueError('object arrays are not supported')                     │
│                                                                                           │
│ /home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/numpy/lib/fu │
│ nction_base.py:485 in asarray_chkfinite                                                   │
│                                                                                           │
│    482 │   """                                                                            │
│    483 │   a = asarray(a, dtype=dtype, order=order)                                       │
│    484 │   if a.dtype.char in typecodes['AllFloat'] and not np.isfinite(a).all():         │
│ ❱  485 │   │   raise ValueError(                                                          │
│    486 │   │   │   "array must not contain infs or NaNs")                                 │
│    487 │   return a                                                                       │
╰───────────────────────────────────────────────────────────────────────────────────────────╯
ValueError: array must not contain infs or NaNs

I already checked that leukocytes_raw.X does not contain any infs or nans.

Got any idea?

Zethson commented 3 years ago

Maybe a result of:

/home/lukas/miniconda3/envs/single_cell_analysis/lib/python3.8/site-packages/diffxpy/testing/det.py:1620: RuntimeWarning: divide by zero encountered in log
  self._logfc = np.log(mean_x1) - np.log(mean_x0)

?