loosolab / TF-COMB

Transcription Factor Co-Occurrence using Market Basket analysis
https://tf-comb.readthedocs.io
MIT License
10 stars 1 forks source link

Floating Point Error in #37

Closed oKoch closed 2 years ago

oKoch commented 2 years ago

Hi,

when i use the utils.get_threshold(x, "both", percent=0.05), i am getting an Floating Point error. Have a look at the following stack trace. The error occures not always. Sorry i cant provide an example data set at the moment. VG Thank you.

---------------------------------------------------------------------------
FloatingPointError                        Traceback (most recent call last)
/tmp/ipykernel_22838/2299144729.py in <module>
----> 1 investigate_differential_analysis_per_tissue(tissues=diff_mb_tissues)

/tmp/ipykernel_22838/1620922113.py in investigate_differential_analysis_per_tissue(tissues)
     33             # compared to the other clusters in the tissue. This TF-pairs could be interesting for further investigation
     34             # ,because they show a significant difference to all the other clusters in the tissue.
---> 35             res = find_specific_tf_cos_for_cluster(df=diff_rules, cluster_name=cluster_name)
     36 
     37             # save res as .pkl

/tmp/ipykernel_22838/2215076161.py in find_specific_tf_cos_for_cluster(df, cluster_name)
     52     # Filtering: log2changes and cosine cols to get tf-pairs that have a log2change,
     53     # that shows a high difference to all other clusters in the tissue and a significant cosine value.
---> 54     significants = get_significant_rules(df=result, cosine_col=cluster_cosine_col_name, cosine_threshold=0.001, log2fc_threshold_percent=0.05)
     55 
     56     print(f'Cluster: {cluster_name}: {significants.shape} ,tf-pairs with significant log2fc-changes in comparison to all the other clusters in tissue')

/tmp/ipykernel_22838/139775648.py in get_significant_rules(df, cosine_col, cosine_threshold, log2fc_threshold_percent)
     60             # calculates the thresholds for the log2foldchange cols,
     61             # positive and negative is possible, so both thresholds are needed.
---> 62             measure_threshold = utils.get_threshold(df[col], "both", percent=log2fc_threshold_percent)
     63             upper_threshold = measure_threshold[1]
     64             lower_threshold = measure_threshold[0]

~/.conda/envs/tfcomb_env/lib/python3.7/site-packages/tfcomb/utils.py in get_threshold(data, which, percent, _n_max, verbosity)
    988         for distribution in distributions:
    989                 logger.debug("Fitting data to '{0}'".format(distribution))
--> 990                 params = distribution.fit(data_finite)
    991 
    992                 #Test fit using negative loglikelihood function

~/.conda/envs/tfcomb_env/lib/python3.7/site-packages/scipy/stats/_distn_infrastructure.py in fit(self, data, *args, **kwds)
   2494         # especially when the user fixes parameters. Minimizing the sum
   2495         # of squares of the error generalizes to these cases.
-> 2496         vals = optimizer(func, x0, args=(ravel(data),), disp=0)
   2497         obj = func(vals, data)
   2498 

~/.conda/envs/tfcomb_env/lib/python3.7/site-packages/scipy/optimize/optimize.py in fmin(func, x0, args, xtol, ftol, maxiter, maxfun, full_output, disp, retall, callback, initial_simplex)
    578             'initial_simplex': initial_simplex}
    579 
--> 580     res = _minimize_neldermead(func, x0, args, callback=callback, **opts)
    581     if full_output:
    582         retlist = res['x'], res['fun'], res['nit'], res['nfev'], res['status']

~/.conda/envs/tfcomb_env/lib/python3.7/site-packages/scipy/optimize/optimize.py in _minimize_neldermead(func, x0, args, callback, maxiter, maxfev, disp, return_all, initial_simplex, xatol, fatol, adaptive, bounds, **unknown_options)
    773             if bounds is not None:
    774                 xe = np.clip(xe, lower_bound, upper_bound)
--> 775             fxe = func(xe)
    776 
    777             if fxe < fxr:

~/.conda/envs/tfcomb_env/lib/python3.7/site-packages/scipy/optimize/optimize.py in function_wrapper(x, *wrapper_args)
    462     def function_wrapper(x, *wrapper_args):
    463         ncalls[0] += 1
--> 464         return function(np.copy(x), *(wrapper_args + args))
    465 
    466     return ncalls, function_wrapper

~/.conda/envs/tfcomb_env/lib/python3.7/site-packages/scipy/stats/_distn_infrastructure.py in _penalized_nnlf(self, theta, x)
   2237         x = asarray((x-loc) / scale)
   2238         n_log_scale = len(x) * log(scale)
-> 2239         return self._nnlf_and_penalty(x, args) + n_log_scale
   2240 
   2241     def _fitstart(self, data, args=None):

~/.conda/envs/tfcomb_env/lib/python3.7/site-packages/scipy/stats/_distn_infrastructure.py in _nnlf_and_penalty(self, x, args)
   2218         if n_bad > 0:
   2219             x = argsreduce(~cond0, x)[0]
-> 2220         logpdf = self._logpdf(x, *args)
   2221         finite_logpdf = np.isfinite(logpdf)
   2222         n_bad += np.sum(~finite_logpdf, axis=0)

~/.conda/envs/tfcomb_env/lib/python3.7/site-packages/scipy/stats/_continuous_distns.py in _logpdf(self, x, a, b)
   8117     def _logpdf(self, x, a, b):
   8118         if np.isscalar(a) and np.isscalar(b):
-> 8119             return _truncnorm_logpdf_scalar(x, a, b)
   8120         a, b = np.atleast_1d(a), np.atleast_1d(b)
   8121         if a.size == 1 and b.size == 1:

~/.conda/envs/tfcomb_env/lib/python3.7/site-packages/scipy/stats/_continuous_distns.py in _truncnorm_logpdf_scalar(x, a, b)
   7795         cond_inner = ~condlta & ~condgtb
   7796         if np.any(cond_inner):
-> 7797             _logdelta = _truncnorm_get_logdelta_scalar(a, b)
   7798             np.place(out, cond_inner, _norm_logpdf(x[cond_inner]) - _logdelta)
   7799         return (out[0] if (shp == ()) else out)

~/.conda/envs/tfcomb_env/lib/python3.7/site-packages/scipy/stats/_continuous_distns.py in _truncnorm_get_logdelta_scalar(a, b)
   7774     else:
   7775         sla, slb = _norm_logsf(a), _norm_logsf(b)
-> 7776         logdelta = sla + np.log1p(-np.exp(slb - sla))
   7777     return logdelta
   7778 

FloatingPointError: underflow encountered in exp
msbentsen commented 2 years ago

Hi Oliver, thank you for the issue! I suspect that one of the distributions within get_threshold() did not fit the data, and thus threw the error. I created a try/except now, so please have a look if that solves the issue - it should throw an error but continue with the rest of the fitting without breaking.

oKoch commented 2 years ago

Hi i checked it, for me it looks fine.