Open lyriccoder opened 4 years ago
I can't run NMF algorithm. When I run:
%%time nmf_model = NMF(n_components=20, beta_loss='kullback-leibler', solver='mu').fit(data)
... I see the following error stack :
--------------------------------------------------------------------------- FloatingPointError Traceback (most recent call last) <timed exec> in <module> d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_nmf.py in fit(self, X, y, **params) 1310 self 1311 """ -> 1312 self.fit_transform(X, **params) 1313 return self 1314 d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_nmf.py in fit_transform(self, X, y, W, H) 1285 l1_ratio=self.l1_ratio, regularization='both', 1286 random_state=self.random_state, verbose=self.verbose, -> 1287 shuffle=self.shuffle) 1288 1289 self.reconstruction_err_ = _beta_divergence(X, W, H, self.beta_loss, d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_nmf.py in non_negative_factorization(X, W, H, n_components, init, update_H, solver, beta_loss, tol, max_iter, alpha, l1_ratio, regularization, random_state, verbose, shuffle) 1067 tol, l1_reg_W, l1_reg_H, 1068 l2_reg_W, l2_reg_H, update_H, -> 1069 verbose) 1070 1071 else: d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_nmf.py in _fit_multiplicative_update(X, W, H, beta_loss, max_iter, tol, l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H, update_H, verbose) 810 if update_H: 811 delta_H = _multiplicative_update_h(X, W, H, beta_loss, l1_reg_H, --> 812 l2_reg_H, gamma) 813 H *= delta_H 814 d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_nmf.py in _multiplicative_update_h(X, W, H, beta_loss, l1_reg_H, l2_reg_H, gamma) 634 else: 635 # Numerator --> 636 WH_safe_X = _special_sparse_dot(W, H, X) 637 if sp.issparse(X): 638 WH_safe_X_data = WH_safe_X.data d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_nmf.py in _special_sparse_dot(W, H, X) 178 batch = slice(start, start + batch_size) 179 dot_vals[batch] = np.multiply(W[ii[batch], :], --> 180 H.T[jj[batch], :]).sum(axis=1) 181 182 WH = sp.coo_matrix((dot_vals, (ii, jj)), shape=X.shape) FloatingPointError: underflow encountered in multiply
I also have the same error for LatentDirichletAllocation if I choose 448 clusters for 25000 rows:
%%time lda_model = LatentDirichletAllocation(n_components=448).fit(data_vec)
--------------------------------------------------------------------------- FloatingPointError Traceback (most recent call last) <timed exec> in <module> d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_online_lda.py in fit(self, X, y) 566 # batch update 567 self._em_step(X, total_samples=n_samples, --> 568 batch_update=True, parallel=parallel) 569 570 # check perplexity d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_online_lda.py in _em_step(self, X, total_samples, batch_update, parallel) 446 # E-step 447 _, suff_stats = self._e_step(X, cal_sstats=True, random_init=True, --> 448 parallel=parallel) 449 450 # M-step d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_online_lda.py in _e_step(self, X, cal_sstats, random_init, parallel) 399 self.mean_change_tol, cal_sstats, 400 random_state) --> 401 for idx_slice in gen_even_slices(X.shape[0], n_jobs)) 402 403 # merge result d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\parallel.py in __call__(self, iterable) 1001 # remaining jobs. 1002 self._iterating = False -> 1003 if self.dispatch_one_batch(iterator): 1004 self._iterating = self._original_iterator is not None 1005 d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator) 832 return False 833 else: --> 834 self._dispatch(tasks) 835 return True 836 d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\parallel.py in _dispatch(self, batch) 751 with self._lock: 752 job_idx = len(self._jobs) --> 753 job = self._backend.apply_async(batch, callback=cb) 754 # A job can complete so quickly than its callback is 755 # called before we get here, causing self._jobs to d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback) 199 def apply_async(self, func, callback=None): 200 """Schedule a func to be run""" --> 201 result = ImmediateResult(func) 202 if callback: 203 callback(result) d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch) 580 # Don't delay the application, to avoid keeping the input 581 # arguments in memory --> 582 self.results = batch() 583 584 def get(self): d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\parallel.py in __call__(self) 254 with parallel_backend(self._backend, n_jobs=self._n_jobs): 255 return [func(*args, **kwargs) --> 256 for func, args, kwargs in self.items] 257 258 def __len__(self): d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\parallel.py in <listcomp>(.0) 254 with parallel_backend(self._backend, n_jobs=self._n_jobs): 255 return [func(*args, **kwargs) --> 256 for func, args, kwargs in self.items] 257 258 def __len__(self): d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_online_lda.py in _update_doc_distribution(X, exp_topic_word_distr, doc_topic_prior, max_iters, mean_change_tol, cal_sstats, random_state) 115 116 doc_topic_d = (exp_doc_topic_d * --> 117 np.dot(cnts / norm_phi, exp_topic_word_d.T)) 118 # Note: adds doc_topic_prior to doc_topic_d, in-place. 119 _dirichlet_expectation_1d(doc_topic_d, doc_topic_prior, FloatingPointError: underflow encountered in multiply
Could you please help? I am using Python 3.7.5 x64. Windows 10.
I can't run NMF algorithm. When I run:
... I see the following error stack :
I also have the same error for LatentDirichletAllocation if I choose 448 clusters for 25000 rows:
Could you please help? I am using Python 3.7.5 x64. Windows 10.