Open ThomasMeissnerDS opened 10 months ago
Hi,
as part of a Kaggle competition I wanted to use Ngboost. However it fails with:
LinAlgError Traceback (most recent call last) Cell In[58], line 112 97 base_learner_choice = GradientBoostingRegressor( 98 max_depth=7, 99 n_estimators=300, 100 n_iter_no_change=20, 101 random_state=seed, 102 ) 104 model = NGBClassifier( 105 Dist=k_categorical(int(y_train.nunique())), 106 n_estimators=300, (...) 109 learning_rate=0.1, 110 ) --> 112 model.fit( 113 x_train, 114 y_train.values.ravel(), 115 X_val=x_test, 116 Y_val=y_test.values.ravel(), 117 sample_weight=classes_weights_sample, 118 early_stopping_rounds=10, 119 ) 121 if target_type=='num': 122 preds.append(model.predict(X_test_temp)) File /opt/conda/lib/python3.10/site-packages/ngboost/ngboost.py:250, in NGBoost.fit(self, X, Y, X_val, Y_val, sample_weight, val_sample_weight, train_loss_monitor, val_loss_monitor, early_stopping_rounds) 247 self.scalings = [] 248 self.col_idxs = [] --> 250 return self.partial_fit( 251 X, 252 Y, 253 X_val=X_val, 254 Y_val=Y_val, 255 sample_weight=sample_weight, 256 val_sample_weight=val_sample_weight, 257 train_loss_monitor=train_loss_monitor, 258 val_loss_monitor=val_loss_monitor, 259 early_stopping_rounds=early_stopping_rounds, 260 ) File /opt/conda/lib/python3.10/site-packages/ngboost/ngboost.py:384, in NGBoost.partial_fit(self, X, Y, X_val, Y_val, sample_weight, val_sample_weight, train_loss_monitor, val_loss_monitor, early_stopping_rounds) 382 loss_list += [train_loss_monitor(D, Y_batch, weight_batch)] 383 loss = loss_list[-1] --> 384 grads = D.grad(Y_batch, natural=self.natural_gradient) 386 proj_grad = self.fit_base(X_batch, grads, weight_batch) 387 scale = self.line_search(proj_grad, P_batch, Y_batch, weight_batch) File /opt/conda/lib/python3.10/site-packages/ngboost/scores.py:12, in Score.grad(self, Y, natural) 10 if natural: 11 metric = self.metric() ---> 12 grad = np.linalg.solve(metric, grad) 13 return grad File <__array_function__ internals>:200, in solve(*args, **kwargs) File /opt/conda/lib/python3.10/site-packages/numpy/linalg/linalg.py:386, in solve(a, b) 384 signature = 'DD->D' if isComplexType(t) else 'dd->d' 385 extobj = get_linalg_error_extobj(_raise_linalgerror_singular) --> 386 r = gufunc(a, b, signature=signature, extobj=extobj) 388 return wrap(r.astype(result_t, copy=False)) File /opt/conda/lib/python3.10/site-packages/numpy/linalg/linalg.py:89, in _raise_linalgerror_singular(err, flag) 88 def _raise_linalgerror_singular(err, flag): ---> 89 raise LinAlgError("Singular matrix") LinAlgError: Singular matrix
I tried to remove collinear features, but the error kept popping up. Here is the public notebook.
What could be the root cause?
Hi,
as part of a Kaggle competition I wanted to use Ngboost. However it fails with:
I tried to remove collinear features, but the error kept popping up. Here is the public notebook.
What could be the root cause?