stanfordmlgroup / ngboost

Natural Gradient Boosting for Probabilistic Prediction
Apache License 2.0
1.62k stars 214 forks source link

NGBoost is not deterministic when setting 'random_state' #313

Open CompRhys opened 1 year ago

CompRhys commented 1 year ago

What are the reasons for setting random state if the algorithm is not deterministic even when state is set?

from ngboost import NGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import fetch_california_housing
a = fetch_california_housing()
X = a["data"]
Y = a["target"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
ngb = NGBRegressor(random_state=0).fit(X_train, Y_train)
Y_preds = ngb.predict(X_test)
Y_dists = ngb.pred_dist(X_test)
test_MSE = mean_squared_error(Y_preds, Y_test)
print('Test MSE', test_MSE)
test_NLL = -Y_dists.logpdf(Y_test).mean()
print('Test NLL', test_NLL)
ngb = NGBRegressor(random_state=0).fit(X_train, Y_train)
Y_preds = ngb.predict(X_test)
Y_dists = ngb.pred_dist(X_test)
test_MSE = mean_squared_error(Y_preds, Y_test)
print('Test MSE', test_MSE)
test_NLL = -Y_dists.logpdf(Y_test).mean()
print('Test NLL', test_NLL)
Python 3.10.8 (main, Nov 24 2022, 08:09:04) [Clang 14.0.6 ] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> from ngboost import NGBRegressor
>>>
>>> from sklearn.model_selection import train_test_split
>>> from sklearn.metrics import mean_squared_error
>>> from sklearn.datasets import fetch_california_housing
>>> a = fetch_california_housing()
>>> X = a["data"]
>>> Y = a["target"]
>>> X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
>>> ngb = NGBRegressor(random_state=0).fit(X_train, Y_train)
[iter 0] loss=1.5615 val_loss=0.0000 scale=1.0000 norm=1.1047
[iter 100] loss=1.1287 val_loss=0.0000 scale=2.0000 norm=1.5472
[iter 200] loss=0.9066 val_loss=0.0000 scale=1.0000 norm=0.6993
[iter 300] loss=0.7630 val_loss=0.0000 scale=1.0000 norm=0.6764
[iter 400] loss=0.6882 val_loss=0.0000 scale=1.0000 norm=0.6760
>>> Y_preds = ngb.predict(X_test)
>>> Y_dists = ngb.pred_dist(X_test)
>>> test_MSE = mean_squared_error(Y_preds, Y_test)
>>> print('Test MSE', test_MSE)
Test MSE 0.3187727427849739
>>> test_NLL = -Y_dists.logpdf(Y_test).mean()
>>> print('Test NLL', test_NLL)
Test NLL 0.7131524400157387
>>> ngb = NGBRegressor(random_state=0).fit(X_train, Y_train)
[iter 0] loss=1.5615 val_loss=0.0000 scale=1.0000 norm=1.1047
[iter 100] loss=1.1287 val_loss=0.0000 scale=2.0000 norm=1.5472
[iter 200] loss=0.9066 val_loss=0.0000 scale=1.0000 norm=0.6993
[iter 300] loss=0.7630 val_loss=0.0000 scale=1.0000 norm=0.6764
[iter 400] loss=0.6882 val_loss=0.0000 scale=1.0000 norm=0.6760
>>> Y_preds = ngb.predict(X_test)
>>> Y_dists = ngb.pred_dist(X_test)
>>> test_MSE = mean_squared_error(Y_preds, Y_test)
>>> print('Test MSE', test_MSE)
Test MSE 0.3187673980553155
>>> test_NLL = -Y_dists.logpdf(Y_test).mean()
>>> print('Test NLL', test_NLL)
Test NLL 0.7129771828896784
>>>