Open CompRhys opened 1 year ago
What are the reasons for setting random state if the algorithm is not deterministic even when state is set?
from ngboost import NGBRegressor from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error from sklearn.datasets import fetch_california_housing a = fetch_california_housing() X = a["data"] Y = a["target"] X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) ngb = NGBRegressor(random_state=0).fit(X_train, Y_train) Y_preds = ngb.predict(X_test) Y_dists = ngb.pred_dist(X_test) test_MSE = mean_squared_error(Y_preds, Y_test) print('Test MSE', test_MSE) test_NLL = -Y_dists.logpdf(Y_test).mean() print('Test NLL', test_NLL) ngb = NGBRegressor(random_state=0).fit(X_train, Y_train) Y_preds = ngb.predict(X_test) Y_dists = ngb.pred_dist(X_test) test_MSE = mean_squared_error(Y_preds, Y_test) print('Test MSE', test_MSE) test_NLL = -Y_dists.logpdf(Y_test).mean() print('Test NLL', test_NLL)
Python 3.10.8 (main, Nov 24 2022, 08:09:04) [Clang 14.0.6 ] on darwin Type "help", "copyright", "credits" or "license" for more information. >>> from ngboost import NGBRegressor >>> >>> from sklearn.model_selection import train_test_split >>> from sklearn.metrics import mean_squared_error >>> from sklearn.datasets import fetch_california_housing >>> a = fetch_california_housing() >>> X = a["data"] >>> Y = a["target"] >>> X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) >>> ngb = NGBRegressor(random_state=0).fit(X_train, Y_train) [iter 0] loss=1.5615 val_loss=0.0000 scale=1.0000 norm=1.1047 [iter 100] loss=1.1287 val_loss=0.0000 scale=2.0000 norm=1.5472 [iter 200] loss=0.9066 val_loss=0.0000 scale=1.0000 norm=0.6993 [iter 300] loss=0.7630 val_loss=0.0000 scale=1.0000 norm=0.6764 [iter 400] loss=0.6882 val_loss=0.0000 scale=1.0000 norm=0.6760 >>> Y_preds = ngb.predict(X_test) >>> Y_dists = ngb.pred_dist(X_test) >>> test_MSE = mean_squared_error(Y_preds, Y_test) >>> print('Test MSE', test_MSE) Test MSE 0.3187727427849739 >>> test_NLL = -Y_dists.logpdf(Y_test).mean() >>> print('Test NLL', test_NLL) Test NLL 0.7131524400157387 >>> ngb = NGBRegressor(random_state=0).fit(X_train, Y_train) [iter 0] loss=1.5615 val_loss=0.0000 scale=1.0000 norm=1.1047 [iter 100] loss=1.1287 val_loss=0.0000 scale=2.0000 norm=1.5472 [iter 200] loss=0.9066 val_loss=0.0000 scale=1.0000 norm=0.6993 [iter 300] loss=0.7630 val_loss=0.0000 scale=1.0000 norm=0.6764 [iter 400] loss=0.6882 val_loss=0.0000 scale=1.0000 norm=0.6760 >>> Y_preds = ngb.predict(X_test) >>> Y_dists = ngb.pred_dist(X_test) >>> test_MSE = mean_squared_error(Y_preds, Y_test) >>> print('Test MSE', test_MSE) Test MSE 0.3187673980553155 >>> test_NLL = -Y_dists.logpdf(Y_test).mean() >>> print('Test NLL', test_NLL) Test NLL 0.7129771828896784 >>>
What are the reasons for setting random state if the algorithm is not deterministic even when state is set?