Open exalate-issue-sync[bot] opened 1 year ago
Spencer Aiello commented: h2o.performance does appear to be working -- but there's nothing worth reporting the json response?
here's the raw response:
{"meta":{"schema_version":[3],"schema_name":["ModelMetricsListSchemaV3"],"schema_type":["ModelMetricsList"]},"_exclude_fields":[""],"model":{"__meta":{"schema_version":[3],"schema_name":["ModelKeyV3"],"schema_type":["Key
JIRA Issue Migration Info
Jira Issue: PUBDEV-1807 Assignee: Spencer Butt Reporter: Arno Candel State: Open Fix Version: N/A Attachments: N/A Development PRs: N/A
library(h2o) h<-h2o.init() setwd("/users/arno/h2o-3") covtype <- h2o.uploadFile("smalldata/covtype/covtype.20k.data") covtype[,55] <- covtype[,55]==6 covtype[,55] <- as.factor(covtype[,55]) s <- h2o.runif(covtype) train <- covtype[s <= 0.8,] valid <- covtype[s > 0.8,]
library(caret) cov_df <- as.data.frame(train[,55]) cov_df$C55 <- as.factor(cov_df$C55) train_df <- as.data.frame(train[,-55]) train_balanced <- as.h2o(upSample(train_df,cov_df$C55))
################## GBM hh_imbalanced_gbm <-h2o.gbm(x=c(1:54),y=55,ntrees=10,min_rows=5,learn_rate=0.2,training_frame=train,distribution="bernoulli") hh_balanced_gbm <-h2o.gbm(x=c(1:54),y=55,ntrees=10,min_rows=5,learn_rate=0.2,training_frame=train_balanced,distribution="bernoulli") hh_balanced_gbm_bc <-h2o.gbm(x=c(1:54),y=55,ntrees=10,min_rows=5,learn_rate=0.2,training_frame=train,distribution="bernoulli",balance_classes=T)
pred_imbalanced_gbm <- predict(hh_imbalanced_gbm, valid) pred_imbalanced_p1_df_gbm <- as.data.frame(pred_imbalanced_gbm$p1) hist(pred_imbalanced_p1_df_gbm$p1) # OK
pred_balanced_gbm <- predict(hh_balanced_gbm, valid) pred_balanced_p1_df_gbm <- as.data.frame(pred_balanced_gbm$p1) hist(pred_balanced_p1_df_gbm$p1) # OK
pred_balanced_gbm_bc <- predict(hh_balanced_gbm_bc, valid) pred_balanced_p1_df_gbm_bc <- as.data.frame(pred_balanced_gbm_bc$p1) hist(pred_balanced_p1_df_gbm_bc$p1) ############################################ LIMITED RANGE ###########################################
hh_balanced_gbm h2o.performance(model=hh_balanced_gbm, data=valid)
H2OBinomialModel: gbm Model ID: GBM_model_R_1438225022307_20 Model Summary: number_of_trees model_size_in_bytes min_depth max_depth mean_depth min_leaves max_leaves mean_leaves 1 10.000000 3754.000000 5.000000 5.000000 5.00000 20.000000 32.000000 26.90000
H2OBinomialMetrics: gbm Reported on training data.
MSE: 0.06496919 R^2: 0.7401232 LogLoss: 0.2481969 AUC: 0.9711492 Gini: 0.9422984
Confusion Matrix for F1-optimal threshold: 0 1 Error Rate 0 12530 1757 0.122979 =1757/14287 1 309 13978 0.021628 =309/14287 Totals 12839 15735 0.072303 =2066/28574
Maximum Metrics: metric threshold value idx 1 max f1 0.569247 0.931184 182.000000 2 max f2 0.477460 0.965155 218.000000 3 max f0point5 0.703187 0.922826 130.000000 4 max accuracy 0.676281 0.929131 142.000000 5 max precision 0.914725 1.000000 0.000000 6 max absolute_MCC 0.569247 0.859820 182.000000 7 max min_per_class_accuracy 0.709157 0.921747 128.000000
MSE: NaN R^2: NaN LogLoss: NaN AUC: 0 Gini: 0
NULL NULL