RGF-team / rgf

Home repository for the Regularized Greedy Forest (RGF) library. It includes original implementation from the paper and multithreaded one written in C++, along with various language-specific wrappers.
378 stars 58 forks source link

updated the R-package by adding the warm-start and save_model features #273

Closed mlampros closed 5 years ago

mlampros commented 5 years ago


I've added the warm-start and save_model features. I've tested it locally and I used the following test-cases,



#---------------
# RGF Regressor
#--------------

library(RGF)

set.seed(1)
x = matrix(runif(1000), nrow = 100, ncol = 10)

y = runif(100)

RGF_regr = RGF_Regressor$new(max_leaf = 50)

RGF_regr$fit(x, y)

pth = "/your_folder/model_rgf"

RGF_regr$save_model(filename = pth)

preds = RGF_regr$predict(x)

RGF_regr1 = RGF_Regressor$new(max_leaf = 150, init_model = pth)

RGF_regr1$fit(x, y)

preds1 = RGF_regr1$predict(x)               # there are differences between 'preds' and 'preds1' to observe

#--------------------------------------------------------------------------------------------------------------- ERROR CASE
# IF I GIVE :    RGF_Regressor$new(max_leaf = 50, init_model = pth)     THEN I RECEIVE THE FOLLOWING ERROR :
#-----------------------------------------------------------------------------------------------------------

# Error in py_call_impl(callable, dots$args, dots$keywords) : 
#   Exception: "train": 
#   algorithm=RGF
# train_x_fn=/tmp/rgf/2e3daf71-b125-4b77-a99a-fb74031c0f6e2.train.data.x
# train_y_fn=/tmp/rgf/2e3daf71-b125-4b77-a99a-fb74031c0f6e2.train.data.y
# Log:ON
# model_fn_prefix=/tmp/rgf/2e3daf71-b125-4b77-a99a-fb74031c0f6e2.model
# model_fn_for_warmstart=/home/lampros/Pictures/model_rgf
# --------------------
#   Thu Dec  6 20:26:49 2018: Reading training data ... 
# Thu Dec  6 20:26:49 2018: Start ... #train=100
# --------------------
#   Forest-level: 
#   loss=LS
# max_leaf_forest=50
# max_tree=25
# opt_interval=100
# test_interval=100
# num_tree_search=1
# memory_policy=Generous
# !Input error!: (Detected in AzRgforest::warmup_timer) 
# The model given for warm-start is already over the requested maximum size of the models: #leaf=50, #tree=14 

#-----------------------------------------------------------------------------------------------
# THE ERROR APPEARS BECAUSE THE NUMBER OF TREES SHOULD BE GREATER THAN THE INITIAL (WHICH IS 50)
#---------------------------------------------------------------------------------------------------------------

#---------------
# RGF Classifier
#---------------

library(RGF)

set.seed(1)
x = matrix(runif(1000), nrow = 100, ncol = 10)

y = sample(1:2, 100, replace = TRUE)

RGF_class = RGF_Classifier$new(max_leaf = 50)

RGF_class$fit(x, y)

pth = "/your_folder/model_rgf"

RGF_class$save_model(filename = pth)

preds = RGF_class$predict(x)

RGF_class1 = RGF_Classifier$new(max_leaf = 150, init_model = pth)

RGF_class1$fit(x, y)

preds1 = RGF_class1$predict(x)

table(preds, preds1)                  # there are differences between 'preds' and 'preds1' to observe

the tests can be added by @jameslamb as we've agreed in issue Nr. 269. @fukatani , @StrikerRUS this pull request is ready for review.