imbs-hl / ranger

A Fast Implementation of Random Forests
http://imbs-hl.github.io/ranger/
776 stars 194 forks source link

warnings generated running 'Understanding random forests with randomForestExplainer' code #679

Closed dougedmunds closed 1 year ago

dougedmunds commented 1 year ago

R version 4.3.1 (2023-06-16 ucrt) -- "Beagle Scouts" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 (64-bit) Win11 home, AMD Ryzen 5 2600 Six-Core 16 GB RAM

Running the code in the vignette Understanding random forests with randomForestExplainer produces warning messages in several locations. I have added the warnings generated by R after the code lines which triggered the warnings.


``

#running code from the vignette
#Understanding random forests with randomForestExplainer

library(randomForest)
library(randomForestExplainer)

data(Boston, package = "MASS")
Boston$chas <- as.logical(Boston$chas)
str(Boston)

set.seed(2017)
forest <- randomForest(medv ~ ., data = Boston, localImp = TRUE)
forest

#min_depth_frame <- min_depth_distribution(forest)
#save(min_depth_frame, file = "min_depth_frame.rda")
load("min_depth_frame.rda")
head(min_depth_frame, n = 10)

plot_min_depth_distribution(min_depth_frame)

plot_min_depth_distribution(min_depth_frame, mean_sample = "relevant_trees", k = 15)

# importance_frame <- measure_importance(forest)
# save(importance_frame, file = "importance_frame.rda")
load("importance_frame.rda")
importance_frame

plot_multi_way_importance(importance_frame, size_measure = "no_of_nodes")

plot_multi_way_importance(importance_frame, x_measure = "mse_increase", 
                          y_measure = "node_purity_increase", 
                          size_measure = "p_value", no_of_labels = 5)
# system generated
# Warning message:
#  Using alpha for a discrete variable is not advised. 

plot_importance_ggpairs(importance_frame)

plot_importance_rankings(importance_frame)
# system generated
#warning messages:
# There were 18 warnings (use warnings() to see them)   

(vars <- important_variables(importance_frame, k = 5, measures = c("mean_min_depth", "no_of_trees")))

# interactions_frame <- min_depth_interactions(forest, vars)

#system generated
#Warning messages:
#  1: `funs()` was deprecated in dplyr 0.8.0.
#ℹ Please use a list of either functions or lambdas:
#  
#  # Simple named list: list(mean = mean, median = median)
#  
#  # Auto named with `tibble::lst()`: tibble::lst(mean, median)
#  
#  # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
#  ℹ The deprecated feature was likely used in the randomForestExplainer package.
#Please report the issue to the authors.
#This warning is displayed once every 8 hours.
#Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated. 
#2: There were 8719 warnings in `summarise()`.
#The first warning was:
#  ℹ In argument: `lstat = min(lstat, na.rm = TRUE)`.
#ℹ In group 26: `tree = 2`, `split var = "zn"`.
#Caused by warning in `min()`:
#  ! no non-missing arguments to min; returning Inf
#ℹ Run dplyr::last_dplyr_warnings() to see the 8718 remaining warnings. 

# save(interactions_frame, file = "interactions_frame.rda")
load("interactions_frame.rda")
head(interactions_frame[order(interactions_frame$occurrences, decreasing = TRUE), ])

plot_min_depth_interactions(interactions_frame)

# interactions_frame <- min_depth_interactions(forest, vars, mean_sample = "relevant_trees", 
                                             uncond_mean_sample = "relevant_trees")

#system generated
#Warning messages:
#  There were 8719 warnings in `summarise()`.
#The first warning was:
#  ℹ In argument: `lstat = min(lstat, na.rm = TRUE)`.
#ℹ In group 26: `tree = 2`, `split var = "zn"`.
#Caused by warning in `min()`:
#  ! no non-missing arguments to min; returning Inf
#ℹ Run dplyr::last_dplyr_warnings() to see the 8718 remaining warnings. 

# save(interactions_frame, file = "interactions_frame_relevant.rda")
load("interactions_frame_relevant.rda")
plot_min_depth_interactions(interactions_frame)

plot_predict_interaction(forest, Boston, "rm", "lstat")

explain_forest(forest, interactions = TRUE, data = Boston)

#processing file: Explain_forest_template.Rmd
#|.............................................................................................| 100% [conditional_print]
#
#processing file: ./Explain_forest_template_interactions.Rmd
#
#
#output file: Explain_forest_template.knit.md
#
#"C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/pandoc" +RTS -K512m -RTS Explain_forest_template.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandocfb4680a66ec.html --lua-filter "C:\Users\DAE\AppData\Local\R\win-library\4.3\rmarkdown\rmarkdown\lua\pagebreak.lua" --lua-filter "C:\Users\DAE\AppData\Local\R\win-library\4.3\rmarkdown\rmarkdown\lua\latex-div.lua" --embed-resources --standalone --variable bs3=TRUE --section-divs --table-of-contents --toc-depth 3 --variable toc_float=1 --variable toc_selectors=h1,h2,h3 --variable toc_collapsed=1 --variable toc_smooth_scroll=1 --variable toc_print=1 --template "C:\Users\DAE\AppData\Local\R\win-library\4.3\rmarkdown\rmd\h\default.html" --no-highlight --variable highlightjs=1 --variable theme=bootstrap --mathjax --variable "mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" --include-in-header "C:\Users\DAE\AppData\Local\Temp\Rtmp8KMTJN\rmarkdown-strfb47dc2577.html" 
#
#Output created: Your_forest_explained.html
#

``

mnwright commented 1 year ago

This is the rangerrepository, not randomForestor randomForestExplainer. Try this one: https://github.com/ModelOriented/randomForestExplainer.