tidymodels / butcher

Reduce the size of model objects saved to disk
https://butcher.tidymodels.org/
Other
131 stars 12 forks source link

add butcher methods for `klaR::rda` and `klaR::NaiveBayes` #248

Closed simonpcouch closed 1 year ago

simonpcouch commented 1 year ago

Related to #234. :)

library(butcher)
library(klaR)
#> Loading required package: MASS

# rda ------------------------------------------------------------------
fit_mod <- function() {
  boop <- runif(1e6)
  rda(
    y ~ x,
    data = data.frame(y = rep(letters[1:4], 1e4), x = rnorm(4e4)),
    gamma = 0.05,
    lambda = 0.2
  )
}

mod_fit <- fit_mod()
mod_res <- butcher(mod_fit)

weigh(mod_fit)
#> # A tibble: 12 × 2
#>    object             size
#>    <chr>             <dbl>
#>  1 terms          8.00    
#>  2 call           0.00235 
#>  3 covariances    0.00092 
#>  4 means          0.00084 
#>  5 covpooled      0.000568
#>  6 prior          0.000496
#>  7 regularization 0.000352
#>  8 classes        0.000304
#>  9 error.rate     0.00028 
#> 10 varnames       0.000112
#> 11 converged      0.000056
#> 12 iter           0.000056
weigh(mod_res)
#> # A tibble: 12 × 2
#>    object             size
#>    <chr>             <dbl>
#>  1 terms          0.00332 
#>  2 covariances    0.00092 
#>  3 means          0.00084 
#>  4 covpooled      0.000568
#>  5 prior          0.000496
#>  6 regularization 0.000352
#>  7 classes        0.000304
#>  8 error.rate     0.00028 
#>  9 call           0.000112
#> 10 varnames       0.000112
#> 11 converged      0.000056
#> 12 iter           0.000056

predict(mod_fit, data.frame(x = 1))
#> $class
#> [1] c
#> Levels: a b c d
#> 
#> $posterior
#>              a         b         c         d
#> [1,] 0.2499478 0.2496472 0.2514357 0.2489692
predict(mod_res, data.frame(x = 1))
#> $class
#> [1] c
#> Levels: a b c d
#> 
#> $posterior
#>              a         b         c         d
#> [1,] 0.2499478 0.2496472 0.2514357 0.2489692

# NaiveBayes ------------------------------------------------------------------
fit_mod <- function() {
  boop <- runif(1e6)
  NaiveBayes(
    y ~ x,
    data = data.frame(y = as.factor(rep(letters[1:4], 1e4)), x = rnorm(4e4))
  )
}

mod_fit <- fit_mod()
mod_res <- butcher(mod_fit)

weigh(mod_fit)
#> # A tibble: 7 × 2
#>   object        size
#>   <chr>        <dbl>
#> 1 x.x       0.320   
#> 2 apriori   0.00118 
#> 3 tables.x  0.00076 
#> 4 call      0.000448
#> 5 levels    0.000304
#> 6 varnames  0.000112
#> 7 usekernel 0.000056
weigh(mod_res)
#> # A tibble: 6 × 2
#>   object        size
#>   <chr>        <dbl>
#> 1 apriori   0.00118 
#> 2 tables.x  0.00076 
#> 3 levels    0.000304
#> 4 call      0.000112
#> 5 varnames  0.000112
#> 6 usekernel 0.000056

predict(mod_fit, data.frame(x = 1))
#> $class
#> [1] a
#> Levels: a b c d
#> 
#> $posterior
#>              a         b         c         d
#> [1,] 0.2524843 0.2474297 0.2491512 0.2509349
predict(mod_res, data.frame(x = 1))
#> $class
#> [1] a
#> Levels: a b c d
#> 
#> $posterior
#>              a         b         c         d
#> [1,] 0.2524843 0.2474297 0.2491512 0.2509349

Created on 2023-01-21 with reprex v2.0.2

github-actions[bot] commented 1 year ago

This pull request has been automatically locked. If you believe you have found a related problem, please file a new issue (with a reprex: https://reprex.tidyverse.org) and link to this issue.