amices / mice

Multivariate Imputation by Chained Equations
https://amices.org/mice/
GNU General Public License v2.0
428 stars 107 forks source link

Update pool.R #549

Closed KyuriP closed 1 year ago

KyuriP commented 1 year ago

update pool() function to allow for custom df

stefvanbuuren commented 1 year ago

Thanks for your PR. A few comments:

gerkovink commented 1 year ago

@stefvanbuuren We're still working on this; let's leave it for now and I'll re-open later when we're at a more refined state. The goal is to allow for a custom degrees of freedom, much like the custom.t argument.

gerkovink commented 1 year ago
library(mice, warn.conflicts = FALSE)
library(magrittr)
set.seed(123)
imp <- mice(nhanes, printFlag = FALSE)
fit <- with(imp, lm(age ~ bmi))

# regular calculation
pool(fit) %T>% 
  print %>% summary()
#> Class: mipo    m = 5 
#>          term m    estimate        ubar            b           t dfcom       df
#> 1 (Intercept) 5  3.85793713 1.101684619 0.1435955094 1.273999230    23 16.93708
#> 2         bmi 5 -0.07826576 0.001498793 0.0002086205 0.001749137    23 16.64170
#>         riv    lambda       fmi
#> 1 0.1564101 0.1352549 0.2220023
#> 2 0.1670308 0.1431246 0.2303752
#>          term    estimate  std.error statistic       df     p.value
#> 1 (Intercept)  3.85793713 1.12871574  3.417988 16.93708 0.003292004
#> 2         bmi -0.07826576 0.04182269 -1.871371 16.64170 0.078980518

# custom calculation
pool(fit, 
     rule = "reiter2003", 
     custom.t = ".data$b + .data$b/m", 
     custom.df = ".data$m - 1") %T>% 
  print %>% summary()
#> Class: mipo    m = 5 
#>          term m    estimate        ubar            b            t dfcom df
#> 1 (Intercept) 5  3.85793713 1.101684619 0.1435955094 0.1723146113    23  4
#> 2         bmi 5 -0.07826576 0.001498793 0.0002086205 0.0002503446    23  4
#>         riv lambda fmi
#> 1 0.1564101     NA  NA
#> 2 0.1670308     NA  NA
#>          term    estimate  std.error statistic df      p.value
#> 1 (Intercept)  3.85793713 0.41510795  9.293817  4 0.0007457218
#> 2         bmi -0.07826576 0.01582228 -4.946554  4 0.0077803039

# another custom calculation
outsideoffunction <- 14
Rubin_original <- "(.data$m-1)*(1 + (1/((1 + 1 / .data$m) * .data$b / .data$ubar)))^2"
pool(fit, 
     rule = "rubin1987", 
     custom.t = "outsideoffunction", 
     custom.df = "Rubin_original")  %T>% 
  print %>% summary()
#> Error in `summarize()`:
#> ℹ In argument: `fmi = (.data$riv + 2/(.data$df + 3))/(.data$riv + 1)`.
#> ℹ In group 1: `term = (Intercept)`.
#> Caused by error in `.data$df + 3`:
#> ! non-numeric argument to binary operator
#> Backtrace:
#>      ▆
#>   1. ├─... %>% summary()
#>   2. ├─base::summary(.)
#>   3. ├─base::print(.)
#>   4. ├─mice::pool(...)
#>   5. │ └─mice:::pool.fitlist(...) at mice/R/pool.R:164:2
#>   6. │   └─w %>% group_by(!!!syms(grp)) %>% ... at mice/R/pool.R:197:4
#>   7. ├─dplyr::summarize(...)
#>   8. ├─dplyr:::summarise.grouped_df(...)
#>   9. │ └─dplyr:::summarise_cols(.data, dplyr_quosures(...), by, "summarise")
#>  10. │   ├─base::withCallingHandlers(...)
#>  11. │   └─dplyr:::map(quosures, summarise_eval_one, mask = mask)
#>  12. │     └─base::lapply(.x, .f, ...)
#>  13. │       └─dplyr (local) FUN(X[[i]], ...)
#>  14. │         └─mask$eval_all_summarise(quo)
#>  15. │           └─dplyr (local) eval()
#>  16. └─base::.handleSimpleError(...)
#>  17.   └─dplyr (local) h(simpleError(msg, call))
#>  18.     └─dplyr (local) handler(cnd)
#>  19.       └─rlang::abort(message, class = error_class, parent = parent, call = error_call)

Created on 2023-04-26 with reprex v2.0.2

gerkovink commented 1 year ago

@KyuriP See below, we should add to the help that the result of the custom functions must be of class numeric.

library(mice, warn.conflicts = FALSE)
library(magrittr)
set.seed(123)
imp <- mice(nhanes, printFlag = FALSE)
fit <- with(imp, lm(age ~ bmi))

# regular calculation
pool(fit) %T>% 
  print %>% summary()
#> Class: mipo    m = 5 
#>          term m    estimate        ubar            b           t dfcom       df
#> 1 (Intercept) 5  3.85793713 1.101684619 0.1435955094 1.273999230    23 16.93708
#> 2         bmi 5 -0.07826576 0.001498793 0.0002086205 0.001749137    23 16.64170
#>         riv    lambda       fmi
#> 1 0.1564101 0.1352549 0.2220023
#> 2 0.1670308 0.1431246 0.2303752
#>          term    estimate  std.error statistic       df     p.value
#> 1 (Intercept)  3.85793713 1.12871574  3.417988 16.93708 0.003292004
#> 2         bmi -0.07826576 0.04182269 -1.871371 16.64170 0.078980518

# custom calculation
pool(fit, 
     rule = "reiter2003", 
     custom.t = ".data$b + .data$b/m", 
     custom.df = ".data$m - 1") %T>% 
  print %>% summary()
#> Class: mipo    m = 5 
#>          term m    estimate        ubar            b            t dfcom df
#> 1 (Intercept) 5  3.85793713 1.101684619 0.1435955094 0.1723146113    23  4
#> 2         bmi 5 -0.07826576 0.001498793 0.0002086205 0.0002503446    23  4
#>         riv lambda fmi
#> 1 0.1564101     NA  NA
#> 2 0.1670308     NA  NA
#>          term    estimate  std.error statistic df      p.value
#> 1 (Intercept)  3.85793713 0.41510795  9.293817  4 0.0007457218
#> 2         bmi -0.07826576 0.01582228 -4.946554  4 0.0077803039

# another custom calculation
outsideoffunction <- 14
Rubin_original <- "(.data$m-1)*(1 + (1/((1 + 1 / .data$m) * .data$b / .data$ubar)))^2"
pool(fit, 
     rule = "rubin1987", 
     custom.t = outsideoffunction, 
     custom.df = Rubin_original)  %T>% 
  print %>% summary()
#> Class: mipo    m = 5 
#>          term m    estimate        ubar            b  t dfcom       df
#> 1 (Intercept) 5  3.85793713 1.101684619 0.1435955094 14    23 218.6523
#> 2         bmi 5 -0.07826576 0.001498793 0.0002086205 14    23 195.2681
#>         riv       lambda       fmi
#> 1 0.1564101 1.230819e-02 0.1430576
#> 2 0.1670308 1.788175e-05 0.1517682
#>          term    estimate std.error  statistic       df   p.value
#> 1 (Intercept)  3.85793713  3.741657  1.0310771 218.6523 0.3036438
#> 2         bmi -0.07826576  3.741657 -0.0209174 195.2681 0.9833329

Created on 2023-04-26 with reprex v2.0.2