Is it possible to extend ggstats::ggcoef_table for MICE-imputed GEE-GLM (geeglm) model pooled coefficients?

It partially works, but fails to recognize everything. It doesn't recognize odds ratios, doesn't format the coefficients nicely like in the examples.

The model is the GEE estimated logistic regression (geepack::geeglm). There exists a basic tidier:

> tidy(mod)
                                         term estimate std.error statistic p.value        b    df dfcom    fmi lambda  m    riv     ubar
1                                 (Intercept)  -0.2990    0.4300   -0.6953  0.4868 5.74e-03 17876   Inf 0.0327 0.0326 20 0.0337 0.178911
2                                      ArmHD    0.3809    0.6201    0.6142  0.5391 2.18e-02  5378   Inf 0.0598 0.0594 20 0.0632 0.361710
3                          Visit_nOrdMonth 12  -0.0829    0.3678   -0.2253  0.8218 2.10e-02   713   Inf 0.1656 0.1633 20 0.1951 0.113180
4                          Visit_nOrdMonth 20  -0.3074    0.3837   -0.8012  0.4237 3.61e-02   287   Inf 0.2623 0.2572 20 0.3462 0.109375
5                            CurrentSmokerYes  -0.7840    0.7094   -1.1052  0.2691 1.07e-02 38132   Inf 0.0224 0.0223 20 0.0228 0.491951
...

But the results:

> ggstats::ggcoef_table(mod, exponentiate = TRUE)
✖ Unable to identify the list of variables.

This is usually due to an error calling `stats::model.frame(x)`or `stats::model.matrix(x)`.
It could be the case if that type of model does not implement these methods.
Rarely, this error may occur if the model object was created within
a functional programming framework (e.g. using `lappy()`, `purrr::map()`, etc.).

obraz

Pooled model data:

mod <- structure(list(call = pool(object = m_longit), m = 20L, pooled = structure(list(
    term = structure(1:30, levels = c("(Intercept)", "ArmHD ", 
    "Visit_nOrdMonth 12", "Visit_nOrdMonth 20", "CurrentSmokerYes", 
    "BMI_centered", "GenderMale", "Age_centered", "ArmHD :Visit_nOrdMonth 12", 
    "ArmHD :Visit_nOrdMonth 20", "ArmHD :CurrentSmokerYes", 
    "ArmHD :BMI_centered", "ArmHD :GenderMale", 
    "ArmHD :Age_centered", "Visit_nOrdMonth 12:CurrentSmokerYes", 
    "Visit_nOrdMonth 20:CurrentSmokerYes", "Visit_nOrdMonth 12:BMI_centered", 
    "Visit_nOrdMonth 20:BMI_centered", "Visit_nOrdMonth 12:GenderMale", 
    "Visit_nOrdMonth 20:GenderMale", "Visit_nOrdMonth 12:Age_centered", 
    "Visit_nOrdMonth 20:Age_centered", "ArmHD :Visit_nOrdMonth 12:CurrentSmokerYes", 
    "ArmHD :Visit_nOrdMonth 20:CurrentSmokerYes", "ArmHD :Visit_nOrdMonth 12:BMI_centered", 
    "ArmHD :Visit_nOrdMonth 20:BMI_centered", "ArmHD :Visit_nOrdMonth 12:GenderMale", 
    "ArmHD :Visit_nOrdMonth 20:GenderMale", "ArmHD :Visit_nOrdMonth 12:Age_centered", 
    "ArmHD :Visit_nOrdMonth 20:Age_centered"), class = "factor"), 
    m = c(20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 
    20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 
    20L, 20L, 20L, 20L, 20L, 20L, 20L), estimate = c(-0.29903187595548, 
    0.380872027401296, -0.0828676502765822, -0.307444671001533, 
    -0.783980495389177, 0.035212462157362, 0.250206944263597, 
    -0.017234017916225, 0.0873716935040566, 0.501669357945944, 
    0.629976239010189, 0.0103309021118499, -0.710440238180912, 
    0.0284496288428124, 0.187255608262129, 0.365859459306186, 
    -0.113801927460189, -0.0214189834022172, 0.639079773673284, 
    0.467942998794559, 0.0336432513729626, -0.0032003933188581, 
    -0.0390287655013661, -0.367468618749712, 0.110564691330047, 
    0.0195080672542059, -0.609947802591084, -0.514663582186983, 
    -0.0191275913422143, 0.0229444015254167), ubar = c(0.178910542612104, 
    0.361709639160786, 0.11318010075047, 0.109374942177015, 0.491950601812907, 
    0.00359752616836495, 0.37258391707241, 0.000779654085582352, 
    0.248436936335551, 0.253871527293263, 0.821531331040922, 
    0.00762841036178101, 0.695736347093203, 0.00144871817311475, 
    0.400419307876188, 0.582910240807128, 0.00341763960905729, 
    0.00448395840623119, 0.203718949538353, 0.322473550400124, 
    0.000569997779978852, 0.000608363225145927, 0.646926965308594, 
    0.899047011197042, 0.00617998444713391, 0.00874507037666418, 
    0.440344746613085, 0.615333409679517, 0.00154075485784863, 
    0.00123053826533071), b = c(0.00574228980863424, 0.0217701125420553, 
    0.0210353227321423, 0.0360599329922958, 0.0106970974728955, 
    0.000609255139901839, 0.0231763298949901, 2.57010039837727e-05, 
    0.0301639128892515, 0.036985935195778, 0.0370945204494253, 
    0.000944934906536697, 0.0306037473509527, 0.000249506763463724, 
    0.202339930508713, 0.094294780937389, 0.000974055403472398, 
    0.0026671211819612, 0.0692512364870469, 0.0402692080443424, 
    8.49167907626829e-05, 0.000182968384479355, 0.250361623612087, 
    0.123376260799054, 0.00190559100079968, 0.00279970449631357, 
    0.133737715221484, 0.0696958456346859, 0.000579398967755914, 
    0.000380137562633506), t = c(0.18493994691117, 0.384568257329944, 
    0.135267189619219, 0.147237871818926, 0.503182554159447, 
    0.00423724406526188, 0.39691906346215, 0.000806640139765313, 
    0.280109044869265, 0.29270675924883, 0.860480577512819, 0.00862059201364454, 
    0.727870281811703, 0.00171070027475166, 0.612876234910337, 
    0.681919760791386, 0.00444039778270331, 0.00728443564729045, 
    0.276432747849752, 0.364756218846684, 0.000659160410279669, 
    0.00080048002884925, 0.909806670101286, 1.02859208503605, 
    0.00818085499797357, 0.0116847600977934, 0.580769347595643, 
    0.688514047595937, 0.00214912377399234, 0.00162968270609589
    ), dfcom = c(Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, 
    Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, 
    Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf), df = c(17875.8312273481, 
    5377.74893584068, 712.625859410522, 287.318926486729, 38132.409398344, 
    833.572847238697, 5054.63256049474, 16975.9654941346, 1486.1201142506, 
    1079.36242884061, 9273.37078501162, 1434.31784421067, 9748.40946606314, 
    810.13651553986, 158.109284183625, 901.293506980286, 358.138852078407, 
    128.552737041216, 274.598980261076, 1413.95059720754, 1038.41293270817, 
    329.855627850284, 227.581899074746, 1197.83817922747, 317.623777540153, 
    300.185847842698, 324.993426903818, 1681.84833695141, 237.105935251678, 
    316.737544169371), riv = c(0.033700665209754, 0.0631960437167034, 
    0.195149931147748, 0.346175539737723, 0.0228314637793895, 
    0.177821610450628, 0.0653145379461191, 0.0346128554726992, 
    0.127485505983443, 0.152971986932216, 0.0474105429704621, 
    0.130064011348216, 0.0461869425864503, 0.180837174889335, 
    0.530586120237343, 0.169853800899371, 0.299258637726326, 
    0.624554687476034, 0.356931932332146, 0.131119803140739, 
    0.156426276439402, 0.315792927255325, 0.406351441336649, 
    0.144091546076687, 0.323766276105696, 0.336153923812171, 
    0.318896959853922, 0.118928432562332, 0.394851207539387, 
    0.324365728405779), lambda = c(0.0326019575530752, 0.0594396904410815, 
    0.163284895109635, 0.25715482826643, 0.0223218238662962, 
    0.150974993897925, 0.0613100972714056, 0.0334548863273935, 
    0.113070638431174, 0.132676239029223, 0.0452645271604824, 
    0.115094375223085, 0.04414788667909, 0.153143192588159, 0.346655515310087, 
    0.145192331527914, 0.230330304557391, 0.384446699326795, 
    0.263043358201978, 0.115920349707134, 0.135266968267993, 
    0.24000199477744, 0.288940181943737, 0.125944070272003, 0.244579637621653, 
    0.251583232905602, 0.241790655040438, 0.106287792052962, 
    0.283077654021558, 0.244921566187188), fmi = c(0.0327101746988321, 
    0.0597892924465919, 0.165623310061771, 0.262272270527071, 
    0.0223730979022412, 0.153004763352921, 0.0616812946217472, 
    0.0335687383891115, 0.114261851084983, 0.134278888452115, 
    0.045470369616539, 0.116325704296666, 0.0443439305672371, 
    0.155226131408954, 0.354766090538579, 0.147082885118919, 
    0.234592763115993, 0.393804973651112, 0.26835286359789, 0.11716821205609, 
    0.1369276602079, 0.244568527181479, 0.295107706982863, 0.127399813343547, 
    0.249291829388427, 0.256520249408316, 0.24641397540461, 0.10734867354401, 
    0.289049371007454, 0.249644680176034)), class = "data.frame", row.names = c(NA, 
-30L)), glanced = NULL), class = c("mipo", "data.frame"))

the provided pooled model data cannot be reproduced as it requires specific package and the m_longit object.

As I understand it, it is the result of mice::pool().

I am not familiar with mice. I explored quickly the structure of a mipo object. And I'm not sure if we could do much more:

such object does not store the original model
the type of model cannot be identified (needed to know how to label coefficients, otherwise generic Beta or expo(Beta) are used
the original data cannot be identified (i.e. model.frame()) which is crucial to identify the different variables and the reference values

So, I'm truly sorry, but I do not see how to automatically improve the plot.

However, you can still customize it manually. See below an example.

library(mice)
#> 
#> Attachement du package : 'mice'
#> L'objet suivant est masqué depuis 'package:stats':
#> 
#>     filter
#> Les objets suivants sont masqués depuis 'package:base':
#> 
#>     cbind, rbind
library(ggstats)
library(tidyverse)

imp <- mice(nhanes2, maxit = 2, m = 2)
#> 
#>  iter imp variable
#>   1   1  bmi  hyp  chl
#>   1   2  bmi  hyp  chl
#>   2   1  bmi  hyp  chl
#>   2   2  bmi  hyp  chl
fit <- with(data = imp, exp = lm(bmi ~ hyp + chl + age))
mod <- pool(fit)

# getting the data used by ggcoef_table()
# add exponentiate = TRUE if you want exponentiated values
td <- ggcoef_model(
  mod,
  return_data = TRUE,
  signif_stars = FALSE,
  show_p_values = FALSE
)
#> ✖ Unable to identify the list of variables.
#> 
#> This is usually due to an error calling `stats::model.frame(x)`or `stats::model.matrix(x)`.
#> It could be the case if that type of model does not implement these methods.
#> Rarely, this error may occur if the model object was created within
#> a functional programming framework (e.g. using `lappy()`, `purrr::map()`, etc.).

# remove intercept
td <- td |> filter(term != "(Intercept)")

# manual recoding
td$variable
#> [1] hypyes   chl      age40-59 age60-99
#> Levels: (Intercept) hypyes chl age40-59 age60-99
td$variable <- td$variable %>%
  fct_recode(
    "hyp" = "hypyes",
    "age" = "age40-59",
    "age" = "age60-99"
  )

td$var_label
#> [1] hypyes   chl      age40-59 age60-99
#> Levels: (Intercept) hypyes chl age40-59 age60-99
td$var_label <- td$var_label %>%
  fct_recode(
    "Label for hyp" = "hypyes",
    "Label for chl" = "chl",
    "Label for age" = "age40-59",
    "Label for age" = "age60-99"
  )

td$label
#> [1] hypyes   chl      age40-59 age60-99
#> Levels: (Intercept) hypyes chl age40-59 age60-99
td$label <- td$label %>%
  fct_recode(
    "yes" = "hypyes",
    " " = "chl",
    "40-49" = "age40-59",
    "60-99" = "age60-99"
  )

# add exponentiate = TRUE if values are exponentiated
p <- ggcoef_table(
  data = td,
  y = "label", # indicating the correct col to use for y
  table_header = c("EST", "95% CI", "p")
)

# changing x axis label of the left plot
p[[1]] <- p[[1]] + xlab("EST")
p

^{Created on 2023-08-18 with reprex v2.0.2}

larmarange / ggstats

Is it possible to extend ggstats::ggcoef_table for MICE-imputed GEE-GLM (geeglm) model pooled coefficients? #42