ShanaScogin / BayesPostEst

An R package to generate and plot postestimation quantities after estimating Bayesian regression models using MCMC
https://shanascogin.github.io/BayesPostEst/
GNU General Public License v3.0
12 stars 2 forks source link

Add print, plot, and as.data.frame methods for mcmcRocPrc() #63

Closed andybega closed 4 years ago

andybega commented 4 years ago

This adds several methods for mcmcRocPrc(), closes #32

Otherwise:

Overall, the new behavior will be like this:


library(BayesPostEst)
#> Registered S3 method overwritten by 'GGally':
#>   method from   
#>   +.gg   ggplot2

data("jags_logit")

# basic case without full sims
with_curves <- mcmcRocPrc(jags_logit,
                          yname = "Y",
                          xnames = c("X1", "X2"),
                          curves = TRUE,
                          fullsims = FALSE)

with_curves
#> mcmcRocPrc object
#> curves: TRUE; fullsims: FALSE
#> AUC-ROC: 0.627
#> AUC-PR:  0.621
plot(with_curves)

as.data.frame(with_curves)
#>    area_under_roc area_under_prc
#> V1      0.6270768      0.6210375
head(as.data.frame(with_curves, what = "prc"))
#>   sim           x         y
#> 1   1 0.000000000       NaN
#> 2   1 0.003816794 1.0000000
#> 3   1 0.007633588 1.0000000
#> 4   1 0.007633588 0.6666667
#> 5   1 0.011450382 0.7500000
#> 6   1 0.015267176 0.8000000
# to ensure consistent output, this includes a "sim" column with value 1
# (see below with fullsims = TRUE)

# with full sims, it'll sample a few curves (n = 40 by default) to plot, 
# and add a averaged curve as well
full_with_curves <- mcmcRocPrc(jags_logit,
                               yname = "Y",
                               xnames = c("X1", "X2"),
                               curves = TRUE,
                               fullsims = TRUE)

full_with_curves
#> mcmcRocPrc object
#> curves: TRUE; fullsims: TRUE
#> AUC-ROC: 0.624 [80%: 0.619 - 0.627]
#> AUC-PR:  0.618 [80%: 0.613 - 0.620]
plot(full_with_curves, alpha = 0.2)


# With fullsims = TRUE, we have 2,000 ROC curves. These are turned into
# a long data frame, with unique ID in the "sim" column
df <- as.data.frame(full_with_curves, what = "roc")
head(df)
#>   sim           x           y
#> 1   1 0.000000000 0.000000000
#> 2   1 0.000000000 0.003816794
#> 3   1 0.000000000 0.007633588
#> 4   1 0.000000000 0.011450382
#> 5   1 0.000000000 0.015267176
#> 6   1 0.004201681 0.015267176
length(unique(df$sim))
#> [1] 2000
range(df$sim)
#> [1]    1 2000

# no curve data, only AUC
no_curves <- mcmcRocPrc(jags_logit,
                        yname = "Y",
                        xnames = c("X1", "X2"),
                        curves = FALSE,
                        fullsims = FALSE)

# this causes an error if we try to plot
plot(no_curves)
#> Error in plot.mcmcRocPrc(no_curves): Use mcmcRocPrc(..., curves = TRUE) to generate data for plots
head(as.data.frame(full_with_curves))
#>    area_under_roc area_under_prc
#> V1      0.6243505      0.6171366
#> V2      0.6222176      0.6185580
#> V3      0.6261146      0.6189066
#> V4      0.6237411      0.6200423
#> V5      0.6266438      0.6200719
#> V6      0.6260504      0.6194895

as.data.frame(no_curves, what = "auc")
#>   area_under_roc area_under_prc
#> 1      0.6270768      0.6210375

# again, error since there is no roc data
as.data.frame(no_curves, what = "roc")
#> Error in as.data.frame.mcmcRocPrc(no_curves, what = "roc"): No curve data; use mcmcRocPrc(..., curves = TRUE)

Created on 2020-06-15 by the reprex package (v0.3.0)