droodman / WildBootTests.jl

MIT License
17 stars 3 forks source link

A minor bug in confidence intervals for multi-variable hypotheses with nonull & when p-val = 0 #5

Closed s3alfisc closed 2 years ago

s3alfisc commented 2 years ago

I have now managed to set up continuous integration of fwildclusterboot vs wildboottestjlr & have discovered one bug in wildboottestjlr that I believe goes back to to WildBootTests.jl. (I have also found one bug in fwildclusterboot that I need to address).

You can find the test results of the CI tests here under run-lib-actions check-r-package. The Julia vs R tests are all in test_r_vs_julia.r. I will set up a similar workflow for wildboottestjlr, and I even think it might be possible to trigger the workflow every time you push to WildBootTests - I will check this if you think it is of interest to you. Beyond that, I believe that you should have all rights to trigger the workflow manually.

Bug:

For rademacher weights and nonull and multi-variable hypotheses, if the estimated p-value is 0, the confidence interval computed by WildBootTests is incorrect.

Example:

set.seed(1206855)
reltol <- 0.02

N <- 10000
seed <- 875784

lm_fit <- lm(proposition_vote ~ treatment  + log_income  ,
             data = wildboottestjlr:::create_data(N = 10000,
                                                  N_G1 = 20,
                                                  icc1 = 0.5,
                                                  N_G2 = 20,
                                                  icc2 = 0.2,
                                                  numb_fe1 = 10,
                                                  numb_fe2 = 10,
                                                  seed = 90864369,
                                                  #seed = 89761297,
                                                  weights = 1:N / N))

boot_r_rade <- fwildclusterboot::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.1, type = "rademacher", impose_null = FALSE)
boot_jl_rade <- wildboottestjlr::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.1, type = "rademacher", impose_null = FALSE)

boot_r_mammen <- fwildclusterboot::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.1, type = "mammen", impose_null = FALSE)
boot_jl_mammen <- wildboottestjlr::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.1, type = "mammen", impose_null = FALSE)

boot_r_webb <- fwildclusterboot::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.1, type = "webb", impose_null = FALSE)
boot_jl_webb <- wildboottestjlr::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.1, type = "webb", impose_null = FALSE)

boot_r_norm <- fwildclusterboot::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.1, type = "norm", impose_null = FALSE)
boot_jl_norm <- wildboottestjlr::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.1, type = "norm", impose_null = FALSE)

# pvals
rbind(
boot_r_rade$p_val,
boot_jl_rade$p_val,
boot_r_mammen$p_val,
boot_jl_mammen$p_val,
boot_r_webb$p_val,
boot_jl_webb$p_val
)
     [,1]
[1,]    0
[2,]    0
[3,]    0
[4,]    0
[5,]    0
[6,]    0

# tstats
rbind(
  boot_r_rade$t_stat,
  boot_jl_rade$t_stat,
  boot_r_mammen$t_stat,
  boot_jl_mammen$t_stat,
  boot_r_webb$t_stat,
  boot_jl_webb$t_stat
)
          [,1]
[1,] -23.16794
[2,] -23.16564
[3,] -23.16794
[4,] -23.16564
[5,] -23.16794
[6,] -23.16564

# confidence intervals 
rbind(
boot_r_rade$conf_int,
boot_jl_rade$conf_int,
boot_r_mammen$conf_int,
boot_jl_mammen$conf_int,
boot_r_webb$con,
boot_jl_webb$conf_int
)
            [,1]         [,2]
[1,] -0.008812402  0.009013337
[2,] -0.099899627 -0.099371314 # larger by factor of 10
[3,] -0.008973389  0.009174324
[4,] -0.009277179  0.009477927
[5,] -0.009021215  0.009222149
[6,] -0.009241490  0.009442238

Changing beta0 changes the severity of the error:

# switching to beta0 = 1
boot_r_rade <- fwildclusterboot::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 1, type = "rademacher", impose_null = FALSE)
boot_jl_rade <- wildboottestjlr::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 1, type = "rademacher", impose_null = FALSE)
boot_r_webb <- fwildclusterboot::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 1, type = "webb", impose_null = FALSE)
boot_jl_webb <- wildboottestjlr::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 1, type = "webb", impose_null = FALSE)

rbind(boot_r_rade$p_val, boot_jl_rade$p_val, boot_r_webb$p_val, boot_jl_webb$p_val)
     [,1]
[1,]    0
[2,]    0
[3,]    0
[4,]    0
rbind(boot_r_rade$conf_int, boot_jl_rade$conf_int, boot_r_webb$conf_int, boot_jl_webb$conf_int)
             [,1]         [,2]
[1,] -0.008839514  0.009040449
[2,] -1.000427961 -0.999899626
[3,] -0.009065662  0.009266596
[4,] -0.008974318  0.009175067

# switching to beta0 = 2
boot_r_rade <- fwildclusterboot::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 2, type = "rademacher", impose_null = FALSE)
boot_jl_rade <- wildboottestjlr::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 2, type = "rademacher", impose_null = FALSE)

rbind(boot_r_rade$p_val, boot_jl_rade$p_val)
     [,1]
[1,]    0
[2,]    0
rbind(boot_r_rade$conf_int, boot_jl_rade$conf_int)
             [,1]         [,2]
[1,] -0.009006202  0.009207137
[2,] -1.999899626 -1.999371290

If pval > 0, the error does not occur:

# p-val > 0
boot_r_rade <- fwildclusterboot::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.01, type = "rademacher", impose_null = FALSE)
boot_jl_rade <- wildboottestjlr::boottest(lm_fit, clustid = "group_id1", B = 999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.01, type = "rademacher", impose_null = FALSE)

rbind(boot_r_rade$p_val, boot_jl_rade$p_val)
           [,1]
[1,] 0.03603604
[2,] 0.03703704
rbind(boot_r_rade$conf_int, boot_jl_rade$conf_int)
rbind(boot_r_rade$conf_int, boot_jl_rade$conf_int)
             [,1]        [,2]
[1,] -0.008901961 0.009102896
[2,] -0.009899627 0.009186449

Update

There is a related error (only for rademacher weights & impose_null = FALSE & multivariable hypotheses) for one sided tests if the p-val is either 0 or 1, which leads to incorrect t-stats (and likely also confidence intervals):

impose_null <- FALSE
p_val_type = ">"
type = "rademacher"

boot_r <- fwildclusterboot::boottest(object, clustid = "group_id1", B = 99999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.1, type = type, p_val_type = p_val_type, impose_null = impose_null, conf_int = FALSE)
boot_jl <- wildboottestjlr::boottest(object, clustid = "group_id1", B = 99999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.1, type = type, p_val_type = p_val_type, impose_null = impose_null, conf_int = FALSE)

boot_r$p_val; boot_jl$p_val # 1, 1
boot_r$t_stat; boot_jl$t_stat # [1] -23.16794, [1] -46.35455

type = "mammen"
boot_r <- fwildclusterboot::boottest(object, clustid = "group_id1", B = 99999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.1, type = type, p_val_type = p_val_type, impose_null = impose_null, conf_int = FALSE)
boot_jl <- wildboottestjlr::boottest(object, clustid = "group_id1", B = 99999, param = c("treatment", "log_income"), R = c(1, 0.1), beta0 = 0.1, type = type, p_val_type = p_val_type, impose_null = impose_null, conf_int = FALSE)

boot_r$p_val; boot_jl$p_val # 1,1
boot_r$t_stat; boot_jl$t_stat # [1] -23.16794 [1] -23.16564
droodman commented 2 years ago

Thanks for the excellent testing. I think these examples were all fixed by moving a ) in one line.

droodman commented 2 years ago

I am publishing version 0.6.5 now which I hope fixes the problems above and brings a modest but noticeable additional reduction in per-session latency, at least if you only doing one major kind of test, like WRE or WCR.

s3alfisc commented 2 years ago

Yes, all tests pass with v0.6.5 & latency on first use is reduced by around 5s!