ncn-foreigners / nonprobsvy

An R package for modern methods for non-probability surveys
https://ncn-foreigners.github.io/nonprobsvy/
Other
29 stars 4 forks source link

Bootstrap bug #34

Closed BERENZ closed 10 months ago

BERENZ commented 10 months ago

Generate data

set.seed(2023-10-19)
n_reps <- 500
N <- 100000
n <- 1000
x1 <- rnorm(N,1,1)
x2 <- rexp(N,1)
alp <- rnorm(N)
epsilon <- rnorm(N)
y11 <- 1 + x1 + x2 + alp + epsilon
y12 <- 0.5*(x1-1.5)^2 + x2^2 + alp + epsilon
y21 <- rbinom(N,1,plogis(1 + x1 + x2 + alp))
y22 <- rbinom(N,1,plogis(0.5*(x1-1.5)^2 + x2^2 + alp))
p1 <- plogis(x2)
p2 <- plogis(-3+(x1-1.5)^2+(x2-2)^2)
pop_data <- data.frame(x1,x2,y11,y12,y21,y22,p1,p2) |> setDT()
pop_data[, x1k := x1/N]
pop_data[, x2k := x2/N]
p_quantiles1 <- seq(0.25, 0.75, 0.25)
p_quantiles2 <- seq(0.1, 0.9, 0.1)
sample_prob <- pop_data[sample(1:N, n),]
sample_prob$w <- N/n
sample_prob_svy <- svydesign(ids=~1, weights = ~w, data = sample_prob)

## match population totals (should we calibrate x1_* and x2_* so they exactly match 0.25)
sample_prob_svy_cal <- calibrate(sample_prob_svy, ~ x1 + x2, 
                                 c(`(Intercept)` = N, x1=sum(x1), x2=sum(x2)))
sample_bd1 <- pop_data[rbinom(N,1,pop_data$p1)==1, ]

Problem: bootstrap is run as many times as variables present in the target argument.

bd1_ipw_0 <- nonprob(selection = ~ x1 + x2, 
                     target = ~ y11 + y12 + y21 + y22, 
                     svydesign = sample_prob_svy,
                     data = sample_bd1,  
                     se = TRUE,
                     control_inference = controlInf(var_method = "bootstrap", num_boot = 10),
                     verbose = T,
                     control_selection = controlSel(est_method_sel = "gee", h = 1, start_type = "naive"))

Results

[1] "iteration 1/10, estimated mean = 2.85205174317731"
[1] "iteration 2/10, estimated mean = 2.96401796345292"
[1] "iteration 3/10, estimated mean = 2.94871299846419"
[1] "iteration 4/10, estimated mean = 3.01392624049592"
[1] "iteration 5/10, estimated mean = 2.94170984330577"
[1] "iteration 6/10, estimated mean = 2.9224964117775"
[1] "iteration 7/10, estimated mean = 2.93534474976417"
[1] "iteration 8/10, estimated mean = 2.93058020856146"
[1] "iteration 9/10, estimated mean = 2.90403964012494"
[1] "iteration 10/10, estimated mean = 2.93133613697437"

[1] "iteration 1/10, estimated mean = 2.67422366432402"
[1] "iteration 2/10, estimated mean = 2.62639389897099"
[1] "iteration 3/10, estimated mean = 2.55918335738336"