pitakakariki / simr

Power Analysis of Generalised Linear Mixed Models by Simulation
68 stars 19 forks source link

With powerCurve, first nine out of ten groups have zero power #263

Closed raoulvanoosten closed 10 months ago

raoulvanoosten commented 10 months ago

I have data of the proportion of cylists wearing helmets, and simulated new data from these: one set before a campaign with the same settings, and one set after the campaign with .03 more cyclists wearing helmets. I wanted to use simr::powerSim() to calculate how many groups (blocks/locations) I need to sample for 90% power. I did that by running a generalized linear model (glmer). However, the simulations yield 0 power for 1 to 9 blocks, and 100% power for 10 blocks. Here is the output:

Power for predictor 'campagne', (95% confidence interval),
by number of levels in block:
      3:  0.00% ( 0.00, 30.85) - 12 rows
      5:  0.00% ( 0.00, 30.85) - 20 rows
      7:  0.00% ( 0.00, 30.85) - 28 rows
      9:  0.00% ( 0.00, 30.85) - 36 rows
     11: 100.0% (69.15, 100.0) - 44 rows
     12: 100.0% (69.15, 100.0) - 48 rows
     14: 100.0% (69.15, 100.0) - 56 rows
     16: 100.0% (69.15, 100.0) - 64 rows
     18: 100.0% (69.15, 100.0) - 72 rows
     20: 100.0% (69.15, 100.0) - 80 rows
Time elapsed: 0 h 0 m 3 s

This does not seem right to me. What is going on? I included the code below, which can be run with packages simr, lme4 and optionally emmeans.

The parameters in the data:

These parameters were relevant in the model of the actual data (high AIC)

# simulations of two groups: numbers with/without helmet before campaign vs after campaign
# before campaign what was proportion with helmet?
  # different between e-bikes and normal and bibukom, so take into account (simulate)

# simulate
# after campaign .05 higher

# for-loops
df_sim_binnen <- c()
df_sim_buiten <- c()

effect_campagne <- .05

for(i in 1:10){
  size_nbikes_binnen_voor <- abs(round(rnorm(1, 42.2, 24.3))) # determine length
  helm_nbikes_binnen_voor <- abs(rnorm(1, 0.0166, 0.0188)) # create binom
  size_nbikes_binnen_na <- abs(round(rnorm(1, 42.2, 24.3)))
  helm_nbikes_binnen_na <- abs(rnorm(1, 0.0166 + effect_campagne, 0.0188))
  size_ebikes_binnen_voor <- abs(round(rnorm(1, 56.9, 48.8)))
  helm_ebikes_binnen_voor <- abs(rnorm(1, 0.0512, 0.0265))
  size_ebikes_binnen_na <- abs(round(rnorm(1, 56.9, 48.8)))
  helm_ebikes_binnen_na <- abs(rnorm(1, 0.0512 + effect_campagne, 0.0265))
  group <- LETTERS[i]
  df_sim_binnen <- rbind(df_sim_binnen,
                         data.frame(block = group,
                                    campagne = "voor",
                                    fietstype = "normale fiets",
                                    bibukom = "binnen",
                                    aantal_fietsers = size_nbikes_binnen_voor,
                                    proportie_helm = helm_nbikes_binnen_voor))
  df_sim_binnen <- rbind(df_sim_binnen,
                         data.frame(block = group,
                                    campagne = "na",
                                    fietstype = "normale fiets",
                                    bibukom = "binnen",
                                    aantal_fietsers = size_nbikes_binnen_na,
                                    proportie_helm = helm_nbikes_binnen_na))
  df_sim_binnen <- rbind(df_sim_binnen,
                         data.frame(block = group,
                                    campagne = "voor",
                                    fietstype = "elektrische fiets",
                                    bibukom = "binnen",
                                    aantal_fietsers = size_ebikes_binnen_voor,
                                    proportie_helm = helm_ebikes_binnen_voor))
  df_sim_binnen <- rbind(df_sim_binnen,
                         data.frame(block = group,
                                    campagne = "na",
                                    fietstype = "elektrische fiets",
                                    bibukom = "binnen",
                                    aantal_fietsers = size_ebikes_binnen_na,
                                    proportie_helm = helm_ebikes_binnen_na))
}

for(j in 11:20){
  size_nbikes_buiten_voor <- abs(round(rnorm(1, 10.4, 6.72))) # determine length
  helm_nbikes_buiten_voor <- abs(rnorm(1, 0.0510, 0.0633)) # create binom
  size_nbikes_buiten_na <- abs(round(rnorm(1, 10.4, 6.72)))
  helm_nbikes_buiten_na <- abs(rnorm(1, 0.0510 + effect_campagne, 0.0633))
  size_ebikes_buiten_voor <- abs(round(rnorm(1, 19.8, 13.4)))
  helm_ebikes_buiten_voor <- abs(rnorm(1, 0.0968, 0.0569))
  size_ebikes_buiten_na <- abs(round(rnorm(1, 19.8, 13.4)))
  helm_ebikes_buiten_na <- abs(rnorm(1, 0.0968 + effect_campagne, 0.0569))
  group <- LETTERS[j]
  df_sim_buiten <- rbind(df_sim_buiten,
                         data.frame(block = group,
                                    campagne = "voor",
                                    fietstype = "normale fiets",
                                    bibukom = "buiten",
                                    aantal_fietsers = size_nbikes_buiten_voor,
                                    proportie_helm = helm_nbikes_buiten_voor))
  df_sim_buiten <- rbind(df_sim_buiten,
                         data.frame(block = group,
                                    campagne = "na",
                                    fietstype = "normale fiets",
                                    bibukom = "buiten",
                                    aantal_fietsers = size_nbikes_buiten_na,
                                    proportie_helm = helm_nbikes_buiten_na))
  df_sim_buiten <- rbind(df_sim_buiten,
                         data.frame(block = group,
                                    campagne = "voor",
                                    fietstype = "elektrische fiets",
                                    bibukom = "buiten",
                                    aantal_fietsers = size_ebikes_buiten_voor,
                                    proportie_helm = helm_ebikes_buiten_voor))
  df_sim_buiten <- rbind(df_sim_buiten,
                         data.frame(block = group,
                                    campagne = "na",
                                    fietstype = "elektrische fiets",
                                    bibukom = "buiten",
                                    aantal_fietsers = size_ebikes_buiten_na,
                                    proportie_helm = helm_ebikes_buiten_na))
}

df_sim <- rbind(df_sim_binnen,
                df_sim_buiten)

df_sim$block <- factor(df_sim$block)
df_sim$campagne <- factor(df_sim$campagne)
df_sim$fietstype <- factor(df_sim$fietstype)
df_sim$bibukom <- factor(df_sim$bibukom)

# modelleren
model_campagne <- lme4::glmer(proportie_helm ~ campagne + fietstype + bibukom + (1 | block), data = df_sim,
                              family = binomial, weight = aantal_fietsers)

# emmeans
## fietstype
EMM_campagne <- emmeans::emmeans(model_campagne,
                                  specs = ~ campagne,
                                  type = "response")

EMM_campagne

# campagne   prob      SE  df asymp.LCL asymp.UCL
# na       0.1052 0.00870 Inf    0.0893    0.1235
# voor     0.0482 0.00611 Inf    0.0376    0.0617
# 
# Results are averaged over the levels of: fietstype, bibukom 
# Confidence level used: 0.95 
# Intervals are back-transformed from the logit scale 

PRS_campagne <- pairs(EMM_campagne, type = "response")

PRS_campagne |> confint()

simr::powerCurve(model_campagne, test = simr::fixed("campagne", method = "chisq"), along = "block", nsim = 1000)

I tried the simr::powerSim() function with manual editing of the number of groups for 100 simulations each. This works. I think I should run/randomize the for-loop several times and then run many simulations.

Power for predictor 'campagne', (95% confidence interval),
by number of levels in block:
      4:       32.00% (23.02, 42.08) - 16 rows
      6:       56.00% (45.72, 65.92) - 24 rows
      8:       87.00% (78.80, 92.89) - 32 rows
     10:       82.00% (73.05, 88.97) - 40 rows
     12:       88.00% (79.98, 93.64) - 48 rows
     14:       92.00% (84.84, 96.48) - 56 rows
     16:       99.00% (94.55, 99.97) - 64 rows
     18:       98.00% (92.96, 99.76) - 72 rows
     20:       99.00% (94.55, 99.97) - 80 rows
pitakakariki commented 10 months ago

powerCurve works with the order of the data in the data frame (edit: not entirely true, see below). In your example, the binary variable bibukom is "binnen" for the first 40 observations and "buiten" for the next 40.

That means when powerCurve gets down to 40 or fewer rows, it can no longer fit the model. Should work if you alternate bibukom the way you've alternated campagne and fietstype though.

raoulvanoosten commented 10 months ago

Thanks for your reply. Good to know that the order is important. That can be changed so it will work. It does indeed work with the code below. Important is that the letters of block have to alternative (A, C, E, etc for "binnen", and B, D, F for "buiten").

# for-loop
df_sim <- c()

effect_campagne <- .05

for(i in 1:10){
  size_nbikes_binnen_voor <- abs(round(rnorm(1, 42.2, 24.3))) # determine length
  helm_nbikes_binnen_voor <- abs(rnorm(1, 0.0166, 0.0188)) # create binom
  size_nbikes_binnen_na <- abs(round(rnorm(1, 42.2, 24.3)))
  helm_nbikes_binnen_na <- abs(rnorm(1, 0.0166 + effect_campagne, 0.0188))
  size_ebikes_binnen_voor <- abs(round(rnorm(1, 56.9, 48.8)))
  helm_ebikes_binnen_voor <- abs(rnorm(1, 0.0512, 0.0265))
  size_ebikes_binnen_na <- abs(round(rnorm(1, 56.9, 48.8)))
  helm_ebikes_binnen_na <- abs(rnorm(1, 0.0512 + effect_campagne, 0.0265))
  group_binnen <- LETTERS[(i*2)-1]
  size_nbikes_buiten_voor <- abs(round(rnorm(1, 10.4, 6.72))) # determine length
  helm_nbikes_buiten_voor <- abs(rnorm(1, 0.0510, 0.0633)) # create binom
  size_nbikes_buiten_na <- abs(round(rnorm(1, 10.4, 6.72)))
  helm_nbikes_buiten_na <- abs(rnorm(1, 0.0510 + effect_campagne, 0.0633))
  size_ebikes_buiten_voor <- abs(round(rnorm(1, 19.8, 13.4)))
  helm_ebikes_buiten_voor <- abs(rnorm(1, 0.0968, 0.0569))
  size_ebikes_buiten_na <- abs(round(rnorm(1, 19.8, 13.4)))
  helm_ebikes_buiten_na <- abs(rnorm(1, 0.0968 + effect_campagne, 0.0569))
  group_buiten <- LETTERS[i * 2]
  df_sim <- rbind(df_sim,
                 data.frame(block = group_binnen,
                            campagne = "voor",
                            fietstype = "normale fiets",
                            bibukom = "binnen",
                            aantal_fietsers = size_nbikes_binnen_voor,
                            proportie_helm = helm_nbikes_binnen_voor))
  df_sim <- rbind(df_sim,
                 data.frame(block = group_binnen,
                            campagne = "na",
                            fietstype = "normale fiets",
                            bibukom = "binnen",
                            aantal_fietsers = size_nbikes_binnen_na,
                            proportie_helm = helm_nbikes_binnen_na))
  df_sim <- rbind(df_sim,
                 data.frame(block = group_binnen,
                            campagne = "voor",
                            fietstype = "elektrische fiets",
                            bibukom = "binnen",
                            aantal_fietsers = size_ebikes_binnen_voor,
                            proportie_helm = helm_ebikes_binnen_voor))
  df_sim <- rbind(df_sim,
                 data.frame(block = group_binnen,
                            campagne = "na",
                            fietstype = "elektrische fiets",
                            bibukom = "binnen",
                            aantal_fietsers = size_ebikes_binnen_na,
                            proportie_helm = helm_ebikes_binnen_na))
  df_sim <- rbind(df_sim,
                  data.frame(block = group_buiten,
                             campagne = "voor",
                             fietstype = "normale fiets",
                             bibukom = "buiten",
                             aantal_fietsers = size_nbikes_buiten_voor,
                             proportie_helm = helm_nbikes_buiten_voor))
  df_sim <- rbind(df_sim,
                  data.frame(block = group_buiten,
                             campagne = "na",
                             fietstype = "normale fiets",
                             bibukom = "buiten",
                             aantal_fietsers = size_nbikes_buiten_na,
                             proportie_helm = helm_nbikes_buiten_na))
  df_sim <- rbind(df_sim,
                  data.frame(block = group_buiten,
                             campagne = "voor",
                             fietstype = "elektrische fiets",
                             bibukom = "buiten",
                             aantal_fietsers = size_ebikes_buiten_voor,
                             proportie_helm = helm_ebikes_buiten_voor))
  df_sim <- rbind(df_sim,
                  data.frame(block = group_buiten,
                             campagne = "na",
                             fietstype = "elektrische fiets",
                             bibukom = "buiten",
                             aantal_fietsers = size_ebikes_buiten_na,
                             proportie_helm = helm_ebikes_buiten_na))
}

df_sim$block <- factor(df_sim$block)
df_sim$campagne <- factor(df_sim$campagne)
df_sim$fietstype <- factor(df_sim$fietstype)
df_sim$bibukom <- factor(df_sim$bibukom)

# modelleren
model_campagne <- lme4::glmer(proportie_helm ~ campagne + fietstype + bibukom + (1 | block), data = df_sim,
                              family = binomial, weight = aantal_fietsers)

simr::powerCurve(model_campagne, test = simr::fixed("campagne", method = "chisq"), along = "block", nsim = 10)