Closed sou412 closed 11 months ago
Test code before fix.
library(dplyr)
library(testthat)
data(anzia2012)
set.seed(1234)
# create a test data ----------------------------------------------------------
treated_id <- anzia2012 %>%
filter(year == 2007, oncycle == 1) %>%
pull(district)
test_data <- anzia2012 %>%
filter(year %in% c(2005, 2006, 2007)) %>%
mutate(
outcome_var0_control = lnavgsalary_cpi,
outcome_var0_treatment = lnavgsalary_cpi,
outcome_var0_all = lnavgsalary_cpi
) %>%
bind_rows(
anzia2012 %>%
filter(year == 2004) %>%
mutate(
outcome_var0_control = case_when(
district %in% treated_id ~ lnavgsalary_cpi,
TRUE ~ 0
),
outcome_var0_treatment = case_when(
!(district %in% treated_id) ~ lnavgsalary_cpi,
TRUE ~ 0
),
outcome_var0_all = 0
)
)
# run the function ------------------------------------------------------------
## Expected to fail because var(Y|D=0) = 0 for t-2
expect_error(
did_check(
formula = outcome_var0_control ~ oncycle,
data = test_data,
id_unit = "district",
id_time = "year",
option = list(n_boot = 20, parallel = TRUE, lag = 1:2)
),
"NA/NaN/Inf in 'y'"
)
## Expected to fail because var(Y|D=0) = 0 for t-2
expect_error(
did_check(
formula = outcome_var0_all ~ oncycle | teachers_avg_yrs_exper +
ami_pc + asian_pc + black_pc + hisp_pc,
data = test_data,
id_unit = "district",
id_time = "year",
option = list(n_boot = 20, parallel = TRUE, lag = 1:2)
),
"NA/NaN/Inf in 'y'"
)
## Expected to run because var(Y|D=0) != 0 for t-1
expect_type(
did_check(
formula = outcome_var0_control ~ oncycle,
data = test_data,
id_unit = "district",
id_time = "year",
option = list(n_boot = 20, parallel = TRUE, lag = 1)
),
"list"
)
## Expected to run
expect_type(
did_check(
formula = outcome_var0_treatment ~ oncycle | teachers_avg_yrs_exper +
ami_pc + asian_pc + black_pc + hisp_pc,
data = test_data,
id_unit = "district",
id_time = "year",
option = list(n_boot = 20, parallel = TRUE, lag = 1:2)
),
"list"
)
expect_type(
out_var0_treatment <- did_check(
formula = outcome_var0_treatment ~ oncycle | teachers_avg_yrs_exper +
ami_pc + asian_pc + black_pc + hisp_pc,
data = test_data,
id_unit = "district",
id_time = "year",
option = list(n_boot = 20, parallel = TRUE, lag = 1:2)
),
"list"
)
summary(out_var0_treatment)
When it runs, it returns estimates:
> summary(out_var0_treatment)
── Estimates for assessing parallel trends assumption ─────────────────────────────────────────────────────────────────────────────────────────
estimate lag std.error EqCI95_LB EqCI95_UB
1 -0.00361 1 0.00178 -0.0847 0.0847
2 10.63308 2 0.00611 -145.3014 145.3014
When the control group has no variation in the outcome during any of the pre-treatment period (in the range of
lag
value),did_check()
function fails with the following error message"This is because of the outcome normalization happening here: https://github.com/naoki-egami/DIDdesign/blob/8440250a4259fd3e8b5b616cf87f2333491c4ca2/R/did-check-std.R#L130
The immediate fix is to produce a more meaningful error message when this happens.