Open MatthieuStigler opened 1 year ago
If variables not stored as factor, doesn't work for anova
!!
library(multiDiff)
## sim dat
dat_did_df <- sim_dat_common(timing_treatment = 5, seed = 97)
head(dat_did_df)
#> # A tibble: 6 × 5
#> unit Time treat_group tr y
#> <int> <int> <chr> <dbl> <dbl>
#> 1 1 1 treated 0 -0.746
#> 2 1 2 treated 0 -1.30
#> 3 1 3 treated 0 -1.25
#> 4 1 4 treated 0 -0.991
#> 5 1 5 treated 1 -1.06
#> 6 1 6 treated 0 -1.27
## get total var
var(dat_did_df$y)
#> [1] 2.713643
v_full <- var(residuals(fixest::feols(y~1, data=dat_did_df)))
all.equal(v_full, var(dat_did_df$y))
#> [1] TRUE
## get conditioanl vars
var2 <- function(x) drop(crossprod(x)/length(x))
res_FE_T <- residuals(fixest::feols(y~1|Time, data=dat_did_df))
v_FE_T <- var(res_FE_T)
res_FE_N <- residuals(fixest::feols(y~1|unit, data=dat_did_df))
v_FE_N <- var(res_FE_N)
v_FE_NT <- var(residuals(fixest::feols(y~1|Time+unit, data=dat_did_df)))
v_FE_T_then_N <- var(residuals(fixest::feols(res~1|unit, data=dat_did_df |>
dplyr::mutate(res = res_FE_T))))
v_FE_N_then_T <- var(residuals(fixest::feols(res~1|Time, data=dat_did_df |>
dplyr::mutate(res = res_FE_N))))
ano_manu_lm <- c(v_full=v_full, v_FE_T=v_FE_T, v_FE_N=v_FE_N, v_FE_NT=v_FE_NT,
v_FE_T_then_N = v_FE_T_then_N, v_FE_N_then_T=v_FE_N_then_T)
ano_manu_lm
#> v_full v_FE_T v_FE_N v_FE_NT v_FE_T_then_N
#> 2.7136432 2.0314706 1.5900935 0.9079209 0.9079209
#> v_FE_N_then_T
#> 0.9079209
ano_manu_lm/ano_manu_lm[1]
#> v_full v_FE_T v_FE_N v_FE_NT v_FE_T_then_N
#> 1.0000000 0.7486137 0.5859626 0.3345764 0.3345764
#> v_FE_N_then_T
#> 0.3345764
all.equal(ano_manu_lm[["v_FE_T_then_N"]],
ano_manu_lm[["v_FE_NT"]])
#> [1] TRUE
## anova total SSR
SSR_tot_manu <- crossprod(dat_did_df$y-mean(dat_did_df$y)) |> drop()
SSR_tot_anova <- aov(y~Time, data =dat_did_df) |> broom::tidy() |>
dplyr::pull(sumsq) |> sum()
all.equal(SSR_tot_anova, SSR_tot_manu)
#> [1] TRUE
##
get_ssr_aov <- function(aov){
aov |>
broom::tidy() |>
dplyr::select(term, sumsq) |>
dplyr::mutate(perc = 100* sumsq/sum(sumsq))
}
## by Y
AOV_T <- aov(y~Time, data =dat_did_df) |> get_ssr_aov()
AOV_T
#> # A tibble: 2 × 3
#> term sumsq perc
#> <chr> <dbl> <dbl>
#> 1 Time 1579. 5.82
#> 2 Residuals 25554. 94.2
all.equal(AOV_T$perc[1],
100*(1-v_FE_T/v_full))
#> [1] "Mean relative difference: 3.318918"
AOV_N <- aov(y~unit, data =dat_did_df) |> get_ssr_aov()
AOV_N
#> # A tibble: 2 × 3
#> term sumsq perc
#> <chr> <dbl> <dbl>
#> 1 unit 4.49 0.0165
#> 2 Residuals 27129. 100.
all.equal(AOV_N$perc[1], 100*(1-v_FE_N/v_full))
#> [1] "Mean relative difference: 2500.989"
AOV_NT <- aov(y~unit+Time, data =dat_did_df) |> get_ssr_aov()
AOV_TN <- aov(y~Time +unit, data =dat_did_df) |> get_ssr_aov()
all.equal(AOV_NT$perc[1:2], AOV_TN$perc[2:1])
#> [1] TRUE
AOV_NT
#> # A tibble: 3 × 3
#> term sumsq perc
#> <chr> <dbl> <dbl>
#> 1 unit 4.49 0.0165
#> 2 Time 1579. 5.82
#> 3 Residuals 25550. 94.2
AOV_TN
#> # A tibble: 3 × 3
#> term sumsq perc
#> <chr> <dbl> <dbl>
#> 1 Time 1579. 5.82
#> 2 unit 4.49 0.0165
#> 3 Residuals 25550. 94.2
all.equal(sum(AOV_NT$perc[1:2]), 100*(1-v_FE_NT/v_full))
#> [1] "Mean relative difference: 10.39984"
Created on 2023-10-15 with reprex v2.0.2
feols() residuals is muuuch faster!
very important to make sure the vars are as factor!
Created on 2023-10-15 with reprex v2.0.2