Closed simon-lowe closed 2 years ago
Hi Simon,
Could you share your code in a reprex: https://reprex.tidyverse.org/. That way I can help you much better
Hi Kyle,
This is nothing super important, so don't spend your time on it if you don't have it!
library(data.table)
library(tidyverse)
library(lfe)
#> Loading required package: Matrix
#>
#> Attaching package: 'Matrix'
#> The following objects are masked from 'package:tidyr':
#>
#> expand, pack, unpack
library(fastDummies)
library(ggthemes)
library(did)
library(did2s)
#> Loading required package: fixest
#> ℹ did2s (v0.6.0). For more information on the methodology, visit <https://www.kylebutts.com/did2s>
#> To cite did2s in publications use:
#>
#> Butts, Kyle (2021). did2s: Two-Stage Difference-in-Differences
#> Following Gardner (2021). R package version 0.6.0.
#>
#> A BibTeX entry for LaTeX users is
#>
#> @Manual{,
#> title = {did2s: Two-Stage Difference-in-Differences Following Gardner (2021)},
#> author = {Kyle Butts},
#> year = {2021},
#> url = {https://github.com/kylebutts/did2s/},
#> }
library(reprex)
# Set parameters ----------------------------------------------------------
iseed = 20201221
nrep <- 10
true_mu <- 1
set.seed(iseed)
nobs <- 1000
nstates <- 40
make_data3 <- function(nobs = 1000,
nstates = 40) {
# unit fixed effects (unobservd heterogeneity)
unit <- tibble(
unit = 1:nobs,
# generate state
state = sample(1:nstates, nobs, replace = TRUE),
unit_fe = rnorm(nobs, state/5, 1),
# generate instantaneous treatment effect
#mu = rnorm(nobs, true_mu, 0.2)
mu = true_mu
)
# year fixed effects (first part)
year <- tibble(
year = 1980:2010,
year_fe = rnorm(length(year), 0, 1)
)
# Put the states into treatment groups
treat_taus <- tibble(
# sample the states randomly
state = sample(1:nstates, nstates, replace = FALSE),
# place the randomly sampled states into five treatment groups G_g
# cohort_year = sort(rep(c(1986, 1992, 1998, 2004), 10))
cohort_year = sample(c(1986, 1992, 1998, 2004), nstates, replace = TRUE)
)
# make main dataset
# full interaction of unit X year
expand_grid(unit = 1:nobs, year = 1980:2010) %>%
left_join(., unit) %>%
left_join(., year) %>%
left_join(., treat_taus) %>%
# make error term and get treatment indicators and treatment effects
# Also get cohort specific trends (modify time FE)
mutate(error = rnorm(n(), 0, 1),
treat = ifelse((year >= cohort_year)* (cohort_year != 2004), 1, 0),
mu = ifelse(cohort_year==1992, 2, ifelse(cohort_year==1998, 1, 3)),
tau = ifelse(treat == 1, mu, 0),
year_fe = year_fe + 0.1*(year - cohort_year)
) %>%
# calculate cumulative treatment effects
group_by(unit) %>%
mutate(tau_cum = cumsum(tau)) %>%
ungroup() %>%
# calculate the dep variable
mutate(dep_var = (2010 - cohort_year) + unit_fe + year_fe + tau_cum + error) %>%
# Relabel 2004 cohort as never-treated
mutate(cohort_year = ifelse(cohort_year == 2004, Inf, cohort_year))
}
# Rest --------------------------------------------------------------------
data <- make_data3()
#> Joining, by = "unit"
#> Joining, by = "year"
#> Joining, by = "state"
setDT(data)
data[cohort_year == Inf, cohort_year := 0]
mod <- did2s::event_study(yname = "dep_var",
tname = "year",
idname = "unit",
gname = "cohort_year",
data = data)
#> ℹ Note these estimators rely on different underlying assumptions. See Table 2 of <https://arxiv.org/abs/2109.05913> for an overview.
#> Estimating TWFE Model
#> Estimating using Gardner (2021)
#> Estimating using Callaway and Sant'Anna (2020)
#> Estimating using Sun and Abraham (2020)
#> Estimating using Borusyak, Jaravel, Spiess (2021)
#> Estimating using Roth and Sant'Anna (2021)
plot_event_study(mod)
Created on 2022-07-13 by the reprex package (v2.0.1)
My point is simply that the TWFE looks like neither of the two graphs in the blog post I mentioned.
Thanks for your time, Simon
I think the difference is the blog post removes the most negative relative year indicator from the estimate. See below:
library(tidyverse)
library(fixest)
# Set parameters ----------------------------------------------------------
iseed = 20201221
nrep <- 10
true_mu <- 1
set.seed(iseed)
nobs <- 1000
nstates <- 40
make_data3 <- function(nobs = 1000,
nstates = 40) {
# unit fixed effects (unobservd heterogeneity)
unit <- tibble(
unit = 1:nobs,
# generate state
state = sample(1:nstates, nobs, replace = TRUE),
unit_fe = rnorm(nobs, state/5, 1),
# generate instantaneous treatment effect
#mu = rnorm(nobs, true_mu, 0.2)
mu = true_mu
)
# year fixed effects (first part)
year <- tibble(
year = 1980:2010,
year_fe = rnorm(length(year), 0, 1)
)
# Put the states into treatment groups
treat_taus <- tibble(
# sample the states randomly
state = sample(1:nstates, nstates, replace = FALSE),
# place the randomly sampled states into five treatment groups G_g
# cohort_year = sort(rep(c(1986, 1992, 1998, 2004), 10))
cohort_year = sample(c(1986, 1992, 1998, 2004), nstates, replace = TRUE)
)
# make main dataset
# full interaction of unit X year
expand_grid(unit = 1:nobs, year = 1980:2010) %>%
left_join(., unit) %>%
left_join(., year) %>%
left_join(., treat_taus) %>%
# make error term and get treatment indicators and treatment effects
# Also get cohort specific trends (modify time FE)
mutate(error = rnorm(n(), 0, 1),
treat = ifelse((year >= cohort_year)* (cohort_year != 2004), 1, 0),
mu = ifelse(cohort_year==1992, 2, ifelse(cohort_year==1998, 1, 3)),
tau = ifelse(treat == 1, mu, 0),
year_fe = year_fe + 0.1*(year - cohort_year)
) %>%
# calculate cumulative treatment effects
group_by(unit) %>%
mutate(tau_cum = cumsum(tau)) %>%
ungroup() %>%
# calculate the dep variable
mutate(dep_var = (2010 - cohort_year) + unit_fe + year_fe + tau_cum + error) %>%
# Relabel 2004 cohort as never-treated
mutate(cohort_year = ifelse(cohort_year == 2004, Inf, cohort_year))
}
# Rest --------------------------------------------------------------------
data <- make_data3()
#> Joining, by = "unit"
#> Joining, by = "year"
#> Joining, by = "state"
data$rel_year <- data$year - data$cohort_year
data |>
fixest::feols(
dep_var ~ i(rel_year, ref = c(-1, -Inf)) | unit + year
) |>
coefplot(keep="rel_year::-?[1-5]{1}$")
data |>
fixest::feols(
dep_var ~ i(rel_year, ref = c(-1, -17, -Inf)) | unit + year
) |>
coefplot(keep="rel_year::-?[1-5]{1}$")
Created on 2022-07-14 by the reprex package (v2.0.1)
Hi,
First of all, thanks for all the amazing work on this package!
I was just playing around a bit with the data generated in this post: https://psantanna.com/posts/twfe The last version in this post is with treatment effect heterogeneity and none of the "standard" TWFE approaches work. But if I use event study from your package, TWFE also works (it doesn't in the post). So I was wondering whether the TWFE in event study is Wooldridge's version?
Thanks, Simon