google / CausalImpact

An R package for causal inference in time series
Apache License 2.0
1.71k stars 254 forks source link

Error with Relative effect #70

Open quynhchau4 opened 1 year ago

quynhchau4 commented 1 year ago

I'm running into problems with the relative effect when making the forecast. It always returns much bigger than expected, sometimes goes negative although should be positive. Please find below my code and advise how to fix it.

install.packages("dplyr")

install.packages("magrittr")

install.packages("ggplot2")

install.packages("scales")

install.packages("tscount")

install.packages("mgcv")

install.packages("CausalImpact")

install.packages("changepoint")

install.packages("bcp")

install.packages("timetk")

install.packages("xts")

install.packages("zoo")

install.packages("tidyverse")

library(dplyr) # For data manipulation library(magrittr) # For the '%>%' pipe operator library(ggplot2) # For producing plots library(scales) # For making plot axes pretty library(tscount) # For simulating time series count data library(mgcv) # For fitting generalised additive models library(CausalImpact) # For fitting Byesian structural time series models for causal inference library(changepoint) # For fitting changepoint models library(bcp) # For fitting Bayesian changepoint models library(timetk) library(tidyr) library(xts) library(zoo) library(tidyverse)

please input your absolute directory path of "treatment_vs_control.csv" by follow 20230730.csv

data <- read.csv("C:\Users\quynh\Downloads\Task1\DASH\20230730.csv") data$date <- as.Date(data$date, format="%Y/%m/%d")

format(data$date, "%d/%m/%Y")

data <- data %>% mutate(date = as.Date(date))

Visual plot to see data trend

ggplot(data, aes(x = date, y = dash))+ geom_point(alpha= 0.3) + labs(title = "dash price data from 2019 to 2020", x = "date", y = "dash Price")

Convert data to aggregation by week

data_dash_wk <- data %>% filter(date > as.Date("2019-01-01")) %>% group_by(week = cut(date, "week")) %>% summarise(dash = mean(dash, na.rm = TRUE)) %>% mutate(week = as.Date(as.character(week)))

Create row index

data_dash_wk <- data_dash_wk %>% mutate(index = row_number())

Visual plot to see data trend

ggplot(data_dash_wk, aes(x = week, y = dash))+ geom_vline(xintercept = as.Date("2020-03-11"), color = "blue", lty = "dashed") + geom_point(color = "red") + geom_line(alpha = 0.4) + labs(title = "dash data from Jan 2019 to end June 2020", subtitle = "dash data declared on 11th March 2020 (dotted blue line)", x = "date", y = "dash")

Post Intervention Period is filled with NA

data_dash_wk_causal <- data_dash_wk %>% mutate(dash = replace(dash, week >= as.Date("2020-03-11"), NA))

Create ts zoo data

ts_dash_wk <- zoo(data_dash_wk_causal$dash, data_dash_wk_causal$week)

plot(ts_dash_wk)

Model 3

ss3 <- list()

Semi Local trend, weekly-seasonal

ss3 <- AddSemilocalLinearTrend(ss3, ts_dash_wk)

Add weekly seasonal

ss3 <- AddSeasonal(ss3, ts_dash_wk, nseasons = 52) model3 <- bsts(ts_dash_wk, state.specification = ss3, niter = 1500, burn = 500) plot(model3, main = "Model 3") plot(model3, "components")

Model 3

plot(model3$state.specification[[2]], model3,ylim = c(-6000,6000), ylab = "Distribution", xlab = "Date") par(new=TRUE) plot(components3$Date, components3$Seasonality, col = "magenta", type = "l", ylim = c(-6000,6000), ylab = "Distribution", xlab = "Date") abline(h = 2000, col = "red") abline(h = -2000, col = "red")

components3 = cbind.data.frame( colMeans(model3$state.contributions[-(1:500),"trend",]), colMeans(model3$state.contributions[-(1:500),"seasonal.52.1",]), as.Date(time(ts_dash_wk))) names(components3) = c("Trend", "Seasonality", "Date")

components3 = cbind.data.frame( colMeans(model3$state.contributions[-(1:500),"trend",]), colMeans(model3$state.contributions[-(1:500),"seasonal.52.1",]), as.Date(time(ts_dash_wk))) names(components3) = c("Trend", "Seasonality", "date")

components3 = pivot_longer(components3, cols =c("Trend","Seasonality"))

names(components3) = c("date", "Component", "Value")

Causal impact of dash and Covid-19

pre.period <- as.Date(c("2019-01-01", "2020-03-11")) post.period <- as.Date(c("2020-03-11", "2020-06-30"))

Obtain post period data

dat_dash_causal_post <- data_dash_wk %>% filter(week >= as.Date("2020-03-11"))

Use model 3 for causal impact

impact <- CausalImpact(bsts.model = model3, post.period.response = dat_dash_causal_post$dash, alpha = 0.05) plot(impact)

summary(impact)

summary(impact, "report")

SeanRichterWalsh commented 1 year ago

I have also noticed some off calculations of relative effect. In the vignette, the dummy example output checks out. I have also run analyses that work out fine but in some instances I see relative effect greater than the value it should be.