Open siddharthhaveliwala opened 10 months ago
data <- read.csv("ICNSA_new.csv")
data$DATE <- as.Date(data$DATE) data <- na.omit(data)
data$lag1 <- c(NA, head(data$ICNSA, -1)) data$lag2 <- c(NA, NA, head(data$ICNSA, -2)) data$lag3 <- c(NA, NA, NA, head(data$ICNSA, -3))
data$covid_year <- ifelse(format(data$DATE, "%Y") %in% c("2020", "2021"), 1, 0)
ts_data <- ts(data$ICNSA, frequency = 52)
fit_covid <- auto.arima(ts_data, xreg = data$covid_year)
forecast_covid <- forecast(fit_covid, xreg = rep(1, 5))
lag_matrix <- as.matrix(data[, c("lag1", "lag2", "lag3")])
fit_lag <- auto.arima(ts_data, xreg = lag_matrix)
future_lag <- tail(ts_data, 3)
forecast_lag <- forecast(fit_lag, xreg = matrix(rep(future_lag, 5), ncol = 3, byrow = TRUE, dimnames = list(NULL, colnames(lag_matrix))), h = 5)
print('Forecast including covid data as xreg:') print(forecast_covid) print('Forecast including lagged data as xreg:') print(forecast_lag)
print('Forecast including covid data as xreg:') [1] "Forecast including covid data as xreg:" print(forecast_covid) Point Forecast Lo 80 Hi 80 Lo 95 Hi 95 58.26923 364976.2 258644.88 471307.4 202356.508 527595.8 58.28846 361668.6 181381.31 541955.9 85942.996 637394.2 58.30769 351002.6 118641.72 583363.5 -4362.695 706367.9 58.32692 341331.6 75422.16 607241.1 -65341.813 748005.1 58.34615 350291.1 63962.67 636619.6 -87610.468 788192.7 print('Forecast including lagged data as xreg:') [1] "Forecast including lagged data as xreg:" print(forecast_lag) Point Forecast Lo 80 Hi 80 Lo 95 Hi 95 58.26923 316328.9 211308.2 421349.6 155713.5 476944.2 58.28846 313163.5 208142.8 418184.2 152548.2 473778.9 58.30769 314792.9 209772.1 419813.6 154177.5 475408.2 58.32692 316079.5 211058.8 421100.2 155464.1 476694.9 58.34615 329931.3 224910.6 434952.0 169316.0 490546.7
library(ggplot2) library(plotly) library(dplyr) library(forecast) library(tseries) library(reprex) library(cowplot)
df_cc <- read.csv("/Users/siddharthhaveliwala/Documents/Spring 2024/Courses/CDA 500 - Time Series Analysis/HW/CACCLAIMS.csv") df_ic <- read.csv("/Users/siddharthhaveliwala/Documents/Spring 2024/Courses/CDA 500 - Time Series Analysis/HW/ICSA.csv")
head(df_cc) head(df_ic)
sum(is.na(df_cc)) sum(is.na(df_ic))
df_cc <- subset(df_cc, CACCLAIMS != '.') df_cc$DATE <- as.Date(df_cc$DATE) df_ic$DATE <- as.Date(df_ic$DATE) df_cc$CACCLAIMS <- as.numeric(df_cc$CACCLAIMS)
summary(df_cc) summary(df_ic) str(df_ic) str(df_cc)
plot(df_cc$DATE, df_cc$CACCLAIMS, type = "l", xlab = "Date", ylab = "Continued Claims", main = "Time Series Plot", col = "blue") lines(df_ic$DATE, df_ic$ICSA, type = "l", col = "red") legend("topright", legend = c("CACCLAIMS", "ICSA"), col = c("blue", "red"), lty = 1) title(main = "Time Series Plot", xlab = "Date", ylab = "Continued Claims")
adf.test(df_cc$CACCLAIMS) adf.test(df_ic$ICSA)
acf(df_cc$CACCLAIMS) pacf(df_cc$CACCLAIMS) acf(df_ic$ICSA) pacf(df_ic$ICSA)
cross_correlation <- ccf(df_ic$ICSA, df_cc$CACCLAIMS, lag.max = 10, plot = FALSE)
plot(cross_correlation, main = "Cross-Correlation between ICSA and CACCLAIMS")
merged_data <- merge(df_cc, df_ic, by = "DATE", all.x = TRUE)
sum(is.na(merged_data)) length(merged_data)
covid_start_date <- as.Date("2020-03-01") covid_end_date <- as.Date("2021-06-30")
merged_data_filtered <- merged_data[merged_data$DATE < covid_start_date | merged_data$DATE > covid_end_date, ] plot(merged_data_filtered$DATE, merged_data_filtered$CACCLAIMS, type = "l")
acf(merged_data_filtered$CACCLAIMS) pacf(merged_data_filtered$CACCLAIMS) acf(merged_data_filtered$ICSA) pacf(merged_data_filtered$ICSA)
cross_corr_filt <- ccf(merged_data_filtered$ICSA, merged_data_filtered$CACCLAIMS, lag.max = 10, plot = FALSE)
plot(cross_corr_filt, main = "Cross-Correlation between ICSA and CACCLAIMS (filtered data)")
merged_data_filtered <- merged_data_filtered %>% mutate(ICSA_lag1 = lag(ICSA, 1), CACCLAIMS_lag1 = lag(CACCLAIMS, 1))
merged_data_filtered <- na.omit(merged_data_filtered)
merged_data_filtered <- as_tibble(merged_data_filtered)
reg_arima_filtered <- auto.arima(merged_data_filtered$ICSA_lag1, xreg = merged_data_filtered$CACCLAIMS_lag1)
forecast_filtered <- forecast(reg_arima_filtered, xreg = merged_data_filtered$CACCLAIMS, h = 1)
print(forecast_filtered)
autoplot(forecast_filtered, main = "Forecast for Tomorrow's Date (Filtered Data)")
The provided script predicts Initial Claims (ICNSA) using Conditional Claims in California as an external regressor through an ARIMA(0,1,2) model. Loading and pre-processing the datasets included handling missing values and convering date columns. Exploratory Data Analysis involved checking summary statistics, conducting ADF tests for stationarity, and analyzing ACF and PACF plots to determine ARIMA terms. Model building encompasses cross-correlation analysis, merging datasets, filtering out the COVID-19 period, applying lag 1, and fitting a regARIMA model with filtered data. Finally, forecasts for February 22, 2024, are generated and visualized, providing forecasted values along with confidence intervals, with a value 212738.3.
Homework 2 -
# Load necessary libraries
library(fredr)
library(ggplot2)
library(plotly)
library(dplyr)
library(hrbrthemes)
library(tidyverse)
library(splines)
library(forecast)
# FRED API Key
fredr_set_key("d40a0fc0b1555df0c6568749cc1737e6")
# Retrieve the ICNSA data from FRED
D0 <- fredr(series_id = "ICNSA")
# Analyze the loaded dataset
summary(D0)
str(D0)
head(D0)
# Load the D0 data to D1 dataframe
D1 <- D0
D1$DATE <- as.Date(D1$date)
D1$ICNSA <- D1$value
# Drop unnecessary features
D1 <- subset(D1, select = -c(date, series_id, value, realtime_start, realtime_end))
# Analyze the D1 data frame
str(D1)
head(D1)
sum(is.na(D1))
## Empirical Analysis for Covid data
# Plot the ICNSA data
plot1 <- D1 %>%
ggplot(aes(DATE, ICNSA)) +
geom_area(fill="blue", alpha=0.5) +
geom_line(color="blue") +
ylab("ICNSA Count") +
geom_rect(data=D1, inherit.aes=FALSE, aes(xmin=DATE[DATE == "2019-08-17"], xmax=DATE[DATE == "2021-03-13"], ymin=min(ICNSA),
ymax=max(ICNSA)), color="transparent", fill="red", alpha=0.3) +
theme_ipsum()
plot1 <- ggplotly(plot1)
plot1
# Statistical Analysis
summary(D1$ICNSA)
# Plot the boxplot for ICNSA data
boxplot1 <- plot_ly(D1, y = D1$ICNSA, type = "box")
boxplot1
# Plot the histogram for ICNSA data
ggplot(D1) +
aes(x = ICNSA) +
geom_histogram(bins = as.integer(sqrt(nrow(D1))), fill = "navyblue") +
theme_minimal()
# Identify the outliers by quantiles (out of bound by interval from 2.5% and 97.5%)
lwr_bound <- quantile(D1$ICNSA, 0.025)
upr_bound <- quantile(D1$ICNSA, 0.975)
outliers <- which(D1$ICNSA < lwr_bound | D1$ICNSA > upr_bound)
outliers
# View the outlier's dates
D1[outliers, "DATE"]
# Define covid period
covid_period <- subset(D1, DATE >= as.Date("2019-08-17") & DATE <= as.Date("2021-03-13"))
## Implement a multiplicative and an additive Holt-Winters model
# Fit a cubic spline to the covid period data
spline_fit <- smooth.spline(x = as.numeric(covid_period$DATE), y = covid_period$ICNSA, lambda = 0.7)
# Predict the values using fitted spline
imputed_values <- predict(spline_fit, x = as.numeric(covid_period$DATE))
# Replace the values in the original dataset with the predicted values
D1$ICNSA[which(D1$DATE >= as.Date("2019-08-17") & D1$DATE <= as.Date("2021-03-13"))] <- imputed_values$y
# Visualize the original and imputed data
spline_plot <- plot_ly()
spline_plot <- add_lines(spline_plot, x = ~D1$DATE, y = ~D1$ICNSA, name = "Original", line = list(color = "blue"))
spline_plot <- add_lines(spline_plot, x = ~covid_period$DATE, y = ~imputed_values$y, name = "Imputed", line = list(color = "red"))
spline_plot <- layout(spline_plot, xaxis = list(title = "Date"), yaxis = list(title = "ICNSA"))
spline_plot
# Convert the data to time series object
ts_data <- ts(D1$ICNSA, frequency = 52)
# Fit the multiplicative Holt-Winters model
hw_multiplicative <- HoltWinters(ts_data, seasonal = "mult")
# Fit the additive Holt-Winters model
hw_additive <- HoltWinters(ts_data, seasonal = "additive")
# Forecast the next ICNSA value using hw_multiplicative and hw_additive
forecast_multiplicative <- forecast(hw_multiplicative, h = 1)
forecast_additive <- forecast(hw_additive, h = 1)
# Extract the forecasted values
next_ICNSA_multiplicative <- forecast_multiplicative$mean[length(forecast_multiplicative$mean)]
next_ICNSA_additive <- forecast_additive$mean[length(forecast_additive$mean)]
# Print the forecasted values
print(paste("Forecasted ICNSA value (Multiplicative Holt-Winters):", round(next_ICNSA_multiplicative, 2)))
print(paste("Forecasted ICNSA value (Additive Holt-Winters):", round(next_ICNSA_additive, 2)))
Forecasted ICNSA value (Multiplicative Holt-Winters): 193561.54 Forecasted ICNSA value (Additive Holt-Winters): 201703.88
Homework 3:
# Load necessary libraries
library(fredr)
library(ggplot2)
library(plotly)
library(dplyr)
library(hrbrthemes)
library(tidyverse)
library(splines)
library(forecast)
library(tseries)
library(imputeTS)
library(legion)
# FRED API Key
fredr_set_key("d40a0fc0b1555df0c6568749cc1737e6")
# Retrieve the ICNSA data from FRED
D0 <- fredr(series_id = "ICNSA", observation_start = as.Date("1971-01-02"), observation_end = as.Date("2024-02-17"))
# Analyze the loaded dataset
summary(D0)
str(D0)
head(D0)
# Load the D0 data to D1 dataframe
D1 <- D0
D1$DATE <- as.Date(D1$date)
D1$ICNSA <- D1$value
# Drop unnecessary features
D1 <- subset(D1, select = -c(date, series_id, value, realtime_start, realtime_end))
# Analyze the D1 data frame
str(D1)
head(D1)
sum(is.na(D1))
## Empirical Analysis for Covid data
# Plot the ICNSA data
plot1 <- D1 %>%
ggplot(aes(DATE, ICNSA)) +
geom_area(fill="blue", alpha=0.5) +
geom_line(color="blue") +
ylab("ICNSA Count") +
geom_rect(data=D1, inherit.aes=FALSE, aes(xmin=DATE[DATE == "2020-03-14"], xmax=DATE[DATE == "2020-11-28"], ymin=min(ICNSA),
ymax=max(ICNSA)), color="transparent", fill="red", alpha=0.3) +
theme_ipsum()
plot1 <- ggplotly(plot1)
plot1
# Identify the outliers by quantiles (out of bound by interval from 2.5% and 97.5%)
lwr_bound <- quantile(D1$ICNSA, 0.025)
upr_bound <- quantile(D1$ICNSA, 0.975)
outliers <- which(D1$ICNSA < lwr_bound | D1$ICNSA > upr_bound)
outliers
# Define covid period
covid_period <- subset(D1, DATE >= as.Date("2020-03-14") & DATE <= as.Date("2020-11-28"))
# Replace values with NA
covid_period$ICNSA <- NA
# Replace covid period data in D1 data frame
D1$ICNSA[which(D1$DATE >= as.Date("2020-03-14") & D1$DATE <= as.Date("2020-11-28"))] <- covid_period$ICNSA
# Convert the D1 data frame to time series object
ts_data <- ts(D1$ICNSA, frequency = 52)
sum(is.na(ts_data))
# Plot the ts_data NA distributions
ggplot_na_distribution(ts_data)
statsNA(ts_data)
# Apply Kalman filter on the time series
D1_imp <- na_kalman(ts_data, smooth = TRUE)
# Plot the imputation plot
ggplot_na_imputations(ts_data, D1_imp)
# Build structural time series models (level, trend, BSM)
str_model_level <- StructTS(D1_imp, type = "level")
str_model_trend <- StructTS(D1_imp, type = "trend")
str_model_bsm <- StructTS(D1_imp, type = "BSM")
# Forecast the values for built models
str_forecast_level <- forecast(str_model_level, h = 1)
str_forecast_trend <- forecast(str_model_trend, h = 1)
str_forecast_bsm <- forecast(str_model_bsm, h = 1)
# Print the values for forecasts of structural models
print(str_forecast_level)
print(str_forecast_trend)
print(str_forecast_bsm)
# Retrieve the IURNSA (Initial Unemployment Claims Non-Seasonally Adjusted) data from FRED
D2 <- fredr(series_id = "IURNSA")
# Statistical Analysis of IURNSA data
summary(D2)
str(D2)
D2$DATE <- as.Date(D2$date)
D2$IURNSA <- D2$value
D2 <- subset(D2, select = -c(date, series_id, value, realtime_start, realtime_end))
sum(is.na(D2))
# Plot the ICNSA data
plot2 <- D2 %>%
ggplot(aes(DATE, IURNSA)) +
geom_area(fill="blue", alpha=0.5) +
geom_line(color="blue") +
ylab("IURNSA Count") +
geom_rect(data=D2, inherit.aes=FALSE, aes(xmin=DATE[DATE == "2020-03-14"], xmax=DATE[DATE == "2020-11-28"], ymin=min(IURNSA),
ymax=max(IURNSA)), color="transparent", fill="red", alpha=0.3) +
theme_ipsum()
plot2 <- ggplotly(plot2)
plot2
# Define covid period for IURNSA data
covid_period_2 <- subset(D2, DATE >= as.Date("2020-03-14") & DATE <= as.Date("2020-11-28"))
# Replace values with NA in covid_period_2
covid_period_2$IURNSA <- NA
# Replace covid period data in D2 data frame
D2$IURNSA[which(D2$DATE >= as.Date("2020-03-14") & D2$DATE <= as.Date("2020-11-28"))] <- covid_period_2$IURNSA
# Convert the D2 data frame to time series object
ts_data_2 <- ts(D2$IURNSA, frequency = 52)
# Plot the ts_data_2 NA distributions
ggplot_na_distribution(ts_data_2)
ggplot_na_distribution2(ts_data_2)
ggplot_na_gapsize(ts_data_2)
statsNA(ts_data_2)
# Apply Kalman filter on the time series
D2_imp <- na_kalman(ts_data_2, smooth = TRUE)
# Plot the imputation plot
ggplot_na_imputations(ts_data_2, D2_imp)
# Convert the D2_imp data frame to time series object
ts_data_2 <- ts(D2_imp, frequency = 52)
ts_data_2
# Convert the D1_imp data frame to time series object
ts_data_1 <- ts(D1_imp, frequency = 52)
ts_data_1
# Union of two time series into a multivariate time series
multivariate_ts <- ts.union(ts_data_1, ts_data_2)
# Check for NA values
sum(is.na(multivariate_ts))
# Fit the model based on covariate series using ETS (Error, Trend and Seasonal) based multivariate forecasting model
covariate_model <- auto.vets(multivariate_ts)
# Summary of the model
summary(covariate_model)
# Forecast ICNSA using covariate series IURNSA
forecast <- forecast(covariate_model, h = 1)
# Print the forecasted value
print(forecast)
# > print(forecast)
# Time Series:
# Start = c(54, 18)
# End = c(54, 18)
# Frequency = 52
# ts_data_1 ts_data_2
# 54.32692 188821.4 1.476452
Forecasted ICNSA value : 188821.4 Forecasted IURNSA value : 1.476452
# Load necessary libraries
library(lubridate)
library(stringr)
library(ggplot2)
library(plotly)
library(dplyr)
library(tseries)
library(imputeTS)
library(vars)
library(forecast)
library(corrplot)
library(BigVAR)
library(reprex)
# Set working directory
setwd("/Users/siddharthhaveliwala/Documents/Spring 2024/Courses/CDA 500 - Time Series Analysis/HW4/Data/input/Tabacco")
# Get 12BVS time series data
tobacco_vs <- read.csv("12BVS.csv")
# Data preprocessing
tobacco_vs <- tobacco_vs[-(1:7),]
tobacco_vs <- subset(tobacco_vs, select = c('X','X.1'))
names(tobacco_vs)[names(tobacco_vs) == "X"] <- "Period"
names(tobacco_vs)[names(tobacco_vs) == "X.1"] <- "Value"
#View(tobacco_vs)
sum(is.na(tobacco_vs))
tobacco_vs <- na.omit(tobacco_vs)
tobacco_vs$Value <- gsub('[,]', '', tobacco_vs$Value)
tobacco_vs$Period <- mdy(tobacco_vs$Period)
tobacco_vs$Value <- as.numeric(tobacco_vs$Value)
# Analyze the dataset
summary(tobacco_vs)
str(tobacco_vs)
# Get 12BTI time series data
tobacco_ti <- read.csv("12BTI.csv")
# Data preprocessing
tobacco_ti <- tobacco_ti[-(1:7),]
tobacco_ti <- subset(tobacco_ti, select = c('X','X.1'))
names(tobacco_ti)[names(tobacco_ti) == "X"] <- "Period"
names(tobacco_ti)[names(tobacco_ti) == "X.1"] <- "Value"
tobacco_ti$Value <- gsub('[,]', '', tobacco_ti$Value)
#View(tobacco_ti)
sum(is.na(tobacco_ti))
tobacco_ti <- na.omit(tobacco_ti)
tobacco_ti$Period <- mdy(tobacco_ti$Period)
tobacco_ti$Value <- as.numeric(tobacco_ti$Value)
# Analyze the dataset
summary(tobacco_ti)
str(tobacco_ti)
# Get 12STI time series data
bev_tab_ti <- read.csv("12STI.csv")
# Data preprocessing
bev_tab_ti <- bev_tab_ti[-(1:7),]
bev_tab_ti <- subset(bev_tab_ti, select = c('X','X.1'))
names(bev_tab_ti)[names(bev_tab_ti) == "X"] <- "Period"
names(bev_tab_ti)[names(bev_tab_ti) == "X.1"] <- "Value"
bev_tab_ti$Value <- gsub('[,]', '', bev_tab_ti$Value)
#View(bev_tab_ti)
sum(is.na(bev_tab_ti))
bev_tab_ti <- na.omit(bev_tab_ti)
bev_tab_ti$Period <- mdy(bev_tab_ti$Period)
bev_tab_ti$Value <- as.numeric(bev_tab_ti$Value)
# Analyze the dataset
summary(bev_tab_ti)
str(bev_tab_ti)
# Get 12SVS time series data
bev_tab_vs <- read.csv("12SVS.csv")
# Data Preprocessing
bev_tab_vs <- bev_tab_vs[-(1:7),]
bev_tab_vs <- subset(bev_tab_vs, select = c('X','X.1'))
names(bev_tab_vs)[names(bev_tab_vs) == "X"] <- "Period"
names(bev_tab_vs)[names(bev_tab_vs) == "X.1"] <- "Value"
bev_tab_vs$Value <- gsub('[,]', '', bev_tab_vs$Value)
#View(bev_tab_vs)
sum(is.na(bev_tab_vs))
bev_tab_vs <- na.omit(bev_tab_vs)
bev_tab_vs$Period <- mdy(bev_tab_vs$Period)
bev_tab_vs$Value <- as.numeric(bev_tab_vs$Value)
# Analyze the dataset
summary(bev_tab_vs)
str(bev_tab_vs)
# Plot the tobacco_vs data
plot1 <- tobacco_vs %>%
ggplot(aes(Period, Value)) +
geom_area(fill="blue", alpha=0.5) +
geom_line(color="blue") +
ylab("Tobacco Manufacturing Value of Shipments Count") +
theme_minimal()
plot1 <- ggplotly(plot1)
plot1
# Plot the tobacco_ti data
plot2 <- tobacco_ti %>%
ggplot(aes(Period, Value)) +
geom_area(fill="blue", alpha=0.5) +
geom_line(color="blue") +
ylab("Tobacco Manufacturing Total Inventories Count") +
theme_minimal()
plot2 <- ggplotly(plot2)
plot2
# Plot the bev_tab_vs data
plot3 <- bev_tab_vs %>%
ggplot(aes(Period, Value)) +
geom_area(fill="blue", alpha=0.5) +
geom_line(color="blue") +
ylab("Beverages and Tobacco Products Value of Shipments Count") +
theme_minimal()
plot3 <- ggplotly(plot3)
plot3
# Plot the bev_tab_ti data
plot4 <- bev_tab_ti %>%
ggplot(aes(Period, Value)) +
geom_area(fill="blue", alpha=0.5) +
geom_line(color="blue") +
ylab("Beverages and Tobacco Products Total Inventories Count") +
theme_minimal()
plot4 <- ggplotly(plot4)
plot4
# Convert to Time Series
ts_tobacco_vs <- ts(tobacco_vs$Value, frequency = 12)
ts_tobacco_ti <- ts(tobacco_ti$Value, frequency = 12)
ts_bev_tab_ti <- ts(bev_tab_ti$Value, frequency = 12)
ts_bev_tab_vs <- ts(bev_tab_vs$Value, frequency = 12)
# Union the data into multivariate time series object
multivariate_ts <- ts.union(ts_bev_tab_vs, ts_tobacco_vs, ts_tobacco_ti, ts_bev_tab_ti)
#View(multivariate_ts)
# Plot the multivariate object
autoplot(multivariate_ts)
# Plot the correlations of the multivariate object
corr_multivariate_ts <- cor(multivariate_ts)
corrplot(corr_multivariate_ts, method = "number")
# Determine the persistence of the Model by ACF and PACF plots
par(mfrow = c(2,2))
acf(ts_bev_tab_vs, main = "ACF for Beverage & Tobacco - Value of Shipments")
acf(ts_tobacco_vs, main = "ACF for Tobacco Manufacturing - Value of Shipments")
acf(ts_tobacco_ti, main = "ACF for Tobacco Manufacturing - Total Inventories")
acf(ts_bev_tab_ti, main = "ACF for Beverage & Tobacco Products - Total Inventories")
pacf(ts_bev_tab_vs, main = "PACF for Beverage & Tobacco - Value of Shipments")
pacf(ts_tobacco_vs, main = "PACF for Tobacco Manufacturing - Value of Shipments")
pacf(ts_tobacco_ti, main = "PACF for Tobacco Manufacturing - Total Inventories")
pacf(ts_bev_tab_ti, main = "PACF for Beverage & Tobacco Products - Total Inventories")
# Finding the optimal lags
lagselect <- VARselect(multivariate_ts, lag.max = 10, type = "const")
lagselect$selection
#Lag = 3
# Build the VAR(1) model
var_model_1 <- VAR(multivariate_ts, p = 1, type = "const", season = NULL, exog = NULL)
summary(var_model_1)
# Diagnose VAR(1) model
serial_var_1 <- serial.test(var_model_1, lags.pt = 12, type = "PT.asymptotic")
serial_var_1
# Heteroskedasticy check (arch effects)
arch_var_1 <- arch.test(var_model_1, lags.multi = 12, multivariate.only = TRUE)
arch_var_1
# Normal Distribution of Residuals
norm_var_1 <- normality.test(var_model_1, multivariate.only = TRUE)
norm_var_1
# Testing for structural breaks in Residuals (model stability test)
stability_var_1 <- stability(var_model_1, type = "OLS-CUSUM")
par(mar=c(1,1,1,1))
plot(stability_var_1)
# Granger's causality test
granger_tobacco_ti <- causality(var_model_1, cause = 'ts_tobacco_ti')
granger_tobacco_ti
granger_bev_tab_vs <- causality(var_model_1, cause = 'ts_bev_tab_vs')
granger_bev_tab_vs
granger_bev_tab_ti <- causality(var_model_1, cause = 'ts_bev_tab_ti')
granger_bev_tab_ti
granger_tobacco_vs <- causality(var_model_1, cause = 'ts_tobacco_vs')
granger_tobacco_vs
# Impluse Response Functions
inf_var_model_1 <- irf(var_model_1, impulse = "ts_tobacco_ti", response = c("ts_tobacco_vs", "ts_bev_tab_vs", "ts_bev_tab_ti"), boot = TRUE, n.ahead = 10)
plot(inf_var_model_1, main = "Shock from Tobacco Products - Total Inventories")
# Variance Decomposition
fevd_var_1 <- fevd(var_model_1, n.ahead = 10)
plot(fevd_var_1)
# VAR(1) Forecasting
forecast_var_1 <- predict(var_model_1, n.ahead = 12, ci = 0.95)
fanchart(forecast_var_1, names = "ts_bev_tab_vs")
fanchart(forecast_var_1, names = "ts_tobacco_ti")
fanchart(forecast_var_1, names = "ts_tobacco_vs")
fanchart(forecast_var_1, names = "ts_bev_tab_ti")
# Print the forecast using VAR(1)
print(forecast_var_1)
# Forecast with VAR 1 model
# ts_bev_tab_vs : 18018.41
# ts_tobacco_vs : 6588.687
# ts_tobacco_ti : 3594.108
# ts_bev_tab_ti : 30838.32
# Build the VAR(p) model
var_model_p <- VAR(multivariate_ts, p = 3, type = "const", season = NULL, exog = NULL)
summary(var_model_p)
# Diagnose VAR(1) model
serial_var_p <- serial.test(var_model_p, lags.pt = 12, type = "PT.asymptotic")
serial_var_p
# Heteroskedasticy (arch effects)
arch_var_p <- arch.test(var_model_p, lags.multi = 12, multivariate.only = TRUE)
arch_var_p
# Normal Distribution of Residuals
norm_var_p <- normality.test(var_model_p, multivariate.only = TRUE)
norm_var_p
# Testing for structural breaks in Residuals (model stability test)
stability_var_p <- stability(var_model_p, type = "OLS-CUSUM")
par(mar=c(1,1,1,1))
plot(stability_var_p)
# Granger's causality test
granger_tobacco_ti_p <- causality(var_model_p, cause = 'ts_tobacco_ti')
granger_tobacco_ti_p
granger_bev_tab_vs_p <- causality(var_model_p, cause = 'ts_bev_tab_vs')
granger_bev_tab_vs_p
granger_bev_tab_ti_p <- causality(var_model_p, cause = 'ts_bev_tab_ti')
granger_bev_tab_ti_p
granger_tobacco_vs_p <- causality(var_model_p, cause = 'ts_tobacco_vs')
granger_tobacco_vs_p
# Impluse Response Functions
inf_var_model_p <- irf(var_model_p, impulse = "ts_tobacco_ti", response = c("ts_tobacco_vs", "ts_bev_tab_vs", "ts_bev_tab_ti"), boot = TRUE, n.ahead = 10)
plot(inf_var_model_p, main = "Shock from Tobacco Products - Total Inventories")
# Variance Decomposition
fevd_var_p <- fevd(var_model_p, n.ahead = 10)
plot(fevd_var_p)
# VAR Forecasting
forecast_var_p <- predict(var_model_p, n.ahead = 1, ci = 0.95)
fanchart(forecast_var_p, names = "ts_bev_tab_vs")
fanchart(forecast_var_p, names = "ts_tobacco_ti")
fanchart(forecast_var_p, names = "ts_tobacco_vs")
fanchart(forecast_var_p, names = "ts_bev_tab_ti")
# Print the forecasts for VAR(p) model
print(forecast_var_p)
# Forecast with VAR(p) model
# ts_bev_tab_vs : 18479.15
# ts_tobacco_vs : 7033.018
# ts_tobacco_ti : 3580.238
# ts_bev_tab_ti : 30819.38
# BigVAR
# T = 386
# Fit the BIGVAR(3) model on the multivariate time series
bigvar_model_p <- constructModel(as.matrix(multivariate_ts), p = 3, struct = "SparseLag", gran = c(25,10), verbose = FALSE, h = 5, IC = TRUE)
# Save the BIGVAR model results
results_bigvar <- cv.BigVAR(bigvar_model_p)
# Print the results
results_bigvar
# Plot the BIGVAR results
plot(results_bigvar)
SparsityPlot.BigVAR.results(results_bigvar)
# Predict the forecast using BIGVAR(3) model
forecast_bigvar <- predict(results_bigvar, n.ahead = 1)
forecast_bigvar
# Forecast with BigVAR SparseOO model
# ts_bev_tab_vs : 18199.354
# ts_tobacco_vs : 6645.381
# ts_tobacco_ti : 3371.634
# ts_bev_tab_ti : 30844.491
#####################################
# FORECASTS - VAR(1), VAR(p) & BIGVAR
#####################################
# Forecast with VAR 1 model
# ts_bev_tab_vs : 18018.41
# ts_tobacco_vs : 6588.687
# ts_tobacco_ti : 3594.108
# ts_bev_tab_ti : 30838.32
# Forecast with VAR(p) model
# ts_bev_tab_vs : 18479.15
# ts_tobacco_vs : 7033.018
# ts_tobacco_ti : 3580.238
# ts_bev_tab_ti : 30819.38
# Forecast with BigVAR SparseLag model
# ts_bev_tab_vs : 18199.354
# ts_tobacco_vs : 6645.381
# ts_tobacco_ti : 3371.634
# ts_bev_tab_ti : 30844.491
#####################################
> install.packages("tsbox")
> install.packages("xts")
> install.packages("tsibble")
> install.packages("fredr")
> install.packages("prophet")
> install.packages("signal")
library(tidyverse) library(fredr) library(dplyr) library(tidyr) library(tsbox) library(xts) library(tsibble) library(ggplot2) library(signal)
df_icnsa <- fredr(series_id = "ICNSA")
df <- read.csv("/Users/siddharthhaveliwala/Downloads/ICNSA.csv")
which(is.na(df)) sum(is.na(df))
head(df) str(df) summary(df)
df$date <- as.Date(df$DATE) df$value <- df$ICNSA
df <- select(df, -DATE, -ICNSA)
ts_plot(df)
df$month <- format(df$date, "%Y-%m") df_summary <- df %>% group_by(month) %>% summarise(mean_value = mean(value))
summary(df_summary)
df_summary$month <- as.Date(paste0(df_summary$month, "-01"), format = "%Y-%m-%d")
ts_plot(df_summary)
df$year <- format(df$date, "%Y") df_ann_summary <- df %>% group_by(year) %>% summarise(mean_value = mean(value))
df_ann_summary$year <- as.Date(paste0(df_ann_summary$year, "-01-01"), format = "%Y-%m-%d")
ts_plot(df_ann_summary)
ts_data <- ts(df$value, frequency = 52)
decomposed <- decompose(ts_data) detrended_data <- ts_data - decomposed$trend
detrended_df <- data.frame(date = time(ts_data), value = detrended_data)
View(detrended_df)
class(detrended_df$date)
sum(is.na(detrended_df))
detrended_df <- na.omit(detrended_df) detrended_df$date <- as.Date(detrended_df$date)
library(ggplot2)
plot(detrended_df, type = 'line')
have plotted the time series data but trying to implement different models to get the predictions