Open SH3458 opened 5 months ago
@SH3458 Can you throw more light on how you considered p,d,q values for ARIMA model without ACF and PACF plots?
This is truly insightful! I did not have decomposition in my code, and it really helps make much more sense of the data.
Yes, so here as I am using 'auto.arima' function, it automatically chooses the best potential values of p, q, and d values. Just an added information, if you do not want to use the 'auto.arima' function then apart from ACF & PACF plots you can also perform grid search over various orders and select the combination that gives us the minimum BIC value (Bayesian Information Criterion).
When you say best potential of P,q values - that makes sense. But d is order of differencing that needs to be done to make the series stationary. So does auto.arima performs stationary check in the background!?
Yes, it actually does!
Some possible covid start and end dates
covid_start <- as.Date("2020-03-14")
covid_end <- as.Date("2021-01-02")
covid_start_idx <- which(icnsa$date == covid_start)
covid_end_idx <- which(icnsa$date == covid_end)
covid_idx <- rep(0, nrow(icnsa))
covid_idx[covid_start_idx:covid_end_idx] <- 1
covid_idx <- as.logical(covid_idx)
icnsa_na <- icnsa |>
select(date, value)
icnsa_na$value[covid_idx] <- NA
Homework assignment - 1 from Brightspace
suppressMessages({
library(astsa)
library(dplyr)
library(dygraphs)
library(forecast)
library(fpp3)
library(fredr)
library(imputeTS)
library(lubridate)
library(seasonal)
library(tidyverse)
library(tsbox)
library(zoo)
library(reprex)
})
# My API Key
fredr_set_key("052142bc981666b4ebcb1c8df98d006b")
icnsa = fredr(series_id = "ICNSA")
ccnsa = fredr(series_id = "CCNSA")
# Date Preparation
icnsa$date <- as.Date(icnsa$date)
ccnsa$date <- as.Date(ccnsa$date)
# Check for missing values
missing_values <- any(is.na(icnsa$value))
missing_values <- any(is.na(ccnsa$value))
# # ICNSA Series Plotting using "plotly"
# plot_ly(data = icnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") +
# layout(title = "ICNSA Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value"))
# # CCNSA Series Plotting using "plotly"
# plot_ly(data = ccnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") +
# layout(title = "CCNSA Series Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value"))
# Handling COVID data for ICNSA
covid_start <- as.Date("2020-03-21")
covid_end <- as.Date("2020-07-25")
covid_start_idx <- which(icnsa$date == covid_start)
covid_end_idx <- which(icnsa$date == covid_end)
covid_idx <- rep(0, nrow(icnsa))
covid_idx[covid_start_idx:covid_end_idx] <- 1
covid_idx <- as.logical(covid_idx)
icnsa$value[covid_idx] <- NA
# Impute NA values using linear interpolation
icnsa$value <- zoo::na.approx(icnsa$value)
# Check for missing values after imputation
missing_after_imputation <- any(is.na(icnsa$value))
missing_after_imputation
#> [1] FALSE
# Handling COVID data for CCNSA
covid_s <- as.Date("2020-03-28")
covid_e <- as.Date("2020-10-17")
covid_s_idx <- which(ccnsa$date == covid_s)
covid_e_idx <- which(ccnsa$date == covid_e)
cov_idx <- rep(0, nrow(ccnsa))
cov_idx[covid_s_idx:covid_e_idx] <- 1
cov_idx <- as.logical(cov_idx)
ccnsa$value[cov_idx] <- NA
# Impute NA values using linear interpolation
ccnsa$value <- zoo::na.approx(ccnsa$value)
# Check for missing values after imputation
miss_after_imputation <- any(is.na(ccnsa$value))
miss_after_imputation
#> [1] FALSE
# # ICNSA Series Plotting using "plotly"
# plot_ly(data = icnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") +
# layout(title = "ICNSA Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value"))
# # CCNSA Series Plotting using "plotly"
# plot_ly(data = ccnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") +
# layout(title = "CCNSA Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value"))
# Convert data into a time series object
icnsa_ts <- ts(icnsa$value, frequency = 52.18)
ccnsa_ts <- ts(ccnsa$value, frequency = 52.18)
# Plot the time series
autoplot(icnsa_ts) +
labs(title = "ICNSA")
autoplot(ccnsa_ts) +
labs(title = "CCNSA")
# ACF and PACF plots
ggtsdisplay(icnsa_ts, main = "ICNSA Plot", lag.max = 52.18)
ggtsdisplay(ccnsa_ts, main = "CCNSA Plot", lag.max = 52.18)
# Model identification for ICNSA using auto.arima
icnsa_model <- auto.arima(icnsa_ts)
#> Warning: The time series frequency has been rounded to support seasonal
#> differencing.
# Model identification for CCNSA using auto.arima
ccnsa_model <- auto.arima(ccnsa_ts)
#> Warning: The time series frequency has been rounded to support seasonal
#> differencing.
# Display identified models
icnsa_model
#> Series: icnsa_ts
#> ARIMA(4,0,2)(0,1,1)[52]
#>
#> Coefficients:
#> ar1 ar2 ar3 ar4 ma1 ma2 sma1
#> 1.8970 -0.9218 0.0371 -0.0159 -1.4718 0.5706 -0.2342
#> s.e. 0.0481 0.0763 0.0412 0.0320 0.0446 0.0474 0.0190
#>
#> sigma^2 = 1.063e+09: log likelihood = -34582.42
#> AIC=69180.84 AICc=69180.89 BIC=69228.7
ccnsa_model
#> Series: ccnsa_ts
#> ARIMA(0,0,5)(0,1,1)[52]
#>
#> Coefficients:
#> ma1 ma2 ma3 ma4 ma5 sma1
#> 1.3326 1.6475 1.4565 1.0185 0.4920 -0.2595
#> s.e. 0.0196 0.0254 0.0234 0.0179 0.0146 0.0221
#>
#> sigma^2 = 3.499e+10: log likelihood = -39685.21
#> AIC=79384.41 AICc=79384.45 BIC=79426.29
# Time series cross-correlation analysis
cross_correlation_icnsa_ccnsa <- ccf(icnsa_ts, ccnsa_ts, lag.max = 52.18, type = "correlation", main = "Cross-correlation between ICNSA and CCNSA", xlab = "Lag", ylab = "Correlation")
# Identify the lag with the maximum correlation
max_corr_lag <- which.max(cross_correlation_icnsa_ccnsa$acf)
max_corr_value <- cross_correlation_icnsa_ccnsa$acf[max_corr_lag]
cat("Maximum correlation is at lag", max_corr_lag, "with a value of", max_corr_value, "\n")
#> Maximum correlation is at lag 49 with a value of 0.8356323
# Make sure both time series have the same length
min_length <- min(length(icnsa_ts), length(ccnsa_ts))
icnsa_ts <- head(icnsa_ts, min_length)
ccnsa_ts <- head(ccnsa_ts, min_length)
# Combine the time series as a data frame
combined_data <- data.frame(icnsa = icnsa_ts, ccnsa = ccnsa_ts)
# Fit regARIMA model
reg_arima_model <- auto.arima(combined_data[, "icnsa"], xreg = combined_data[, "ccnsa"])
#> Warning: The time series frequency has been rounded to support seasonal
#> differencing.
# Summary of the regARIMA model
summary(reg_arima_model)
#> Series: combined_data[, "icnsa"]
#> Regression with ARIMA(3,0,2)(0,1,1)[52] errors
#>
#> Coefficients:
#> ar1 ar2 ar3 ma1 ma2 sma1 drift xreg
#> 1.7560 -0.7199 -0.0437 -1.4236 0.5583 -0.2330 25.0600 0.0448
#> s.e. 0.0523 0.0739 0.0327 0.0384 0.0271 0.0191 157.8115 0.0090
#>
#> sigma^2 = 1.056e+09: log likelihood = -34559.38
#> AIC=69136.76 AICc=69136.82 BIC=69190.59
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -113.3733 32159.01 21031.19 -0.4065116 5.793527 0.3004479
#> ACF1
#> Training set 0.0002859649
# Regression Diagnostics
checkresiduals(reg_arima_model)
#>
#> Ljung-Box test
#>
#> data: Residuals from Regression with ARIMA(3,0,2)(0,1,1)[52] errors
#> Q* = 392.88, df = 98.36, p-value < 2.2e-16
#>
#> Model df: 6. Total lags used: 104.36
# Forecasting for the next 1 value using regARIMA model
num_forecasts <- 1
reg_arima_forecast <- forecast(reg_arima_model, h = num_forecasts, xreg = tail(ccnsa_ts, 1))
# Extract the rounded forecasted value
rounded_forecast <- round(reg_arima_forecast$mean)
cat("The final forecast value is", rounded_forecast)
#> The final forecast value is 240750
The final forecast value is 240750 Created on 2024-02-21 with reprex v2.1.0
The detailed thought process is provided in the pdf submitted on Brightspace.
Homework Assignment - 2
suppressMessages({
library(astsa)
library(dplyr)
library(dygraphs)
library(forecast)
library(fpp3)
library(fredr)
library(imputeTS)
library(lubridate)
library(seasonal)
library(tidyverse)
library(tsbox)
library(zoo)
library(reprex)
library(tseries)
})
# My API Key
fredr_set_key("052142bc981666b4ebcb1c8df98d006b")
icnsa = fredr(series_id = "ICNSA")
# Date Preparation
icnsa$date <- as.Date(icnsa$date)
# Check for missing values
missing_values <- any(is.na(icnsa$value))
missing_values
#> [1] FALSE
# # ICNSA Series Plotting using "plotly"
# plot_ly(data = icnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") +
# layout(title = "ICNSA Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value"))
# Handling COVID data for ICNSA
covid_start <- as.Date("2020-03-21")
covid_end <- as.Date("2020-07-25")
covid_start_idx <- which(icnsa$date == covid_start)
covid_end_idx <- which(icnsa$date == covid_end)
covid_idx <- rep(0, nrow(icnsa))
covid_idx[covid_start_idx:covid_end_idx] <- 1
covid_idx <- as.logical(covid_idx)
icnsa$value[covid_idx] <- NA
# Imputing "NA" values using cubic spline imputation between COVID start and end dates
# Define a function for cubic spline imputation
cubic_spline_imputation <- function(x, lambda) {
non_na_indices <- which(!is.na(x))
na_indices <- which(is.na(x))
spline_model <- smooth.spline(non_na_indices, x[non_na_indices], spar = lambda)
imputed_values <- predict(spline_model, x = na_indices)$y
x[na_indices] <- imputed_values
return(x)
}
# Defining another function that checks calculates the RMSE value for getting the best lambda
cubic_spline_cv <- function(x, lambda) {
non_na_indices <- which(!is.na(x))
na_indices <- which(is.na(x))
spline_model <- smooth.spline(non_na_indices, x[non_na_indices], spar = lambda)
imputed_values <- predict(spline_model, x = na_indices)$y
rmse <- sqrt(mean((x[non_na_indices] - predict(spline_model, x = non_na_indices)$y)^2))
return(rmse)
}
# Lambda values to be tested
lambda_values <- seq(0.1, 1, by = 0.1)
# Performing cross-validation to find the best lambda
rmse_values <- sapply(lambda_values, function(lambda) cubic_spline_cv(icnsa$value, lambda))
# Display the RMSE values for each lambda
cat("Lambda values:", lambda_values, "\n")
#> Lambda values: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1
cat("RMSE values:", rmse_values, "\n")
#> RMSE values: 62880.31 63772.23 68648.33 76213.45 82421.38 89424.08 97054.13 103978.3 110141.9 113968.4
# Find the lambda with the minimum RMSE
best_lambda <- lambda_values[which.min(rmse_values)]
cat("Best lambda:", best_lambda, "\n")
#> Best lambda: 0.1
# Imputing "NA" values using cubic spline imputation between Covid start and end dates
icnsa$value <- cubic_spline_imputation(icnsa$value, best_lambda)
# Converting data to a time series object after imputation
ts_icnsa_imputed <- ts(icnsa$value, frequency = 24)
# Checking for missing values after imputation
missing_values_after_imputation <- any(is.na(ts_icnsa_imputed))
missing_values_after_imputation
#> [1] FALSE
# Plotting the time series with x-date and y-value
plot_data <- data.frame(date = icnsa$date, value = icnsa$value)
plot(plot_data$date, plot_data$value, type = "l",
xlab = "Date", ylab = "Value", main = "ICNSA after imputation")
# Augmented Dickey-Fuller (ADF) test for stationarity
adf_test_result <- adf.test(ts_icnsa_imputed)
#> Warning in adf.test(ts_icnsa_imputed): p-value smaller than printed p-value
cat("Augmented Dickey-Fuller Test p-value:", adf_test_result$p.value, "\n")
#> Augmented Dickey-Fuller Test p-value: 0.01
# Holt-Winters Multiplicative Model
hw_multiplicative <- hw(ts_icnsa_imputed, seasonal = "multiplicative", h = 1)
forecast_multiplicative <- round(hw_multiplicative$mean[1])
# Holt-Winters Additive Model
hw_additive <- hw(ts_icnsa_imputed, seasonal = "additive", h = 1)
forecast_additive <- round(hw_additive$mean[1])
# Print the forecasts
cat("Multiplicative Holt-Winters Forecast:", forecast_multiplicative, "\n")
#> Multiplicative Holt-Winters Forecast: 209359
cat("Additive Holt-Winters Forecast:", forecast_additive, "\n")
#> Additive Holt-Winters Forecast: 203425
Created on 2024-02-28 with reprex v2.1.0
The Forecast values are -
suppressMessages({
library(astsa)
library(bsts)
library(dlm)
library(dplyr)
library(dygraphs)
library(fable)
library(forecast)
library(fpp3)
library(fredr)
library(imputeTS)
library(lubridate)
library(MARSS)
library(splines)
library(seasonal)
library(tidyverse)
library(tsbox)
library(zoo)
library(reprex)
library(plotly)
library(tseries)
})
#> Warning: package 'bsts' was built under R version 4.3.3
#> Warning: package 'BoomSpikeSlab' was built under R version 4.3.3
#> Warning: package 'Boom' was built under R version 4.3.3
#> Warning: package 'dlm' was built under R version 4.3.3
#> Warning: package 'MARSS' was built under R version 4.3.3
install.packages("vars")
#> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'vars' successfully unpacked and MD5 sums checked
#>
#> The downloaded binary packages are in
#> C:\Users\shwet\AppData\Local\Temp\RtmpeyvtMx\downloaded_packages
library(vars)
#> Warning: package 'vars' was built under R version 4.3.3
#> Loading required package: MASS
#>
#> Attaching package: 'MASS'
#> The following object is masked from 'package:plotly':
#>
#> select
#> The following object is masked from 'package:dplyr':
#>
#> select
#> Loading required package: strucchange
#> Warning: package 'strucchange' was built under R version 4.3.3
#> Loading required package: sandwich
#>
#> Attaching package: 'strucchange'
#> The following object is masked from 'package:stringr':
#>
#> boundary
#> Loading required package: urca
#> Loading required package: lmtest
#>
#> Attaching package: 'vars'
#> The following object is masked from 'package:fable':
#>
#> VAR
# My API Key
fredr_set_key("052142bc981666b4ebcb1c8df98d006b")
# ICNSA data
icnsa <- fredr(series_id = "ICNSA")
# Date Preparation
icnsa$date <- as.Date(icnsa$date)
# Check for missing values
missing_values <- any(is.na(icnsa$value))
missing_values
#> [1] FALSE
# Plot the original values
plot(icnsa$date, icnsa$value, type = "l", main = "ICNSA Time Series")
# Handling COVID data for ICNSA
covid_start <- as.Date("2020-03-21")
covid_end <- as.Date("2021-01-16")
covid_start_idx <- which(icnsa$date == covid_start)
covid_end_idx <- which(icnsa$date == covid_end)
covid_idx <- rep(0, nrow(icnsa))
covid_idx[covid_start_idx:covid_end_idx] <- 1
covid_idx <- as.logical(covid_idx)
icnsa$value[covid_idx] <- NA
# Create a copy of the original data for Spline imputation
covid_data_spline <- icnsa
covid_data_spline$value[covid_idx] <- NA
# Create a copy of the original data for State Space imputation
covid_data_imputed <- icnsa
covid_data_imputed$value[covid_idx] <- NA
# Impute using state space method
covid_data_imputed$value <- imputeTS::na_kalman(covid_data_imputed$value)
# Impute using cubic spline method
# Define a function for cubic spline imputation
cubic_spline_imputation <- function(x, lambda) {
non_na_indices <- which(!is.na(x))
na_indices <- which(is.na(x))
spline_model <- smooth.spline(non_na_indices, x[non_na_indices], spar = lambda)
imputed_values <- predict(spline_model, x = na_indices)$y
x[na_indices] <- imputed_values
return(x)
}
# Lambda values to be tested
lambda_values <- seq(0.1, 1, by = 0.1)
# Performing cross-validation to find the best lambda
rmse_values <- sapply(lambda_values, function(lambda) cubic_spline_cv(covid_data_spline$value, lambda))
#> Error in cubic_spline_cv(covid_data_spline$value, lambda): could not find function "cubic_spline_cv"
# Display the RMSE values for each lambda
# cat("Lambda values:", lambda_values, "\n")
# cat("RMSE values:", rmse_values, "\n")
# Find the lambda with the minimum RMSE
best_lambda <- lambda_values[which.min(rmse_values)]
#> Error in eval(expr, envir, enclos): object 'rmse_values' not found
cat("Best lambda:", best_lambda, "\n")
#> Error in eval(expr, envir, enclos): object 'best_lambda' not found
# Impute using cubic spline imputation
covid_data_spline$value <- cubic_spline_imputation(covid_data_spline$value, best_lambda)
#> Error in eval(expr, envir, enclos): object 'best_lambda' not found
# Plot the original values
plot(icnsa$date, icnsa$value, type = "l", main = "ICNSA Time Series")
# Plot imputed data using state space method (Kalman filter)
lines(covid_data_imputed$date, covid_data_imputed$value, col = "red", lty = 2, lwd = 2)
# Plot imputed data using cubic spline method
lines(covid_data_spline$date, covid_data_spline$value, col = "blue", lty = 2, lwd = 2)
# Set up a 1x2 plotting layout (1 row, 2 columns)
par(mfrow = c(1, 2))
# Plot the original values
plot(icnsa$date, icnsa$value, type = "l", main = "ICNSA Time Series", col = "black")
# Plot imputed data using state space method (Kalman filter)
lines(covid_data_imputed$date, covid_data_imputed$value, col = "red", lty = 2, lwd = 2)
legend("topright", legend = c("Original", "Imputed (Kalman)"), col = c("black", "red"), lty = c(1, 2), lwd = c(1, 2))
# Plot imputed data using cubic spline method
plot(icnsa$date, icnsa$value, type = "l", main = "ICNSA Time Series", col = "black")
lines(covid_data_spline$date, covid_data_spline$value, col = "blue", lty = 2, lwd = 2)
legend("topright", legend = c("Original", "Imputed (Spline)"), col = c("black", "blue"), lty = c(1, 2), lwd = c(1, 2))
# Reset plotting layout to default
par(mfrow = c(1, 1))
# Convert the data into time series object
covid_data_imputed_ts <- ts(covid_data_imputed$value, frequency = 52.18)
# Fit a structural time series model to ts_data
structural_model <- StructTS(covid_data_imputed_ts, type = "BSM")
# Generate forecasts from the fitted model
forecast_values <- predict(structural_model, n.ahead = 1)$pred
# Get the next rounded forecast value
rounded_forecast <- round(forecast_values)
# Display the rounded forecast value
cat("Rounded Forecast Value:", rounded_forecast, "\n")
#> Rounded Forecast Value: 241753
# Another covariate series - Insured Unemployment Rate (IURNSA)
iurnsa <- fredr(series_id = "IURNSA")
# Date Preparation
iurnsa$date <- as.Date(iurnsa$date)
# Check for missing values
missing_values <- any(is.na(iurnsa$value))
missing_values
#> [1] FALSE
# Convert it into time series
iurnsa_ts <- ts(iurnsa$value, frequency = 52.18)
# Plot the original values
plot(iurnsa$date, iurnsa$value, type = "l", main = "IURNSA Time Series")
# Make sure both time series have the same length
min_length <- min(length(icnsa_ts), length(iurnsa_ts))
#> Error in eval(expr, envir, enclos): object 'icnsa_ts' not found
icnsa_ts <- tail(icnsa_ts, min_length)
#> Error in eval(expr, envir, enclos): object 'icnsa_ts' not found
iurnsa_ts <- tail(iurnsa_ts, min_length)
#> Error in eval(expr, envir, enclos): object 'min_length' not found
# Handling COVID data for IURNSA
cov_start <- as.Date("2020-04-04")
cov_end <- as.Date("2020-10-17")
cov_start_idx <- which(iurnsa$date == cov_start)
cov_end_idx <- which(iurnsa$date == cov_end)
cov_idx <- rep(0, nrow(iurnsa))
cov_idx[cov_start_idx:cov_end_idx] <- 1
cov_idx <- as.logical(cov_idx)
iurnsa$value[cov_idx] <- NA
# Impute using state space method
iurnsa$value <- imputeTS::na_kalman(iurnsa$value)
# Plot the original values
plot(iurnsa$date, iurnsa$value, type = "l", main = "IURNSA Imputed")
# Convert it into time series
iurnsa_ts <- ts(iurnsa$value, frequency = 52.18)
Combine the time series data for ICNSA and IURNSA combined_ts <- cbind(ICNSA = covid_data_imputed_ts, IURNSA = iurnsa_ts) Fit an ARIMA model with covariate (IURNSA) arima_model <- auto.arima(combined_ts[, "ICNSA"], xreg = combined_ts[, "IURNSA"]) Forecast the next value forecast_values <- forecast(arima_model, xreg = iurnsa_ts, h = 1) Get the next rounded forecast value rounded_forecast <- round(forecast_values$mean) Display the rounded forecast value cat("Rounded Forecast Value with Covariate (IURNSA):", rounded_forecast, "\n")
Created on 2024-03-07 with reprex v2.1.0
The final forecast value is - 213853
suppressMessages({
library(astsa)
library(bsts)
library(BigVAR)
library(dlm)
library(dplyr)
library(dygraphs)
library(fable)
library(forecast)
library(fpp3)
library(fredr)
library(ggplot2)
library(imputeTS)
library(lubridate)
library(MARSS)
library(readxl)
library(splines)
library(seasonal)
library(tidyverse)
library(tsbox)
library(zoo)
library(reprex)
library(plotly)
library(tseries)
library(vars)
})
#> Warning: package 'bsts' was built under R version 4.3.3
#> Warning: package 'BoomSpikeSlab' was built under R version 4.3.3
#> Warning: package 'Boom' was built under R version 4.3.3
#> Warning: package 'BigVAR' was built under R version 4.3.3
#> Warning: package 'lattice' was built under R version 4.3.3
#> Warning: package 'dlm' was built under R version 4.3.3
#> Warning: package 'fable' was built under R version 4.3.3
#> Warning: package 'fabletools' was built under R version 4.3.3
#> Warning: package 'forecast' was built under R version 4.3.3
#> Warning: package 'ggplot2' was built under R version 4.3.3
#> Warning: package 'feasts' was built under R version 4.3.3
#> Warning: package 'MARSS' was built under R version 4.3.3
#> Warning: package 'vars' was built under R version 4.3.3
#> Warning: package 'strucchange' was built under R version 4.3.3
# File paths
file_paths <- c("C:/Study/Time Series/HW/HW4/Manufacturing and Supplies Inventory (MI).xlsx",
"C:/Study/Time Series/HW/HW4/Work in Process Inventories (WI).xlsx",
"C:/Study/Time Series/HW/HW4/Finished Goods Inventories (FI).xlsx",
"C:/Study/Time Series/HW/HW4/Value of Shipments (VS).xlsx")
# Load data
data_MI <- read_excel(file_paths[1])
#> New names:
#> • `` -> `...1`
#> • `` -> `...2`
data_WI <- read_excel(file_paths[2])
#> New names:
#> • `` -> `...1`
#> • `` -> `...2`
data_FI <- read_excel(file_paths[3])
#> New names:
#> • `` -> `...1`
#> • `` -> `...2`
data_VS <- read_excel(file_paths[4])
#> New names:
#> • `` -> `...1`
#> • `` -> `...2`
# Inspect the structure of the data
str(data_MI)
#> tibble [403 × 3] (S3: tbl_df/tbl/data.frame)
#> $ ...1 : chr [1:403] NA NA NA NA ...
#> $ ...2 : chr [1:403] NA NA NA NA ...
#> $ U.S. Census Bureau: chr [1:403] "Source: Manufacturers' Shipments, Inventories, and Orders" "Food Products: U.S. Total" "Seasonally Adjusted Materials and Supplies Inventories [Millions of Dollars]" "Period: 1992 to 2024" ...
str(data_WI)
#> tibble [403 × 3] (S3: tbl_df/tbl/data.frame)
#> $ ...1 : chr [1:403] NA NA NA NA ...
#> $ ...2 : chr [1:403] NA NA NA NA ...
#> $ U.S. Census Bureau: chr [1:403] "Source: Manufacturers' Shipments, Inventories, and Orders" "Food Products: U.S. Total" "Seasonally Adjusted Work in Process Inventories [Millions of Dollars]" "Period: 1992 to 2024" ...
str(data_FI)
#> tibble [403 × 3] (S3: tbl_df/tbl/data.frame)
#> $ ...1 : chr [1:403] NA NA NA NA ...
#> $ ...2 : chr [1:403] NA NA NA NA ...
#> $ U.S. Census Bureau: chr [1:403] "Source: Manufacturers' Shipments, Inventories, and Orders" "Food Products: U.S. Total" "Seasonally Adjusted Finished Goods Inventories [Millions of Dollars]" "Period: 1992 to 2024" ...
str(data_VS)
#> tibble [403 × 3] (S3: tbl_df/tbl/data.frame)
#> $ ...1 : chr [1:403] NA NA NA NA ...
#> $ ...2 : chr [1:403] NA NA NA NA ...
#> $ U.S. Census Bureau: chr [1:403] "Source: Manufacturers' Shipments, Inventories, and Orders" "Food Products: U.S. Total" "Seasonally Adjusted Value of Shipments [Millions of Dollars]" "Period: 1992 to 2024" ...
# Delete last column and first 7 rows, and rename columns
clean_data <- function(data) {
data <- data[-c(1:7, (nrow(data)-9):nrow(data)), -ncol(data)] # Delete first 7 rows and last column
colnames(data) <- c("Date", "Value") # Rename columns
return(data)
}
# Clean and rename each dataset
new_MI <- clean_data(data_MI)
new_WI <- clean_data(data_WI)
new_FI <- clean_data(data_FI)
new_VS <- clean_data(data_VS)
# Check the structure of the cleaned datasets
str(new_MI)
#> tibble [386 × 2] (S3: tbl_df/tbl/data.frame)
#> $ Date : chr [1:386] "Jan-1992" "Feb-1992" "Mar-1992" "Apr-1992" ...
#> $ Value: chr [1:386] "8923" "9164" "9200" "9252" ...
str(new_WI)
#> tibble [386 × 2] (S3: tbl_df/tbl/data.frame)
#> $ Date : chr [1:386] "Jan-1992" "Feb-1992" "Mar-1992" "Apr-1992" ...
#> $ Value: chr [1:386] "2100" "2203" "2073" "2071" ...
str(new_FI)
#> tibble [386 × 2] (S3: tbl_df/tbl/data.frame)
#> $ Date : chr [1:386] "Jan-1992" "Feb-1992" "Mar-1992" "Apr-1992" ...
#> $ Value: chr [1:386] "14576" "14396" "14469" "14238" ...
str(new_VS)
#> tibble [386 × 2] (S3: tbl_df/tbl/data.frame)
#> $ Date : chr [1:386] "Jan-1992" "Feb-1992" "Mar-1992" "Apr-1992" ...
#> $ Value: chr [1:386] "28677" "28560" "29239" "29904" ...
# Convert dates to standard format
new_MI$Date <- as.Date(paste0("01-", new_MI$Date), format = "%d-%b-%Y")
new_WI$Date <- as.Date(paste0("01-", new_WI$Date), format = "%d-%b-%Y")
new_FI$Date <- as.Date(paste0("01-", new_FI$Date), format = "%d-%b-%Y")
new_VS$Date <- as.Date(paste0("01-", new_VS$Date), format = "%d-%b-%Y")
# Convert "Value" column from character to numeric
new_MI$Value <- as.numeric(new_MI$Value)
new_WI$Value <- as.numeric(new_WI$Value)
new_FI$Value <- as.numeric(new_FI$Value)
new_VS$Value <- as.numeric(new_VS$Value)
# Check for missing values
sum(is.na(new_MI))
#> [1] 0
sum(is.na(new_WI))
#> [1] 0
sum(is.na(new_FI))
#> [1] 0
sum(is.na(new_VS))
#> [1] 0
# Plot Manufacturing & Supplies Inventory
plot(new_MI$Date, new_MI$Value, type = "l", xlab = "Date", ylab = "Value", main = "Manufacturing & Supplies Inventory")
# Plot Work in Process Inventories
plot(new_WI$Date, new_WI$Value, type = "l", xlab = "Date", ylab = "Value", main = "Work in Process Inventories")
# Plot Finished Goods Inventories
plot(new_FI$Date, new_FI$Value, type = "l", xlab = "Date", ylab = "Value", main = "Finished Goods Inventories")
# Plot Value of Shipments
plot(new_VS$Date, new_VS$Value, type = "l", xlab = "Date", ylab = "Value", main = "Value of Shipments")
# Function to perform ADF test and print results
check_stationarity <- function(data) {
adf_test <- ur.df(data$Value, type = "trend", lags = 12)
cat("ADF Test Results:\n")
print(summary(adf_test))
}
# Check stationarity for each dataset
check_stationarity(new_MI)
#> ADF Test Results:
#>
#> ###############################################
#> # Augmented Dickey-Fuller Test Unit Root Test #
#> ###############################################
#>
#> Test regression trend
#>
#>
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -982.66 -113.96 7.36 120.22 1041.74
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 116.319390 53.545891 2.172 0.03049 *
#> z.lag.1 -0.017928 0.007755 -2.312 0.02136 *
#> tt 0.915888 0.357012 2.565 0.01071 *
#> z.diff.lag1 -0.058130 0.052591 -1.105 0.26977
#> z.diff.lag2 -0.059405 0.052873 -1.124 0.26196
#> z.diff.lag3 0.173746 0.052941 3.282 0.00113 **
#> z.diff.lag4 0.008400 0.053669 0.157 0.87572
#> z.diff.lag5 0.033364 0.053945 0.618 0.53666
#> z.diff.lag6 0.055853 0.053832 1.038 0.30018
#> z.diff.lag7 0.100373 0.053875 1.863 0.06327 .
#> z.diff.lag8 -0.018400 0.054166 -0.340 0.73428
#> z.diff.lag9 0.090376 0.054249 1.666 0.09660 .
#> z.diff.lag10 0.118937 0.053949 2.205 0.02812 *
#> z.diff.lag11 0.035821 0.054324 0.659 0.51006
#> z.diff.lag12 -0.068162 0.054064 -1.261 0.20821
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 212.1 on 358 degrees of freedom
#> Multiple R-squared: 0.1025, Adjusted R-squared: 0.06737
#> F-statistic: 2.919 on 14 and 358 DF, p-value: 0.0003229
#>
#>
#> Value of test-statistic is: -2.3117 3.7258 3.3825
#>
#> Critical values for test statistics:
#> 1pct 5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2 6.15 4.71 4.05
#> phi3 8.34 6.30 5.36
check_stationarity(new_WI)
#> ADF Test Results:
#>
#> ###############################################
#> # Augmented Dickey-Fuller Test Unit Root Test #
#> ###############################################
#>
#> Test regression trend
#>
#>
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -885.42 -57.38 1.89 54.98 623.34
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 33.89776 17.61082 1.925 0.05504 .
#> z.lag.1 -0.02136 0.01010 -2.115 0.03513 *
#> tt 0.30695 0.15121 2.030 0.04310 *
#> z.diff.lag1 -0.03411 0.05172 -0.660 0.51000
#> z.diff.lag2 0.12738 0.05176 2.461 0.01432 *
#> z.diff.lag3 0.01045 0.05221 0.200 0.84148
#> z.diff.lag4 0.04943 0.05197 0.951 0.34214
#> z.diff.lag5 0.05509 0.05783 0.953 0.34146
#> z.diff.lag6 0.03521 0.05853 0.601 0.54789
#> z.diff.lag7 -0.05597 0.05864 -0.955 0.34045
#> z.diff.lag8 0.02748 0.05887 0.467 0.64097
#> z.diff.lag9 0.12951 0.05889 2.199 0.02851 *
#> z.diff.lag10 0.02351 0.05921 0.397 0.69162
#> z.diff.lag11 0.05356 0.05929 0.903 0.36695
#> z.diff.lag12 -0.19131 0.05907 -3.239 0.00131 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 115.4 on 358 degrees of freedom
#> Multiple R-squared: 0.08869, Adjusted R-squared: 0.05305
#> F-statistic: 2.489 on 14 and 358 DF, p-value: 0.002207
#>
#>
#> Value of test-statistic is: -2.1149 2.2656 2.2549
#>
#> Critical values for test statistics:
#> 1pct 5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2 6.15 4.71 4.05
#> phi3 8.34 6.30 5.36
check_stationarity(new_FI)
#> ADF Test Results:
#>
#> ###############################################
#> # Augmented Dickey-Fuller Test Unit Root Test #
#> ###############################################
#>
#> Test regression trend
#>
#>
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -983.00 -132.92 -7.66 145.87 890.07
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 187.827721 96.032445 1.956 0.0513 .
#> z.lag.1 -0.015667 0.008088 -1.937 0.0535 .
#> tt 1.081874 0.484667 2.232 0.0262 *
#> z.diff.lag1 0.036568 0.052244 0.700 0.4844
#> z.diff.lag2 0.118457 0.052068 2.275 0.0235 *
#> z.diff.lag3 0.052388 0.052441 0.999 0.3185
#> z.diff.lag4 0.052407 0.052749 0.994 0.3211
#> z.diff.lag5 0.071997 0.052811 1.363 0.1736
#> z.diff.lag6 -0.005623 0.052859 -0.106 0.9153
#> z.diff.lag7 0.067721 0.052811 1.282 0.2006
#> z.diff.lag8 -0.033436 0.053058 -0.630 0.5290
#> z.diff.lag9 0.050405 0.053185 0.948 0.3439
#> z.diff.lag10 0.037787 0.053432 0.707 0.4799
#> z.diff.lag11 0.125206 0.053248 2.351 0.0192 *
#> z.diff.lag12 -0.128491 0.053588 -2.398 0.0170 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 229.4 on 358 degrees of freedom
#> Multiple R-squared: 0.08919, Adjusted R-squared: 0.05357
#> F-statistic: 2.504 on 14 and 358 DF, p-value: 0.002064
#>
#>
#> Value of test-statistic is: -1.937 4.3484 2.9516
#>
#> Critical values for test statistics:
#> 1pct 5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2 6.15 4.71 4.05
#> phi3 8.34 6.30 5.36
check_stationarity(new_VS)
#> ADF Test Results:
#>
#> ###############################################
#> # Augmented Dickey-Fuller Test Unit Root Test #
#> ###############################################
#>
#> Test regression trend
#>
#>
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -1791.77 -323.58 6.22 340.94 1599.96
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 757.382452 258.320727 2.932 0.00359 **
#> z.lag.1 -0.028956 0.010464 -2.767 0.00595 **
#> tt 4.160546 1.455615 2.858 0.00451 **
#> z.diff.lag1 -0.105861 0.052152 -2.030 0.04311 *
#> z.diff.lag2 0.008682 0.052406 0.166 0.86852
#> z.diff.lag3 0.123475 0.052334 2.359 0.01884 *
#> z.diff.lag4 0.136105 0.052350 2.600 0.00971 **
#> z.diff.lag5 -0.109228 0.052670 -2.074 0.03881 *
#> z.diff.lag6 0.069633 0.052929 1.316 0.18915
#> z.diff.lag7 0.064114 0.052917 1.212 0.22647
#> z.diff.lag8 -0.014938 0.052259 -0.286 0.77516
#> z.diff.lag9 0.149177 0.051819 2.879 0.00423 **
#> z.diff.lag10 0.077924 0.052274 1.491 0.13693
#> z.diff.lag11 0.055271 0.052481 1.053 0.29298
#> z.diff.lag12 -0.058946 0.052051 -1.132 0.25819
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 528.3 on 358 degrees of freedom
#> Multiple R-squared: 0.1235, Adjusted R-squared: 0.08923
#> F-statistic: 3.603 on 14 and 358 DF, p-value: 1.328e-05
#>
#>
#> Value of test-statistic is: -2.7672 5.3268 4.1175
#>
#> Critical values for test statistics:
#> 1pct 5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2 6.15 4.71 4.05
#> phi3 8.34 6.30 5.36
# Convert to time series objects
ts_MI <- ts(new_MI$Value, frequency = 12)
ts_WI <- ts(new_WI$Value, frequency = 12)
ts_FI <- ts(new_FI$Value, frequency = 12)
ts_VS <- ts(new_VS$Value, frequency = 12)
# Decomposition
plot(decompose(ts_MI), ylab = "")
title(main = "MI")
plot(decompose(ts_WI), ylab = "")
title(main = "WI")
plot(decompose(ts_FI), ylab = "")
title(main = "FI")
plot(decompose(ts_VS), ylab = "")
title(main = "VS")
# Combine all time series into a single data frame
ts_data <- data.frame(ts_MI, ts_WI, ts_FI, ts_VS)
# Fit a VAR(1) model
var_model_1 <- VAR(ts_data, p = 1)
var_select <- VARselect(ts_data, lag.max = 12, type = "both")
var_select$criteria
#> 1 2 3 4 5
#> AIC(n) 4.373607e+01 4.363734e+01 4.358105e+01 4.360257e+01 4.362457e+01
#> HQ(n) 4.383606e+01 4.380399e+01 4.381435e+01 4.390253e+01 4.399119e+01
#> SC(n) 4.398790e+01 4.405705e+01 4.416864e+01 4.435805e+01 4.454793e+01
#> FPE(n) 9.870510e+18 8.942936e+18 8.454137e+18 8.639464e+18 8.833817e+18
#> 6 7 8 9 10
#> AIC(n) 4.365953e+01 4.370112e+01 4.375264e+01 4.378099e+01 4.381128e+01
#> HQ(n) 4.409280e+01 4.420105e+01 4.431923e+01 4.441424e+01 4.451118e+01
#> SC(n) 4.475077e+01 4.496024e+01 4.517965e+01 4.537588e+01 4.557405e+01
#> FPE(n) 9.151299e+18 9.544525e+18 1.005556e+19 1.035301e+19 1.068197e+19
#> 11 12
#> AIC(n) 4.384152e+01 4.387503e+01
#> HQ(n) 4.460807e+01 4.470824e+01
#> SC(n) 4.577217e+01 4.597356e+01
#> FPE(n) 1.102309e+19 1.141507e+19
# Fit a VAR(p) model. Keeping p=2 based on above observations
var_model_2 <- VAR(ts_data, p = 2)
# Print summary of VAR models
summary(var_model_1)
#>
#> VAR Estimation Results:
#> =========================
#> Endogenous variables: ts_MI, ts_WI, ts_FI, ts_VS
#> Deterministic variables: const
#> Sample size: 385
#> Log Likelihood: -10573.343
#> Roots of the characteristic polynomial:
#> 1.001 0.9617 0.9617 0.8785
#> Call:
#> VAR(y = ts_data, p = 1)
#>
#>
#> Estimation results for equation ts_MI:
#> ======================================
#> ts_MI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> ts_MI.l1 0.931876 0.018964 49.138 < 2e-16 ***
#> ts_WI.l1 0.143906 0.038674 3.721 0.000228 ***
#> ts_FI.l1 -0.012623 0.013910 -0.907 0.364718
#> ts_VS.l1 0.014056 0.005301 2.651 0.008351 **
#> const 65.452317 55.502250 1.179 0.239027
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 212.4 on 380 degrees of freedom
#> Multiple R-Squared: 0.9983, Adjusted R-squared: 0.9983
#> F-statistic: 5.701e+04 on 4 and 380 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation ts_WI:
#> ======================================
#> ts_WI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> ts_MI.l1 0.018159 0.010399 1.746 0.0816 .
#> ts_WI.l1 0.954453 0.021207 45.007 <2e-16 ***
#> ts_FI.l1 -0.012908 0.007628 -1.692 0.0914 .
#> ts_VS.l1 0.004387 0.002907 1.509 0.1321
#> const -6.162485 30.434629 -0.202 0.8396
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 116.5 on 380 degrees of freedom
#> Multiple R-Squared: 0.9952, Adjusted R-squared: 0.9952
#> F-statistic: 1.973e+04 on 4 and 380 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation ts_FI:
#> ======================================
#> ts_FI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> ts_MI.l1 0.06018 0.02003 3.004 0.00284 **
#> ts_WI.l1 -0.02622 0.04086 -0.642 0.52146
#> ts_FI.l1 0.92467 0.01469 62.927 < 2e-16 ***
#> ts_VS.l1 0.01762 0.00560 3.146 0.00178 **
#> const 103.59469 58.63259 1.767 0.07806 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 224.4 on 380 degrees of freedom
#> Multiple R-Squared: 0.9989, Adjusted R-squared: 0.9989
#> F-statistic: 8.781e+04 on 4 and 380 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation ts_VS:
#> ======================================
#> ts_VS = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> ts_MI.l1 0.05232 0.04962 1.054 0.292
#> ts_WI.l1 -0.03111 0.10119 -0.307 0.759
#> ts_FI.l1 -0.01026 0.03640 -0.282 0.778
#> ts_VS.l1 0.99145 0.01387 71.474 <2e-16 ***
#> const 145.36282 145.22818 1.001 0.317
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 555.9 on 380 degrees of freedom
#> Multiple R-Squared: 0.9987, Adjusted R-squared: 0.9987
#> F-statistic: 7.441e+04 on 4 and 380 DF, p-value: < 2.2e-16
#>
#>
#>
#> Covariance matrix of residuals:
#> ts_MI ts_WI ts_FI ts_VS
#> ts_MI 45132.9 -291.3 -5855 22577
#> ts_WI -291.3 13570.9 -2659 2168
#> ts_FI -5855.3 -2659.3 50367 8220
#> ts_VS 22577.1 2167.7 8220 309011
#>
#> Correlation matrix of residuals:
#> ts_MI ts_WI ts_FI ts_VS
#> ts_MI 1.00000 -0.01177 -0.12281 0.19118
#> ts_WI -0.01177 1.00000 -0.10172 0.03347
#> ts_FI -0.12281 -0.10172 1.00000 0.06588
#> ts_VS 0.19118 0.03347 0.06588 1.00000
summary(var_model_2)
#>
#> VAR Estimation Results:
#> =========================
#> Endogenous variables: ts_MI, ts_WI, ts_FI, ts_VS
#> Deterministic variables: const
#> Sample size: 384
#> Log Likelihood: -10511.22
#> Roots of the characteristic polynomial:
#> 0.9996 0.9783 0.9783 0.8925 0.2677 0.2488 0.1422 0.09355
#> Call:
#> VAR(y = ts_data, p = 2)
#>
#>
#> Estimation results for equation ts_MI:
#> ======================================
#> ts_MI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + ts_MI.l2 + ts_WI.l2 + ts_FI.l2 + ts_VS.l2 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> ts_MI.l1 0.906090 0.052806 17.159 <2e-16 ***
#> ts_WI.l1 0.217293 0.094205 2.307 0.0216 *
#> ts_FI.l1 0.065662 0.049570 1.325 0.1861
#> ts_VS.l1 0.018152 0.020075 0.904 0.3665
#> ts_MI.l2 0.030727 0.053441 0.575 0.5657
#> ts_WI.l2 -0.084649 0.098394 -0.860 0.3902
#> ts_FI.l2 -0.078922 0.047622 -1.657 0.0983 .
#> ts_VS.l2 -0.004325 0.020385 -0.212 0.8321
#> const 57.127943 57.013253 1.002 0.3170
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 212.3 on 375 degrees of freedom
#> Multiple R-Squared: 0.9984, Adjusted R-squared: 0.9983
#> F-statistic: 2.845e+04 on 8 and 375 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation ts_WI:
#> ======================================
#> ts_WI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + ts_MI.l2 + ts_WI.l2 + ts_FI.l2 + ts_VS.l2 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> ts_MI.l1 0.025083 0.028794 0.871 0.3843
#> ts_WI.l1 0.918495 0.051368 17.881 <2e-16 ***
#> ts_FI.l1 0.055116 0.027029 2.039 0.0421 *
#> ts_VS.l1 0.013843 0.010946 1.265 0.2068
#> ts_MI.l2 -0.011200 0.029140 -0.384 0.7009
#> ts_WI.l2 0.044854 0.053652 0.836 0.4037
#> ts_FI.l2 -0.067988 0.025967 -2.618 0.0092 **
#> ts_VS.l2 -0.009128 0.011116 -0.821 0.4121
#> const -0.615680 31.088082 -0.020 0.9842
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 115.8 on 375 degrees of freedom
#> Multiple R-Squared: 0.9953, Adjusted R-squared: 0.9952
#> F-statistic: 9965 on 8 and 375 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation ts_FI:
#> ======================================
#> ts_FI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + ts_MI.l2 + ts_WI.l2 + ts_FI.l2 + ts_VS.l2 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> ts_MI.l1 0.19209 0.05476 3.508 0.000506 ***
#> ts_WI.l1 0.19447 0.09769 1.991 0.047236 *
#> ts_FI.l1 0.99211 0.05140 19.300 < 2e-16 ***
#> ts_VS.l1 0.06076 0.02082 2.919 0.003727 **
#> ts_MI.l2 -0.13351 0.05542 -2.409 0.016470 *
#> ts_WI.l2 -0.23876 0.10203 -2.340 0.019807 *
#> ts_FI.l2 -0.04725 0.04938 -0.957 0.339238
#> ts_VS.l2 -0.04995 0.02114 -2.363 0.018646 *
#> const 63.38438 59.12213 1.072 0.284368
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 220.1 on 375 degrees of freedom
#> Multiple R-Squared: 0.999, Adjusted R-squared: 0.9989
#> F-statistic: 4.544e+04 on 8 and 375 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation ts_VS:
#> ======================================
#> ts_VS = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + ts_MI.l2 + ts_WI.l2 + ts_FI.l2 + ts_VS.l2 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> ts_MI.l1 0.53011 0.13261 3.998 7.71e-05 ***
#> ts_WI.l1 0.60782 0.23657 2.569 0.010576 *
#> ts_FI.l1 0.46942 0.12448 3.771 0.000189 ***
#> ts_VS.l1 0.81489 0.05041 16.165 < 2e-16 ***
#> ts_MI.l2 -0.48564 0.13420 -3.619 0.000337 ***
#> ts_WI.l2 -0.66548 0.24709 -2.693 0.007393 **
#> ts_FI.l2 -0.46779 0.11959 -3.912 0.000109 ***
#> ts_VS.l2 0.17545 0.05119 3.427 0.000677 ***
#> const 118.48950 143.17289 0.828 0.408425
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 533.1 on 375 degrees of freedom
#> Multiple R-Squared: 0.9988, Adjusted R-squared: 0.9988
#> F-statistic: 4.023e+04 on 8 and 375 DF, p-value: < 2.2e-16
#>
#>
#>
#> Covariance matrix of residuals:
#> ts_MI ts_WI ts_FI ts_VS
#> ts_MI 45062.2 -620.7 -6091 21514
#> ts_WI -620.7 13398.3 -2882 1440
#> ts_FI -6090.9 -2881.5 48458 5384
#> ts_VS 21514.1 1440.4 5384 284173
#>
#> Correlation matrix of residuals:
#> ts_MI ts_WI ts_FI ts_VS
#> ts_MI 1.00000 -0.02526 -0.13035 0.19012
#> ts_WI -0.02526 1.00000 -0.11309 0.02334
#> ts_FI -0.13035 -0.11309 1.00000 0.04588
#> ts_VS 0.19012 0.02334 0.04588 1.00000
# Produce a one-month ahead forecast for VAR(1) model
forecast_var1 <- predict(var_model_1, n.ahead = 1)
forecast_var2 <- predict(var_model_2, n.ahead = 1)
# Print forecast for the next month
print(forecast_var1)
#> $ts_MI
#> fcst lower upper CI
#> ts_MI.fcst 25546.62 25130.24 25963 416.3845
#>
#> $ts_WI
#> fcst lower upper CI
#> ts_WI.fcst 6173.774 5945.45 6402.098 228.3242
#>
#> $ts_FI
#> fcst lower upper CI
#> ts_FI.fcst 38148.07 37708.2 38587.93 439.8687
#>
#> $ts_VS
#> fcst lower upper CI
#> ts_VS.fcst 79916.19 78826.67 81005.71 1089.519
print(forecast_var2)
#> $ts_MI
#> fcst lower upper CI
#> ts_MI.fcst 25597.19 25181.13 26013.25 416.0585
#>
#> $ts_WI
#> fcst lower upper CI
#> ts_WI.fcst 6181.545 5954.677 6408.413 226.8676
#>
#> $ts_FI
#> fcst lower upper CI
#> ts_FI.fcst 38125.98 37694.54 38557.43 431.4482
#>
#> $ts_VS
#> fcst lower upper CI
#> ts_VS.fcst 79887.68 78842.87 80932.5 1044.815
# Granger causality test
causality(var_model_1, cause = "ts_WI")
#> $Granger
#>
#> Granger causality H0: ts_WI do not Granger-cause ts_MI ts_FI ts_VS
#>
#> data: VAR object var_model_1
#> F-Test = 4.9772, df1 = 3, df2 = 1520, p-value = 0.001937
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: ts_WI and ts_MI ts_FI ts_VS
#>
#> data: VAR object var_model_1
#> Chi-squared = 4.9675, df = 3, p-value = 0.1742
causality(var_model_1, cause = "ts_MI")
#> $Granger
#>
#> Granger causality H0: ts_MI do not Granger-cause ts_WI ts_FI ts_VS
#>
#> data: VAR object var_model_1
#> F-Test = 4.6258, df1 = 3, df2 = 1520, p-value = 0.003162
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: ts_MI and ts_WI ts_FI ts_VS
#>
#> data: VAR object var_model_1
#> Chi-squared = 20.423, df = 3, p-value = 0.0001387
causality(var_model_1, cause = "ts_FI")
#> $Granger
#>
#> Granger causality H0: ts_FI do not Granger-cause ts_MI ts_WI ts_VS
#>
#> data: VAR object var_model_1
#> F-Test = 1.2421, df1 = 3, df2 = 1520, p-value = 0.293
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: ts_FI and ts_MI ts_WI ts_VS
#>
#> data: VAR object var_model_1
#> Chi-squared = 12.92, df = 3, p-value = 0.004813
causality(var_model_1, cause = "ts_VS")
#> $Granger
#>
#> Granger causality H0: ts_VS do not Granger-cause ts_MI ts_WI ts_FI
#>
#> data: VAR object var_model_1
#> F-Test = 7.6444, df1 = 3, df2 = 1520, p-value = 4.511e-05
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: ts_VS and ts_MI ts_WI ts_FI
#>
#> data: VAR object var_model_1
#> Chi-squared = 17.181, df = 3, p-value = 0.0006485
causality(var_model_2, cause = "ts_WI")
#> $Granger
#>
#> Granger causality H0: ts_WI do not Granger-cause ts_MI ts_FI ts_VS
#>
#> data: VAR object var_model_2
#> F-Test = 4.3941, df1 = 6, df2 = 1500, p-value = 0.0002069
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: ts_WI and ts_MI ts_FI ts_VS
#>
#> data: VAR object var_model_2
#> Chi-squared = 5.9754, df = 3, p-value = 0.1128
causality(var_model_2, cause = "ts_MI")
#> $Granger
#>
#> Granger causality H0: ts_MI do not Granger-cause ts_WI ts_FI ts_VS
#>
#> data: VAR object var_model_2
#> F-Test = 5.9554, df1 = 6, df2 = 1500, p-value = 3.669e-06
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: ts_MI and ts_WI ts_FI ts_VS
#>
#> data: VAR object var_model_2
#> Chi-squared = 20.926, df = 3, p-value = 0.0001091
causality(var_model_2, cause = "ts_FI")
#> $Granger
#>
#> Granger causality H0: ts_FI do not Granger-cause ts_MI ts_WI ts_VS
#>
#> data: VAR object var_model_2
#> F-Test = 4.353, df1 = 6, df2 = 1500, p-value = 0.0002296
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: ts_FI and ts_MI ts_WI ts_VS
#>
#> data: VAR object var_model_2
#> Chi-squared = 13.422, df = 3, p-value = 0.003807
causality(var_model_2, cause = "ts_VS")
#> $Granger
#>
#> Granger causality H0: ts_VS do not Granger-cause ts_MI ts_WI ts_FI
#>
#> data: VAR object var_model_2
#> F-Test = 4.306, df1 = 6, df2 = 1500, p-value = 0.0002587
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: ts_VS and ts_MI ts_WI ts_FI
#>
#> data: VAR object var_model_2
#> Chi-squared = 15.682, df = 3, p-value = 0.001318
# BigVAR
BV_mtx <- cbind(ts_MI, ts_WI, ts_FI, ts_VS)
sparse_var1 <- BigVAR.fit(Y = BV_mtx, struct = 'Basic', p = 1, lambda = 1)[,,1]
sparse_var2 <- BigVAR.fit(Y = BV_mtx, struct = 'Basic', p = 2, lambda = 1)[,,1]
print(sparse_var1)
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 174.01547 0.886890892 0.30504143 0.01663456 -0.0006677293
#> [2,] -238.99471 0.197292936 0.52896736 -0.02799815 -0.0036364839
#> [3,] 227.60538 0.077398839 0.03136181 0.90695011 0.0136623442
#> [4,] -29.26629 -0.004295884 -0.03908114 0.05908871 0.9807352964
print(sparse_var2)
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 154.25487 0.46214070 0.16137837 0.02877342 0.009604066 0.42370831
#> [2,] -655.62155 0.13445978 0.10050360 0.03903518 -0.016213934 0.13277418
#> [3,] 407.01738 0.06397564 0.02506380 0.43447155 0.014553460 0.06898732
#> [4,] 20.27397 0.01090493 -0.01664742 0.02716274 0.541490492 -0.01249533
#> [,7] [,8] [,9]
#> [1,] 0.15912244 0.017064895 -0.023648172
#> [2,] 0.09489652 0.036436615 -0.020478404
#> [3,] 0.02970738 0.406646961 0.007813708
#> [4,] -0.02515570 0.006335993 0.450445911
summary(sparse_var1)
#> V1 V2 V3 V4
#> Min. :-238.99 Min. :-0.004296 Min. :-0.03908 Min. :-0.027998
#> 1st Qu.: -81.70 1st Qu.: 0.056975 1st Qu.: 0.01375 1st Qu.: 0.005476
#> Median : 72.37 Median : 0.137346 Median : 0.16820 Median : 0.037862
#> Mean : 33.34 Mean : 0.289322 Mean : 0.20657 Mean : 0.238669
#> 3rd Qu.: 187.41 3rd Qu.: 0.369692 3rd Qu.: 0.36102 3rd Qu.: 0.271054
#> Max. : 227.61 Max. : 0.886891 Max. : 0.52897 Max. : 0.906950
#> V5
#> Min. :-0.003636
#> 1st Qu.:-0.001410
#> Median : 0.006497
#> Mean : 0.247523
#> 3rd Qu.: 0.255431
#> Max. : 0.980735
summary(sparse_var2)
#> V1 V2 V3 V4
#> Min. :-655.62 Min. :0.01090 Min. :-0.01665 Min. :0.02716
#> 1st Qu.:-148.70 1st Qu.:0.05071 1st Qu.: 0.01464 1st Qu.:0.02837
#> Median : 87.26 Median :0.09922 Median : 0.06278 Median :0.03390
#> Mean : -18.52 Mean :0.16787 Mean : 0.06757 Mean :0.13236
#> 3rd Qu.: 217.45 3rd Qu.:0.21638 3rd Qu.: 0.11572 3rd Qu.:0.13789
#> Max. : 407.02 Max. :0.46214 Max. : 0.16138 Max. :0.43447
#> V5 V6 V7 V8
#> Min. :-0.01621 Min. :-0.01250 Min. :-0.02516 Min. :0.006336
#> 1st Qu.: 0.00315 1st Qu.: 0.04862 1st Qu.: 0.01599 1st Qu.:0.014383
#> Median : 0.01208 Median : 0.10088 Median : 0.06230 Median :0.026751
#> Mean : 0.13736 Mean : 0.15324 Mean : 0.06464 Mean :0.116621
#> 3rd Qu.: 0.14629 3rd Qu.: 0.20551 3rd Qu.: 0.11095 3rd Qu.:0.128989
#> Max. : 0.54149 Max. : 0.42371 Max. : 0.15912 Max. :0.406647
#> V9
#> Min. :-0.023648
#> 1st Qu.:-0.021271
#> Median :-0.006332
#> Mean : 0.103533
#> 3rd Qu.: 0.118472
#> Max. : 0.450446
Created on 2024-04-10 with reprex v2.1.0
The final forecast values :
VAR(1) - MI - 25546.62 WI - 6173.774 FI - 38148.07 VS - 79916.19
VAR(2) - MI - 25597.19 WI - 6181.545 FI - 38125.98 VS - 79887.68
Created on 2024-01-31 with reprex v2.1.0