jlivsey / UB-sping24-time-series

3 stars 1 forks source link

Shweta Hatote #2

Open SH3458 opened 5 months ago

SH3458 commented 5 months ago
install.packages("astsa")
#> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'astsa' successfully unpacked and MD5 sums checked
#> 
#> The downloaded binary packages are in
#>  C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages
install.packages("dygraphs")
#> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'dygraphs' successfully unpacked and MD5 sums checked
#> 
#> The downloaded binary packages are in
#>  C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages
install.packages("forecast")
#> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'forecast' successfully unpacked and MD5 sums checked
#> Warning: cannot remove prior installation of package 'forecast'
#> Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
#> C:\Users\shwet\AppData\Local\R\win-library\4.3\00LOCK\forecast\libs\x64\forecast.dll
#> to
#> C:\Users\shwet\AppData\Local\R\win-library\4.3\forecast\libs\x64\forecast.dll:
#> Permission denied
#> Warning: restored 'forecast'
#> 
#> The downloaded binary packages are in
#>  C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages
install.packages("fpp3")
#> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'fpp3' successfully unpacked and MD5 sums checked
#> 
#> The downloaded binary packages are in
#>  C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages
install.packages("fredr")
#> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'fredr' successfully unpacked and MD5 sums checked
#> 
#> The downloaded binary packages are in
#>  C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages
install.packages("lubridate")
#> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'lubridate' successfully unpacked and MD5 sums checked
#> Warning: cannot remove prior installation of package 'lubridate'
#> Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
#> C:\Users\shwet\AppData\Local\R\win-library\4.3\00LOCK\lubridate\libs\x64\lubridate.dll
#> to
#> C:\Users\shwet\AppData\Local\R\win-library\4.3\lubridate\libs\x64\lubridate.dll:
#> Permission denied
#> Warning: restored 'lubridate'
#> 
#> The downloaded binary packages are in
#>  C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages
install.packages("seasonal")
#> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'seasonal' successfully unpacked and MD5 sums checked
#> 
#> The downloaded binary packages are in
#>  C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages
install.packages("tidyverse")
#> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'tidyverse' successfully unpacked and MD5 sums checked
#> 
#> The downloaded binary packages are in
#>  C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages
install.packages("tsbox")
#> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'tsbox' successfully unpacked and MD5 sums checked
#> 
#> The downloaded binary packages are in
#>  C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages
install.packages("zoo")
#> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'zoo' successfully unpacked and MD5 sums checked
#> Warning: cannot remove prior installation of package 'zoo'
#> Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
#> C:\Users\shwet\AppData\Local\R\win-library\4.3\00LOCK\zoo\libs\x64\zoo.dll to
#> C:\Users\shwet\AppData\Local\R\win-library\4.3\zoo\libs\x64\zoo.dll: Permission
#> denied
#> Warning: restored 'zoo'
#> 
#> The downloaded binary packages are in
#>  C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages
install.packages("reprex")
#> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'reprex' successfully unpacked and MD5 sums checked
#> 
#> The downloaded binary packages are in
#>  C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages

library(astsa)
library(dygraphs)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
#> 
#> Attaching package: 'forecast'
#> The following object is masked from 'package:astsa':
#> 
#>     gas
library(fpp3)
#> ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
#> ✔ tibble      3.2.1     ✔ tsibble     1.1.4
#> ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
#> ✔ tidyr       1.3.0     ✔ feasts      0.3.1
#> ✔ lubridate   1.9.3     ✔ fable       0.3.3
#> ✔ ggplot2     3.4.4     ✔ fabletools  0.3.4
#> ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
#> ✖ lubridate::date()    masks base::date()
#> ✖ dplyr::filter()      masks stats::filter()
#> ✖ tsibble::intersect() masks base::intersect()
#> ✖ tsibble::interval()  masks lubridate::interval()
#> ✖ dplyr::lag()         masks stats::lag()
#> ✖ tsibble::setdiff()   masks base::setdiff()
#> ✖ tsibble::union()     masks base::union()
library(fredr)
library(lubridate)
library(seasonal)
#> 
#> Attaching package: 'seasonal'
#> The following object is masked from 'package:tibble':
#> 
#>     view
#> The following objects are masked from 'package:astsa':
#> 
#>     trend, unemp
library(tidyverse)
library(tsbox)
library(zoo)
#> 
#> Attaching package: 'zoo'
#> The following object is masked from 'package:tsibble':
#> 
#>     index
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
library(reprex)

# My API Key
fredr_set_key("052142bc981666b4ebcb1c8df98d006b")
icnsa = fredr(series_id = "ICNSA")

# Date Preparation

# Convert the 'date' column in the icnsa dataframe to Date format
icnsa$date <- as.Date(icnsa$date)

# Check for missing values
missing_values <- any(is.na(icnsa$value))
missing_values
#> [1] FALSE

# Handling extreme values

# Check for extreme values during COVID years 
# Here I have decided to incorporate Winsorize method for handling extreme values as
# it helps retaining more information compared to removing the values.
# It is a method where the values exceeding the set threshold are replaced with
# the threshold value itself.

# Now adjusting the threshold for extreme values based on 99th percentile.
covid_threshold <- quantile(icnsa$value, 0.99)
icnsa$value <- pmin(icnsa$value, covid_threshold)

# Create a time series object
# As it is weekly data, our sampling frequency will be 52.18
icnsa_ts <- ts(icnsa$value, frequency = 52.18)  

# Plot the time series
autoplot(icnsa_ts) +
  labs(title = "ICNSA")


# Fit ARIMA model
arima_model <- auto.arima(icnsa_ts)
#> Warning: The time series frequency has been rounded to support seasonal
#> differencing.
forecast_arima <- forecast::forecast(arima_model, h = 1)
accuracy(arima_model)
#>                    ME     RMSE     MAE        MPE    MAPE      MASE        ACF1
#> Training set 184.0878 35454.34 21076.6 -0.4552686 5.74542 0.2935432 0.001084788

# Round the forecasted value to the nearest integer
rounded_forecast <- round(forecast_arima$mean)

# Print the rounded forecasted value
cat("Forecast value:", rounded_forecast, "\n")
#> Forecast value: 240499

# Time series decomposition
icnsa_decomp <- decompose(icnsa_ts)
# Plot the decomposition
autoplot(icnsa_decomp) + labs(title = "ICNSA decomposed")


# Check for seasonality pattern
seasonal_pattern <- frequency(icnsa_ts)
if (seasonal_pattern > 1) {
  # Apply seasonal adjustment
  icnsa_stl <- stl(icnsa_ts, s.window = "periodic")
  # Extract the seasonally adjusted component
  icnsa_ts_adj <- icnsa_stl$time.series[, "seasonal"]
} else {
  icnsa_ts_adj <- icnsa_ts
}

# Visualize the original and seasonally adjusted time series
autoplot(icnsa_ts, series = "Original") + 
  autolayer(icnsa_ts_adj, series = "Seasonally Adjusted") +
  ggtitle("Original vs. Seasonally Adjusted Time Series")


# Fit ARIMA model with seasonality adjustment
arima_model_adj <- auto.arima(icnsa_ts_adj)
#> Warning: The time series frequency has been rounded to support seasonal
#> differencing.
forecast_arima_adj <- forecast::forecast(arima_model_adj, h = 1)
accuracy(arima_model_adj)
#>                     ME     RMSE     MAE       MPE     MAPE      MASE
#> Training set -16.07901 12602.58 5058.47 -1.495665 7.459476 0.6949608
#>                     ACF1
#> Training set 0.002777531

# Round the forecasted value to the nearest integer
rounded_forecast_adj <- round(forecast_arima_adj$mean)

# Print the rounded forecasted value
cat("Forecast value with seasonality adjustment:", rounded_forecast_adj, "\n")
#> Forecast value with seasonality adjustment: 53266

Created on 2024-01-31 with reprex v2.1.0

vgunturi commented 5 months ago

@SH3458 Can you throw more light on how you considered p,d,q values for ARIMA model without ACF and PACF plots?

devanshpratapsingh commented 5 months ago

This is truly insightful! I did not have decomposition in my code, and it really helps make much more sense of the data.

SH3458 commented 5 months ago

Yes, so here as I am using 'auto.arima' function, it automatically chooses the best potential values of p, q, and d values. Just an added information, if you do not want to use the 'auto.arima' function then apart from ACF & PACF plots you can also perform grid search over various orders and select the combination that gives us the minimum BIC value (Bayesian Information Criterion).

vgunturi commented 5 months ago

When you say best potential of P,q values - that makes sense. But d is order of differencing that needs to be done to make the series stationary. So does auto.arima performs stationary check in the background!?

SH3458 commented 5 months ago

Yes, it actually does!

jlivsey commented 5 months ago

Some possible covid start and end dates

covid_start <- as.Date("2020-03-14")
covid_end <- as.Date("2021-01-02")

covid_start_idx <- which(icnsa$date == covid_start)
covid_end_idx <- which(icnsa$date == covid_end)
covid_idx <- rep(0, nrow(icnsa))
covid_idx[covid_start_idx:covid_end_idx] <- 1
covid_idx <- as.logical(covid_idx)

icnsa_na <- icnsa |>
  select(date, value) 

icnsa_na$value[covid_idx] <- NA
SH3458 commented 4 months ago

Homework assignment - 1 from Brightspace

suppressMessages({
library(astsa)
library(dplyr)
library(dygraphs)
library(forecast)
library(fpp3)
library(fredr)
library(imputeTS)
library(lubridate)
library(seasonal)
library(tidyverse)
library(tsbox)
library(zoo)
library(reprex)
})

# My API Key
fredr_set_key("052142bc981666b4ebcb1c8df98d006b")
icnsa = fredr(series_id = "ICNSA")
ccnsa = fredr(series_id = "CCNSA")

# Date Preparation
icnsa$date <- as.Date(icnsa$date)
ccnsa$date <- as.Date(ccnsa$date)

# Check for missing values
missing_values <- any(is.na(icnsa$value))
missing_values <- any(is.na(ccnsa$value))

# # ICNSA Series Plotting using "plotly"
# plot_ly(data = icnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") +
#   layout(title = "ICNSA Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value"))
# # CCNSA Series Plotting using "plotly"
# plot_ly(data = ccnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") +
#   layout(title = "CCNSA Series Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value"))

# Handling COVID data for ICNSA
covid_start <- as.Date("2020-03-21")
covid_end <- as.Date("2020-07-25")
covid_start_idx <- which(icnsa$date == covid_start)
covid_end_idx <- which(icnsa$date == covid_end)
covid_idx <- rep(0, nrow(icnsa))
covid_idx[covid_start_idx:covid_end_idx] <- 1
covid_idx <- as.logical(covid_idx)
icnsa$value[covid_idx] <- NA
# Impute NA values using linear interpolation
icnsa$value <- zoo::na.approx(icnsa$value)
# Check for missing values after imputation
missing_after_imputation <- any(is.na(icnsa$value))
missing_after_imputation
#> [1] FALSE

# Handling COVID data for CCNSA
covid_s <- as.Date("2020-03-28")
covid_e <- as.Date("2020-10-17")
covid_s_idx <- which(ccnsa$date == covid_s)
covid_e_idx <- which(ccnsa$date == covid_e)
cov_idx <- rep(0, nrow(ccnsa))
cov_idx[covid_s_idx:covid_e_idx] <- 1
cov_idx <- as.logical(cov_idx)
ccnsa$value[cov_idx] <- NA
# Impute NA values using linear interpolation
ccnsa$value <- zoo::na.approx(ccnsa$value)
# Check for missing values after imputation
miss_after_imputation <- any(is.na(ccnsa$value))
miss_after_imputation
#> [1] FALSE

# # ICNSA Series Plotting using "plotly"
# plot_ly(data = icnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") +
#    layout(title = "ICNSA Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value"))
# # CCNSA Series Plotting using "plotly"
# plot_ly(data = ccnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") +
#    layout(title = "CCNSA Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value"))

# Convert data into a time series object
icnsa_ts <- ts(icnsa$value, frequency = 52.18)
ccnsa_ts <- ts(ccnsa$value, frequency = 52.18)

# Plot the time series
autoplot(icnsa_ts) +
  labs(title = "ICNSA")

autoplot(ccnsa_ts) +
  labs(title = "CCNSA")


# ACF and PACF plots
ggtsdisplay(icnsa_ts, main = "ICNSA Plot", lag.max = 52.18)

ggtsdisplay(ccnsa_ts, main = "CCNSA Plot", lag.max = 52.18)


# Model identification for ICNSA using auto.arima
icnsa_model <- auto.arima(icnsa_ts)
#> Warning: The time series frequency has been rounded to support seasonal
#> differencing.
# Model identification for CCNSA using auto.arima
ccnsa_model <- auto.arima(ccnsa_ts)
#> Warning: The time series frequency has been rounded to support seasonal
#> differencing.

# Display identified models
icnsa_model
#> Series: icnsa_ts 
#> ARIMA(4,0,2)(0,1,1)[52] 
#> 
#> Coefficients:
#>          ar1      ar2     ar3      ar4      ma1     ma2     sma1
#>       1.8970  -0.9218  0.0371  -0.0159  -1.4718  0.5706  -0.2342
#> s.e.  0.0481   0.0763  0.0412   0.0320   0.0446  0.0474   0.0190
#> 
#> sigma^2 = 1.063e+09:  log likelihood = -34582.42
#> AIC=69180.84   AICc=69180.89   BIC=69228.7
ccnsa_model
#> Series: ccnsa_ts 
#> ARIMA(0,0,5)(0,1,1)[52] 
#> 
#> Coefficients:
#>          ma1     ma2     ma3     ma4     ma5     sma1
#>       1.3326  1.6475  1.4565  1.0185  0.4920  -0.2595
#> s.e.  0.0196  0.0254  0.0234  0.0179  0.0146   0.0221
#> 
#> sigma^2 = 3.499e+10:  log likelihood = -39685.21
#> AIC=79384.41   AICc=79384.45   BIC=79426.29

# Time series cross-correlation analysis
cross_correlation_icnsa_ccnsa <- ccf(icnsa_ts, ccnsa_ts, lag.max = 52.18, type = "correlation", main = "Cross-correlation between ICNSA and CCNSA", xlab = "Lag", ylab = "Correlation")


# Identify the lag with the maximum correlation
max_corr_lag <- which.max(cross_correlation_icnsa_ccnsa$acf)
max_corr_value <- cross_correlation_icnsa_ccnsa$acf[max_corr_lag]
cat("Maximum correlation is at lag", max_corr_lag, "with a value of", max_corr_value, "\n")
#> Maximum correlation is at lag 49 with a value of 0.8356323

# Make sure both time series have the same length
min_length <- min(length(icnsa_ts), length(ccnsa_ts))
icnsa_ts <- head(icnsa_ts, min_length)
ccnsa_ts <- head(ccnsa_ts, min_length)

# Combine the time series as a data frame
combined_data <- data.frame(icnsa = icnsa_ts, ccnsa = ccnsa_ts)

# Fit regARIMA model
reg_arima_model <- auto.arima(combined_data[, "icnsa"], xreg = combined_data[, "ccnsa"])
#> Warning: The time series frequency has been rounded to support seasonal
#> differencing.

# Summary of the regARIMA model
summary(reg_arima_model)
#> Series: combined_data[, "icnsa"] 
#> Regression with ARIMA(3,0,2)(0,1,1)[52] errors 
#> 
#> Coefficients:
#>          ar1      ar2      ar3      ma1     ma2     sma1     drift    xreg
#>       1.7560  -0.7199  -0.0437  -1.4236  0.5583  -0.2330   25.0600  0.0448
#> s.e.  0.0523   0.0739   0.0327   0.0384  0.0271   0.0191  157.8115  0.0090
#> 
#> sigma^2 = 1.056e+09:  log likelihood = -34559.38
#> AIC=69136.76   AICc=69136.82   BIC=69190.59
#> 
#> Training set error measures:
#>                     ME     RMSE      MAE        MPE     MAPE      MASE
#> Training set -113.3733 32159.01 21031.19 -0.4065116 5.793527 0.3004479
#>                      ACF1
#> Training set 0.0002859649

# Regression Diagnostics
checkresiduals(reg_arima_model)

#> 
#>  Ljung-Box test
#> 
#> data:  Residuals from Regression with ARIMA(3,0,2)(0,1,1)[52] errors
#> Q* = 392.88, df = 98.36, p-value < 2.2e-16
#> 
#> Model df: 6.   Total lags used: 104.36

# Forecasting for the next 1 value using regARIMA model
num_forecasts <- 1
reg_arima_forecast <- forecast(reg_arima_model, h = num_forecasts, xreg = tail(ccnsa_ts, 1))

# Extract the rounded forecasted value
rounded_forecast <- round(reg_arima_forecast$mean)
cat("The final forecast value is", rounded_forecast)
#> The final forecast value is 240750

The final forecast value is 240750 Created on 2024-02-21 with reprex v2.1.0

The detailed thought process is provided in the pdf submitted on Brightspace.

SH3458 commented 4 months ago

Homework Assignment - 2

suppressMessages({
  library(astsa)
  library(dplyr)
  library(dygraphs)
  library(forecast)
  library(fpp3)
  library(fredr)
  library(imputeTS)
  library(lubridate)
  library(seasonal)
  library(tidyverse)
  library(tsbox)
  library(zoo)
  library(reprex)
  library(tseries)
})

# My API Key
fredr_set_key("052142bc981666b4ebcb1c8df98d006b")
icnsa = fredr(series_id = "ICNSA")

# Date Preparation
icnsa$date <- as.Date(icnsa$date)

# Check for missing values
missing_values <- any(is.na(icnsa$value))
missing_values
#> [1] FALSE

# # ICNSA Series Plotting using "plotly"
# plot_ly(data = icnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") +
#   layout(title = "ICNSA Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value"))

# Handling COVID data for ICNSA
covid_start <- as.Date("2020-03-21")
covid_end <- as.Date("2020-07-25")
covid_start_idx <- which(icnsa$date == covid_start)
covid_end_idx <- which(icnsa$date == covid_end)
covid_idx <- rep(0, nrow(icnsa))
covid_idx[covid_start_idx:covid_end_idx] <- 1
covid_idx <- as.logical(covid_idx)
icnsa$value[covid_idx] <- NA

# Imputing "NA" values using cubic spline imputation between COVID start and end dates
# Define a function for cubic spline imputation
cubic_spline_imputation <- function(x, lambda) {
  non_na_indices <- which(!is.na(x))
  na_indices <- which(is.na(x))
  spline_model <- smooth.spline(non_na_indices, x[non_na_indices], spar = lambda)
  imputed_values <- predict(spline_model, x = na_indices)$y
  x[na_indices] <- imputed_values
  return(x)
}
# Defining another function that checks calculates the RMSE value for getting the best lambda
cubic_spline_cv <- function(x, lambda) {
  non_na_indices <- which(!is.na(x))
  na_indices <- which(is.na(x))
  spline_model <- smooth.spline(non_na_indices, x[non_na_indices], spar = lambda)
  imputed_values <- predict(spline_model, x = na_indices)$y
  rmse <- sqrt(mean((x[non_na_indices] - predict(spline_model, x = non_na_indices)$y)^2))
  return(rmse)
}

# Lambda values to be tested
lambda_values <- seq(0.1, 1, by = 0.1)

# Performing cross-validation to find the best lambda
rmse_values <- sapply(lambda_values, function(lambda) cubic_spline_cv(icnsa$value, lambda))

# Display the RMSE values for each lambda
cat("Lambda values:", lambda_values, "\n")
#> Lambda values: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1
cat("RMSE values:", rmse_values, "\n")
#> RMSE values: 62880.31 63772.23 68648.33 76213.45 82421.38 89424.08 97054.13 103978.3 110141.9 113968.4

# Find the lambda with the minimum RMSE
best_lambda <- lambda_values[which.min(rmse_values)]
cat("Best lambda:", best_lambda, "\n")
#> Best lambda: 0.1

# Imputing "NA" values using cubic spline imputation between Covid start and end dates
icnsa$value <- cubic_spline_imputation(icnsa$value, best_lambda)

# Converting data to a time series object after imputation
ts_icnsa_imputed <- ts(icnsa$value, frequency = 24)

# Checking for missing values after imputation
missing_values_after_imputation <- any(is.na(ts_icnsa_imputed))
missing_values_after_imputation
#> [1] FALSE

# Plotting the time series with x-date and y-value
plot_data <- data.frame(date = icnsa$date, value = icnsa$value)
plot(plot_data$date, plot_data$value, type = "l",
     xlab = "Date", ylab = "Value", main = "ICNSA after imputation")


# Augmented Dickey-Fuller (ADF) test for stationarity
adf_test_result <- adf.test(ts_icnsa_imputed)
#> Warning in adf.test(ts_icnsa_imputed): p-value smaller than printed p-value
cat("Augmented Dickey-Fuller Test p-value:", adf_test_result$p.value, "\n")
#> Augmented Dickey-Fuller Test p-value: 0.01

# Holt-Winters Multiplicative Model
hw_multiplicative <- hw(ts_icnsa_imputed, seasonal = "multiplicative", h = 1)
forecast_multiplicative <- round(hw_multiplicative$mean[1])

# Holt-Winters Additive Model
hw_additive <- hw(ts_icnsa_imputed, seasonal = "additive", h = 1)
forecast_additive <- round(hw_additive$mean[1])

# Print the forecasts
cat("Multiplicative Holt-Winters Forecast:", forecast_multiplicative, "\n")
#> Multiplicative Holt-Winters Forecast: 209359
cat("Additive Holt-Winters Forecast:", forecast_additive, "\n")
#> Additive Holt-Winters Forecast: 203425

Created on 2024-02-28 with reprex v2.1.0

The Forecast values are -

SH3458 commented 4 months ago
suppressMessages({
  library(astsa)
  library(bsts)
  library(dlm)
  library(dplyr)
  library(dygraphs)
  library(fable)
  library(forecast)
  library(fpp3)
  library(fredr)
  library(imputeTS)
  library(lubridate)
  library(MARSS)
  library(splines)
  library(seasonal)
  library(tidyverse)
  library(tsbox)
  library(zoo)
  library(reprex)
  library(plotly)
  library(tseries)
})
#> Warning: package 'bsts' was built under R version 4.3.3
#> Warning: package 'BoomSpikeSlab' was built under R version 4.3.3
#> Warning: package 'Boom' was built under R version 4.3.3
#> Warning: package 'dlm' was built under R version 4.3.3
#> Warning: package 'MARSS' was built under R version 4.3.3

install.packages("vars")
#> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'vars' successfully unpacked and MD5 sums checked
#> 
#> The downloaded binary packages are in
#>  C:\Users\shwet\AppData\Local\Temp\RtmpeyvtMx\downloaded_packages
library(vars)
#> Warning: package 'vars' was built under R version 4.3.3
#> Loading required package: MASS
#> 
#> Attaching package: 'MASS'
#> The following object is masked from 'package:plotly':
#> 
#>     select
#> The following object is masked from 'package:dplyr':
#> 
#>     select
#> Loading required package: strucchange
#> Warning: package 'strucchange' was built under R version 4.3.3
#> Loading required package: sandwich
#> 
#> Attaching package: 'strucchange'
#> The following object is masked from 'package:stringr':
#> 
#>     boundary
#> Loading required package: urca
#> Loading required package: lmtest
#> 
#> Attaching package: 'vars'
#> The following object is masked from 'package:fable':
#> 
#>     VAR

# My API Key
fredr_set_key("052142bc981666b4ebcb1c8df98d006b")

# ICNSA data
icnsa <- fredr(series_id = "ICNSA")
# Date Preparation
icnsa$date <- as.Date(icnsa$date)
# Check for missing values
missing_values <- any(is.na(icnsa$value))
missing_values
#> [1] FALSE
# Plot the original values
plot(icnsa$date, icnsa$value, type = "l", main = "ICNSA Time Series")


# Handling COVID data for ICNSA
covid_start <- as.Date("2020-03-21")
covid_end <- as.Date("2021-01-16")
covid_start_idx <- which(icnsa$date == covid_start)
covid_end_idx <- which(icnsa$date == covid_end)
covid_idx <- rep(0, nrow(icnsa))
covid_idx[covid_start_idx:covid_end_idx] <- 1
covid_idx <- as.logical(covid_idx)
icnsa$value[covid_idx] <- NA

# Create a copy of the original data for Spline imputation
covid_data_spline <- icnsa
covid_data_spline$value[covid_idx] <- NA

# Create a copy of the original data for State Space imputation
covid_data_imputed <- icnsa
covid_data_imputed$value[covid_idx] <- NA

# Impute using state space method
covid_data_imputed$value <- imputeTS::na_kalman(covid_data_imputed$value)

# Impute using cubic spline method
# Define a function for cubic spline imputation
cubic_spline_imputation <- function(x, lambda) {
  non_na_indices <- which(!is.na(x))
  na_indices <- which(is.na(x))
  spline_model <- smooth.spline(non_na_indices, x[non_na_indices], spar = lambda)
  imputed_values <- predict(spline_model, x = na_indices)$y
  x[na_indices] <- imputed_values
  return(x)
}
# Lambda values to be tested
lambda_values <- seq(0.1, 1, by = 0.1)
# Performing cross-validation to find the best lambda
rmse_values <- sapply(lambda_values, function(lambda) cubic_spline_cv(covid_data_spline$value, lambda))
#> Error in cubic_spline_cv(covid_data_spline$value, lambda): could not find function "cubic_spline_cv"
# Display the RMSE values for each lambda
# cat("Lambda values:", lambda_values, "\n")
# cat("RMSE values:", rmse_values, "\n")
# Find the lambda with the minimum RMSE
best_lambda <- lambda_values[which.min(rmse_values)]
#> Error in eval(expr, envir, enclos): object 'rmse_values' not found
cat("Best lambda:", best_lambda, "\n")
#> Error in eval(expr, envir, enclos): object 'best_lambda' not found
# Impute using cubic spline imputation
covid_data_spline$value <- cubic_spline_imputation(covid_data_spline$value, best_lambda)
#> Error in eval(expr, envir, enclos): object 'best_lambda' not found

# Plot the original values
plot(icnsa$date, icnsa$value, type = "l", main = "ICNSA Time Series")
# Plot imputed data using state space method (Kalman filter)
lines(covid_data_imputed$date, covid_data_imputed$value, col = "red", lty = 2, lwd = 2)
# Plot imputed data using cubic spline method
lines(covid_data_spline$date, covid_data_spline$value, col = "blue", lty = 2, lwd = 2)


# Set up a 1x2 plotting layout (1 row, 2 columns)
par(mfrow = c(1, 2))
# Plot the original values
plot(icnsa$date, icnsa$value, type = "l", main = "ICNSA Time Series", col = "black")
# Plot imputed data using state space method (Kalman filter)
lines(covid_data_imputed$date, covid_data_imputed$value, col = "red", lty = 2, lwd = 2)
legend("topright", legend = c("Original", "Imputed (Kalman)"), col = c("black", "red"), lty = c(1, 2), lwd = c(1, 2))
# Plot imputed data using cubic spline method
plot(icnsa$date, icnsa$value, type = "l", main = "ICNSA Time Series", col = "black")  
lines(covid_data_spline$date, covid_data_spline$value, col = "blue", lty = 2, lwd = 2)
legend("topright", legend = c("Original", "Imputed (Spline)"), col = c("black", "blue"), lty = c(1, 2), lwd = c(1, 2))

# Reset plotting layout to default
par(mfrow = c(1, 1))
# Convert the data into time series object
covid_data_imputed_ts <- ts(covid_data_imputed$value, frequency = 52.18)

# Fit a structural time series model to ts_data
structural_model <- StructTS(covid_data_imputed_ts, type = "BSM")

# Generate forecasts from the fitted model
forecast_values <- predict(structural_model, n.ahead = 1)$pred
# Get the next rounded forecast value
rounded_forecast <- round(forecast_values)
# Display the rounded forecast value
cat("Rounded Forecast Value:", rounded_forecast, "\n")
#> Rounded Forecast Value: 241753

# Another covariate series - Insured Unemployment Rate (IURNSA)
iurnsa <- fredr(series_id = "IURNSA")
# Date Preparation
iurnsa$date <- as.Date(iurnsa$date)
# Check for missing values
missing_values <- any(is.na(iurnsa$value))
missing_values
#> [1] FALSE
# Convert it into time series
iurnsa_ts <- ts(iurnsa$value, frequency = 52.18)
# Plot the original values
plot(iurnsa$date, iurnsa$value, type = "l", main = "IURNSA Time Series")


# Make sure both time series have the same length
min_length <- min(length(icnsa_ts), length(iurnsa_ts))
#> Error in eval(expr, envir, enclos): object 'icnsa_ts' not found
icnsa_ts <- tail(icnsa_ts, min_length)
#> Error in eval(expr, envir, enclos): object 'icnsa_ts' not found
iurnsa_ts <- tail(iurnsa_ts, min_length)
#> Error in eval(expr, envir, enclos): object 'min_length' not found

# Handling COVID data for IURNSA
cov_start <- as.Date("2020-04-04")
cov_end <- as.Date("2020-10-17")
cov_start_idx <- which(iurnsa$date == cov_start)
cov_end_idx <- which(iurnsa$date == cov_end)
cov_idx <- rep(0, nrow(iurnsa))
cov_idx[cov_start_idx:cov_end_idx] <- 1
cov_idx <- as.logical(cov_idx)
iurnsa$value[cov_idx] <- NA
# Impute using state space method
iurnsa$value <- imputeTS::na_kalman(iurnsa$value)
# Plot the original values
plot(iurnsa$date, iurnsa$value, type = "l", main = "IURNSA Imputed")

# Convert it into time series
iurnsa_ts <- ts(iurnsa$value, frequency = 52.18)

Combine the time series data for ICNSA and IURNSA combined_ts <- cbind(ICNSA = covid_data_imputed_ts, IURNSA = iurnsa_ts) Fit an ARIMA model with covariate (IURNSA) arima_model <- auto.arima(combined_ts[, "ICNSA"], xreg = combined_ts[, "IURNSA"]) Forecast the next value forecast_values <- forecast(arima_model, xreg = iurnsa_ts, h = 1) Get the next rounded forecast value rounded_forecast <- round(forecast_values$mean) Display the rounded forecast value cat("Rounded Forecast Value with Covariate (IURNSA):", rounded_forecast, "\n")

Created on 2024-03-07 with reprex v2.1.0

The final forecast value is - 213853

SH3458 commented 2 months ago
suppressMessages({
  library(astsa)
  library(bsts)
  library(BigVAR)
  library(dlm)
  library(dplyr)
  library(dygraphs)
  library(fable)
  library(forecast)
  library(fpp3)
  library(fredr)
  library(ggplot2)
  library(imputeTS)
  library(lubridate)
  library(MARSS)
  library(readxl)
  library(splines)
  library(seasonal)
  library(tidyverse)
  library(tsbox)
  library(zoo)
  library(reprex)
  library(plotly)
  library(tseries)
  library(vars)
})
#> Warning: package 'bsts' was built under R version 4.3.3
#> Warning: package 'BoomSpikeSlab' was built under R version 4.3.3
#> Warning: package 'Boom' was built under R version 4.3.3
#> Warning: package 'BigVAR' was built under R version 4.3.3
#> Warning: package 'lattice' was built under R version 4.3.3
#> Warning: package 'dlm' was built under R version 4.3.3
#> Warning: package 'fable' was built under R version 4.3.3
#> Warning: package 'fabletools' was built under R version 4.3.3
#> Warning: package 'forecast' was built under R version 4.3.3
#> Warning: package 'ggplot2' was built under R version 4.3.3
#> Warning: package 'feasts' was built under R version 4.3.3
#> Warning: package 'MARSS' was built under R version 4.3.3
#> Warning: package 'vars' was built under R version 4.3.3
#> Warning: package 'strucchange' was built under R version 4.3.3

# File paths
file_paths <- c("C:/Study/Time Series/HW/HW4/Manufacturing and Supplies Inventory (MI).xlsx",
                "C:/Study/Time Series/HW/HW4/Work in Process Inventories (WI).xlsx",
                "C:/Study/Time Series/HW/HW4/Finished Goods Inventories (FI).xlsx",
                "C:/Study/Time Series/HW/HW4/Value of Shipments (VS).xlsx")

# Load data
data_MI <- read_excel(file_paths[1])
#> New names:
#> • `` -> `...1`
#> • `` -> `...2`
data_WI <- read_excel(file_paths[2])
#> New names:
#> • `` -> `...1`
#> • `` -> `...2`
data_FI <- read_excel(file_paths[3])
#> New names:
#> • `` -> `...1`
#> • `` -> `...2`
data_VS <- read_excel(file_paths[4])
#> New names:
#> • `` -> `...1`
#> • `` -> `...2`

# Inspect the structure of the data
str(data_MI)
#> tibble [403 × 3] (S3: tbl_df/tbl/data.frame)
#>  $ ...1              : chr [1:403] NA NA NA NA ...
#>  $ ...2              : chr [1:403] NA NA NA NA ...
#>  $ U.S. Census Bureau: chr [1:403] "Source: Manufacturers' Shipments, Inventories, and Orders" "Food Products: U.S. Total" "Seasonally Adjusted Materials and Supplies Inventories [Millions of Dollars]" "Period: 1992 to 2024" ...
str(data_WI)
#> tibble [403 × 3] (S3: tbl_df/tbl/data.frame)
#>  $ ...1              : chr [1:403] NA NA NA NA ...
#>  $ ...2              : chr [1:403] NA NA NA NA ...
#>  $ U.S. Census Bureau: chr [1:403] "Source: Manufacturers' Shipments, Inventories, and Orders" "Food Products: U.S. Total" "Seasonally Adjusted Work in Process Inventories [Millions of Dollars]" "Period: 1992 to 2024" ...
str(data_FI)
#> tibble [403 × 3] (S3: tbl_df/tbl/data.frame)
#>  $ ...1              : chr [1:403] NA NA NA NA ...
#>  $ ...2              : chr [1:403] NA NA NA NA ...
#>  $ U.S. Census Bureau: chr [1:403] "Source: Manufacturers' Shipments, Inventories, and Orders" "Food Products: U.S. Total" "Seasonally Adjusted Finished Goods Inventories [Millions of Dollars]" "Period: 1992 to 2024" ...
str(data_VS)
#> tibble [403 × 3] (S3: tbl_df/tbl/data.frame)
#>  $ ...1              : chr [1:403] NA NA NA NA ...
#>  $ ...2              : chr [1:403] NA NA NA NA ...
#>  $ U.S. Census Bureau: chr [1:403] "Source: Manufacturers' Shipments, Inventories, and Orders" "Food Products: U.S. Total" "Seasonally Adjusted Value of Shipments [Millions of Dollars]" "Period: 1992 to 2024" ...

# Delete last column and first 7 rows, and rename columns
clean_data <- function(data) {
  data <- data[-c(1:7, (nrow(data)-9):nrow(data)), -ncol(data)]  # Delete first 7 rows and last column
  colnames(data) <- c("Date", "Value")  # Rename columns
  return(data)
}

# Clean and rename each dataset
new_MI <- clean_data(data_MI)
new_WI <- clean_data(data_WI)
new_FI <- clean_data(data_FI)
new_VS <- clean_data(data_VS)

# Check the structure of the cleaned datasets
str(new_MI)
#> tibble [386 × 2] (S3: tbl_df/tbl/data.frame)
#>  $ Date : chr [1:386] "Jan-1992" "Feb-1992" "Mar-1992" "Apr-1992" ...
#>  $ Value: chr [1:386] "8923" "9164" "9200" "9252" ...
str(new_WI)
#> tibble [386 × 2] (S3: tbl_df/tbl/data.frame)
#>  $ Date : chr [1:386] "Jan-1992" "Feb-1992" "Mar-1992" "Apr-1992" ...
#>  $ Value: chr [1:386] "2100" "2203" "2073" "2071" ...
str(new_FI)
#> tibble [386 × 2] (S3: tbl_df/tbl/data.frame)
#>  $ Date : chr [1:386] "Jan-1992" "Feb-1992" "Mar-1992" "Apr-1992" ...
#>  $ Value: chr [1:386] "14576" "14396" "14469" "14238" ...
str(new_VS)
#> tibble [386 × 2] (S3: tbl_df/tbl/data.frame)
#>  $ Date : chr [1:386] "Jan-1992" "Feb-1992" "Mar-1992" "Apr-1992" ...
#>  $ Value: chr [1:386] "28677" "28560" "29239" "29904" ...

# Convert dates to standard format
new_MI$Date <- as.Date(paste0("01-", new_MI$Date), format = "%d-%b-%Y")
new_WI$Date <- as.Date(paste0("01-", new_WI$Date), format = "%d-%b-%Y")
new_FI$Date <- as.Date(paste0("01-", new_FI$Date), format = "%d-%b-%Y")
new_VS$Date <- as.Date(paste0("01-", new_VS$Date), format = "%d-%b-%Y")

# Convert "Value" column from character to numeric
new_MI$Value <- as.numeric(new_MI$Value)
new_WI$Value <- as.numeric(new_WI$Value)
new_FI$Value <- as.numeric(new_FI$Value)
new_VS$Value <- as.numeric(new_VS$Value)

# Check for missing values
sum(is.na(new_MI))
#> [1] 0
sum(is.na(new_WI))
#> [1] 0
sum(is.na(new_FI))
#> [1] 0
sum(is.na(new_VS))
#> [1] 0

# Plot Manufacturing & Supplies Inventory
plot(new_MI$Date, new_MI$Value, type = "l", xlab = "Date", ylab = "Value", main = "Manufacturing & Supplies Inventory")


# Plot Work in Process Inventories
plot(new_WI$Date, new_WI$Value, type = "l", xlab = "Date", ylab = "Value", main = "Work in Process Inventories")


# Plot Finished Goods Inventories
plot(new_FI$Date, new_FI$Value, type = "l", xlab = "Date", ylab = "Value", main = "Finished Goods Inventories")


# Plot Value of Shipments
plot(new_VS$Date, new_VS$Value, type = "l", xlab = "Date", ylab = "Value", main = "Value of Shipments")


# Function to perform ADF test and print results
check_stationarity <- function(data) {
  adf_test <- ur.df(data$Value, type = "trend", lags = 12)
  cat("ADF Test Results:\n")
  print(summary(adf_test))
}

# Check stationarity for each dataset
check_stationarity(new_MI)
#> ADF Test Results:
#> 
#> ############################################### 
#> # Augmented Dickey-Fuller Test Unit Root Test # 
#> ############################################### 
#> 
#> Test regression trend 
#> 
#> 
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -982.66 -113.96    7.36  120.22 1041.74 
#> 
#> Coefficients:
#>                Estimate Std. Error t value Pr(>|t|)   
#> (Intercept)  116.319390  53.545891   2.172  0.03049 * 
#> z.lag.1       -0.017928   0.007755  -2.312  0.02136 * 
#> tt             0.915888   0.357012   2.565  0.01071 * 
#> z.diff.lag1   -0.058130   0.052591  -1.105  0.26977   
#> z.diff.lag2   -0.059405   0.052873  -1.124  0.26196   
#> z.diff.lag3    0.173746   0.052941   3.282  0.00113 **
#> z.diff.lag4    0.008400   0.053669   0.157  0.87572   
#> z.diff.lag5    0.033364   0.053945   0.618  0.53666   
#> z.diff.lag6    0.055853   0.053832   1.038  0.30018   
#> z.diff.lag7    0.100373   0.053875   1.863  0.06327 . 
#> z.diff.lag8   -0.018400   0.054166  -0.340  0.73428   
#> z.diff.lag9    0.090376   0.054249   1.666  0.09660 . 
#> z.diff.lag10   0.118937   0.053949   2.205  0.02812 * 
#> z.diff.lag11   0.035821   0.054324   0.659  0.51006   
#> z.diff.lag12  -0.068162   0.054064  -1.261  0.20821   
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 212.1 on 358 degrees of freedom
#> Multiple R-squared:  0.1025, Adjusted R-squared:  0.06737 
#> F-statistic: 2.919 on 14 and 358 DF,  p-value: 0.0003229
#> 
#> 
#> Value of test-statistic is: -2.3117 3.7258 3.3825 
#> 
#> Critical values for test statistics: 
#>       1pct  5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2  6.15  4.71  4.05
#> phi3  8.34  6.30  5.36
check_stationarity(new_WI)
#> ADF Test Results:
#> 
#> ############################################### 
#> # Augmented Dickey-Fuller Test Unit Root Test # 
#> ############################################### 
#> 
#> Test regression trend 
#> 
#> 
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -885.42  -57.38    1.89   54.98  623.34 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)   
#> (Intercept)  33.89776   17.61082   1.925  0.05504 . 
#> z.lag.1      -0.02136    0.01010  -2.115  0.03513 * 
#> tt            0.30695    0.15121   2.030  0.04310 * 
#> z.diff.lag1  -0.03411    0.05172  -0.660  0.51000   
#> z.diff.lag2   0.12738    0.05176   2.461  0.01432 * 
#> z.diff.lag3   0.01045    0.05221   0.200  0.84148   
#> z.diff.lag4   0.04943    0.05197   0.951  0.34214   
#> z.diff.lag5   0.05509    0.05783   0.953  0.34146   
#> z.diff.lag6   0.03521    0.05853   0.601  0.54789   
#> z.diff.lag7  -0.05597    0.05864  -0.955  0.34045   
#> z.diff.lag8   0.02748    0.05887   0.467  0.64097   
#> z.diff.lag9   0.12951    0.05889   2.199  0.02851 * 
#> z.diff.lag10  0.02351    0.05921   0.397  0.69162   
#> z.diff.lag11  0.05356    0.05929   0.903  0.36695   
#> z.diff.lag12 -0.19131    0.05907  -3.239  0.00131 **
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 115.4 on 358 degrees of freedom
#> Multiple R-squared:  0.08869,    Adjusted R-squared:  0.05305 
#> F-statistic: 2.489 on 14 and 358 DF,  p-value: 0.002207
#> 
#> 
#> Value of test-statistic is: -2.1149 2.2656 2.2549 
#> 
#> Critical values for test statistics: 
#>       1pct  5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2  6.15  4.71  4.05
#> phi3  8.34  6.30  5.36
check_stationarity(new_FI)
#> ADF Test Results:
#> 
#> ############################################### 
#> # Augmented Dickey-Fuller Test Unit Root Test # 
#> ############################################### 
#> 
#> Test regression trend 
#> 
#> 
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -983.00 -132.92   -7.66  145.87  890.07 
#> 
#> Coefficients:
#>                Estimate Std. Error t value Pr(>|t|)  
#> (Intercept)  187.827721  96.032445   1.956   0.0513 .
#> z.lag.1       -0.015667   0.008088  -1.937   0.0535 .
#> tt             1.081874   0.484667   2.232   0.0262 *
#> z.diff.lag1    0.036568   0.052244   0.700   0.4844  
#> z.diff.lag2    0.118457   0.052068   2.275   0.0235 *
#> z.diff.lag3    0.052388   0.052441   0.999   0.3185  
#> z.diff.lag4    0.052407   0.052749   0.994   0.3211  
#> z.diff.lag5    0.071997   0.052811   1.363   0.1736  
#> z.diff.lag6   -0.005623   0.052859  -0.106   0.9153  
#> z.diff.lag7    0.067721   0.052811   1.282   0.2006  
#> z.diff.lag8   -0.033436   0.053058  -0.630   0.5290  
#> z.diff.lag9    0.050405   0.053185   0.948   0.3439  
#> z.diff.lag10   0.037787   0.053432   0.707   0.4799  
#> z.diff.lag11   0.125206   0.053248   2.351   0.0192 *
#> z.diff.lag12  -0.128491   0.053588  -2.398   0.0170 *
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 229.4 on 358 degrees of freedom
#> Multiple R-squared:  0.08919,    Adjusted R-squared:  0.05357 
#> F-statistic: 2.504 on 14 and 358 DF,  p-value: 0.002064
#> 
#> 
#> Value of test-statistic is: -1.937 4.3484 2.9516 
#> 
#> Critical values for test statistics: 
#>       1pct  5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2  6.15  4.71  4.05
#> phi3  8.34  6.30  5.36
check_stationarity(new_VS)
#> ADF Test Results:
#> 
#> ############################################### 
#> # Augmented Dickey-Fuller Test Unit Root Test # 
#> ############################################### 
#> 
#> Test regression trend 
#> 
#> 
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -1791.77  -323.58     6.22   340.94  1599.96 
#> 
#> Coefficients:
#>                Estimate Std. Error t value Pr(>|t|)   
#> (Intercept)  757.382452 258.320727   2.932  0.00359 **
#> z.lag.1       -0.028956   0.010464  -2.767  0.00595 **
#> tt             4.160546   1.455615   2.858  0.00451 **
#> z.diff.lag1   -0.105861   0.052152  -2.030  0.04311 * 
#> z.diff.lag2    0.008682   0.052406   0.166  0.86852   
#> z.diff.lag3    0.123475   0.052334   2.359  0.01884 * 
#> z.diff.lag4    0.136105   0.052350   2.600  0.00971 **
#> z.diff.lag5   -0.109228   0.052670  -2.074  0.03881 * 
#> z.diff.lag6    0.069633   0.052929   1.316  0.18915   
#> z.diff.lag7    0.064114   0.052917   1.212  0.22647   
#> z.diff.lag8   -0.014938   0.052259  -0.286  0.77516   
#> z.diff.lag9    0.149177   0.051819   2.879  0.00423 **
#> z.diff.lag10   0.077924   0.052274   1.491  0.13693   
#> z.diff.lag11   0.055271   0.052481   1.053  0.29298   
#> z.diff.lag12  -0.058946   0.052051  -1.132  0.25819   
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 528.3 on 358 degrees of freedom
#> Multiple R-squared:  0.1235, Adjusted R-squared:  0.08923 
#> F-statistic: 3.603 on 14 and 358 DF,  p-value: 1.328e-05
#> 
#> 
#> Value of test-statistic is: -2.7672 5.3268 4.1175 
#> 
#> Critical values for test statistics: 
#>       1pct  5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2  6.15  4.71  4.05
#> phi3  8.34  6.30  5.36

# Convert to time series objects 
ts_MI <- ts(new_MI$Value, frequency = 12)
ts_WI <- ts(new_WI$Value, frequency = 12)
ts_FI <- ts(new_FI$Value, frequency = 12)
ts_VS <- ts(new_VS$Value, frequency = 12)

# Decomposition
plot(decompose(ts_MI), ylab = "")
title(main = "MI")

plot(decompose(ts_WI), ylab = "")
title(main = "WI")

plot(decompose(ts_FI), ylab = "")
title(main = "FI")

plot(decompose(ts_VS), ylab = "")
title(main = "VS")


# Combine all time series into a single data frame
ts_data <- data.frame(ts_MI, ts_WI, ts_FI, ts_VS)

# Fit a VAR(1) model
var_model_1 <- VAR(ts_data, p = 1)

var_select <- VARselect(ts_data, lag.max = 12, type = "both")
var_select$criteria
#>                   1            2            3            4            5
#> AIC(n) 4.373607e+01 4.363734e+01 4.358105e+01 4.360257e+01 4.362457e+01
#> HQ(n)  4.383606e+01 4.380399e+01 4.381435e+01 4.390253e+01 4.399119e+01
#> SC(n)  4.398790e+01 4.405705e+01 4.416864e+01 4.435805e+01 4.454793e+01
#> FPE(n) 9.870510e+18 8.942936e+18 8.454137e+18 8.639464e+18 8.833817e+18
#>                   6            7            8            9           10
#> AIC(n) 4.365953e+01 4.370112e+01 4.375264e+01 4.378099e+01 4.381128e+01
#> HQ(n)  4.409280e+01 4.420105e+01 4.431923e+01 4.441424e+01 4.451118e+01
#> SC(n)  4.475077e+01 4.496024e+01 4.517965e+01 4.537588e+01 4.557405e+01
#> FPE(n) 9.151299e+18 9.544525e+18 1.005556e+19 1.035301e+19 1.068197e+19
#>                  11           12
#> AIC(n) 4.384152e+01 4.387503e+01
#> HQ(n)  4.460807e+01 4.470824e+01
#> SC(n)  4.577217e+01 4.597356e+01
#> FPE(n) 1.102309e+19 1.141507e+19

# Fit a VAR(p) model. Keeping p=2 based on above observations
var_model_2 <- VAR(ts_data, p = 2)

# Print summary of VAR models
summary(var_model_1)
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: ts_MI, ts_WI, ts_FI, ts_VS 
#> Deterministic variables: const 
#> Sample size: 385 
#> Log Likelihood: -10573.343 
#> Roots of the characteristic polynomial:
#> 1.001 0.9617 0.9617 0.8785
#> Call:
#> VAR(y = ts_data, p = 1)
#> 
#> 
#> Estimation results for equation ts_MI: 
#> ====================================== 
#> ts_MI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1  0.931876   0.018964  49.138  < 2e-16 ***
#> ts_WI.l1  0.143906   0.038674   3.721 0.000228 ***
#> ts_FI.l1 -0.012623   0.013910  -0.907 0.364718    
#> ts_VS.l1  0.014056   0.005301   2.651 0.008351 ** 
#> const    65.452317  55.502250   1.179 0.239027    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 212.4 on 380 degrees of freedom
#> Multiple R-Squared: 0.9983,  Adjusted R-squared: 0.9983 
#> F-statistic: 5.701e+04 on 4 and 380 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_WI: 
#> ====================================== 
#> ts_WI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1  0.018159   0.010399   1.746   0.0816 .  
#> ts_WI.l1  0.954453   0.021207  45.007   <2e-16 ***
#> ts_FI.l1 -0.012908   0.007628  -1.692   0.0914 .  
#> ts_VS.l1  0.004387   0.002907   1.509   0.1321    
#> const    -6.162485  30.434629  -0.202   0.8396    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 116.5 on 380 degrees of freedom
#> Multiple R-Squared: 0.9952,  Adjusted R-squared: 0.9952 
#> F-statistic: 1.973e+04 on 4 and 380 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_FI: 
#> ====================================== 
#> ts_FI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1   0.06018    0.02003   3.004  0.00284 ** 
#> ts_WI.l1  -0.02622    0.04086  -0.642  0.52146    
#> ts_FI.l1   0.92467    0.01469  62.927  < 2e-16 ***
#> ts_VS.l1   0.01762    0.00560   3.146  0.00178 ** 
#> const    103.59469   58.63259   1.767  0.07806 .  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 224.4 on 380 degrees of freedom
#> Multiple R-Squared: 0.9989,  Adjusted R-squared: 0.9989 
#> F-statistic: 8.781e+04 on 4 and 380 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_VS: 
#> ====================================== 
#> ts_VS = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1   0.05232    0.04962   1.054    0.292    
#> ts_WI.l1  -0.03111    0.10119  -0.307    0.759    
#> ts_FI.l1  -0.01026    0.03640  -0.282    0.778    
#> ts_VS.l1   0.99145    0.01387  71.474   <2e-16 ***
#> const    145.36282  145.22818   1.001    0.317    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 555.9 on 380 degrees of freedom
#> Multiple R-Squared: 0.9987,  Adjusted R-squared: 0.9987 
#> F-statistic: 7.441e+04 on 4 and 380 DF,  p-value: < 2.2e-16 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>         ts_MI   ts_WI ts_FI  ts_VS
#> ts_MI 45132.9  -291.3 -5855  22577
#> ts_WI  -291.3 13570.9 -2659   2168
#> ts_FI -5855.3 -2659.3 50367   8220
#> ts_VS 22577.1  2167.7  8220 309011
#> 
#> Correlation matrix of residuals:
#>          ts_MI    ts_WI    ts_FI   ts_VS
#> ts_MI  1.00000 -0.01177 -0.12281 0.19118
#> ts_WI -0.01177  1.00000 -0.10172 0.03347
#> ts_FI -0.12281 -0.10172  1.00000 0.06588
#> ts_VS  0.19118  0.03347  0.06588 1.00000
summary(var_model_2)
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: ts_MI, ts_WI, ts_FI, ts_VS 
#> Deterministic variables: const 
#> Sample size: 384 
#> Log Likelihood: -10511.22 
#> Roots of the characteristic polynomial:
#> 0.9996 0.9783 0.9783 0.8925 0.2677 0.2488 0.1422 0.09355
#> Call:
#> VAR(y = ts_data, p = 2)
#> 
#> 
#> Estimation results for equation ts_MI: 
#> ====================================== 
#> ts_MI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + ts_MI.l2 + ts_WI.l2 + ts_FI.l2 + ts_VS.l2 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1  0.906090   0.052806  17.159   <2e-16 ***
#> ts_WI.l1  0.217293   0.094205   2.307   0.0216 *  
#> ts_FI.l1  0.065662   0.049570   1.325   0.1861    
#> ts_VS.l1  0.018152   0.020075   0.904   0.3665    
#> ts_MI.l2  0.030727   0.053441   0.575   0.5657    
#> ts_WI.l2 -0.084649   0.098394  -0.860   0.3902    
#> ts_FI.l2 -0.078922   0.047622  -1.657   0.0983 .  
#> ts_VS.l2 -0.004325   0.020385  -0.212   0.8321    
#> const    57.127943  57.013253   1.002   0.3170    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 212.3 on 375 degrees of freedom
#> Multiple R-Squared: 0.9984,  Adjusted R-squared: 0.9983 
#> F-statistic: 2.845e+04 on 8 and 375 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_WI: 
#> ====================================== 
#> ts_WI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + ts_MI.l2 + ts_WI.l2 + ts_FI.l2 + ts_VS.l2 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1  0.025083   0.028794   0.871   0.3843    
#> ts_WI.l1  0.918495   0.051368  17.881   <2e-16 ***
#> ts_FI.l1  0.055116   0.027029   2.039   0.0421 *  
#> ts_VS.l1  0.013843   0.010946   1.265   0.2068    
#> ts_MI.l2 -0.011200   0.029140  -0.384   0.7009    
#> ts_WI.l2  0.044854   0.053652   0.836   0.4037    
#> ts_FI.l2 -0.067988   0.025967  -2.618   0.0092 ** 
#> ts_VS.l2 -0.009128   0.011116  -0.821   0.4121    
#> const    -0.615680  31.088082  -0.020   0.9842    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 115.8 on 375 degrees of freedom
#> Multiple R-Squared: 0.9953,  Adjusted R-squared: 0.9952 
#> F-statistic:  9965 on 8 and 375 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_FI: 
#> ====================================== 
#> ts_FI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + ts_MI.l2 + ts_WI.l2 + ts_FI.l2 + ts_VS.l2 + const 
#> 
#>          Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1  0.19209    0.05476   3.508 0.000506 ***
#> ts_WI.l1  0.19447    0.09769   1.991 0.047236 *  
#> ts_FI.l1  0.99211    0.05140  19.300  < 2e-16 ***
#> ts_VS.l1  0.06076    0.02082   2.919 0.003727 ** 
#> ts_MI.l2 -0.13351    0.05542  -2.409 0.016470 *  
#> ts_WI.l2 -0.23876    0.10203  -2.340 0.019807 *  
#> ts_FI.l2 -0.04725    0.04938  -0.957 0.339238    
#> ts_VS.l2 -0.04995    0.02114  -2.363 0.018646 *  
#> const    63.38438   59.12213   1.072 0.284368    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 220.1 on 375 degrees of freedom
#> Multiple R-Squared: 0.999,   Adjusted R-squared: 0.9989 
#> F-statistic: 4.544e+04 on 8 and 375 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_VS: 
#> ====================================== 
#> ts_VS = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + ts_MI.l2 + ts_WI.l2 + ts_FI.l2 + ts_VS.l2 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1   0.53011    0.13261   3.998 7.71e-05 ***
#> ts_WI.l1   0.60782    0.23657   2.569 0.010576 *  
#> ts_FI.l1   0.46942    0.12448   3.771 0.000189 ***
#> ts_VS.l1   0.81489    0.05041  16.165  < 2e-16 ***
#> ts_MI.l2  -0.48564    0.13420  -3.619 0.000337 ***
#> ts_WI.l2  -0.66548    0.24709  -2.693 0.007393 ** 
#> ts_FI.l2  -0.46779    0.11959  -3.912 0.000109 ***
#> ts_VS.l2   0.17545    0.05119   3.427 0.000677 ***
#> const    118.48950  143.17289   0.828 0.408425    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 533.1 on 375 degrees of freedom
#> Multiple R-Squared: 0.9988,  Adjusted R-squared: 0.9988 
#> F-statistic: 4.023e+04 on 8 and 375 DF,  p-value: < 2.2e-16 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>         ts_MI   ts_WI ts_FI  ts_VS
#> ts_MI 45062.2  -620.7 -6091  21514
#> ts_WI  -620.7 13398.3 -2882   1440
#> ts_FI -6090.9 -2881.5 48458   5384
#> ts_VS 21514.1  1440.4  5384 284173
#> 
#> Correlation matrix of residuals:
#>          ts_MI    ts_WI    ts_FI   ts_VS
#> ts_MI  1.00000 -0.02526 -0.13035 0.19012
#> ts_WI -0.02526  1.00000 -0.11309 0.02334
#> ts_FI -0.13035 -0.11309  1.00000 0.04588
#> ts_VS  0.19012  0.02334  0.04588 1.00000

# Produce a one-month ahead forecast for VAR(1) model
forecast_var1 <- predict(var_model_1, n.ahead = 1)
forecast_var2 <- predict(var_model_2, n.ahead = 1)
# Print forecast for the next month
print(forecast_var1)
#> $ts_MI
#>                fcst    lower upper       CI
#> ts_MI.fcst 25546.62 25130.24 25963 416.3845
#> 
#> $ts_WI
#>                fcst   lower    upper       CI
#> ts_WI.fcst 6173.774 5945.45 6402.098 228.3242
#> 
#> $ts_FI
#>                fcst   lower    upper       CI
#> ts_FI.fcst 38148.07 37708.2 38587.93 439.8687
#> 
#> $ts_VS
#>                fcst    lower    upper       CI
#> ts_VS.fcst 79916.19 78826.67 81005.71 1089.519
print(forecast_var2)
#> $ts_MI
#>                fcst    lower    upper       CI
#> ts_MI.fcst 25597.19 25181.13 26013.25 416.0585
#> 
#> $ts_WI
#>                fcst    lower    upper       CI
#> ts_WI.fcst 6181.545 5954.677 6408.413 226.8676
#> 
#> $ts_FI
#>                fcst    lower    upper       CI
#> ts_FI.fcst 38125.98 37694.54 38557.43 431.4482
#> 
#> $ts_VS
#>                fcst    lower   upper       CI
#> ts_VS.fcst 79887.68 78842.87 80932.5 1044.815

# Granger causality test
causality(var_model_1, cause = "ts_WI")
#> $Granger
#> 
#>  Granger causality H0: ts_WI do not Granger-cause ts_MI ts_FI ts_VS
#> 
#> data:  VAR object var_model_1
#> F-Test = 4.9772, df1 = 3, df2 = 1520, p-value = 0.001937
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_WI and ts_MI ts_FI ts_VS
#> 
#> data:  VAR object var_model_1
#> Chi-squared = 4.9675, df = 3, p-value = 0.1742
causality(var_model_1, cause = "ts_MI")
#> $Granger
#> 
#>  Granger causality H0: ts_MI do not Granger-cause ts_WI ts_FI ts_VS
#> 
#> data:  VAR object var_model_1
#> F-Test = 4.6258, df1 = 3, df2 = 1520, p-value = 0.003162
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_MI and ts_WI ts_FI ts_VS
#> 
#> data:  VAR object var_model_1
#> Chi-squared = 20.423, df = 3, p-value = 0.0001387
causality(var_model_1, cause = "ts_FI")
#> $Granger
#> 
#>  Granger causality H0: ts_FI do not Granger-cause ts_MI ts_WI ts_VS
#> 
#> data:  VAR object var_model_1
#> F-Test = 1.2421, df1 = 3, df2 = 1520, p-value = 0.293
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_FI and ts_MI ts_WI ts_VS
#> 
#> data:  VAR object var_model_1
#> Chi-squared = 12.92, df = 3, p-value = 0.004813
causality(var_model_1, cause = "ts_VS")
#> $Granger
#> 
#>  Granger causality H0: ts_VS do not Granger-cause ts_MI ts_WI ts_FI
#> 
#> data:  VAR object var_model_1
#> F-Test = 7.6444, df1 = 3, df2 = 1520, p-value = 4.511e-05
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_VS and ts_MI ts_WI ts_FI
#> 
#> data:  VAR object var_model_1
#> Chi-squared = 17.181, df = 3, p-value = 0.0006485
causality(var_model_2, cause = "ts_WI")
#> $Granger
#> 
#>  Granger causality H0: ts_WI do not Granger-cause ts_MI ts_FI ts_VS
#> 
#> data:  VAR object var_model_2
#> F-Test = 4.3941, df1 = 6, df2 = 1500, p-value = 0.0002069
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_WI and ts_MI ts_FI ts_VS
#> 
#> data:  VAR object var_model_2
#> Chi-squared = 5.9754, df = 3, p-value = 0.1128
causality(var_model_2, cause = "ts_MI")
#> $Granger
#> 
#>  Granger causality H0: ts_MI do not Granger-cause ts_WI ts_FI ts_VS
#> 
#> data:  VAR object var_model_2
#> F-Test = 5.9554, df1 = 6, df2 = 1500, p-value = 3.669e-06
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_MI and ts_WI ts_FI ts_VS
#> 
#> data:  VAR object var_model_2
#> Chi-squared = 20.926, df = 3, p-value = 0.0001091
causality(var_model_2, cause = "ts_FI")
#> $Granger
#> 
#>  Granger causality H0: ts_FI do not Granger-cause ts_MI ts_WI ts_VS
#> 
#> data:  VAR object var_model_2
#> F-Test = 4.353, df1 = 6, df2 = 1500, p-value = 0.0002296
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_FI and ts_MI ts_WI ts_VS
#> 
#> data:  VAR object var_model_2
#> Chi-squared = 13.422, df = 3, p-value = 0.003807
causality(var_model_2, cause = "ts_VS")
#> $Granger
#> 
#>  Granger causality H0: ts_VS do not Granger-cause ts_MI ts_WI ts_FI
#> 
#> data:  VAR object var_model_2
#> F-Test = 4.306, df1 = 6, df2 = 1500, p-value = 0.0002587
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_VS and ts_MI ts_WI ts_FI
#> 
#> data:  VAR object var_model_2
#> Chi-squared = 15.682, df = 3, p-value = 0.001318

# BigVAR
BV_mtx <- cbind(ts_MI, ts_WI, ts_FI, ts_VS)
sparse_var1 <- BigVAR.fit(Y = BV_mtx, struct = 'Basic', p = 1, lambda = 1)[,,1]
sparse_var2 <- BigVAR.fit(Y = BV_mtx, struct = 'Basic', p = 2, lambda = 1)[,,1]
print(sparse_var1)
#>            [,1]         [,2]        [,3]        [,4]          [,5]
#> [1,]  174.01547  0.886890892  0.30504143  0.01663456 -0.0006677293
#> [2,] -238.99471  0.197292936  0.52896736 -0.02799815 -0.0036364839
#> [3,]  227.60538  0.077398839  0.03136181  0.90695011  0.0136623442
#> [4,]  -29.26629 -0.004295884 -0.03908114  0.05908871  0.9807352964
print(sparse_var2)
#>            [,1]       [,2]        [,3]       [,4]         [,5]        [,6]
#> [1,]  154.25487 0.46214070  0.16137837 0.02877342  0.009604066  0.42370831
#> [2,] -655.62155 0.13445978  0.10050360 0.03903518 -0.016213934  0.13277418
#> [3,]  407.01738 0.06397564  0.02506380 0.43447155  0.014553460  0.06898732
#> [4,]   20.27397 0.01090493 -0.01664742 0.02716274  0.541490492 -0.01249533
#>             [,7]        [,8]         [,9]
#> [1,]  0.15912244 0.017064895 -0.023648172
#> [2,]  0.09489652 0.036436615 -0.020478404
#> [3,]  0.02970738 0.406646961  0.007813708
#> [4,] -0.02515570 0.006335993  0.450445911
summary(sparse_var1)
#>        V1                V2                  V3                 V4           
#>  Min.   :-238.99   Min.   :-0.004296   Min.   :-0.03908   Min.   :-0.027998  
#>  1st Qu.: -81.70   1st Qu.: 0.056975   1st Qu.: 0.01375   1st Qu.: 0.005476  
#>  Median :  72.37   Median : 0.137346   Median : 0.16820   Median : 0.037862  
#>  Mean   :  33.34   Mean   : 0.289322   Mean   : 0.20657   Mean   : 0.238669  
#>  3rd Qu.: 187.41   3rd Qu.: 0.369692   3rd Qu.: 0.36102   3rd Qu.: 0.271054  
#>  Max.   : 227.61   Max.   : 0.886891   Max.   : 0.52897   Max.   : 0.906950  
#>        V5           
#>  Min.   :-0.003636  
#>  1st Qu.:-0.001410  
#>  Median : 0.006497  
#>  Mean   : 0.247523  
#>  3rd Qu.: 0.255431  
#>  Max.   : 0.980735
summary(sparse_var2)
#>        V1                V2                V3                 V4         
#>  Min.   :-655.62   Min.   :0.01090   Min.   :-0.01665   Min.   :0.02716  
#>  1st Qu.:-148.70   1st Qu.:0.05071   1st Qu.: 0.01464   1st Qu.:0.02837  
#>  Median :  87.26   Median :0.09922   Median : 0.06278   Median :0.03390  
#>  Mean   : -18.52   Mean   :0.16787   Mean   : 0.06757   Mean   :0.13236  
#>  3rd Qu.: 217.45   3rd Qu.:0.21638   3rd Qu.: 0.11572   3rd Qu.:0.13789  
#>  Max.   : 407.02   Max.   :0.46214   Max.   : 0.16138   Max.   :0.43447  
#>        V5                 V6                 V7                 V8          
#>  Min.   :-0.01621   Min.   :-0.01250   Min.   :-0.02516   Min.   :0.006336  
#>  1st Qu.: 0.00315   1st Qu.: 0.04862   1st Qu.: 0.01599   1st Qu.:0.014383  
#>  Median : 0.01208   Median : 0.10088   Median : 0.06230   Median :0.026751  
#>  Mean   : 0.13736   Mean   : 0.15324   Mean   : 0.06464   Mean   :0.116621  
#>  3rd Qu.: 0.14629   3rd Qu.: 0.20551   3rd Qu.: 0.11095   3rd Qu.:0.128989  
#>  Max.   : 0.54149   Max.   : 0.42371   Max.   : 0.15912   Max.   :0.406647  
#>        V9           
#>  Min.   :-0.023648  
#>  1st Qu.:-0.021271  
#>  Median :-0.006332  
#>  Mean   : 0.103533  
#>  3rd Qu.: 0.118472  
#>  Max.   : 0.450446

Created on 2024-04-10 with reprex v2.1.0

The final forecast values :

VAR(1) - MI - 25546.62 WI - 6173.774 FI - 38148.07 VS - 79916.19

VAR(2) - MI - 25597.19 WI - 6181.545 FI - 38125.98 VS - 79887.68