Shweta Hatote - Githubissues

install.packages("astsa") #> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3' #> (as 'lib' is unspecified) #> package 'astsa' successfully unpacked and MD5 sums checked #> #> The downloaded binary packages are in #> C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages install.packages("dygraphs") #> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3' #> (as 'lib' is unspecified) #> package 'dygraphs' successfully unpacked and MD5 sums checked #> #> The downloaded binary packages are in #> C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages install.packages("forecast") #> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3' #> (as 'lib' is unspecified) #> package 'forecast' successfully unpacked and MD5 sums checked #> Warning: cannot remove prior installation of package 'forecast' #> Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying #> C:\Users\shwet\AppData\Local\R\win-library\4.3\00LOCK\forecast\libs\x64\forecast.dll #> to #> C:\Users\shwet\AppData\Local\R\win-library\4.3\forecast\libs\x64\forecast.dll: #> Permission denied #> Warning: restored 'forecast' #> #> The downloaded binary packages are in #> C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages install.packages("fpp3") #> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3' #> (as 'lib' is unspecified) #> package 'fpp3' successfully unpacked and MD5 sums checked #> #> The downloaded binary packages are in #> C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages install.packages("fredr") #> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3' #> (as 'lib' is unspecified) #> package 'fredr' successfully unpacked and MD5 sums checked #> #> The downloaded binary packages are in #> C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages install.packages("lubridate") #> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3' #> (as 'lib' is unspecified) #> package 'lubridate' successfully unpacked and MD5 sums checked #> Warning: cannot remove prior installation of package 'lubridate' #> Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying #> C:\Users\shwet\AppData\Local\R\win-library\4.3\00LOCK\lubridate\libs\x64\lubridate.dll #> to #> C:\Users\shwet\AppData\Local\R\win-library\4.3\lubridate\libs\x64\lubridate.dll: #> Permission denied #> Warning: restored 'lubridate' #> #> The downloaded binary packages are in #> C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages install.packages("seasonal") #> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3' #> (as 'lib' is unspecified) #> package 'seasonal' successfully unpacked and MD5 sums checked #> #> The downloaded binary packages are in #> C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages install.packages("tidyverse") #> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3' #> (as 'lib' is unspecified) #> package 'tidyverse' successfully unpacked and MD5 sums checked #> #> The downloaded binary packages are in #> C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages install.packages("tsbox") #> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3' #> (as 'lib' is unspecified) #> package 'tsbox' successfully unpacked and MD5 sums checked #> #> The downloaded binary packages are in #> C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages install.packages("zoo") #> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3' #> (as 'lib' is unspecified) #> package 'zoo' successfully unpacked and MD5 sums checked #> Warning: cannot remove prior installation of package 'zoo' #> Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying #> C:\Users\shwet\AppData\Local\R\win-library\4.3\00LOCK\zoo\libs\x64\zoo.dll to #> C:\Users\shwet\AppData\Local\R\win-library\4.3\zoo\libs\x64\zoo.dll: Permission #> denied #> Warning: restored 'zoo' #> #> The downloaded binary packages are in #> C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages install.packages("reprex") #> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3' #> (as 'lib' is unspecified) #> package 'reprex' successfully unpacked and MD5 sums checked #> #> The downloaded binary packages are in #> C:\Users\shwet\AppData\Local\Temp\RtmpGwjs3O\downloaded_packages library(astsa) library(dygraphs) library(forecast) #> Registered S3 method overwritten by 'quantmod': #> method from #> as.zoo.data.frame zoo #> #> Attaching package: 'forecast' #> The following object is masked from 'package:astsa': #> #> gas library(fpp3) #> ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ── #> ✔ tibble 3.2.1 ✔ tsibble 1.1.4 #> ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1 #> ✔ tidyr 1.3.0 ✔ feasts 0.3.1 #> ✔ lubridate 1.9.3 ✔ fable 0.3.3 #> ✔ ggplot2 3.4.4 ✔ fabletools 0.3.4 #> ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ── #> ✖ lubridate::date() masks base::date() #> ✖ dplyr::filter() masks stats::filter() #> ✖ tsibble::intersect() masks base::intersect() #> ✖ tsibble::interval() masks lubridate::interval() #> ✖ dplyr::lag() masks stats::lag() #> ✖ tsibble::setdiff() masks base::setdiff() #> ✖ tsibble::union() masks base::union() library(fredr) library(lubridate) library(seasonal) #> #> Attaching package: 'seasonal' #> The following object is masked from 'package:tibble': #> #> view #> The following objects are masked from 'package:astsa': #> #> trend, unemp library(tidyverse) library(tsbox) library(zoo) #> #> Attaching package: 'zoo' #> The following object is masked from 'package:tsibble': #> #> index #> The following objects are masked from 'package:base': #> #> as.Date, as.Date.numeric library(reprex) # My API Key fredr_set_key("052142bc981666b4ebcb1c8df98d006b") icnsa = fredr(series_id = "ICNSA") # Date Preparation # Convert the 'date' column in the icnsa dataframe to Date format icnsa$date <- as.Date(icnsa$date) # Check for missing values missing_values <- any(is.na(icnsa$value)) missing_values #> [1] FALSE # Handling extreme values # Check for extreme values during COVID years # Here I have decided to incorporate Winsorize method for handling extreme values as # it helps retaining more information compared to removing the values. # It is a method where the values exceeding the set threshold are replaced with # the threshold value itself. # Now adjusting the threshold for extreme values based on 99th percentile. covid_threshold <- quantile(icnsa$value, 0.99) icnsa$value <- pmin(icnsa$value, covid_threshold) # Create a time series object # As it is weekly data, our sampling frequency will be 52.18 icnsa_ts <- ts(icnsa$value, frequency = 52.18) # Plot the time series autoplot(icnsa_ts) + labs(title = "ICNSA")

# Fit ARIMA model arima_model <- auto.arima(icnsa_ts) #> Warning: The time series frequency has been rounded to support seasonal #> differencing. forecast_arima <- forecast::forecast(arima_model, h = 1) accuracy(arima_model) #> ME RMSE MAE MPE MAPE MASE ACF1 #> Training set 184.0878 35454.34 21076.6 -0.4552686 5.74542 0.2935432 0.001084788 # Round the forecasted value to the nearest integer rounded_forecast <- round(forecast_arima$mean) # Print the rounded forecasted value cat("Forecast value:", rounded_forecast, "\n") #> Forecast value: 240499

# Check for seasonality pattern seasonal_pattern <- frequency(icnsa_ts) if (seasonal_pattern > 1) { # Apply seasonal adjustment icnsa_stl <- stl(icnsa_ts, s.window = "periodic") # Extract the seasonally adjusted component icnsa_ts_adj <- icnsa_stl$time.series[, "seasonal"] } else { icnsa_ts_adj <- icnsa_ts } # Visualize the original and seasonally adjusted time series autoplot(icnsa_ts, series = "Original") + autolayer(icnsa_ts_adj, series = "Seasonally Adjusted") + ggtitle("Original vs. Seasonally Adjusted Time Series")

# Fit ARIMA model with seasonality adjustment arima_model_adj <- auto.arima(icnsa_ts_adj) #> Warning: The time series frequency has been rounded to support seasonal #> differencing. forecast_arima_adj <- forecast::forecast(arima_model_adj, h = 1) accuracy(arima_model_adj) #> ME RMSE MAE MPE MAPE MASE #> Training set -16.07901 12602.58 5058.47 -1.495665 7.459476 0.6949608 #> ACF1 #> Training set 0.002777531 # Round the forecasted value to the nearest integer rounded_forecast_adj <- round(forecast_arima_adj$mean) # Print the rounded forecasted value cat("Forecast value with seasonality adjustment:", rounded_forecast_adj, "\n") #> Forecast value with seasonality adjustment: 53266

covid_start <- as.Date("2020-03-14") covid_end <- as.Date("2021-01-02") covid_start_idx <- which(icnsa$date == covid_start) covid_end_idx <- which(icnsa$date == covid_end) covid_idx <- rep(0, nrow(icnsa)) covid_idx[covid_start_idx:covid_end_idx] <- 1 covid_idx <- as.logical(covid_idx) icnsa_na <- icnsa |> select(date, value) icnsa_na$value[covid_idx] <- NA

suppressMessages({ library(astsa) library(dplyr) library(dygraphs) library(forecast) library(fpp3) library(fredr) library(imputeTS) library(lubridate) library(seasonal) library(tidyverse) library(tsbox) library(zoo) library(reprex) }) # My API Key fredr_set_key("052142bc981666b4ebcb1c8df98d006b") icnsa = fredr(series_id = "ICNSA") ccnsa = fredr(series_id = "CCNSA") # Date Preparation icnsa$date <- as.Date(icnsa$date) ccnsa$date <- as.Date(ccnsa$date) # Check for missing values missing_values <- any(is.na(icnsa$value)) missing_values <- any(is.na(ccnsa$value)) # # ICNSA Series Plotting using "plotly" # plot_ly(data = icnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") + # layout(title = "ICNSA Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value")) # # CCNSA Series Plotting using "plotly" # plot_ly(data = ccnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") + # layout(title = "CCNSA Series Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value")) # Handling COVID data for ICNSA covid_start <- as.Date("2020-03-21") covid_end <- as.Date("2020-07-25") covid_start_idx <- which(icnsa$date == covid_start) covid_end_idx <- which(icnsa$date == covid_end) covid_idx <- rep(0, nrow(icnsa)) covid_idx[covid_start_idx:covid_end_idx] <- 1 covid_idx <- as.logical(covid_idx) icnsa$value[covid_idx] <- NA # Impute NA values using linear interpolation icnsa$value <- zoo::na.approx(icnsa$value) # Check for missing values after imputation missing_after_imputation <- any(is.na(icnsa$value)) missing_after_imputation #> [1] FALSE # Handling COVID data for CCNSA covid_s <- as.Date("2020-03-28") covid_e <- as.Date("2020-10-17") covid_s_idx <- which(ccnsa$date == covid_s) covid_e_idx <- which(ccnsa$date == covid_e) cov_idx <- rep(0, nrow(ccnsa)) cov_idx[covid_s_idx:covid_e_idx] <- 1 cov_idx <- as.logical(cov_idx) ccnsa$value[cov_idx] <- NA # Impute NA values using linear interpolation ccnsa$value <- zoo::na.approx(ccnsa$value) # Check for missing values after imputation miss_after_imputation <- any(is.na(ccnsa$value)) miss_after_imputation #> [1] FALSE # # ICNSA Series Plotting using "plotly" # plot_ly(data = icnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") + # layout(title = "ICNSA Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value")) # # CCNSA Series Plotting using "plotly" # plot_ly(data = ccnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") + # layout(title = "CCNSA Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value")) # Convert data into a time series object icnsa_ts <- ts(icnsa$value, frequency = 52.18) ccnsa_ts <- ts(ccnsa$value, frequency = 52.18) # Plot the time series autoplot(icnsa_ts) + labs(title = "ICNSA")

# Model identification for ICNSA using auto.arima icnsa_model <- auto.arima(icnsa_ts) #> Warning: The time series frequency has been rounded to support seasonal #> differencing. # Model identification for CCNSA using auto.arima ccnsa_model <- auto.arima(ccnsa_ts) #> Warning: The time series frequency has been rounded to support seasonal #> differencing. # Display identified models icnsa_model #> Series: icnsa_ts #> ARIMA(4,0,2)(0,1,1)[52] #> #> Coefficients: #> ar1 ar2 ar3 ar4 ma1 ma2 sma1 #> 1.8970 -0.9218 0.0371 -0.0159 -1.4718 0.5706 -0.2342 #> s.e. 0.0481 0.0763 0.0412 0.0320 0.0446 0.0474 0.0190 #> #> sigma^2 = 1.063e+09: log likelihood = -34582.42 #> AIC=69180.84 AICc=69180.89 BIC=69228.7 ccnsa_model #> Series: ccnsa_ts #> ARIMA(0,0,5)(0,1,1)[52] #> #> Coefficients: #> ma1 ma2 ma3 ma4 ma5 sma1 #> 1.3326 1.6475 1.4565 1.0185 0.4920 -0.2595 #> s.e. 0.0196 0.0254 0.0234 0.0179 0.0146 0.0221 #> #> sigma^2 = 3.499e+10: log likelihood = -39685.21 #> AIC=79384.41 AICc=79384.45 BIC=79426.29 # Time series cross-correlation analysis cross_correlation_icnsa_ccnsa <- ccf(icnsa_ts, ccnsa_ts, lag.max = 52.18, type = "correlation", main = "Cross-correlation between ICNSA and CCNSA", xlab = "Lag", ylab = "Correlation")

# Identify the lag with the maximum correlation max_corr_lag <- which.max(cross_correlation_icnsa_ccnsa$acf) max_corr_value <- cross_correlation_icnsa_ccnsa$acf[max_corr_lag] cat("Maximum correlation is at lag", max_corr_lag, "with a value of", max_corr_value, "\n") #> Maximum correlation is at lag 49 with a value of 0.8356323 # Make sure both time series have the same length min_length <- min(length(icnsa_ts), length(ccnsa_ts)) icnsa_ts <- head(icnsa_ts, min_length) ccnsa_ts <- head(ccnsa_ts, min_length) # Combine the time series as a data frame combined_data <- data.frame(icnsa = icnsa_ts, ccnsa = ccnsa_ts) # Fit regARIMA model reg_arima_model <- auto.arima(combined_data[, "icnsa"], xreg = combined_data[, "ccnsa"]) #> Warning: The time series frequency has been rounded to support seasonal #> differencing. # Summary of the regARIMA model summary(reg_arima_model) #> Series: combined_data[, "icnsa"] #> Regression with ARIMA(3,0,2)(0,1,1)[52] errors #> #> Coefficients: #> ar1 ar2 ar3 ma1 ma2 sma1 drift xreg #> 1.7560 -0.7199 -0.0437 -1.4236 0.5583 -0.2330 25.0600 0.0448 #> s.e. 0.0523 0.0739 0.0327 0.0384 0.0271 0.0191 157.8115 0.0090 #> #> sigma^2 = 1.056e+09: log likelihood = -34559.38 #> AIC=69136.76 AICc=69136.82 BIC=69190.59 #> #> Training set error measures: #> ME RMSE MAE MPE MAPE MASE #> Training set -113.3733 32159.01 21031.19 -0.4065116 5.793527 0.3004479 #> ACF1 #> Training set 0.0002859649 # Regression Diagnostics checkresiduals(reg_arima_model)

#> #> Ljung-Box test #> #> data: Residuals from Regression with ARIMA(3,0,2)(0,1,1)[52] errors #> Q* = 392.88, df = 98.36, p-value < 2.2e-16 #> #> Model df: 6. Total lags used: 104.36 # Forecasting for the next 1 value using regARIMA model num_forecasts <- 1 reg_arima_forecast <- forecast(reg_arima_model, h = num_forecasts, xreg = tail(ccnsa_ts, 1)) # Extract the rounded forecasted value rounded_forecast <- round(reg_arima_forecast$mean) cat("The final forecast value is", rounded_forecast) #> The final forecast value is 240750

suppressMessages({ library(astsa) library(dplyr) library(dygraphs) library(forecast) library(fpp3) library(fredr) library(imputeTS) library(lubridate) library(seasonal) library(tidyverse) library(tsbox) library(zoo) library(reprex) library(tseries) }) # My API Key fredr_set_key("052142bc981666b4ebcb1c8df98d006b") icnsa = fredr(series_id = "ICNSA") # Date Preparation icnsa$date <- as.Date(icnsa$date) # Check for missing values missing_values <- any(is.na(icnsa$value)) missing_values #> [1] FALSE # # ICNSA Series Plotting using "plotly" # plot_ly(data = icnsa, x = ~date, y = ~value, type = "scatter", mode = "lines") + # layout(title = "ICNSA Plot", xaxis = list(title = "Date"), yaxis = list(title = "Value")) # Handling COVID data for ICNSA covid_start <- as.Date("2020-03-21") covid_end <- as.Date("2020-07-25") covid_start_idx <- which(icnsa$date == covid_start) covid_end_idx <- which(icnsa$date == covid_end) covid_idx <- rep(0, nrow(icnsa)) covid_idx[covid_start_idx:covid_end_idx] <- 1 covid_idx <- as.logical(covid_idx) icnsa$value[covid_idx] <- NA # Imputing "NA" values using cubic spline imputation between COVID start and end dates # Define a function for cubic spline imputation cubic_spline_imputation <- function(x, lambda) { non_na_indices <- which(!is.na(x)) na_indices <- which(is.na(x)) spline_model <- smooth.spline(non_na_indices, x[non_na_indices], spar = lambda) imputed_values <- predict(spline_model, x = na_indices)$y x[na_indices] <- imputed_values return(x) } # Defining another function that checks calculates the RMSE value for getting the best lambda cubic_spline_cv <- function(x, lambda) { non_na_indices <- which(!is.na(x)) na_indices <- which(is.na(x)) spline_model <- smooth.spline(non_na_indices, x[non_na_indices], spar = lambda) imputed_values <- predict(spline_model, x = na_indices)$y rmse <- sqrt(mean((x[non_na_indices] - predict(spline_model, x = non_na_indices)$y)^2)) return(rmse) } # Lambda values to be tested lambda_values <- seq(0.1, 1, by = 0.1) # Performing cross-validation to find the best lambda rmse_values <- sapply(lambda_values, function(lambda) cubic_spline_cv(icnsa$value, lambda)) # Display the RMSE values for each lambda cat("Lambda values:", lambda_values, "\n") #> Lambda values: 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 cat("RMSE values:", rmse_values, "\n") #> RMSE values: 62880.31 63772.23 68648.33 76213.45 82421.38 89424.08 97054.13 103978.3 110141.9 113968.4 # Find the lambda with the minimum RMSE best_lambda <- lambda_values[which.min(rmse_values)] cat("Best lambda:", best_lambda, "\n") #> Best lambda: 0.1 # Imputing "NA" values using cubic spline imputation between Covid start and end dates icnsa$value <- cubic_spline_imputation(icnsa$value, best_lambda) # Converting data to a time series object after imputation ts_icnsa_imputed <- ts(icnsa$value, frequency = 24) # Checking for missing values after imputation missing_values_after_imputation <- any(is.na(ts_icnsa_imputed)) missing_values_after_imputation #> [1] FALSE # Plotting the time series with x-date and y-value plot_data <- data.frame(date = icnsa$date, value = icnsa$value) plot(plot_data$date, plot_data$value, type = "l", xlab = "Date", ylab = "Value", main = "ICNSA after imputation")

# Augmented Dickey-Fuller (ADF) test for stationarity adf_test_result <- adf.test(ts_icnsa_imputed) #> Warning in adf.test(ts_icnsa_imputed): p-value smaller than printed p-value cat("Augmented Dickey-Fuller Test p-value:", adf_test_result$p.value, "\n") #> Augmented Dickey-Fuller Test p-value: 0.01 # Holt-Winters Multiplicative Model hw_multiplicative <- hw(ts_icnsa_imputed, seasonal = "multiplicative", h = 1) forecast_multiplicative <- round(hw_multiplicative$mean[1]) # Holt-Winters Additive Model hw_additive <- hw(ts_icnsa_imputed, seasonal = "additive", h = 1) forecast_additive <- round(hw_additive$mean[1]) # Print the forecasts cat("Multiplicative Holt-Winters Forecast:", forecast_multiplicative, "\n") #> Multiplicative Holt-Winters Forecast: 209359 cat("Additive Holt-Winters Forecast:", forecast_additive, "\n") #> Additive Holt-Winters Forecast: 203425

suppressMessages({ library(astsa) library(bsts) library(dlm) library(dplyr) library(dygraphs) library(fable) library(forecast) library(fpp3) library(fredr) library(imputeTS) library(lubridate) library(MARSS) library(splines) library(seasonal) library(tidyverse) library(tsbox) library(zoo) library(reprex) library(plotly) library(tseries) }) #> Warning: package 'bsts' was built under R version 4.3.3 #> Warning: package 'BoomSpikeSlab' was built under R version 4.3.3 #> Warning: package 'Boom' was built under R version 4.3.3 #> Warning: package 'dlm' was built under R version 4.3.3 #> Warning: package 'MARSS' was built under R version 4.3.3 install.packages("vars") #> Installing package into 'C:/Users/shwet/AppData/Local/R/win-library/4.3' #> (as 'lib' is unspecified) #> package 'vars' successfully unpacked and MD5 sums checked #> #> The downloaded binary packages are in #> C:\Users\shwet\AppData\Local\Temp\RtmpeyvtMx\downloaded_packages library(vars) #> Warning: package 'vars' was built under R version 4.3.3 #> Loading required package: MASS #> #> Attaching package: 'MASS' #> The following object is masked from 'package:plotly': #> #> select #> The following object is masked from 'package:dplyr': #> #> select #> Loading required package: strucchange #> Warning: package 'strucchange' was built under R version 4.3.3 #> Loading required package: sandwich #> #> Attaching package: 'strucchange' #> The following object is masked from 'package:stringr': #> #> boundary #> Loading required package: urca #> Loading required package: lmtest #> #> Attaching package: 'vars' #> The following object is masked from 'package:fable': #> #> VAR # My API Key fredr_set_key("052142bc981666b4ebcb1c8df98d006b") # ICNSA data icnsa <- fredr(series_id = "ICNSA") # Date Preparation icnsa$date <- as.Date(icnsa$date) # Check for missing values missing_values <- any(is.na(icnsa$value)) missing_values #> [1] FALSE # Plot the original values plot(icnsa$date, icnsa$value, type = "l", main = "ICNSA Time Series")

# Handling COVID data for ICNSA covid_start <- as.Date("2020-03-21") covid_end <- as.Date("2021-01-16") covid_start_idx <- which(icnsa$date == covid_start) covid_end_idx <- which(icnsa$date == covid_end) covid_idx <- rep(0, nrow(icnsa)) covid_idx[covid_start_idx:covid_end_idx] <- 1 covid_idx <- as.logical(covid_idx) icnsa$value[covid_idx] <- NA # Create a copy of the original data for Spline imputation covid_data_spline <- icnsa covid_data_spline$value[covid_idx] <- NA # Create a copy of the original data for State Space imputation covid_data_imputed <- icnsa covid_data_imputed$value[covid_idx] <- NA # Impute using state space method covid_data_imputed$value <- imputeTS::na_kalman(covid_data_imputed$value) # Impute using cubic spline method # Define a function for cubic spline imputation cubic_spline_imputation <- function(x, lambda) { non_na_indices <- which(!is.na(x)) na_indices <- which(is.na(x)) spline_model <- smooth.spline(non_na_indices, x[non_na_indices], spar = lambda) imputed_values <- predict(spline_model, x = na_indices)$y x[na_indices] <- imputed_values return(x) } # Lambda values to be tested lambda_values <- seq(0.1, 1, by = 0.1) # Performing cross-validation to find the best lambda rmse_values <- sapply(lambda_values, function(lambda) cubic_spline_cv(covid_data_spline$value, lambda)) #> Error in cubic_spline_cv(covid_data_spline$value, lambda): could not find function "cubic_spline_cv" # Display the RMSE values for each lambda # cat("Lambda values:", lambda_values, "\n") # cat("RMSE values:", rmse_values, "\n") # Find the lambda with the minimum RMSE best_lambda <- lambda_values[which.min(rmse_values)] #> Error in eval(expr, envir, enclos): object 'rmse_values' not found cat("Best lambda:", best_lambda, "\n") #> Error in eval(expr, envir, enclos): object 'best_lambda' not found # Impute using cubic spline imputation covid_data_spline$value <- cubic_spline_imputation(covid_data_spline$value, best_lambda) #> Error in eval(expr, envir, enclos): object 'best_lambda' not found # Plot the original values plot(icnsa$date, icnsa$value, type = "l", main = "ICNSA Time Series") # Plot imputed data using state space method (Kalman filter) lines(covid_data_imputed$date, covid_data_imputed$value, col = "red", lty = 2, lwd = 2) # Plot imputed data using cubic spline method lines(covid_data_spline$date, covid_data_spline$value, col = "blue", lty = 2, lwd = 2)

# Set up a 1x2 plotting layout (1 row, 2 columns) par(mfrow = c(1, 2)) # Plot the original values plot(icnsa$date, icnsa$value, type = "l", main = "ICNSA Time Series", col = "black") # Plot imputed data using state space method (Kalman filter) lines(covid_data_imputed$date, covid_data_imputed$value, col = "red", lty = 2, lwd = 2) legend("topright", legend = c("Original", "Imputed (Kalman)"), col = c("black", "red"), lty = c(1, 2), lwd = c(1, 2)) # Plot imputed data using cubic spline method plot(icnsa$date, icnsa$value, type = "l", main = "ICNSA Time Series", col = "black") lines(covid_data_spline$date, covid_data_spline$value, col = "blue", lty = 2, lwd = 2) legend("topright", legend = c("Original", "Imputed (Spline)"), col = c("black", "blue"), lty = c(1, 2), lwd = c(1, 2))

# Convert the data into time series object covid_data_imputed_ts <- ts(covid_data_imputed$value, frequency = 52.18) # Fit a structural time series model to ts_data structural_model <- StructTS(covid_data_imputed_ts, type = "BSM") # Generate forecasts from the fitted model forecast_values <- predict(structural_model, n.ahead = 1)$pred # Get the next rounded forecast value rounded_forecast <- round(forecast_values) # Display the rounded forecast value cat("Rounded Forecast Value:", rounded_forecast, "\n") #> Rounded Forecast Value: 241753 # Another covariate series - Insured Unemployment Rate (IURNSA) iurnsa <- fredr(series_id = "IURNSA") # Date Preparation iurnsa$date <- as.Date(iurnsa$date) # Check for missing values missing_values <- any(is.na(iurnsa$value)) missing_values #> [1] FALSE # Convert it into time series iurnsa_ts <- ts(iurnsa$value, frequency = 52.18) # Plot the original values plot(iurnsa$date, iurnsa$value, type = "l", main = "IURNSA Time Series")

# Make sure both time series have the same length min_length <- min(length(icnsa_ts), length(iurnsa_ts)) #> Error in eval(expr, envir, enclos): object 'icnsa_ts' not found icnsa_ts <- tail(icnsa_ts, min_length) #> Error in eval(expr, envir, enclos): object 'icnsa_ts' not found iurnsa_ts <- tail(iurnsa_ts, min_length) #> Error in eval(expr, envir, enclos): object 'min_length' not found # Handling COVID data for IURNSA cov_start <- as.Date("2020-04-04") cov_end <- as.Date("2020-10-17") cov_start_idx <- which(iurnsa$date == cov_start) cov_end_idx <- which(iurnsa$date == cov_end) cov_idx <- rep(0, nrow(iurnsa)) cov_idx[cov_start_idx:cov_end_idx] <- 1 cov_idx <- as.logical(cov_idx) iurnsa$value[cov_idx] <- NA # Impute using state space method iurnsa$value <- imputeTS::na_kalman(iurnsa$value) # Plot the original values plot(iurnsa$date, iurnsa$value, type = "l", main = "IURNSA Imputed")

suppressMessages({
  library(astsa)
  library(bsts)
  library(BigVAR)
  library(dlm)
  library(dplyr)
  library(dygraphs)
  library(fable)
  library(forecast)
  library(fpp3)
  library(fredr)
  library(ggplot2)
  library(imputeTS)
  library(lubridate)
  library(MARSS)
  library(readxl)
  library(splines)
  library(seasonal)
  library(tidyverse)
  library(tsbox)
  library(zoo)
  library(reprex)
  library(plotly)
  library(tseries)
  library(vars)
})
#> Warning: package 'bsts' was built under R version 4.3.3
#> Warning: package 'BoomSpikeSlab' was built under R version 4.3.3
#> Warning: package 'Boom' was built under R version 4.3.3
#> Warning: package 'BigVAR' was built under R version 4.3.3
#> Warning: package 'lattice' was built under R version 4.3.3
#> Warning: package 'dlm' was built under R version 4.3.3
#> Warning: package 'fable' was built under R version 4.3.3
#> Warning: package 'fabletools' was built under R version 4.3.3
#> Warning: package 'forecast' was built under R version 4.3.3
#> Warning: package 'ggplot2' was built under R version 4.3.3
#> Warning: package 'feasts' was built under R version 4.3.3
#> Warning: package 'MARSS' was built under R version 4.3.3
#> Warning: package 'vars' was built under R version 4.3.3
#> Warning: package 'strucchange' was built under R version 4.3.3

# File paths
file_paths <- c("C:/Study/Time Series/HW/HW4/Manufacturing and Supplies Inventory (MI).xlsx",
                "C:/Study/Time Series/HW/HW4/Work in Process Inventories (WI).xlsx",
                "C:/Study/Time Series/HW/HW4/Finished Goods Inventories (FI).xlsx",
                "C:/Study/Time Series/HW/HW4/Value of Shipments (VS).xlsx")

# Load data
data_MI <- read_excel(file_paths[1])
#> New names:
#> • `` -> `...1`
#> • `` -> `...2`
data_WI <- read_excel(file_paths[2])
#> New names:
#> • `` -> `...1`
#> • `` -> `...2`
data_FI <- read_excel(file_paths[3])
#> New names:
#> • `` -> `...1`
#> • `` -> `...2`
data_VS <- read_excel(file_paths[4])
#> New names:
#> • `` -> `...1`
#> • `` -> `...2`

# Inspect the structure of the data
str(data_MI)
#> tibble [403 × 3] (S3: tbl_df/tbl/data.frame)
#>  $ ...1              : chr [1:403] NA NA NA NA ...
#>  $ ...2              : chr [1:403] NA NA NA NA ...
#>  $ U.S. Census Bureau: chr [1:403] "Source: Manufacturers' Shipments, Inventories, and Orders" "Food Products: U.S. Total" "Seasonally Adjusted Materials and Supplies Inventories [Millions of Dollars]" "Period: 1992 to 2024" ...
str(data_WI)
#> tibble [403 × 3] (S3: tbl_df/tbl/data.frame)
#>  $ ...1              : chr [1:403] NA NA NA NA ...
#>  $ ...2              : chr [1:403] NA NA NA NA ...
#>  $ U.S. Census Bureau: chr [1:403] "Source: Manufacturers' Shipments, Inventories, and Orders" "Food Products: U.S. Total" "Seasonally Adjusted Work in Process Inventories [Millions of Dollars]" "Period: 1992 to 2024" ...
str(data_FI)
#> tibble [403 × 3] (S3: tbl_df/tbl/data.frame)
#>  $ ...1              : chr [1:403] NA NA NA NA ...
#>  $ ...2              : chr [1:403] NA NA NA NA ...
#>  $ U.S. Census Bureau: chr [1:403] "Source: Manufacturers' Shipments, Inventories, and Orders" "Food Products: U.S. Total" "Seasonally Adjusted Finished Goods Inventories [Millions of Dollars]" "Period: 1992 to 2024" ...
str(data_VS)
#> tibble [403 × 3] (S3: tbl_df/tbl/data.frame)
#>  $ ...1              : chr [1:403] NA NA NA NA ...
#>  $ ...2              : chr [1:403] NA NA NA NA ...
#>  $ U.S. Census Bureau: chr [1:403] "Source: Manufacturers' Shipments, Inventories, and Orders" "Food Products: U.S. Total" "Seasonally Adjusted Value of Shipments [Millions of Dollars]" "Period: 1992 to 2024" ...

# Delete last column and first 7 rows, and rename columns
clean_data <- function(data) {
  data <- data[-c(1:7, (nrow(data)-9):nrow(data)), -ncol(data)]  # Delete first 7 rows and last column
  colnames(data) <- c("Date", "Value")  # Rename columns
  return(data)
}

# Clean and rename each dataset
new_MI <- clean_data(data_MI)
new_WI <- clean_data(data_WI)
new_FI <- clean_data(data_FI)
new_VS <- clean_data(data_VS)

# Check the structure of the cleaned datasets
str(new_MI)
#> tibble [386 × 2] (S3: tbl_df/tbl/data.frame)
#>  $ Date : chr [1:386] "Jan-1992" "Feb-1992" "Mar-1992" "Apr-1992" ...
#>  $ Value: chr [1:386] "8923" "9164" "9200" "9252" ...
str(new_WI)
#> tibble [386 × 2] (S3: tbl_df/tbl/data.frame)
#>  $ Date : chr [1:386] "Jan-1992" "Feb-1992" "Mar-1992" "Apr-1992" ...
#>  $ Value: chr [1:386] "2100" "2203" "2073" "2071" ...
str(new_FI)
#> tibble [386 × 2] (S3: tbl_df/tbl/data.frame)
#>  $ Date : chr [1:386] "Jan-1992" "Feb-1992" "Mar-1992" "Apr-1992" ...
#>  $ Value: chr [1:386] "14576" "14396" "14469" "14238" ...
str(new_VS)
#> tibble [386 × 2] (S3: tbl_df/tbl/data.frame)
#>  $ Date : chr [1:386] "Jan-1992" "Feb-1992" "Mar-1992" "Apr-1992" ...
#>  $ Value: chr [1:386] "28677" "28560" "29239" "29904" ...

# Convert dates to standard format
new_MI$Date <- as.Date(paste0("01-", new_MI$Date), format = "%d-%b-%Y")
new_WI$Date <- as.Date(paste0("01-", new_WI$Date), format = "%d-%b-%Y")
new_FI$Date <- as.Date(paste0("01-", new_FI$Date), format = "%d-%b-%Y")
new_VS$Date <- as.Date(paste0("01-", new_VS$Date), format = "%d-%b-%Y")

# Convert "Value" column from character to numeric
new_MI$Value <- as.numeric(new_MI$Value)
new_WI$Value <- as.numeric(new_WI$Value)
new_FI$Value <- as.numeric(new_FI$Value)
new_VS$Value <- as.numeric(new_VS$Value)

# Check for missing values
sum(is.na(new_MI))
#> [1] 0
sum(is.na(new_WI))
#> [1] 0
sum(is.na(new_FI))
#> [1] 0
sum(is.na(new_VS))
#> [1] 0

# Plot Manufacturing & Supplies Inventory
plot(new_MI$Date, new_MI$Value, type = "l", xlab = "Date", ylab = "Value", main = "Manufacturing & Supplies Inventory")


# Plot Work in Process Inventories
plot(new_WI$Date, new_WI$Value, type = "l", xlab = "Date", ylab = "Value", main = "Work in Process Inventories")


# Plot Finished Goods Inventories
plot(new_FI$Date, new_FI$Value, type = "l", xlab = "Date", ylab = "Value", main = "Finished Goods Inventories")


# Plot Value of Shipments
plot(new_VS$Date, new_VS$Value, type = "l", xlab = "Date", ylab = "Value", main = "Value of Shipments")


# Function to perform ADF test and print results
check_stationarity <- function(data) {
  adf_test <- ur.df(data$Value, type = "trend", lags = 12)
  cat("ADF Test Results:\n")
  print(summary(adf_test))
}

# Check stationarity for each dataset
check_stationarity(new_MI)
#> ADF Test Results:
#> 
#> ############################################### 
#> # Augmented Dickey-Fuller Test Unit Root Test # 
#> ############################################### 
#> 
#> Test regression trend 
#> 
#> 
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -982.66 -113.96    7.36  120.22 1041.74 
#> 
#> Coefficients:
#>                Estimate Std. Error t value Pr(>|t|)   
#> (Intercept)  116.319390  53.545891   2.172  0.03049 * 
#> z.lag.1       -0.017928   0.007755  -2.312  0.02136 * 
#> tt             0.915888   0.357012   2.565  0.01071 * 
#> z.diff.lag1   -0.058130   0.052591  -1.105  0.26977   
#> z.diff.lag2   -0.059405   0.052873  -1.124  0.26196   
#> z.diff.lag3    0.173746   0.052941   3.282  0.00113 **
#> z.diff.lag4    0.008400   0.053669   0.157  0.87572   
#> z.diff.lag5    0.033364   0.053945   0.618  0.53666   
#> z.diff.lag6    0.055853   0.053832   1.038  0.30018   
#> z.diff.lag7    0.100373   0.053875   1.863  0.06327 . 
#> z.diff.lag8   -0.018400   0.054166  -0.340  0.73428   
#> z.diff.lag9    0.090376   0.054249   1.666  0.09660 . 
#> z.diff.lag10   0.118937   0.053949   2.205  0.02812 * 
#> z.diff.lag11   0.035821   0.054324   0.659  0.51006   
#> z.diff.lag12  -0.068162   0.054064  -1.261  0.20821   
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 212.1 on 358 degrees of freedom
#> Multiple R-squared:  0.1025, Adjusted R-squared:  0.06737 
#> F-statistic: 2.919 on 14 and 358 DF,  p-value: 0.0003229
#> 
#> 
#> Value of test-statistic is: -2.3117 3.7258 3.3825 
#> 
#> Critical values for test statistics: 
#>       1pct  5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2  6.15  4.71  4.05
#> phi3  8.34  6.30  5.36
check_stationarity(new_WI)
#> ADF Test Results:
#> 
#> ############################################### 
#> # Augmented Dickey-Fuller Test Unit Root Test # 
#> ############################################### 
#> 
#> Test regression trend 
#> 
#> 
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -885.42  -57.38    1.89   54.98  623.34 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)   
#> (Intercept)  33.89776   17.61082   1.925  0.05504 . 
#> z.lag.1      -0.02136    0.01010  -2.115  0.03513 * 
#> tt            0.30695    0.15121   2.030  0.04310 * 
#> z.diff.lag1  -0.03411    0.05172  -0.660  0.51000   
#> z.diff.lag2   0.12738    0.05176   2.461  0.01432 * 
#> z.diff.lag3   0.01045    0.05221   0.200  0.84148   
#> z.diff.lag4   0.04943    0.05197   0.951  0.34214   
#> z.diff.lag5   0.05509    0.05783   0.953  0.34146   
#> z.diff.lag6   0.03521    0.05853   0.601  0.54789   
#> z.diff.lag7  -0.05597    0.05864  -0.955  0.34045   
#> z.diff.lag8   0.02748    0.05887   0.467  0.64097   
#> z.diff.lag9   0.12951    0.05889   2.199  0.02851 * 
#> z.diff.lag10  0.02351    0.05921   0.397  0.69162   
#> z.diff.lag11  0.05356    0.05929   0.903  0.36695   
#> z.diff.lag12 -0.19131    0.05907  -3.239  0.00131 **
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 115.4 on 358 degrees of freedom
#> Multiple R-squared:  0.08869,    Adjusted R-squared:  0.05305 
#> F-statistic: 2.489 on 14 and 358 DF,  p-value: 0.002207
#> 
#> 
#> Value of test-statistic is: -2.1149 2.2656 2.2549 
#> 
#> Critical values for test statistics: 
#>       1pct  5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2  6.15  4.71  4.05
#> phi3  8.34  6.30  5.36
check_stationarity(new_FI)
#> ADF Test Results:
#> 
#> ############################################### 
#> # Augmented Dickey-Fuller Test Unit Root Test # 
#> ############################################### 
#> 
#> Test regression trend 
#> 
#> 
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -983.00 -132.92   -7.66  145.87  890.07 
#> 
#> Coefficients:
#>                Estimate Std. Error t value Pr(>|t|)  
#> (Intercept)  187.827721  96.032445   1.956   0.0513 .
#> z.lag.1       -0.015667   0.008088  -1.937   0.0535 .
#> tt             1.081874   0.484667   2.232   0.0262 *
#> z.diff.lag1    0.036568   0.052244   0.700   0.4844  
#> z.diff.lag2    0.118457   0.052068   2.275   0.0235 *
#> z.diff.lag3    0.052388   0.052441   0.999   0.3185  
#> z.diff.lag4    0.052407   0.052749   0.994   0.3211  
#> z.diff.lag5    0.071997   0.052811   1.363   0.1736  
#> z.diff.lag6   -0.005623   0.052859  -0.106   0.9153  
#> z.diff.lag7    0.067721   0.052811   1.282   0.2006  
#> z.diff.lag8   -0.033436   0.053058  -0.630   0.5290  
#> z.diff.lag9    0.050405   0.053185   0.948   0.3439  
#> z.diff.lag10   0.037787   0.053432   0.707   0.4799  
#> z.diff.lag11   0.125206   0.053248   2.351   0.0192 *
#> z.diff.lag12  -0.128491   0.053588  -2.398   0.0170 *
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 229.4 on 358 degrees of freedom
#> Multiple R-squared:  0.08919,    Adjusted R-squared:  0.05357 
#> F-statistic: 2.504 on 14 and 358 DF,  p-value: 0.002064
#> 
#> 
#> Value of test-statistic is: -1.937 4.3484 2.9516 
#> 
#> Critical values for test statistics: 
#>       1pct  5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2  6.15  4.71  4.05
#> phi3  8.34  6.30  5.36
check_stationarity(new_VS)
#> ADF Test Results:
#> 
#> ############################################### 
#> # Augmented Dickey-Fuller Test Unit Root Test # 
#> ############################################### 
#> 
#> Test regression trend 
#> 
#> 
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#> 
#> Residuals:
#>      Min       1Q   Median       3Q      Max 
#> -1791.77  -323.58     6.22   340.94  1599.96 
#> 
#> Coefficients:
#>                Estimate Std. Error t value Pr(>|t|)   
#> (Intercept)  757.382452 258.320727   2.932  0.00359 **
#> z.lag.1       -0.028956   0.010464  -2.767  0.00595 **
#> tt             4.160546   1.455615   2.858  0.00451 **
#> z.diff.lag1   -0.105861   0.052152  -2.030  0.04311 * 
#> z.diff.lag2    0.008682   0.052406   0.166  0.86852   
#> z.diff.lag3    0.123475   0.052334   2.359  0.01884 * 
#> z.diff.lag4    0.136105   0.052350   2.600  0.00971 **
#> z.diff.lag5   -0.109228   0.052670  -2.074  0.03881 * 
#> z.diff.lag6    0.069633   0.052929   1.316  0.18915   
#> z.diff.lag7    0.064114   0.052917   1.212  0.22647   
#> z.diff.lag8   -0.014938   0.052259  -0.286  0.77516   
#> z.diff.lag9    0.149177   0.051819   2.879  0.00423 **
#> z.diff.lag10   0.077924   0.052274   1.491  0.13693   
#> z.diff.lag11   0.055271   0.052481   1.053  0.29298   
#> z.diff.lag12  -0.058946   0.052051  -1.132  0.25819   
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 528.3 on 358 degrees of freedom
#> Multiple R-squared:  0.1235, Adjusted R-squared:  0.08923 
#> F-statistic: 3.603 on 14 and 358 DF,  p-value: 1.328e-05
#> 
#> 
#> Value of test-statistic is: -2.7672 5.3268 4.1175 
#> 
#> Critical values for test statistics: 
#>       1pct  5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2  6.15  4.71  4.05
#> phi3  8.34  6.30  5.36

# Convert to time series objects 
ts_MI <- ts(new_MI$Value, frequency = 12)
ts_WI <- ts(new_WI$Value, frequency = 12)
ts_FI <- ts(new_FI$Value, frequency = 12)
ts_VS <- ts(new_VS$Value, frequency = 12)

# Decomposition
plot(decompose(ts_MI), ylab = "")
title(main = "MI")

plot(decompose(ts_WI), ylab = "")
title(main = "WI")

plot(decompose(ts_FI), ylab = "")
title(main = "FI")

plot(decompose(ts_VS), ylab = "")
title(main = "VS")


# Combine all time series into a single data frame
ts_data <- data.frame(ts_MI, ts_WI, ts_FI, ts_VS)

# Fit a VAR(1) model
var_model_1 <- VAR(ts_data, p = 1)

var_select <- VARselect(ts_data, lag.max = 12, type = "both")
var_select$criteria
#>                   1            2            3            4            5
#> AIC(n) 4.373607e+01 4.363734e+01 4.358105e+01 4.360257e+01 4.362457e+01
#> HQ(n)  4.383606e+01 4.380399e+01 4.381435e+01 4.390253e+01 4.399119e+01
#> SC(n)  4.398790e+01 4.405705e+01 4.416864e+01 4.435805e+01 4.454793e+01
#> FPE(n) 9.870510e+18 8.942936e+18 8.454137e+18 8.639464e+18 8.833817e+18
#>                   6            7            8            9           10
#> AIC(n) 4.365953e+01 4.370112e+01 4.375264e+01 4.378099e+01 4.381128e+01
#> HQ(n)  4.409280e+01 4.420105e+01 4.431923e+01 4.441424e+01 4.451118e+01
#> SC(n)  4.475077e+01 4.496024e+01 4.517965e+01 4.537588e+01 4.557405e+01
#> FPE(n) 9.151299e+18 9.544525e+18 1.005556e+19 1.035301e+19 1.068197e+19
#>                  11           12
#> AIC(n) 4.384152e+01 4.387503e+01
#> HQ(n)  4.460807e+01 4.470824e+01
#> SC(n)  4.577217e+01 4.597356e+01
#> FPE(n) 1.102309e+19 1.141507e+19

# Fit a VAR(p) model. Keeping p=2 based on above observations
var_model_2 <- VAR(ts_data, p = 2)

# Print summary of VAR models
summary(var_model_1)
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: ts_MI, ts_WI, ts_FI, ts_VS 
#> Deterministic variables: const 
#> Sample size: 385 
#> Log Likelihood: -10573.343 
#> Roots of the characteristic polynomial:
#> 1.001 0.9617 0.9617 0.8785
#> Call:
#> VAR(y = ts_data, p = 1)
#> 
#> 
#> Estimation results for equation ts_MI: 
#> ====================================== 
#> ts_MI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1  0.931876   0.018964  49.138  < 2e-16 ***
#> ts_WI.l1  0.143906   0.038674   3.721 0.000228 ***
#> ts_FI.l1 -0.012623   0.013910  -0.907 0.364718    
#> ts_VS.l1  0.014056   0.005301   2.651 0.008351 ** 
#> const    65.452317  55.502250   1.179 0.239027    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 212.4 on 380 degrees of freedom
#> Multiple R-Squared: 0.9983,  Adjusted R-squared: 0.9983 
#> F-statistic: 5.701e+04 on 4 and 380 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_WI: 
#> ====================================== 
#> ts_WI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1  0.018159   0.010399   1.746   0.0816 .  
#> ts_WI.l1  0.954453   0.021207  45.007   <2e-16 ***
#> ts_FI.l1 -0.012908   0.007628  -1.692   0.0914 .  
#> ts_VS.l1  0.004387   0.002907   1.509   0.1321    
#> const    -6.162485  30.434629  -0.202   0.8396    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 116.5 on 380 degrees of freedom
#> Multiple R-Squared: 0.9952,  Adjusted R-squared: 0.9952 
#> F-statistic: 1.973e+04 on 4 and 380 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_FI: 
#> ====================================== 
#> ts_FI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1   0.06018    0.02003   3.004  0.00284 ** 
#> ts_WI.l1  -0.02622    0.04086  -0.642  0.52146    
#> ts_FI.l1   0.92467    0.01469  62.927  < 2e-16 ***
#> ts_VS.l1   0.01762    0.00560   3.146  0.00178 ** 
#> const    103.59469   58.63259   1.767  0.07806 .  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 224.4 on 380 degrees of freedom
#> Multiple R-Squared: 0.9989,  Adjusted R-squared: 0.9989 
#> F-statistic: 8.781e+04 on 4 and 380 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_VS: 
#> ====================================== 
#> ts_VS = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1   0.05232    0.04962   1.054    0.292    
#> ts_WI.l1  -0.03111    0.10119  -0.307    0.759    
#> ts_FI.l1  -0.01026    0.03640  -0.282    0.778    
#> ts_VS.l1   0.99145    0.01387  71.474   <2e-16 ***
#> const    145.36282  145.22818   1.001    0.317    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 555.9 on 380 degrees of freedom
#> Multiple R-Squared: 0.9987,  Adjusted R-squared: 0.9987 
#> F-statistic: 7.441e+04 on 4 and 380 DF,  p-value: < 2.2e-16 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>         ts_MI   ts_WI ts_FI  ts_VS
#> ts_MI 45132.9  -291.3 -5855  22577
#> ts_WI  -291.3 13570.9 -2659   2168
#> ts_FI -5855.3 -2659.3 50367   8220
#> ts_VS 22577.1  2167.7  8220 309011
#> 
#> Correlation matrix of residuals:
#>          ts_MI    ts_WI    ts_FI   ts_VS
#> ts_MI  1.00000 -0.01177 -0.12281 0.19118
#> ts_WI -0.01177  1.00000 -0.10172 0.03347
#> ts_FI -0.12281 -0.10172  1.00000 0.06588
#> ts_VS  0.19118  0.03347  0.06588 1.00000
summary(var_model_2)
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: ts_MI, ts_WI, ts_FI, ts_VS 
#> Deterministic variables: const 
#> Sample size: 384 
#> Log Likelihood: -10511.22 
#> Roots of the characteristic polynomial:
#> 0.9996 0.9783 0.9783 0.8925 0.2677 0.2488 0.1422 0.09355
#> Call:
#> VAR(y = ts_data, p = 2)
#> 
#> 
#> Estimation results for equation ts_MI: 
#> ====================================== 
#> ts_MI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + ts_MI.l2 + ts_WI.l2 + ts_FI.l2 + ts_VS.l2 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1  0.906090   0.052806  17.159   <2e-16 ***
#> ts_WI.l1  0.217293   0.094205   2.307   0.0216 *  
#> ts_FI.l1  0.065662   0.049570   1.325   0.1861    
#> ts_VS.l1  0.018152   0.020075   0.904   0.3665    
#> ts_MI.l2  0.030727   0.053441   0.575   0.5657    
#> ts_WI.l2 -0.084649   0.098394  -0.860   0.3902    
#> ts_FI.l2 -0.078922   0.047622  -1.657   0.0983 .  
#> ts_VS.l2 -0.004325   0.020385  -0.212   0.8321    
#> const    57.127943  57.013253   1.002   0.3170    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 212.3 on 375 degrees of freedom
#> Multiple R-Squared: 0.9984,  Adjusted R-squared: 0.9983 
#> F-statistic: 2.845e+04 on 8 and 375 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_WI: 
#> ====================================== 
#> ts_WI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + ts_MI.l2 + ts_WI.l2 + ts_FI.l2 + ts_VS.l2 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1  0.025083   0.028794   0.871   0.3843    
#> ts_WI.l1  0.918495   0.051368  17.881   <2e-16 ***
#> ts_FI.l1  0.055116   0.027029   2.039   0.0421 *  
#> ts_VS.l1  0.013843   0.010946   1.265   0.2068    
#> ts_MI.l2 -0.011200   0.029140  -0.384   0.7009    
#> ts_WI.l2  0.044854   0.053652   0.836   0.4037    
#> ts_FI.l2 -0.067988   0.025967  -2.618   0.0092 ** 
#> ts_VS.l2 -0.009128   0.011116  -0.821   0.4121    
#> const    -0.615680  31.088082  -0.020   0.9842    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 115.8 on 375 degrees of freedom
#> Multiple R-Squared: 0.9953,  Adjusted R-squared: 0.9952 
#> F-statistic:  9965 on 8 and 375 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_FI: 
#> ====================================== 
#> ts_FI = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + ts_MI.l2 + ts_WI.l2 + ts_FI.l2 + ts_VS.l2 + const 
#> 
#>          Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1  0.19209    0.05476   3.508 0.000506 ***
#> ts_WI.l1  0.19447    0.09769   1.991 0.047236 *  
#> ts_FI.l1  0.99211    0.05140  19.300  < 2e-16 ***
#> ts_VS.l1  0.06076    0.02082   2.919 0.003727 ** 
#> ts_MI.l2 -0.13351    0.05542  -2.409 0.016470 *  
#> ts_WI.l2 -0.23876    0.10203  -2.340 0.019807 *  
#> ts_FI.l2 -0.04725    0.04938  -0.957 0.339238    
#> ts_VS.l2 -0.04995    0.02114  -2.363 0.018646 *  
#> const    63.38438   59.12213   1.072 0.284368    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 220.1 on 375 degrees of freedom
#> Multiple R-Squared: 0.999,   Adjusted R-squared: 0.9989 
#> F-statistic: 4.544e+04 on 8 and 375 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_VS: 
#> ====================================== 
#> ts_VS = ts_MI.l1 + ts_WI.l1 + ts_FI.l1 + ts_VS.l1 + ts_MI.l2 + ts_WI.l2 + ts_FI.l2 + ts_VS.l2 + const 
#> 
#>           Estimate Std. Error t value Pr(>|t|)    
#> ts_MI.l1   0.53011    0.13261   3.998 7.71e-05 ***
#> ts_WI.l1   0.60782    0.23657   2.569 0.010576 *  
#> ts_FI.l1   0.46942    0.12448   3.771 0.000189 ***
#> ts_VS.l1   0.81489    0.05041  16.165  < 2e-16 ***
#> ts_MI.l2  -0.48564    0.13420  -3.619 0.000337 ***
#> ts_WI.l2  -0.66548    0.24709  -2.693 0.007393 ** 
#> ts_FI.l2  -0.46779    0.11959  -3.912 0.000109 ***
#> ts_VS.l2   0.17545    0.05119   3.427 0.000677 ***
#> const    118.48950  143.17289   0.828 0.408425    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 533.1 on 375 degrees of freedom
#> Multiple R-Squared: 0.9988,  Adjusted R-squared: 0.9988 
#> F-statistic: 4.023e+04 on 8 and 375 DF,  p-value: < 2.2e-16 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>         ts_MI   ts_WI ts_FI  ts_VS
#> ts_MI 45062.2  -620.7 -6091  21514
#> ts_WI  -620.7 13398.3 -2882   1440
#> ts_FI -6090.9 -2881.5 48458   5384
#> ts_VS 21514.1  1440.4  5384 284173
#> 
#> Correlation matrix of residuals:
#>          ts_MI    ts_WI    ts_FI   ts_VS
#> ts_MI  1.00000 -0.02526 -0.13035 0.19012
#> ts_WI -0.02526  1.00000 -0.11309 0.02334
#> ts_FI -0.13035 -0.11309  1.00000 0.04588
#> ts_VS  0.19012  0.02334  0.04588 1.00000

# Produce a one-month ahead forecast for VAR(1) model
forecast_var1 <- predict(var_model_1, n.ahead = 1)
forecast_var2 <- predict(var_model_2, n.ahead = 1)
# Print forecast for the next month
print(forecast_var1)
#> $ts_MI
#>                fcst    lower upper       CI
#> ts_MI.fcst 25546.62 25130.24 25963 416.3845
#> 
#> $ts_WI
#>                fcst   lower    upper       CI
#> ts_WI.fcst 6173.774 5945.45 6402.098 228.3242
#> 
#> $ts_FI
#>                fcst   lower    upper       CI
#> ts_FI.fcst 38148.07 37708.2 38587.93 439.8687
#> 
#> $ts_VS
#>                fcst    lower    upper       CI
#> ts_VS.fcst 79916.19 78826.67 81005.71 1089.519
print(forecast_var2)
#> $ts_MI
#>                fcst    lower    upper       CI
#> ts_MI.fcst 25597.19 25181.13 26013.25 416.0585
#> 
#> $ts_WI
#>                fcst    lower    upper       CI
#> ts_WI.fcst 6181.545 5954.677 6408.413 226.8676
#> 
#> $ts_FI
#>                fcst    lower    upper       CI
#> ts_FI.fcst 38125.98 37694.54 38557.43 431.4482
#> 
#> $ts_VS
#>                fcst    lower   upper       CI
#> ts_VS.fcst 79887.68 78842.87 80932.5 1044.815

# Granger causality test
causality(var_model_1, cause = "ts_WI")
#> $Granger
#> 
#>  Granger causality H0: ts_WI do not Granger-cause ts_MI ts_FI ts_VS
#> 
#> data:  VAR object var_model_1
#> F-Test = 4.9772, df1 = 3, df2 = 1520, p-value = 0.001937
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_WI and ts_MI ts_FI ts_VS
#> 
#> data:  VAR object var_model_1
#> Chi-squared = 4.9675, df = 3, p-value = 0.1742
causality(var_model_1, cause = "ts_MI")
#> $Granger
#> 
#>  Granger causality H0: ts_MI do not Granger-cause ts_WI ts_FI ts_VS
#> 
#> data:  VAR object var_model_1
#> F-Test = 4.6258, df1 = 3, df2 = 1520, p-value = 0.003162
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_MI and ts_WI ts_FI ts_VS
#> 
#> data:  VAR object var_model_1
#> Chi-squared = 20.423, df = 3, p-value = 0.0001387
causality(var_model_1, cause = "ts_FI")
#> $Granger
#> 
#>  Granger causality H0: ts_FI do not Granger-cause ts_MI ts_WI ts_VS
#> 
#> data:  VAR object var_model_1
#> F-Test = 1.2421, df1 = 3, df2 = 1520, p-value = 0.293
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_FI and ts_MI ts_WI ts_VS
#> 
#> data:  VAR object var_model_1
#> Chi-squared = 12.92, df = 3, p-value = 0.004813
causality(var_model_1, cause = "ts_VS")
#> $Granger
#> 
#>  Granger causality H0: ts_VS do not Granger-cause ts_MI ts_WI ts_FI
#> 
#> data:  VAR object var_model_1
#> F-Test = 7.6444, df1 = 3, df2 = 1520, p-value = 4.511e-05
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_VS and ts_MI ts_WI ts_FI
#> 
#> data:  VAR object var_model_1
#> Chi-squared = 17.181, df = 3, p-value = 0.0006485
causality(var_model_2, cause = "ts_WI")
#> $Granger
#> 
#>  Granger causality H0: ts_WI do not Granger-cause ts_MI ts_FI ts_VS
#> 
#> data:  VAR object var_model_2
#> F-Test = 4.3941, df1 = 6, df2 = 1500, p-value = 0.0002069
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_WI and ts_MI ts_FI ts_VS
#> 
#> data:  VAR object var_model_2
#> Chi-squared = 5.9754, df = 3, p-value = 0.1128
causality(var_model_2, cause = "ts_MI")
#> $Granger
#> 
#>  Granger causality H0: ts_MI do not Granger-cause ts_WI ts_FI ts_VS
#> 
#> data:  VAR object var_model_2
#> F-Test = 5.9554, df1 = 6, df2 = 1500, p-value = 3.669e-06
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_MI and ts_WI ts_FI ts_VS
#> 
#> data:  VAR object var_model_2
#> Chi-squared = 20.926, df = 3, p-value = 0.0001091
causality(var_model_2, cause = "ts_FI")
#> $Granger
#> 
#>  Granger causality H0: ts_FI do not Granger-cause ts_MI ts_WI ts_VS
#> 
#> data:  VAR object var_model_2
#> F-Test = 4.353, df1 = 6, df2 = 1500, p-value = 0.0002296
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_FI and ts_MI ts_WI ts_VS
#> 
#> data:  VAR object var_model_2
#> Chi-squared = 13.422, df = 3, p-value = 0.003807
causality(var_model_2, cause = "ts_VS")
#> $Granger
#> 
#>  Granger causality H0: ts_VS do not Granger-cause ts_MI ts_WI ts_FI
#> 
#> data:  VAR object var_model_2
#> F-Test = 4.306, df1 = 6, df2 = 1500, p-value = 0.0002587
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_VS and ts_MI ts_WI ts_FI
#> 
#> data:  VAR object var_model_2
#> Chi-squared = 15.682, df = 3, p-value = 0.001318

# BigVAR
BV_mtx <- cbind(ts_MI, ts_WI, ts_FI, ts_VS)
sparse_var1 <- BigVAR.fit(Y = BV_mtx, struct = 'Basic', p = 1, lambda = 1)[,,1]
sparse_var2 <- BigVAR.fit(Y = BV_mtx, struct = 'Basic', p = 2, lambda = 1)[,,1]
print(sparse_var1)
#>            [,1]         [,2]        [,3]        [,4]          [,5]
#> [1,]  174.01547  0.886890892  0.30504143  0.01663456 -0.0006677293
#> [2,] -238.99471  0.197292936  0.52896736 -0.02799815 -0.0036364839
#> [3,]  227.60538  0.077398839  0.03136181  0.90695011  0.0136623442
#> [4,]  -29.26629 -0.004295884 -0.03908114  0.05908871  0.9807352964
print(sparse_var2)
#>            [,1]       [,2]        [,3]       [,4]         [,5]        [,6]
#> [1,]  154.25487 0.46214070  0.16137837 0.02877342  0.009604066  0.42370831
#> [2,] -655.62155 0.13445978  0.10050360 0.03903518 -0.016213934  0.13277418
#> [3,]  407.01738 0.06397564  0.02506380 0.43447155  0.014553460  0.06898732
#> [4,]   20.27397 0.01090493 -0.01664742 0.02716274  0.541490492 -0.01249533
#>             [,7]        [,8]         [,9]
#> [1,]  0.15912244 0.017064895 -0.023648172
#> [2,]  0.09489652 0.036436615 -0.020478404
#> [3,]  0.02970738 0.406646961  0.007813708
#> [4,] -0.02515570 0.006335993  0.450445911
summary(sparse_var1)
#>        V1                V2                  V3                 V4           
#>  Min.   :-238.99   Min.   :-0.004296   Min.   :-0.03908   Min.   :-0.027998  
#>  1st Qu.: -81.70   1st Qu.: 0.056975   1st Qu.: 0.01375   1st Qu.: 0.005476  
#>  Median :  72.37   Median : 0.137346   Median : 0.16820   Median : 0.037862  
#>  Mean   :  33.34   Mean   : 0.289322   Mean   : 0.20657   Mean   : 0.238669  
#>  3rd Qu.: 187.41   3rd Qu.: 0.369692   3rd Qu.: 0.36102   3rd Qu.: 0.271054  
#>  Max.   : 227.61   Max.   : 0.886891   Max.   : 0.52897   Max.   : 0.906950  
#>        V5           
#>  Min.   :-0.003636  
#>  1st Qu.:-0.001410  
#>  Median : 0.006497  
#>  Mean   : 0.247523  
#>  3rd Qu.: 0.255431  
#>  Max.   : 0.980735
summary(sparse_var2)
#>        V1                V2                V3                 V4         
#>  Min.   :-655.62   Min.   :0.01090   Min.   :-0.01665   Min.   :0.02716  
#>  1st Qu.:-148.70   1st Qu.:0.05071   1st Qu.: 0.01464   1st Qu.:0.02837  
#>  Median :  87.26   Median :0.09922   Median : 0.06278   Median :0.03390  
#>  Mean   : -18.52   Mean   :0.16787   Mean   : 0.06757   Mean   :0.13236  
#>  3rd Qu.: 217.45   3rd Qu.:0.21638   3rd Qu.: 0.11572   3rd Qu.:0.13789  
#>  Max.   : 407.02   Max.   :0.46214   Max.   : 0.16138   Max.   :0.43447  
#>        V5                 V6                 V7                 V8          
#>  Min.   :-0.01621   Min.   :-0.01250   Min.   :-0.02516   Min.   :0.006336  
#>  1st Qu.: 0.00315   1st Qu.: 0.04862   1st Qu.: 0.01599   1st Qu.:0.014383  
#>  Median : 0.01208   Median : 0.10088   Median : 0.06230   Median :0.026751  
#>  Mean   : 0.13736   Mean   : 0.15324   Mean   : 0.06464   Mean   :0.116621  
#>  3rd Qu.: 0.14629   3rd Qu.: 0.20551   3rd Qu.: 0.11095   3rd Qu.:0.128989  
#>  Max.   : 0.54149   Max.   : 0.42371   Max.   : 0.15912   Max.   :0.406647  
#>        V9           
#>  Min.   :-0.023648  
#>  1st Qu.:-0.021271  
#>  Median :-0.006332  
#>  Mean   : 0.103533  
#>  3rd Qu.: 0.118472  
#>  Max.   : 0.450446

^{Created on 2024-04-10 with reprex v2.1.0}

The final forecast values :

VAR(1) - MI - 25546.62 WI - 6173.774 FI - 38148.07 VS - 79916.19

VAR(2) - MI - 25597.19 WI - 6181.545 FI - 38125.98 VS - 79887.68

jlivsey / UB-sping24-time-series

Shweta Hatote #2