jlivsey / UB-sping24-time-series

2 stars 1 forks source link

Ashmit Bhardwaj #7

Open sc30ash opened 8 months ago

sc30ash commented 8 months ago

By 4-week moving average, my prediction is that it will be 269616.8 today.

sc30ash commented 8 months ago
time_series <- ts(icnsa$value)
#> Error in eval(expr, envir, enclos): object 'icnsa' not found
moving_average <- function(series, window_size = 4) {
  n <- length(series)
  if (n < window_size) {
    stop("Window size cannot be greater than the length of the time series.")
  }

  ma_values <- numeric(n - window_size + 1)

  for (i in 1:(n - window_size + 1)) {
    ma_values[i] <- mean(series[i:(i + window_size - 1)])
  }

  return(ma_values)
}

predicted_values <- moving_average(time_series)
#> Error in eval(expr, envir, enclos): object 'time_series' not found

next_predicted_value <- tail(predicted_values, 1)
#> Error in eval(expr, envir, enclos): object 'predicted_values' not found
cat("Next Predicted Value:", next_predicted_value, "\n")
#> Error in eval(expr, envir, enclos): object 'next_predicted_value' not found

Created on 2024-02-01 with reprex v2.1.0

sc30ash commented 7 months ago
library(tidyverse)
library('prophet')
#> Loading required package: Rcpp
#> Loading required package: rlang
#> 
#> Attaching package: 'rlang'
#> The following objects are masked from 'package:purrr':
#> 
#>     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
#>     flatten_raw, invoke, splice
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo

fredr_set_key("02d6768019ad56ffddaef2ba205ab083")
start_date <- as.Date("2000-11-27")
icnsa = fredr(series_id = "ICNSA",observation_start = as.Date("1990-01-01"), observation_end = as.Date("2024-02-21"))
IHLIDXNSAUS = fredr(series_id = "IHLIDXNSAUS",observation_start = as.Date("1990-01-01"), observation_end = as.Date("2024-02-21"))
CPALTT01USM657N = fredr(series_id = "CPALTT01USM657N",observation_start = as.Date("1990-01-01"), observation_end = as.Date("2024-02-21"))
JTU5100HIL = fredr(series_id = "JTU5100HIL",observation_start = as.Date("1990-01-01"), observation_end = as.Date("2024-02-21"))

icnsa <- icnsa |>
  select(ds = date, y = value)
IHLIDXNSAUS <- IHLIDXNSAUS |>
  select(ds = date, y = value)
CPALTT01USM657N <- CPALTT01USM657N |>
  select(ds = date, y = value)
JTU5100HIL <- JTU5100HIL |>
  select(ds = date, y = value)

CPALTT01USM657N$Date <- as.Date(CPALTT01USM657N$ds)
IHLIDXNSAUS$Date <- as.Date(IHLIDXNSAUS$ds)
JTU5100HIL$Date <- as.Date(JTU5100HIL$ds)
icnsa$Date <- as.Date(icnsa$ds)

# Load necessary libraries
library(dplyr)

# Aggregate data into weekly averages
JTU5100HIL_weekly <- JTU5100HIL %>%
  mutate(Week = as.Date(cut(Date, "week"))) %>%
  group_by(Week) %>%
  summarise(JTU5100HIL_avg = mean(y))

CPALTT01USM657N_weekly <- CPALTT01USM657N %>%
  mutate(Week = as.Date(cut(Date, "week"))) %>%
  group_by(Week) %>%
  summarise(CPALTT01USM657N_avg = mean(y))

IHLIDXNSAUS_weekly <- IHLIDXNSAUS %>%
  mutate(Week = as.Date(cut(Date, "week"))) %>%
  group_by(Week) %>%
  summarise(IHLIDXNSAUS_avg = mean(y))

ICNSA_weekly <- icnsa %>%
  mutate(Week = as.Date(cut(Date, "week"))) %>%
  group_by(Week) %>%
  summarise(ICNSA_avg = mean(y))

weekly_data <- merge(ICNSA_weekly, JTU5100HIL_weekly, by = "Week", all = TRUE)
weekly_data <- merge(weekly_data, IHLIDXNSAUS_weekly, by = "Week", all = TRUE)
weekly_data <- merge(weekly_data, CPALTT01USM657N_weekly, by = "Week", all = TRUE)
colnames(weekly_data) <- c('Week', 'ICNSA', 'Hires', 'Layoffs', 'CPI')
# Remove NA values
weekly_data <- na.omit(weekly_data)
filtered_data <- weekly_data %>%
  filter(!(Week >= as.Date("2020-03-01") & Week <= as.Date("2022-06-01")))

# Load necessary library
library(ggplot2)

# Function to plot correlation between ICNSA and each predictor
plot_correlation <- function(x, y, main) {
  plot(x, y, xlab = "ICNSA", ylab = main, main = main)
}

# Set up the plotting grid
par(mfrow = c(1, 3))

# Plot correlation between ICNSA and each predictor
ccf(weekly_data$ICNSA, weekly_data$Layoffs, main = "Layoffs")
ccf(weekly_data$ICNSA, weekly_data$Hires, main = "Hires")
ccf(weekly_data$ICNSA, weekly_data$CPI, main = "CPI")

# Reset plotting parameters
par(mfrow = c(1, 1))

# Create time series objects
# Shift ICNSA values up by one
weekly_data$ICNSA <- c(weekly_data$ICNSA[-1], NA)
weekly_data <- na.omit(weekly_data)

icnsa_ts <- ts(weekly_data$ICNSA, start = c(year(start_date), month(start_date)), frequency = 8)
hires_ts <- ts(weekly_data$Hires, start = c(year(start_date), month(start_date)), frequency = 8)
cpi_ts <- ts(weekly_data$CPI, start = c(year(start_date), month(start_date)), frequency = 8)
layoffs_ts <- ts(weekly_data$Layoffs, start = c(year(start_date), month(start_date)), frequency = 8)

# Combine predictor variables into a numeric matrix
predictors_matrix <- cbind(hires_ts, cpi_ts, layoffs_ts)

# Fit ARIMA model with regression
arima_model <- auto.arima(icnsa_ts, xreg = predictors_matrix)

# Summary of the ARIMA model
summary(arima_model)
#> Series: icnsa_ts 
#> Regression with ARIMA(0,0,0) errors 
#> 
#> Coefficients:
#>       intercept   hires_ts     cpi_ts  layoffs_ts
#>       2943569.1  -5627.754  -391821.9  -12722.659
#> s.e.   612041.2   5088.261   315536.0    4403.442
#> 
#> sigma^2 = 6.806e+11:  log likelihood = -689.84
#> AIC=1389.69   AICc=1391.19   BIC=1398.83
#> 
#> Training set error measures:
#>                         ME     RMSE      MAE       MPE     MAPE      MASE
#> Training set -1.050071e-10 788311.7 385082.6 -32.81233 99.96004 0.9833447
#>                    ACF1
#> Training set 0.09924387

# Forecast ICNSA using the ARIMA model
forecast_icnsa <- forecast(arima_model, xreg = predictors_matrix, h = 10)

# Plot forecasted values
plot(forecast_icnsa)

Created on 2024-02-22 with reprex v2.1.0

sc30ash commented 7 months ago

Tomorrow's prediction is : 287892

First, I obtained the necessary data, which included time series data for ICNSA, Hires, CPI, and Layoffs. The ICNSA data represented the Initial Claims Seasonally Adjusted series, while the other variables were potential predictors: Hires, CPI, and Layoffs. These data were collected on a weekly basis.

After loading the data into R, I converted it into time series objects using the ts() function, ensuring that the time series were aligned correctly with the appropriate start dates and frequencies.

Next, I explored the relationships between the variables using various visualization techniques. I created line charts to visualize the trends in each time series and used autocorrelation function (ACF) plots to examine the autocorrelation structure within each series.

To investigate potential relationships between ICNSA and the other variables, I generated correlation plots and cross-correlation plots with lags. These plots provided insights into the potential lagged effects of the predictors on ICNSA.

I then proceeded to build a regression ARIMA model to predict ICNSA using Hires, CPI, and Layoffs as predictors. To do this, I first created lagged versions of the predictor variables to account for potential time lags in their effects on ICNSA.

Using the auto.arima() function from the forecast package, I fit an ARIMA model with regression, incorporating the lagged predictors. I assessed the goodness of fit of the model through various regression diagnostics, including examining residuals, assessing normality, checking for homoscedasticity, identifying outliers, and evaluating the overall model fit using metrics such as R-squared, AIC, BIC, and RMSE.

After evaluating the regression diagnostics, I concluded that the regression ARIMA model adequately captured the temporal relationship between the predictors and ICNSA, although there were some residual autocorrelations that needed further investigation. Additionally, I identified specific outliers and influential observations that warranted attention.

In summary, I collected and prepared the data, explored the relationships between variables, built a regression ARIMA model, and evaluated its performance using regression diagnostics.

sc30ash commented 7 months ago
library(tidyverse)
library('prophet')
#> Loading required package: Rcpp
#> Loading required package: rlang
#> 
#> Attaching package: 'rlang'
#> The following objects are masked from 'package:purrr':
#> 
#>     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
#>     flatten_raw, invoke, splice
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(splines)

find_optimal_lambda <- function(lambda_seq, covid_indices, covid_period) {
  # Perform cross-validation to find the optimal lambda
  cv_errors <- sapply(lambda_seq, function(lambda) {
    cv_fit <- smooth.spline(covid_indices, covid_period, spar = lambda)
    predicted_values <- predict(cv_fit, x = covid_indices)$y
    return(predicted_values)
  })

  # Calculate the average predicted values for each lambda
  avg_predicted_values <- colMeans(cv_errors)

  # Find the lambda with the lowest average predicted values
  optimal_lambda <- lambda_seq[which.min(avg_predicted_values)]

  return(optimal_lambda)
}

fredr_set_key("02d6768019ad56ffddaef2ba205ab083")
start_date <- as.Date("2000-11-27")
icnsa = fredr(series_id = "ICNSA",observation_start = as.Date("1990-01-01"), observation_end = as.Date("2024-03-21"))

icnsa <- icnsa |>
  select(ds = date, y = value)

icnsa$Date <- as.Date(icnsa$ds)

ts_data <- ts(icnsa$y)

# Load necessary libraries
library(changepoint)
#> Loading required package: zoo
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
#> Successfully loaded changepoint package version 2.2.4
#>  See NEWS for details of changes.

# Convert 'ts_data' to time series object if necessary
ts_data <- ts(ts_data, frequency = 1)

# Perform change-point detection
cpt_result <- cpt.mean(ts_data)

# Extract change points
change_points <- attr(cpt_result, "cpts")

start <- change_points[1]

ts_data_post_covid <- ts_data[start:change_points[2]]

cpt_result_post_covid <- cpt.mean(ts_data_post_covid)
change_points_post_covid <- attr(cpt_result_post_covid, "cpts")

end <- change_points_post_covid[1] + start

start <- start- 20
end <- end + 20
# Extract the Covid period
covid_period <- ts_data[start:end]

# Generate time indices for the Covid period
covid_indices <- seq_along(covid_period)

# Define a sequence of lambda values to try
lambda_seq <- seq(0.1, 10, by = 0.01)

# Find the optimal lambda using the custom function
optimal_lambda <- find_optimal_lambda(lambda_seq, covid_indices, covid_period)

# Fit a cubic spline to the Covid period
spline_fit <- smooth.spline(covid_indices, covid_period, spar = 1, cv = TRUE)

# Plot the original data and the spline fit
plot(ts_data, type = 'l', main = 'Original Data with Spline Fit for Covid Period')
covid_x <- seq(start, end, length.out = length(covid_period))
lines(covid_x, predict(spline_fit, x = covid_indices)$y, col = 'red')

predicted_values <- predict(spline_fit, x = covid_indices)$y

# Replace the actual values with predicted values within the Covid period
ts_data[start:end] <- predicted_values
plot(ts_data, type = 'l', main = 'Original Data with Spline Fit for Covid Period')


# Extract the Covid-adjusted time series data
covid_adjusted_data <- ts(ts_data, frequency = 2)

# Fit a multiplicative Holt-Winters model
hw_multiplicative <- HoltWinters(covid_adjusted_data, seasonal = "multiplicative")
# Forecast the next value using the multiplicative Holt-Winters model
forecast_multiplicative <- forecast(hw_multiplicative, h = 1)

# Fit an additive Holt-Winters model
hw_additive <- HoltWinters(covid_adjusted_data, seasonal = "additive")
# Forecast the next value using the additive Holt-Winters model
forecast_additive <- forecast(hw_additive, h = 1)

# Print the forecasted values
print("Forecasted value using multiplicative Holt-Winters model:")
#> [1] "Forecasted value using multiplicative Holt-Winters model:"
print(forecast_multiplicative$mean)
#> Time Series:
#> Start = c(891, 2) 
#> End = c(891, 2) 
#> Frequency = 2 
#>      fit 
#> 205784.2
print("Forecasted value using additive Holt-Winters model:")
#> [1] "Forecasted value using additive Holt-Winters model:"
print(forecast_additive$mean)
#> Time Series:
#> Start = c(891, 2) 
#> End = c(891, 2) 
#> Frequency = 2 
#>      fit 
#> 206980.3
sc30ash commented 7 months ago

Prediction with Additive Model : 206980.3

Prediction with Multiplicative Model : 205784.2

First, I identified the start and end dates of the Covid period using change-point detection algorithms. I found the first change point to mark the start date and then, within the subsequent two years, identified another change point to mark the end date of the Covid period. Next, I used a cubic spline methodology to impute new values for the Covid period. Experimenting with different values of the smoothing parameter λ, I selected the one that resulted in the lowest average prediction when compared to the rest of the data to make it more in line with the rest of the points. Finally, I utilized both multiplicative and additive Holt-Winters models to forecast the next ICNSA value. These models employ exponential smoothing techniques to make forecasts based on the underlying trend, seasonal patterns, and level of the time series data. The predictions obtained from the two models were as follows: • Prediction with the Additive Model: 207055.3 • Prediction with the Multiplicative Model: 205352.1 These forecasts offer insights into potential future trends in the time series data, considering the historical patterns learned from the data, including the imputed Covid period.

sc30ash commented 7 months ago
library(tidyverse)
library('prophet')
library(changepoint)
library(fredr)
library(forecast)
library(splines)
library(imputeTS)
library(bsts)
library(kernlab)
library(quantmod)
library(legion)

find_optimal_lambda <- function(lambda_seq, covid_indices, covid_period) {
  # Perform cross-validation to find the optimal lambda
  cv_errors <- sapply(lambda_seq, function(lambda) {
    cv_fit <- smooth.spline(covid_indices, covid_period, spar = lambda)
    predicted_values <- predict(cv_fit, x = covid_indices)$y
    return(predicted_values)
  })

  # Calculate the average predicted values for each lambda
  avg_predicted_values <- colMeans(cv_errors)

  # Find the lambda with the lowest average predicted values
  optimal_lambda <- lambda_seq[which.min(avg_predicted_values)]

  return(optimal_lambda)
}

fredr_set_key("02d6768019ad56ffddaef2ba205ab083")
start_date <- as.Date("2000-11-27")
icnsa = fredr(series_id = "ICNSA",observation_start = as.Date("1960-01-01"), observation_end = as.Date("2024-03-21"))

icnsa <- icnsa |> select(ds = date, y = value)

icnsa$Date <- as.Date(icnsa$ds)

ts_data <- ts(icnsa$y)

ts_data <- ts(ts_data, frequency = 1)

# Perform change-point detection
cpt_result <- cpt.mean(ts_data)

# Extract change points
change_points <- attr(cpt_result, "cpts")

start <- change_points[1]

ts_data_post_covid <- ts_data[start:change_points[2]]

cpt_result_post_covid <- cpt.mean(ts_data_post_covid)
change_points_post_covid <- attr(cpt_result_post_covid, "cpts")

end <- change_points_post_covid[1] + start

start <- start- 20
end <- end + 20
# Extract the Covid period
covid_period <- ts_data[start:end]

# Generate time indices for the Covid period
covid_indices <- seq_along(covid_period)

# Define a sequence of lambda values to try
lambda_seq <- seq(0.1, 10, by = 0.01)

# Find the optimal lambda using the custom function

kalman_replace <- ts_data
kalman_replace[start:end] <- NA
kalman_replace <- na_kalman(kalman_replace, model = "auto.arima")
imputed_values <- kalman_replace[start:end]
# Plot the original data and the imputed data
plot(ts_data, type = 'l', col = 'blue', main = 'Original Data and Imputed Data for Covid Period')

lines(covid_x, imputed_values, col = 'red')
#> Error in eval(expr, envir, enclos): object 'covid_x' not found
lines(covid_x, predict(spline_fit, x = covid_indices)$y, col = 'green')
#> Error in eval(expr, envir, enclos): object 'covid_x' not found

ts_data[start:end] <- imputed_values
plot(ts_data, type = 'l', main = 'Original Data with Spline Fit for Covid Period')

covid_adjusted_data <- ts(ts_data, frequency = 2)
# Fit a structural time series model to ts_data
structural_model <- StructTS(covid_adjusted_data, type = "BSM")

# Forecast the next time point
next_prediction <- forecast(structural_model, h = 1)

# Print the forecasted value
print(next_prediction)
#>      Point Forecast    Lo 80  Hi 80    Lo 95    Hi 95
#> 1492       196452.5 131977.9 260927 97847.11 295057.8

# Define the ticker symbol for S&P 500
ticker_symbol <- "^GSPC"  # ^GSPC is the symbol for S&P 500

ticker <- getSymbols(ticker_symbol, src = "yahoo", from = "2000-01-01")

# Extract the adjusted close prices
sp500_data_daily <- Ad(get(ticker))

# Convert sp500_data_daily to a dataframe
sp500_df <- data.frame(Date = index(sp500_data_daily), SP500_Close = as.numeric(sp500_data_daily))
sp500_df$Date <- as.Date(sp500_df$Date)
icnsa$Date <- icnsa$Date - 1
# Merge sp500_df with icnsa
merged_data <- merge(icnsa, sp500_df, by = "Date")

# Remove rows where S&P 500 data is missing
merged_data <- merged_data[!is.na(merged_data$SP500_Close), ]

multivariate_data <- cbind(ts(merged_data$y, frequency = 52), ts(merged_data$SP500_Close, frequency = 52))

# Fit a multivariate structural time series model with covariate
structural_model <- vets(multivariate_data)

forecast(structural_model, h = 1)
#> Time Series:
#> Start = c(24, 24) 
#> End = c(24, 24) 
#> Frequency = 52 
#>          ts(merged_data$y, frequency = 52)
#> 24.44231                            195902
#>          ts(merged_data$SP500_Close, frequency = 52)
#> 24.44231                                    5073.425
sc30ash commented 5 months ago
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(stringr)
library(ggplot2)
library(plotly)
#> 
#> Attaching package: 'plotly'
#> The following object is masked from 'package:ggplot2':
#> 
#>     last_plot
#> The following object is masked from 'package:stats':
#> 
#>     filter
#> The following object is masked from 'package:graphics':
#> 
#>     layout
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tseries)
#> Warning: package 'tseries' was built under R version 4.3.3
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(imputeTS)
#> Warning: package 'imputeTS' was built under R version 4.3.3
#> 
#> Attaching package: 'imputeTS'
#> The following object is masked from 'package:tseries':
#> 
#>     na.remove
library(vars)
#> Loading required package: MASS
#> 
#> Attaching package: 'MASS'
#> The following object is masked from 'package:dplyr':
#> 
#>     select
#> The following object is masked from 'package:plotly':
#> 
#>     select
#> Loading required package: strucchange
#> Loading required package: zoo
#> 
#> Attaching package: 'zoo'
#> The following object is masked from 'package:imputeTS':
#> 
#>     na.locf
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
#> Loading required package: sandwich
#> Warning: package 'sandwich' was built under R version 4.3.3
#> 
#> Attaching package: 'strucchange'
#> The following object is masked from 'package:stringr':
#> 
#>     boundary
#> Loading required package: urca
#> Loading required package: lmtest
library(forecast)
library(zoo)

setwd("C:\\Users\\sc30a\\Documents\\ATSA\\Assignment4")
# Read the CSV file
CVGS <- read.csv("CGVS.csv")

# Convert "Period" column to datetime type
CVGS$Period <- as.yearmon(CVGS$Period, "%b-%Y")

# Remove commas and convert "Value" column to numeric
CVGS$Value <- as.numeric(gsub(",", "", CVGS$Value))

# Read BI.csv
BI <- read.csv("BI.csv")

# Convert "Period" column to datetime type
BI$Period <- as.yearmon(BI$Period, "%b-%Y")

# Remove commas and convert "Value" column to numeric
BI$Value <- as.numeric(gsub(",", "", BI$Value))

# Read PI.csv
PI <- read.csv("PI.csv")

# Convert "Period" column to datetime type
PI$Period <- as.yearmon(PI$Period, "%b-%Y")

# Remove commas and convert "Value" column to numeric
PI$Value <- as.numeric(gsub(",", "", PI$Value))

# Read FPI.csv
FPI <- read.csv("FPI.csv")

# Convert "Period" column to datetime type
FPI$Period <- as.yearmon(FPI$Period, "%b-%Y")

# Remove commas and convert "Value" column to numeric
FPI$Value <- as.numeric(gsub(",", "", FPI$Value))

# Print the structure of the dataframes to confirm the changes
str(BI)
#> 'data.frame':    396 obs. of  2 variables:
#>  $ Period: 'yearmon' num  Jan 1992 Feb 1992 Mar 1992 Apr 1992 ...
#>  $ Value : num  686 707 737 751 756 746 736 733 725 705 ...
str(PI)
#> 'data.frame':    396 obs. of  2 variables:
#>  $ Period: 'yearmon' num  Jan 1992 Feb 1992 Mar 1992 Apr 1992 ...
#>  $ Value : num  4971 4871 4935 4938 5039 ...
str(FPI)
#> 'data.frame':    396 obs. of  2 variables:
#>  $ Period: 'yearmon' num  Jan 1992 Feb 1992 Mar 1992 Apr 1992 ...
#>  $ Value : num  25599 25763 25742 25561 25859 ...

# Print the structure of the dataframe to confirm the changes
str(CVGS)
#> 'data.frame':    396 obs. of  2 variables:
#>  $ Period: 'yearmon' num  Jan 1992 Feb 1992 Mar 1992 Apr 1992 ...
#>  $ Value : num  45846 45542 46732 46951 46229 ...

# Merge BI, PI, FPI, and CVGS dataframes by Period
merged.data <- merge(BI, PI, by = "Period", all = TRUE, suffixes = c("_BI", "_PI"))
merged.data <- merge(merged.data, FPI, by = "Period", all = TRUE, suffixes = c("", "_FPI"))
merged.data <- merge(merged.data, CVGS, by = "Period", all = TRUE, suffixes = c("", "_CVGS"))

merged.data <- na.omit(merged.data)

# Keep only value columns
merged.data <- merged.data[, c("Period", "Value_BI", "Value_PI", "Value", "Value_CVGS")]

# Rename columns
names(merged.data)[names(merged.data) == "Value"] <- "Value_FPI"

# Convert to Time Series
ts_PI <- ts(merged.data$Value_PI, frequency = 12)
ts_BI <- ts(merged.data$Value_BI, frequency = 12)
ts_CVGS <- ts(merged.data$Value_CVGS, frequency = 12)
ts_FPI <- ts(merged.data$Value_FPI, frequency = 12)

# Determine the persistence of the Model
par(mfrow=c(4,2))
acf(ts_PI, main = "ACF for PI")
pacf(ts_PI, main = "PACF for PI")
acf(ts_BI, main = "ACF for BI")
pacf(ts_BI, main = "PACF for BI")
acf(ts_CVGS, main = "ACF for CVGS")
pacf(ts_CVGS, main = "PACF for CVGS")
acf(ts_FPI, main = "ACF for FPI")
pacf(ts_FPI, main = "PACF for FPI")

# Finding the optimal lags
lagselect_PI <- VARselect(ts_PI, lag.max = 10, type = "const")
lagselect_BI <- VARselect(ts_BI, lag.max = 10, type = "const")
lagselect_CVGS <- VARselect(ts_CVGS, lag.max = 10, type = "const")
lagselect_FPI <- VARselect(ts_FPI, lag.max = 10, type = "const")
print(lagselect_PI$selection)
#> AIC(n)  HQ(n)  SC(n) FPE(n) 
#>      3      2      2      3
print(lagselect_BI$selection)
#> AIC(n)  HQ(n)  SC(n) FPE(n) 
#>      4      2      2      4
print(lagselect_CVGS$selection)
#> AIC(n)  HQ(n)  SC(n) FPE(n) 
#>      7      7      4      7
print(lagselect_FPI$selection)
#> AIC(n)  HQ(n)  SC(n) FPE(n) 
#>      5      5      5      5

# Build the VAR(1) model
var_model_1 <- VAR(cbind(ts_PI, ts_BI, ts_CVGS, ts_FPI), p = 1, type = "const", season = NULL)
summary(var_model_1)
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: ts_PI, ts_BI, ts_CVGS, ts_FPI 
#> Deterministic variables: const 
#> Sample size: 385 
#> Log Likelihood: -10940.31 
#> Roots of the characteristic polynomial:
#> 0.9988  0.99 0.9695 0.9655
#> Call:
#> VAR(y = cbind(ts_PI, ts_BI, ts_CVGS, ts_FPI), p = 1, type = "const")
#> 
#> 
#> Estimation results for equation ts_PI: 
#> ====================================== 
#> ts_PI = ts_PI.l1 + ts_BI.l1 + ts_CVGS.l1 + ts_FPI.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> ts_PI.l1     0.995298   0.009282 107.233   <2e-16 ***
#> ts_BI.l1    -0.276441   0.246505  -1.121    0.263    
#> ts_CVGS.l1  -0.002569   0.006071  -0.423    0.672    
#> ts_FPI.l1    0.009693   0.009783   0.991    0.322    
#> const      169.376384 282.189867   0.600    0.549    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 647 on 380 degrees of freedom
#> Multiple R-Squared: 0.989,   Adjusted R-squared: 0.9889 
#> F-statistic:  8543 on 4 and 380 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_BI: 
#> ====================================== 
#> ts_BI = ts_PI.l1 + ts_BI.l1 + ts_CVGS.l1 + ts_FPI.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> ts_PI.l1    4.294e-04  3.726e-04   1.152   0.2499    
#> ts_BI.l1    9.726e-01  9.896e-03  98.278   <2e-16 ***
#> ts_CVGS.l1  2.610e-04  2.437e-04   1.071   0.2848    
#> ts_FPI.l1   6.907e-04  3.927e-04   1.759   0.0794 .  
#> const      -1.976e+01  1.133e+01  -1.744   0.0820 .  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 25.97 on 380 degrees of freedom
#> Multiple R-Squared: 0.997,   Adjusted R-squared: 0.9969 
#> F-statistic: 3.106e+04 on 4 and 380 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_CVGS: 
#> ======================================== 
#> ts_CVGS = ts_PI.l1 + ts_BI.l1 + ts_CVGS.l1 + ts_FPI.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> ts_PI.l1      0.03960    0.02220   1.784  0.07518 .  
#> ts_BI.l1     -0.74177    0.58947  -1.258  0.20903    
#> ts_CVGS.l1    0.94777    0.01452  65.281  < 2e-16 ***
#> ts_FPI.l1     0.05244    0.02339   2.241  0.02557 *  
#> const      2014.75220  674.79899   2.986  0.00301 ** 
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 1547 on 380 degrees of freedom
#> Multiple R-Squared: 0.9842,  Adjusted R-squared: 0.984 
#> F-statistic:  5910 on 4 and 380 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ts_FPI: 
#> ======================================= 
#> ts_FPI = ts_PI.l1 + ts_BI.l1 + ts_CVGS.l1 + ts_FPI.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> ts_PI.l1     0.015164   0.004408   3.440 0.000646 ***
#> ts_BI.l1    -0.182792   0.117069  -1.561 0.119262    
#> ts_CVGS.l1  -0.005325   0.002883  -1.847 0.065581 .  
#> ts_FPI.l1    1.008130   0.004646 216.988  < 2e-16 ***
#> const      169.066557 134.016804   1.262 0.207891    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 307.3 on 380 degrees of freedom
#> Multiple R-Squared: 0.9995,  Adjusted R-squared: 0.9995 
#> F-statistic: 1.871e+05 on 4 and 380 DF,  p-value: < 2.2e-16 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>            ts_PI  ts_BI ts_CVGS  ts_FPI
#> ts_PI   418575.4  613.3   49219 48311.9
#> ts_BI      613.3  674.6    1124   385.7
#> ts_CVGS  49219.0 1124.1 2393534 26202.0
#> ts_FPI   48311.9  385.7   26202 94408.1
#> 
#> Correlation matrix of residuals:
#>           ts_PI   ts_BI ts_CVGS  ts_FPI
#> ts_PI   1.00000 0.03650 0.04917 0.24303
#> ts_BI   0.03650 1.00000 0.02797 0.04833
#> ts_CVGS 0.04917 0.02797 1.00000 0.05512
#> ts_FPI  0.24303 0.04833 0.05512 1.00000

# Diagnose VAR(1) model
serial_var_1 <- serial.test(var_model_1, lags.pt = 12, type = "PT.asymptotic")
serial_var_1
#> 
#>  Portmanteau Test (asymptotic)
#> 
#> data:  Residuals of VAR object var_model_1
#> Chi-squared = 427.47, df = 176, p-value < 2.2e-16

# Granger's causality test
granger_PI <- causality(var_model_1, cause = 'ts_PI')
granger_PI
#> $Granger
#> 
#>  Granger causality H0: ts_PI do not Granger-cause ts_BI ts_CVGS ts_FPI
#> 
#> data:  VAR object var_model_1
#> F-Test = 5.0939, df1 = 3, df2 = 1520, p-value = 0.001645
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_PI and ts_BI ts_CVGS ts_FPI
#> 
#> data:  VAR object var_model_1
#> Chi-squared = 22.107, df = 3, p-value = 6.197e-05
granger_BI <- causality(var_model_1, cause = 'ts_BI')
granger_BI
#> $Granger
#> 
#>  Granger causality H0: ts_BI do not Granger-cause ts_PI ts_CVGS ts_FPI
#> 
#> data:  VAR object var_model_1
#> F-Test = 1.4459, df1 = 3, df2 = 1520, p-value = 0.2277
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_BI and ts_PI ts_CVGS ts_FPI
#> 
#> data:  VAR object var_model_1
#> Chi-squared = 1.3745, df = 3, p-value = 0.7115
granger_CVGS <- causality(var_model_1, cause = 'ts_CVGS')
granger_CVGS
#> $Granger
#> 
#>  Granger causality H0: ts_CVGS do not Granger-cause ts_PI ts_BI ts_FPI
#> 
#> data:  VAR object var_model_1
#> F-Test = 1.5864, df1 = 3, df2 = 1520, p-value = 0.1908
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_CVGS and ts_PI ts_BI ts_FPI
#> 
#> data:  VAR object var_model_1
#> Chi-squared = 1.9132, df = 3, p-value = 0.5906
granger_FPI <- causality(var_model_1, cause = 'ts_FPI')
granger_FPI
#> $Granger
#> 
#>  Granger causality H0: ts_FPI do not Granger-cause ts_PI ts_BI ts_CVGS
#> 
#> data:  VAR object var_model_1
#> F-Test = 2.8598, df1 = 3, df2 = 1520, p-value = 0.03578
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: ts_FPI and ts_PI ts_BI ts_CVGS
#> 
#> data:  VAR object var_model_1
#> Chi-squared = 22.614, df = 3, p-value = 4.859e-05

# Impulse Response Functions
inf_var_model_1 <- irf(var_model_1, impulse = "ts_PI", response = c("ts_BI", "ts_CVGS", "ts_FPI"), boot = TRUE, n.ahead = 10)
plot(inf_var_model_1)

# Variance Decomposition
fevd_var_1 <- fevd(var_model_1, n.ahead = 10)
plot(fevd_var_1)

# VAR Forecasting
forecast_var_1 <- predict(var_model_1, n.ahead = 1, ci = 0.95)
forecast_var_1
#> $ts_PI
#>                fcst    lower    upper       CI
#> ts_PI.fcst 15564.82 14296.77 16832.86 1268.046
#> 
#> $ts_BI
#>                fcst    lower    upper      CI
#> ts_BI.fcst 1946.152 1895.244 1997.059 50.9073
#> 
#> $ts_CVGS
#>                 fcst    lower    upper      CI
#> ts_CVGS.fcst 95842.9 92810.63 98875.17 3032.27
#> 
#> $ts_FPI
#>                 fcst    lower    upper       CI
#> ts_FPI.fcst 70080.51 69478.29 70682.72 602.2167

# Create multivariate time series
multivariate_ts <- ts(merged.data[, c("Value_PI", "Value_BI", "Value_CVGS", "Value_FPI")], frequency = 12)

# Build the VAR(p) model
var_model_p <- VAR(multivariate_ts, p = 4, type = "const", season = NULL, exog = NULL)
summary(var_model_p)
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: Value_PI, Value_BI, Value_CVGS, Value_FPI 
#> Deterministic variables: const 
#> Sample size: 382 
#> Log Likelihood: -10760.407 
#> Roots of the characteristic polynomial:
#> 0.9976 0.9942 0.9763 0.9763 0.7486 0.6808 0.6808 0.5294 0.5294 0.524 0.484 0.484 0.4251 0.4251 0.4156 0.3631
#> Call:
#> VAR(y = multivariate_ts, p = 4, type = "const", exogen = NULL)
#> 
#> 
#> Estimation results for equation Value_PI: 
#> ========================================= 
#> Value_PI = Value_PI.l1 + Value_BI.l1 + Value_CVGS.l1 + Value_FPI.l1 + Value_PI.l2 + Value_BI.l2 + Value_CVGS.l2 + Value_FPI.l2 + Value_PI.l3 + Value_BI.l3 + Value_CVGS.l3 + Value_FPI.l3 + Value_PI.l4 + Value_BI.l4 + Value_CVGS.l4 + Value_FPI.l4 + const 
#> 
#>                Estimate Std. Error t value Pr(>|t|)    
#> Value_PI.l1     1.08200    0.05300  20.415   <2e-16 ***
#> Value_BI.l1     0.31059    1.28686   0.241    0.809    
#> Value_CVGS.l1   0.02094    0.02393   0.875    0.382    
#> Value_FPI.l1    0.12461    0.11401   1.093    0.275    
#> Value_PI.l2    -0.06006    0.07844  -0.766    0.444    
#> Value_BI.l2    -0.01119    1.88598  -0.006    0.995    
#> Value_CVGS.l2   0.02709    0.02678   1.011    0.313    
#> Value_FPI.l2    0.09954    0.17038   0.584    0.559    
#> Value_PI.l3    -0.10438    0.07879  -1.325    0.186    
#> Value_BI.l3     0.32829    1.88630   0.174    0.862    
#> Value_CVGS.l3  -0.01768    0.02654  -0.666    0.506    
#> Value_FPI.l3   -0.08048    0.17068  -0.472    0.638    
#> Value_PI.l4     0.06717    0.05451   1.232    0.219    
#> Value_BI.l4    -0.65328    1.27329  -0.513    0.608    
#> Value_CVGS.l4  -0.03103    0.02333  -1.330    0.184    
#> Value_FPI.l4   -0.14229    0.11563  -1.231    0.219    
#> const         163.67887  292.90538   0.559    0.577    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 636.6 on 365 degrees of freedom
#> Multiple R-Squared: 0.9896,  Adjusted R-squared: 0.9892 
#> F-statistic:  2180 on 16 and 365 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation Value_BI: 
#> ========================================= 
#> Value_BI = Value_PI.l1 + Value_BI.l1 + Value_CVGS.l1 + Value_FPI.l1 + Value_PI.l2 + Value_BI.l2 + Value_CVGS.l2 + Value_FPI.l2 + Value_PI.l3 + Value_BI.l3 + Value_CVGS.l3 + Value_FPI.l3 + Value_PI.l4 + Value_BI.l4 + Value_CVGS.l4 + Value_FPI.l4 + const 
#> 
#>                 Estimate Std. Error t value Pr(>|t|)    
#> Value_PI.l1   -2.965e-03  2.132e-03  -1.390   0.1652    
#> Value_BI.l1    1.080e+00  5.177e-02  20.862   <2e-16 ***
#> Value_CVGS.l1  5.984e-04  9.629e-04   0.622   0.5347    
#> Value_FPI.l1   4.339e-03  4.587e-03   0.946   0.3447    
#> Value_PI.l2    1.067e-03  3.156e-03   0.338   0.7354    
#> Value_BI.l2   -6.334e-02  7.587e-02  -0.835   0.4044    
#> Value_CVGS.l2  1.100e-03  1.077e-03   1.021   0.3082    
#> Value_FPI.l2   8.532e-04  6.855e-03   0.124   0.9010    
#> Value_PI.l3    4.943e-03  3.170e-03   1.560   0.1197    
#> Value_BI.l3   -1.796e-01  7.589e-02  -2.367   0.0185 *  
#> Value_CVGS.l3 -7.760e-04  1.068e-03  -0.727   0.4679    
#> Value_FPI.l3  -5.643e-03  6.866e-03  -0.822   0.4117    
#> Value_PI.l4   -2.734e-03  2.193e-03  -1.247   0.2134    
#> Value_BI.l4    1.382e-01  5.122e-02   2.698   0.0073 ** 
#> Value_CVGS.l4 -5.579e-04  9.385e-04  -0.594   0.5526    
#> Value_FPI.l4   9.931e-04  4.652e-03   0.213   0.8311    
#> const         -2.379e+01  1.178e+01  -2.019   0.0443 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 25.61 on 365 degrees of freedom
#> Multiple R-Squared: 0.9971,  Adjusted R-squared: 0.997 
#> F-statistic:  7947 on 16 and 365 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation Value_CVGS: 
#> =========================================== 
#> Value_CVGS = Value_PI.l1 + Value_BI.l1 + Value_CVGS.l1 + Value_FPI.l1 + Value_PI.l2 + Value_BI.l2 + Value_CVGS.l2 + Value_FPI.l2 + Value_PI.l3 + Value_BI.l3 + Value_CVGS.l3 + Value_FPI.l3 + Value_PI.l4 + Value_BI.l4 + Value_CVGS.l4 + Value_FPI.l4 + const 
#> 
#>                 Estimate Std. Error t value Pr(>|t|)    
#> Value_PI.l1      0.40827    0.11390   3.585 0.000383 ***
#> Value_BI.l1      3.23221    2.76544   1.169 0.243253    
#> Value_CVGS.l1    0.58848    0.05143  11.441  < 2e-16 ***
#> Value_FPI.l1     0.70219    0.24500   2.866 0.004397 ** 
#> Value_PI.l2     -0.25056    0.16856  -1.486 0.138022    
#> Value_BI.l2     -0.87647    4.05294  -0.216 0.828909    
#> Value_CVGS.l2    0.16844    0.05756   2.926 0.003643 ** 
#> Value_FPI.l2    -0.69363    0.36615  -1.894 0.058965 .  
#> Value_PI.l3      0.08342    0.16932   0.493 0.622554    
#> Value_BI.l3     -0.74166    4.05362  -0.183 0.854929    
#> Value_CVGS.l3    0.36093    0.05704   6.327 7.31e-10 ***
#> Value_FPI.l3     0.17435    0.36678   0.475 0.634822    
#> Value_PI.l4     -0.24415    0.11715  -2.084 0.037847 *  
#> Value_BI.l4     -2.44582    2.73626  -0.894 0.371988    
#> Value_CVGS.l4   -0.15000    0.05013  -2.992 0.002958 ** 
#> Value_FPI.l4    -0.13113    0.24848  -0.528 0.598008    
#> const         1139.66341  629.44724   1.811 0.071028 .  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 1368 on 365 degrees of freedom
#> Multiple R-Squared: 0.9877,  Adjusted R-squared: 0.9871 
#> F-statistic:  1829 on 16 and 365 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation Value_FPI: 
#> ========================================== 
#> Value_FPI = Value_PI.l1 + Value_BI.l1 + Value_CVGS.l1 + Value_FPI.l1 + Value_PI.l2 + Value_BI.l2 + Value_CVGS.l2 + Value_FPI.l2 + Value_PI.l3 + Value_BI.l3 + Value_CVGS.l3 + Value_FPI.l3 + Value_PI.l4 + Value_BI.l4 + Value_CVGS.l4 + Value_FPI.l4 + const 
#> 
#>                 Estimate Std. Error t value Pr(>|t|)    
#> Value_PI.l1     0.035984   0.024497   1.469   0.1427    
#> Value_BI.l1    -0.328730   0.594793  -0.553   0.5808    
#> Value_CVGS.l1   0.014940   0.011062   1.350   0.1777    
#> Value_FPI.l1    1.102218   0.052695  20.917   <2e-16 ***
#> Value_PI.l2     0.011443   0.036255   0.316   0.7525    
#> Value_BI.l2     0.683730   0.871709   0.784   0.4333    
#> Value_CVGS.l2  -0.010200   0.012380  -0.824   0.4105    
#> Value_FPI.l2    0.014283   0.078752   0.181   0.8562    
#> Value_PI.l3    -0.015116   0.036418  -0.415   0.6783    
#> Value_BI.l3     0.523966   0.871855   0.601   0.5482    
#> Value_CVGS.l3  -0.002279   0.012269  -0.186   0.8527    
#> Value_FPI.l3   -0.002111   0.078887  -0.027   0.9787    
#> Value_PI.l4    -0.024867   0.025196  -0.987   0.3243    
#> Value_BI.l4    -0.900619   0.588517  -1.530   0.1268    
#> Value_CVGS.l4  -0.006836   0.010782  -0.634   0.5265    
#> Value_FPI.l4   -0.111035   0.053443  -2.078   0.0384 *  
#> const         173.015976 135.381899   1.278   0.2021    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 294.2 on 365 degrees of freedom
#> Multiple R-Squared: 0.9995,  Adjusted R-squared: 0.9995 
#> F-statistic: 5.041e+04 on 16 and 365 DF,  p-value: < 2.2e-16 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>            Value_PI Value_BI Value_CVGS Value_FPI
#> Value_PI   405246.0    532.7      22744   34603.9
#> Value_BI      532.7    655.9       1356     391.6
#> Value_CVGS  22744.1   1356.4    1871469   16882.1
#> Value_FPI   34603.9    391.6      16882   86573.6
#> 
#> Correlation matrix of residuals:
#>            Value_PI Value_BI Value_CVGS Value_FPI
#> Value_PI    1.00000  0.03268    0.02612   0.18475
#> Value_BI    0.03268  1.00000    0.03872   0.05197
#> Value_CVGS  0.02612  0.03872    1.00000   0.04194
#> Value_FPI   0.18475  0.05197    0.04194   1.00000

# Diagnose VAR(p) model
serial_var_p <- serial.test(var_model_p, lags.pt = 12, type = "PT.asymptotic")
serial_var_p
#> 
#>  Portmanteau Test (asymptotic)
#> 
#> data:  Residuals of VAR object var_model_p
#> Chi-squared = 179.8, df = 128, p-value = 0.00174

# Heteroskedasticity (arch effects)
arch_var_p <- arch.test(var_model_p, lags.multi = 12, multivariate.only = TRUE)
arch_var_p
#> 
#>  ARCH (multivariate)
#> 
#> data:  Residuals of VAR object var_model_p
#> Chi-squared = 1847.3, df = 1200, p-value < 2.2e-16

# Normal Distribution of Residuals
norm_var_p <- normality.test(var_model_p, multivariate.only = TRUE)
norm_var_p
#> $JB
#> 
#>  JB-Test (multivariate)
#> 
#> data:  Residuals of VAR object var_model_p
#> Chi-squared = 2450.4, df = 8, p-value < 2.2e-16
#> 
#> 
#> $Skewness
#> 
#>  Skewness only (multivariate)
#> 
#> data:  Residuals of VAR object var_model_p
#> Chi-squared = 25.788, df = 4, p-value = 3.492e-05
#> 
#> 
#> $Kurtosis
#> 
#>  Kurtosis only (multivariate)
#> 
#> data:  Residuals of VAR object var_model_p
#> Chi-squared = 2424.6, df = 4, p-value < 2.2e-16

# Testing for structural breaks in Residuals (model stability test)
stability_var_p <- stability(var_model_p, type = "OLS-CUSUM")
plot(stability_var_p)

# Granger's causality test
granger_PI <- causality(var_model_p, cause = 'Value_PI')
granger_PI
#> $Granger
#> 
#>  Granger causality H0: Value_PI do not Granger-cause Value_BI Value_CVGS
#>  Value_FPI
#> 
#> data:  VAR object var_model_p
#> F-Test = 2.9686, df1 = 12, df2 = 1460, p-value = 0.000417
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: Value_PI and Value_BI
#>  Value_CVGS Value_FPI
#> 
#> data:  VAR object var_model_p
#> Chi-squared = 12.908, df = 3, p-value = 0.00484
granger_BI <- causality(var_model_p, cause = 'Value_BI')
granger_BI
#> $Granger
#> 
#>  Granger causality H0: Value_BI do not Granger-cause Value_PI Value_CVGS
#>  Value_FPI
#> 
#> data:  VAR object var_model_p
#> F-Test = 0.80653, df1 = 12, df2 = 1460, p-value = 0.6441
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: Value_BI and Value_PI
#>  Value_CVGS Value_FPI
#> 
#> data:  VAR object var_model_p
#> Chi-squared = 1.7334, df = 3, p-value = 0.6295
granger_CVGS <- causality(var_model_p, cause = 'Value_CVGS')
granger_CVGS
#> $Granger
#> 
#>  Granger causality H0: Value_CVGS do not Granger-cause Value_PI Value_BI
#>  Value_FPI
#> 
#> data:  VAR object var_model_p
#> F-Test = 1.0524, df1 = 12, df2 = 1460, p-value = 0.3975
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: Value_CVGS and Value_PI
#>  Value_BI Value_FPI
#> 
#> data:  VAR object var_model_p
#> Chi-squared = 1.3004, df = 3, p-value = 0.729
granger_FPI <- causality(var_model_p, cause = 'Value_FPI')
granger_FPI
#> $Granger
#> 
#>  Granger causality H0: Value_FPI do not Granger-cause Value_PI Value_BI
#>  Value_CVGS
#> 
#> data:  VAR object var_model_p
#> F-Test = 2.2867, df1 = 12, df2 = 1460, p-value = 0.007045
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: Value_FPI and Value_PI Value_BI
#>  Value_CVGS
#> 
#> data:  VAR object var_model_p
#> Chi-squared = 13.806, df = 3, p-value = 0.003181

# Impulse Response Functions
inf_var_model_p <- irf(var_model_p, impulse = "Value_PI", response = c("Value_BI", "Value_CVGS", "Value_FPI"), boot = TRUE, n.ahead = 10)
plot(inf_var_model_p, main = "Shock from PI")

# Variance Decomposition
fevd_var_p <- fevd(var_model_p, n.ahead = 10)
plot(fevd_var_p)

# VAR Forecasting
forecast_var_p <- predict(var_model_p, n.ahead = 1, ci = 0.95)

# Forecast with VAR(p) model
forecast_var_p
#> $Value_PI
#>                  fcst   lower    upper       CI
#> Value_PI.fcst 15199.1 13951.4 16446.79 1247.692
#> 
#> $Value_BI
#>                   fcst    lower    upper       CI
#> Value_BI.fcst 1946.174 1895.979 1996.369 50.19474
#> 
#> $Value_CVGS
#>                     fcst    lower    upper       CI
#> Value_CVGS.fcst 95050.13 92368.86 97731.39 2681.263
#> 
#> $Value_FPI
#>                    fcst    lower    upper       CI
#> Value_FPI.fcst 69901.98 69325.29 70478.67 576.6877

library(BigVAR)
#> Warning: package 'BigVAR' was built under R version 4.3.3
#> Loading required package: lattice

bigvar_model_p <- constructModel(as.matrix(multivariate_ts), p = 4, struct = "SparseOO", gran = c(25,10), verbose = FALSE, h = 5, IC = TRUE)
results_bigvar <- cv.BigVAR(bigvar_model_p)
plot(results_bigvar)
SparsityPlot.BigVAR.results(results_bigvar)

forecast_bigvar <- predict(results_bigvar, n.ahead = 1)
forecast_bigvar
#>           [,1]
#> [1,] 20232.923
#> [2,]  1454.338
#> [3,] 93378.543
#> [4,] 64059.611

Created on 2024-04-10 with reprex v2.1.0

sc30ash commented 5 months ago

<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns="http://www.w3.org/TR/REC-html40">

Predictions by Different Models

Time Series | VAR(1) | VAR(p) | BigVAR -- | -- | -- | -- Food Products Inventory | 70080.51 | 69901.98 | 64059.61 Consumed Goods | 95842.9 | 95050.13 | 93378.54 Petroleum Inventory | 15564.82 | 15199.1 | 20232.92 Battery Inventory | 1946.152 | 1946.174 | 1454.338