jlivsey / UB-sping24-time-series

2 stars 1 forks source link

Yash Madhukar Patil #27

Open patilyash8076 opened 7 months ago

patilyash8076 commented 7 months ago

assig2.pdf Prediction_2.pdf

patilyash8076 commented 7 months ago
library(reprex)
library(tidyverse)
library(fredr)
#> Warning: package 'fredr' was built under R version 4.3.2
library(forecast)
#> Warning: package 'forecast' was built under R version 4.3.2
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo

fredr_set_key('33b45f8519b2a909fca701748cf47495')
data <- fredr(series_id = "ICNSA")

head(data,n=5)
#> # A tibble: 5 × 5
#>   date       series_id  value realtime_start realtime_end
#>   <date>     <chr>      <dbl> <date>         <date>      
#> 1 1967-01-07 ICNSA     346000 2024-02-09     2024-02-09  
#> 2 1967-01-14 ICNSA     334000 2024-02-09     2024-02-09  
#> 3 1967-01-21 ICNSA     277000 2024-02-09     2024-02-09  
#> 4 1967-01-28 ICNSA     252000 2024-02-09     2024-02-09  
#> 5 1967-02-04 ICNSA     274000 2024-02-09     2024-02-09

timeseries_data <- ts(data$value, frequency = 52)

plot(timeseries_data)


acf(timeseries_data)


pacf(timeseries_data)


arima_auto_model <- auto.arima(timeseries_data)
predicted_next_week_value<- forecast(arima_auto_model, h = 1)$mean
predicted_next_week_value
#> Time Series:
#> Start = c(58, 16) 
#> End = c(58, 16) 
#> Frequency = 52 
#> [1] 216867.3

Created on 2024-02-09 with reprex v2.0.2

patilyash8076 commented 7 months ago
library(reprex)
library(tidyverse)
library(fredr)
#> Warning: package 'fredr' was built under R version 4.3.2
library(forecast)
#> Warning: package 'forecast' was built under R version 4.3.2
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo

fredr_set_key('33b45f8519b2a909fca701748cf47495')
data <- fredr(series_id = "ICNSA")

covid_start <- as.Date("2020-03-14")
covid_end <- as.Date("2021-01-02")

data_without_covid <- data[data$date < covid_start | data$date > covid_end, ]

head(data_without_covid)
#> # A tibble: 6 × 5
#>   date       series_id  value realtime_start realtime_end
#>   <date>     <chr>      <dbl> <date>         <date>      
#> 1 1967-01-07 ICNSA     346000 2024-02-09     2024-02-09  
#> 2 1967-01-14 ICNSA     334000 2024-02-09     2024-02-09  
#> 3 1967-01-21 ICNSA     277000 2024-02-09     2024-02-09  
#> 4 1967-01-28 ICNSA     252000 2024-02-09     2024-02-09  
#> 5 1967-02-04 ICNSA     274000 2024-02-09     2024-02-09  
#> 6 1967-02-11 ICNSA     276000 2024-02-09     2024-02-09

ts_data <- ts(data_without_covid$value, frequency = 52)

plot(ts_data)


acf(ts_data)


pacf(ts_data)


arima_auto_model <- auto.arima(ts_data)

prediction <- forecast(arima_auto_model, h = 1)$mean

prediction
#> Time Series:
#> Start = c(57, 25) 
#> End = c(57, 25) 
#> Frequency = 52 
#> [1] 239210.1

Created on 2024-02-09 with reprex v2.0.2

patilyash8076 commented 7 months ago
library(reprex)
library(tidyverse)
library(fredr)
#> Warning: package 'fredr' was built under R version 4.3.2
library(forecast)
#> Warning: package 'forecast' was built under R version 4.3.2
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo

fredr_set_key('33b45f8519b2a909fca701748cf47495')

start_date <- as.Date("2022-02-03")
end_date <- as.Date("2024-02-03")

icnsa_data <- fredr(series_id = "ICNSA", observation_start = start_date, observation_end = end_date)

ccnsa_data<- fredr(series_id = "CCNSA", observation_start = start_date, observation_end = end_date)

correlation <- cor(icnsa_data$value, ccnsa_data$value)
correlation
#> [1] 0.5978665

plot(icnsa_data$date, icnsa_data$value, type = "l", col = "blue", xlab = "Date", ylab = "Value", ylim = range(c(icnsa_data$value, ccnsa_data$value)))
title(main = "ICNSA and CCNSA Data Over Time")
lines(ccnsa_data$date, ccnsa_data$value, col = "red")

# Here, my both data have weekly series. so used frequency value 52
ICNSA_ts <- ts(icnsa_data$value, frequency = 52)
CCNSA_ts <- ts(ccnsa_data$value, frequency = 52)

# Stationarity Test for ICNSA
adf_test_icnsa <- tseries::adf.test(ICNSA_ts)
adf_test_icnsa
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  ICNSA_ts
#> Dickey-Fuller = -2.6275, Lag order = 4, p-value = 0.3165
#> alternative hypothesis: stationary

#I have performed ADF testing and result says that p-value is greater than 0.05. which says that series is non-stationary. #Therefore, we have to do diffrencing to make series stationary.

# Stationarity Test for CCNSA
adf_test_ccnsa <- tseries::adf.test(CCNSA_ts)
adf_test_ccnsa
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  CCNSA_ts
#> Dickey-Fuller = -3.6139, Lag order = 4, p-value = 0.03529
#> alternative hypothesis: stationary

# Plotting ACF and PACF Plots to get AR and MA values
acf(ICNSA_ts)

pacf(ICNSA_ts)
#ACF Plot: Similarly, a significant spike in the ACF plot that cuts off after a specific lag suggests the order of the MA term. ACF #plot does not show a clear cut-off, which makes it challenging to identify an MA term from this plot alone. Therefore, I used #Auto.Arima model to get a value of AR and MA
#PACF Plot: A significant spike in the PACF plot that cuts off after a specific lag suggests the order of the AR term. In PACF plot, #there's no clear cut-off, indicating that the AR part may not be prominent, or the data requires a different approach, such as #differencing, to clarify the pattern.


auto_model <- auto.arima(ICNSA_ts, xreg = CCNSA_ts)
reg_arima_model <- Arima(icnsa_data$value, xreg = ccnsa_data$value)
checkresiduals(reg_arima_model)
#Comment on the regression diagnostics
#Residual Plot: There are fluctuations in the residuals, which do not seem to have a constant mean over time. This could #indicate that the model has not captured some of the data's trend or seasonality.
#ACF Plot: Several spikes exceed the blue dashed significance bounds, suggesting that there is autocorrelation in the residuals #at various lags. This implies that the model may be improved by adding AR or MA terms to capture this autocorrelation.
#Histogram and Q-Q Plot: shows that the residuals from the model are not spread out in the bell-curve shape we'd expect if #they were normally distributed. This means the model's assumptions about the data may not hold true, and we might need to #either adjust the model or apply a transformation to the data to improve it.

#> 
#>  Ljung-Box test
#> 
#> data:  Residuals from Regression with ARIMA(0,0,0) errors
#> Q* = 106.9, df = 10, p-value < 2.2e-16
#> 
#> Model df: 0.   Total lags used: 10
summary(reg_arima_model)
#> Series: icnsa_data$value 
#> Regression with ARIMA(0,0,0) errors 
#> 
#> Coefficients:
#>       intercept    xreg
#>        85656.13  0.0807
#> s.e.   17456.82  0.0106
#> 
#> sigma^2 = 777318351:  log likelihood = -1222.73
#> AIC=2451.45   AICc=2451.69   BIC=2459.41
#> 
#> Training set error measures:
#>                         ME     RMSE      MAE      MPE     MAPE     MASE
#> Training set -3.545677e-09 27613.63 21114.23 -1.44771 9.505569 1.174026
#>                   ACF1
#> Training set 0.5100366

# Forecasting
forecast_values <- forecast(auto_model, xreg = CCNSA_ts, h=1)
prediction <- forecast_values$mean[1]
prediction
#> [1] 214647.9
plot(forecast_values)

Created on 2024-02-22 with reprex v2.0.2

patilyash8076 commented 7 months ago
library(reprex)
library(tidyverse)
library(fredr)
#> Warning: package 'fredr' was built under R version 4.3.2
library(tsibble)
#> Warning: package 'tsibble' was built under R version 4.3.2
#> 
#> Attaching package: 'tsibble'
#> The following object is masked from 'package:lubridate':
#> 
#>     interval
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, union
library(forecast)
#> Warning: package 'forecast' was built under R version 4.3.2
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(fable)
#> Warning: package 'fable' was built under R version 4.3.2
#> Loading required package: fabletools
#> Warning: package 'fabletools' was built under R version 4.3.2

#Loaded dataset 
fredr_set_key('33b45f8519b2a909fca701748cf47495')
df <- fredr(series_id = "ICNSA") %>% 
  mutate(date = as.Date(date)) %>%
  arrange(date)

#Plotted a plot of dataset to figure out covid start date and end date

df %>% 
  ggplot(aes(x = date , y = value )) +
  geom_line() +
  labs(title = "Initial Claims (ICNSA)", x = "Date", y = "Claims")


#Plotted a plot of dataset as it was not clear to understand start and end date from above plot, Therefore, I have plotted 
#below plot of 5 years from 2019 to 2024 beacause covid pandamic started from march 2020
#based on below I can say that Covid start date is 15th March 2020 to 31st July 2021.

df %>% 
  filter(date >= as.Date("2019-01-01") & date <= as.Date("2024-01-01")) %>%
  ggplot(aes(x = date, y = value)) +
  geom_line() +
  scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
  labs(title = "Initial Claims of 5 years from year 2019 to 2024 (ICNSA)", x = "Date", y = "Claims")


#Assumed start date 15th March 2020 and end_date 31st July 2021.
start_date <- as.Date("2020-03-15")
end_date <- as.Date("2021-07-31")

#filtered out the data having assumed dates or we can say covid_data
covid_df <- df %>%
  filter(date >= start_date & date <= end_date)

#df - covid_df = non_covid_df
non_covid_df <- anti_join(df, covid_df,by = "date")

#assigned covid_df$value as NA, i.e made all value of column values as NA
covid_df$value<-NA

#plot
non_covid_df %>%
  ggplot(aes(x = date , y = value )) +
  geom_line()


#used spline methedology to get a imputation values for covid_df$values
smooth_spline_fit <- smooth.spline(x = non_covid_df$date, y = non_covid_df$value, lambda = 0.6)
impute_val <- predict(smooth_spline_fit, x = as.numeric(covid_df$date))

#assigned imputed values to covid_df$values column, which was made null earlier
covid_df$value <- impute_val$y

#combined both df's
updated_df <- rbind(non_covid_df,covid_df)

# In Below graph I have tried various values of lambda and after trial and error I found the value of lambda = 0.6 as an appropriate value as the curve is smooth on a value of 0.6

updated_df %>%
  ggplot(aes(x = date , y = value )) +
  geom_line()


icnsa_ts <- ts(updated_df$value, frequency = 52)

#Applied Holt-Winters models both  additive and multiplicative
hw_add <- HoltWinters(icnsa_ts, seasonal = "additive")
hw_mult <- HoltWinters(icnsa_ts, seasonal = "multiplicative")

#Forecasted next value using both models
forecast_add <- forecast::forecast(hw_add, h = 1)
forecast_mult <- forecast::forecast(hw_mult, h = 1)

#Printed Point Forecast value of both model
print(forecast_add$mean)
#> Time Series:
#> Start = c(58, 18) 
#> End = c(58, 18) 
#> Frequency = 52 
#>      fit 
#> 251664.3
print(forecast_mult$mean)
#> Time Series:
#> Start = c(58, 18) 
#> End = c(58, 18) 
#> Frequency = 52 
#>      fit 
#> 234894.1

Forecast Value using Additive model : 251664.3 Forecast Value using Multiplicative model : 234894.1 Created on 2024-02-28 with reprex v2.0.2

patilyash8076 commented 6 months ago
library(reprex)
library(tidyverse)
library(fredr)
#> Warning: package 'fredr' was built under R version 4.3.2
library(tsibble)
#> Warning: package 'tsibble' was built under R version 4.3.2
#> 
#> Attaching package: 'tsibble'
#> The following object is masked from 'package:lubridate':
#> 
#>     interval
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, union
library(forecast)
#> Warning: package 'forecast' was built under R version 4.3.2
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(fable)
#> Warning: package 'fable' was built under R version 4.3.2
#> Loading required package: fabletools
#> Warning: package 'fabletools' was built under R version 4.3.2

fredr_set_key('33b45f8519b2a909fca701748cf47495')
df <- fredr(series_id = "ICNSA") %>% 
  mutate(date = as.Date(date)) %>%
  arrange(date)

#Plotted a plot of dataset to figure out covid start date and end date

df %>% 
  ggplot(aes(x = date , y = value )) +
  geom_line() +
  labs(title = "Initial Claims (ICNSA)", x = "Date", y = "Claims")


#Plotted a plot of dataset as it was not clear to understand start and end date from above plot, Therefore, I have plotted 
#below plot of 5 years from 2019 to 2024 beacause covid pandamic started from march 2020
#based on below I can say that Covid start date is 15th March 2020 to 31st July 2021.

df %>% 
  filter(date >= as.Date("2019-01-01") & date <= as.Date("2024-01-01")) %>%
  ggplot(aes(x = date, y = value)) +
  geom_line() +
  scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
  labs(title = "Initial Claims of 5 years from year 2019 to 2024 (ICNSA)", x = "Date", y = "Claims")


#Assumed start date 15th March 2020 and end_date 31st July 2021.
start_date <- as.Date("2020-03-15")
end_date <- as.Date("2021-07-31")
#df - covid_df = non_covid_df
covid_df <- df %>%
  filter(date >= start_date & date <= end_date)

covid_kalman_df <- covid_df
non_covid_df <- anti_join(df, covid_df,by = "date")
non_covid_kalman_df <- non_covid_df

#assigned NA values, i.e made all value of column values as NA
covid_df$value<-NA
covid_kalman_df$value <- NA

# smooth spline to compare with state-space missing value methodology
smooth_spline_fit <- smooth.spline(x = non_covid_df$date, y = non_covid_df$value, lambda = 0.6)
impute_val <- predict(smooth_spline_fit, x = as.numeric(covid_df$date))
covid_df$value <- impute_val$y
complete_df <- rbind(non_covid_df, covid_df) %>% arrange(date)

#Imputed null values by using kalman i.e state-space missing value methodology
complete_kalman_df <- rbind(non_covid_kalman_df, covid_kalman_df) %>% arrange(date)
complete_kalman_df$value <- na_kalman(complete_kalman_df$value, model = "StructTS", smooth = TRUE)
#> Error in na_kalman(complete_kalman_df$value, model = "StructTS", smooth = TRUE): could not find function "na_kalman"

#Plotted the results of smooth spline and Kalman filtering for comparison
plot(complete_df$date, complete_df$value, type = "l", main = "Applied smooth spline")

plot(complete_kalman_df$date, complete_kalman_df$value, type = "l", main = "Applied Kalman methodology")


#Converted the imputed data to a time series object
icnsa_ts <- ts(complete_kalman_df$value, frequency = 52)

#Fitted a structural time series model
model <- StructTS(icnsa_ts, type = "BSM")

#Forecasted future values using the fitted model
pred <- forecast(model, h=1)
pred
#>          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
#> 58.34615       236722.9 191125.6 282320.1 166987.9 306457.8

#Plotted the forecast along with the original time series data
autoplot(pred) + autolayer(icnsa_ts)


#Retrieved additional series for comparative analysis
nyiclaim_data <- fredr(series_id = "NYICLAIMS")
nyiclaim_ts <- ts(nyiclaim_data$value, frequency = 52)

#Adjusted the series to match the timeframe of another series
adjusted_icnsa <- window(icnsa_ts, start = start(nyiclaim_ts), end = end(nyiclaim_ts))

#Applied automatic ARIMA modeling using external regressors
arima_auto <- auto.arima(adjusted_icnsa, xreg = nyiclaim_ts)

#Extracted the last value from the New York insurance claims time series
nyiclaim_tail_value <- tail(nyiclaim_ts, n = 1)

#Forecasted future values using the ARIMA model with the external regressor
pred <- forecast(arima_auto, xreg = nyiclaim_tail_value, h = 1)
pred$mean
#> Time Series:
#> Start = c(39, 10) 
#> End = c(39, 10) 
#> Frequency = 52 
#> [1] 363515.1
#Plotted the forecast and fitted values
autoplot(pred) + autolayer(fitted(arima_auto), alpha = 1)

Created on 2024-03-07 with reprex v2.0.2

patilyash8076 commented 5 months ago
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(zoo)
#> Warning: package 'zoo' was built under R version 4.3.2
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(tseries)
#> Warning: package 'tseries' was built under R version 4.3.2
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(vars)
#> Warning: package 'vars' was built under R version 4.3.3
#> Loading required package: MASS
#> 
#> Attaching package: 'MASS'
#> The following object is masked from 'package:dplyr':
#> 
#>     select
#> Loading required package: strucchange
#> Warning: package 'strucchange' was built under R version 4.3.3
#> Loading required package: sandwich
#> Warning: package 'sandwich' was built under R version 4.3.2
#> Loading required package: urca
#> Warning: package 'urca' was built under R version 4.3.2
#> Loading required package: lmtest
#> Warning: package 'lmtest' was built under R version 4.3.2
library(BigVAR)
#> Warning: package 'BigVAR' was built under R version 4.3.3
#> Loading required package: lattice
library(reprex)

# Read data of 4 time series
data_34avs <- read.csv('C:/Users/Yash/OneDrive/Documents/UB_Spring_2024/Special Topics Timeseries forecasting/Assignment_4/data/34AVS.csv')
data_34bvs <- read.csv('C:/Users/Yash/OneDrive/Documents/UB_Spring_2024/Special Topics Timeseries forecasting/Assignment_4/data/34BVS.csv')
data_34ivs <- read.csv('C:/Users/Yash/OneDrive/Documents/UB_Spring_2024/Special Topics Timeseries forecasting/Assignment_4/data/34IVS.csv')
data_34jvs <- read.csv('C:/Users/Yash/OneDrive/Documents/UB_Spring_2024/Special Topics Timeseries forecasting/Assignment_4/data/34JVS.csv')

# omitted null values
data_34avs <- na.omit(data_34avs)
data_34bvs <- na.omit(data_34bvs)
data_34ivs <- na.omit(data_34ivs)
data_34jvs <- na.omit(data_34jvs)

# The series tracks the total value of shipments in the U.S. for electronic computers manufactured from 1992 to 2024. It reflects economic activity in producing desktops, laptops, and servers. This data reveals trends in technology investment, innovation, and overall economic health, impacting productivity and competitiveness in the economy.
#Also decomposition plot is plotted
data_34avs_ts <- ts(data_34avs$Value, frequency = 12)
data_34avs_ts_decomp <- decompose(data_34avs_ts)
plot(data_34avs_ts_decomp)

# The series tracks the value of shipments for computer and electronic products in the U.S. from 1992 to 2024. It reflects economic activity in producing items like computers and communication equipment, providing insights into technology demand, innovation, and overall economic health, crucial for driving productivity and competitiveness.
#Also decomposition plot is plotted
data_34bvs_ts <- ts(data_34bvs$Value, frequency = 12)
data_34bvs_ts_decomp <- decompose(data_34bvs_ts)
plot(data_34bvs_ts_decomp)

# The series tracks the value of shipments for nondefense search and navigation equipment in the U.S. economy. It reflects economic activity surrounding civilian navigation tools like GPS devices, aiding in understanding demand, production levels, and overall trends in industries producing these products, such as transportation and personal navigation.
#Also decomposition plot is plotted

data_34ivs_ts <- ts(data_34ivs$Value, frequency = 12)
data_34ivs_ts_decomp <- decompose(data_34ivs_ts)
plot(data_34ivs_ts_decomp)

# The "Value of Shipments" series tracks monetary worth of search and navigation equipment for defense within US, reflecting economic activity and national security preparedness. Its a vital indicator for industry performance, policy decisions, and assessing trends in defense-related technologies and manufacturing.
#Also decomposition plot is plotted
data_34jvs_ts <- ts(data_34jvs$Value, frequency = 12)
data_34jvs_ts_decomp <- decompose(data_34jvs_ts)
plot(data_34jvs_ts_decomp)


# ACF plot for 1st timerseries
acf(data_34avs$Value)


# PACF plot for 1st timerseries
pacf(data_34avs$Value)


# ACF plot for 2nd timerseries
acf(data_34bvs$Value)


# PACF plot for 2nd timerseries
pacf(data_34bvs$Value)


# ACF plot for 3rd timerseries
acf(data_34ivs$Value)


# ACF plot for 3rd timerseries
pacf(data_34ivs$Value)


# ACF plot for 4th timerseries
acf(data_34jvs$Value)


# PACF plot for 4th timerseries
pacf(data_34jvs$Value)


 # Performed stationarity check to check stationary
adf_result_avs <- adf.test(data_34avs_ts)

# Print the ADF test result
print(adf_result_avs)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  data_34avs_ts
#> Dickey-Fuller = -2.3606, Lag order = 7, p-value = 0.4249
#> alternative hypothesis: stationary

# As p-value is greater than 0.05, therefore I have perform differencing and again perform adf testing, so now value is less than 0.05 with differencing 1
diff_adf_test_34avs_ts <- adf.test(diff(data_34avs_ts))
#> Warning in adf.test(diff(data_34avs_ts)): p-value smaller than printed p-value
diff_adf_test_34avs_ts
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  diff(data_34avs_ts)
#> Dickey-Fuller = -6.0759, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary

# Performed stationarity check to check stationary
adf_result_bvs <- adf.test(data_34bvs_ts)

# Print the ADF test result
print(adf_result_bvs)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  data_34bvs_ts
#> Dickey-Fuller = -2.6971, Lag order = 7, p-value = 0.2828
#> alternative hypothesis: stationary

# As p-value is greater than 0.05, therefore I have perform differencing and again perform adf testing, so now value is less than 0.05 with differencing 1
diff_adf_test_34bvs_ts <- adf.test(diff(data_34bvs_ts))
#> Warning in adf.test(diff(data_34bvs_ts)): p-value smaller than printed p-value
diff_adf_test_34bvs_ts
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  diff(data_34bvs_ts)
#> Dickey-Fuller = -7.7262, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary

# Performed stationarity check to check stationary
adf_result_ivs <- adf.test(data_34ivs_ts)

# Print the ADF test result
print(adf_result_ivs)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  data_34ivs_ts
#> Dickey-Fuller = -3.0147, Lag order = 7, p-value = 0.1487
#> alternative hypothesis: stationary

# As p-value is greater than 0.05, therefore I have perform differencing and again perform adf testing, so now value is less than 0.05 with differencing 1
diff_adf_test_34ivs_ts <- adf.test(diff(data_34ivs_ts))
#> Warning in adf.test(diff(data_34ivs_ts)): p-value smaller than printed p-value
diff_adf_test_34ivs_ts
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  diff(data_34ivs_ts)
#> Dickey-Fuller = -6.7065, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary

# Performed stationarity check to check stationary
adf_result_jvs <- adf.test(data_34jvs_ts)
# Print the ADF test result
print(adf_result_jvs)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  data_34jvs_ts
#> Dickey-Fuller = -2.1503, Lag order = 7, p-value = 0.5137
#> alternative hypothesis: stationary

# As p-value is greater than 0.05, therefore I have perform differencing and again perform adf testing, so now value is less than 0.05 with differencing 1
diff_adf_test_34jvs_ts <- adf.test(diff(data_34jvs_ts))
#> Warning in adf.test(diff(data_34jvs_ts)): p-value smaller than printed p-value
diff_adf_test_34jvs_ts
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  diff(data_34jvs_ts)
#> Dickey-Fuller = -6.9575, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary

#created timeseries object of merged data and which are differenced
merged_data_ts <- cbind(diff(data_34avs_ts), diff(data_34bvs_ts), diff(data_34ivs_ts), diff(data_34jvs_ts))

# applied basic var model with p =1
var_model <- VAR(merged_data_ts, p=1)
#summary(var_model)

#Applied VAR model with p = 4, I have check for various values of p like p = 1,2,3,4,5 out of that i found 4 is giving the best result, below I have tested there AICs and BICs scores
var_model_p <- VAR(merged_data_ts, p = 4)

# Summary of the VAR model
#summary(var_model_p)

# AIC score for basic VAR model with p = 1 
aic_p <- AIC(var_model)
bic_p <- BIC(var_model)
print(paste("AIC of VAR model with p=1:", aic_p))
#> [1] "AIC of VAR model with p=1: 18404.8411140876"
print(paste("AIC of VAR model with p=1:", bic_p))
#> [1] "AIC of VAR model with p=1: 18483.8539651394"

# AIC score for basic VAR model with p = 4, from both score values. AICs and BICs score of VAR_model with p= 4 have less value comparatively basic model  
aic_p_val <- AIC(var_model_p)
bic_p_val <- BIC(var_model_p)
print(paste("AIC of VAR model with p=4:", aic_p_val))
#> [1] "AIC of VAR model with p=4: 18063.2943147668"
print(paste("AIC of VAR model with p=4:", bic_p_val))
#> [1] "AIC of VAR model with p=4: 18331.4046722754"

#This process involves predicting a change, retrieving the most recent actual value, and then adjusting the prediction to reflect an absolute forecast of 1st timeseries. This method assumes that the forecast is based on changes (e.g., differences), which is why I have add it back to the last known value.

forecast_var <- predict(var_model_p, n.ahead = 1)
forecasted_value_avs<-forecast_var$fcst$diff.data_34avs_ts.[,'fcst']
tail_value_avs <- tail(data_34avs,1)
final_forecasted_value_avs<- tail_value_avs$Value + forecasted_value_avs
print(paste("forecasted value of 1st timeseries:", final_forecasted_value_avs))
#> [1] "forecasted value of 1st timeseries: 951.045287210624"

#This process involves predicting a change, retrieving the most recent actual value, and then adjusting the prediction to reflect an absolute forecast of 2nd timeseries. This method assumes that the forecast is based on changes (e.g., differences), which is why I have add it back to the last known value.
forecast_var <- predict(var_model_p, n.ahead = 1)
forecasted_value_bvs<-forecast_var$fcst$diff.data_34bvs_ts.[,'fcst']
tail_value_bvs <- tail(data_34bvs,1)
final_forecasted_value_bvs<- tail_value_bvs$Value + forecasted_value_bvs
print(paste("forecasted value of 2nd timeseries:", final_forecasted_value_bvs))
#> [1] "forecasted value of 2nd timeseries: 425.124999304088"

#This process involves predicting a change, retrieving the most recent actual value, and then adjusting the prediction to reflect an absolute forecast of 3rd time series. This method assumes that the forecast is based on changes (e.g., differences), which is why I have add it back to the last known value.
forecast_var <- predict(var_model_p, n.ahead = 1)
forecasted_value_ivs<-forecast_var$fcst$diff.data_34ivs_ts.[,'fcst']
tail_value_ivs <- tail(data_34ivs,1)
final_forecasted_value_ivs<- tail_value_ivs$Value + forecasted_value_ivs
print(paste("forecasted value of 3rd timeseries:", final_forecasted_value_ivs))
#> [1] "forecasted value of 3rd timeseries: 1620.86257697004"

#This process involves predicting a change, retrieving the most recent actual value, and then adjusting the prediction to reflect an absolute forecast of 4th timeseries. This method assumes that the forecast is based on changes (e.g., differences), which is why I have add it back to the last known value.
forecast_var <- predict(var_model_p, n.ahead = 1)
forecasted_value_jvs<-forecast_var$fcst$diff.data_34jvs_ts.[,'fcst']
tail_value_jvs <- tail(data_34jvs,1)
final_forecasted_value_jvs<- tail_value_jvs$Value + forecasted_value_jvs
print(paste("forecasted value of 4th timeseries:", final_forecasted_value_jvs))
#> [1] "forecasted value of 4th timeseries: 3405.97139037687"

#I have performed Granger causality on fitted model with p = 4
causality_34avs <- causality(var_model_p, cause = "diff.data_34avs_ts.", boot = TRUE)
causality_34bvs <- causality(var_model_p, cause = "diff.data_34bvs_ts.", boot = TRUE)
causality_34ivs <- causality(var_model_p, cause = "diff.data_34ivs_ts.", boot = TRUE)
causality_34jvs <- causality(var_model_p, cause = "diff.data_34jvs_ts.", boot = TRUE)
causality_34avs$Granger
#> 
#>  Granger causality H0: diff.data_34avs_ts. do not Granger-cause
#>  diff.data_34bvs_ts. diff.data_34ivs_ts. diff.data_34jvs_ts.
#> 
#> data:  VAR object var_model_p
#> F-Test = 2.1059, boot.runs = 100, p-value = 0.09
causality_34bvs$Granger
#> 
#>  Granger causality H0: diff.data_34bvs_ts. do not Granger-cause
#>  diff.data_34avs_ts. diff.data_34ivs_ts. diff.data_34jvs_ts.
#> 
#> data:  VAR object var_model_p
#> F-Test = 1.9898, boot.runs = 100, p-value = 0.06
causality_34ivs$Granger
#> 
#>  Granger causality H0: diff.data_34ivs_ts. do not Granger-cause
#>  diff.data_34avs_ts. diff.data_34bvs_ts. diff.data_34jvs_ts.
#> 
#> data:  VAR object var_model_p
#> F-Test = 1.8385, boot.runs = 100, p-value = 0.18
causality_34jvs$Granger
#> 
#>  Granger causality H0: diff.data_34jvs_ts. do not Granger-cause
#>  diff.data_34avs_ts. diff.data_34bvs_ts. diff.data_34ivs_ts.
#> 
#> data:  VAR object var_model_p
#> F-Test = 0.97924, boot.runs = 100, p-value = 0.45

bigvar_model_p <- constructModel(as.matrix(merged_data_ts), p = 4, struct = "SparseLag", gran = c(25,10), verbose = TRUE, h = 1, IC = FALSE)
summary(bigvar_model_p)
#> Length  Class   Mode 
#>      1 BigVAR     S4
results_bigvar <- cv.BigVAR(bigvar_model_p)
#> Validation Stage: SparseLag  |                                                                              |                                                                      |   0%  |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |===                                                                   |   4%  |                                                                              |===                                                                   |   5%  |                                                                              |====                                                                  |   5%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |=====                                                                 |   8%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |========                                                              |  11%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |==========                                                            |  14%  |                                                                              |==========                                                            |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  17%  |                                                                              |=============                                                         |  18%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |===============                                                       |  21%  |                                                                              |===============                                                       |  22%  |                                                                              |================                                                      |  23%  |                                                                              |=================                                                     |  24%  |                                                                              |==================                                                    |  25%  |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  27%  |                                                                              |====================                                                  |  28%  |                                                                              |====================                                                  |  29%  |                                                                              |=====================                                                 |  30%  |                                                                              |======================                                                |  31%  |                                                                              |======================                                                |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |========================                                              |  34%  |                                                                              |=========================                                             |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  37%  |                                                                              |==========================                                            |  38%  |                                                                              |===========================                                           |  38%  |                                                                              |===========================                                           |  39%  |                                                                              |============================                                          |  40%  |                                                                              |============================                                          |  41%  |                                                                              |=============================                                         |  41%  |                                                                              |==============================                                        |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |===============================                                       |  44%  |                                                                              |===============================                                       |  45%  |                                                                              |================================                                      |  45%  |                                                                              |================================                                      |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |=================================                                     |  48%  |                                                                              |==================================                                    |  48%  |                                                                              |==================================                                    |  49%  |                                                                              |===================================                                   |  50%  |                                                                              |====================================                                  |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  52%  |                                                                              |=====================================                                 |  53%  |                                                                              |======================================                                |  54%  |                                                                              |======================================                                |  55%  |                                                                              |=======================================                               |  55%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  57%  |                                                                              |========================================                              |  58%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |===========================================                           |  61%  |                                                                              |===========================================                           |  62%  |                                                                              |============================================                          |  62%  |                                                                              |============================================                          |  63%  |                                                                              |=============================================                         |  64%  |                                                                              |=============================================                         |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |================================================                      |  68%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  70%  |                                                                              |==================================================                    |  71%  |                                                                              |==================================================                    |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  74%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  77%  |                                                                              |=======================================================               |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  80%  |                                                                              |=========================================================             |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  83%  |                                                                              |===========================================================           |  84%  |                                                                              |============================================================          |  85%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  87%  |                                                                              |=============================================================         |  88%  |                                                                              |==============================================================        |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |===============================================================       |  90%  |                                                                              |===============================================================       |  91%  |                                                                              |================================================================      |  91%  |                                                                              |=================================================================     |  92%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  94%  |                                                                              |==================================================================    |  95%  |                                                                              |===================================================================   |  95%  |                                                                              |===================================================================   |  96%  |                                                                              |====================================================================  |  97%  |                                                                              |====================================================================  |  98%  |                                                                              |===================================================================== |  98%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================| 100%[1] "Evaluation Stage"
plot(results_bigvar)

SparsityPlot.BigVAR.results(results_bigvar)


forecast_bigvar <- predict(results_bigvar, n.ahead = 1)
forecast_bigvar
#>            [,1]
#> [1,] -42.753573
#> [2,]  -8.048883
#> [3,]  11.688577
#> [4,]  -2.624084

Created on 2024-04-11 with reprex v2.0.2