jlivsey / UB-sping24-time-series

3 stars 1 forks source link

Shri Gayathri Chandrasekar #29

Open ShriGayathri98 opened 4 months ago

ShriGayathri98 commented 4 months ago
library(tidyverse) 
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(lubridate) 

# Setting the API key for fredr
fredr_set_key('7011d4c496181649f1cd03a8e8906594')

fred_series <- fredr(series_id = 'ICNSA')

# Displaying the first few rows of the data
head(fred_series)
#> # A tibble: 6 × 5
#>   date       series_id  value realtime_start realtime_end
#>   <date>     <chr>      <dbl> <date>         <date>      
#> 1 1967-01-07 ICNSA     346000 2024-02-08     2024-02-08  
#> 2 1967-01-14 ICNSA     334000 2024-02-08     2024-02-08  
#> 3 1967-01-21 ICNSA     277000 2024-02-08     2024-02-08  
#> 4 1967-01-28 ICNSA     252000 2024-02-08     2024-02-08  
#> 5 1967-02-04 ICNSA     274000 2024-02-08     2024-02-08  
#> 6 1967-02-11 ICNSA     276000 2024-02-08     2024-02-08

# Creating a time series object with weekly frequency
weekly_ts_data <- ts(fred_series$value, frequency = 52)

# Seasonal decomposition and plotting using ggplot2 (via the forecast package)
stl_decomp <- stl(weekly_ts_data, s.window = "periodic")
autoplot(stl_decomp)


# Plotting ACF and PACF using the forecast package for a consistent ggplot2 theme
plot_acf_pacf <- function(ts_data) {
  acf_ts <- Acf(ts_data, plot = FALSE)
  pacf_ts <- Pacf(ts_data, plot = FALSE)

  # Using gridExtra for arranging plots if needed
  p1 <- autoplot(acf_ts) + ggtitle("ACF")
  p2 <- autoplot(pacf_ts) + ggtitle("PACF")

  gridExtra::grid.arrange(p1, p2, ncol = 2)
}

plot_acf_pacf(weekly_ts_data)


# Fitting a manual ARIMA model
manual_arima_model <- Arima(weekly_ts_data, order = c(2, 1, 2))

# Displaying the summary of the manual ARIMA model
summary(manual_arima_model)
#> Series: weekly_ts_data 
#> ARIMA(2,1,2) 
#> 
#> Coefficients:
#>          ar1     ar2      ma1      ma2
#>       1.0910  -0.235  -0.7493  -0.2260
#> s.e.  0.0372   0.038   0.0362   0.0377
#> 
#> sigma^2 = 7.571e+09:  log likelihood = -38095.37
#> AIC=76200.74   AICc=76200.76   BIC=76230.74
#> 
#> Training set error measures:
#>                     ME     RMSE      MAE       MPE     MAPE      MASE
#> Training set -31.52844 86937.87 41484.85 -1.470326 10.60031 0.4368728
#>                     ACF1
#> Training set 0.003411611

# Forecasting the next period and printing the forecasted value
forecast_arima <- forecast(manual_arima_model, h = 1)
print(forecast_arima$mean)
#> Time Series:
#> Start = c(58, 16) 
#> End = c(58, 16) 
#> Frequency = 52 
#> [1] 219901.7

Created on 2024-02-09 with reprex v2.0.2

ShriGayathri98 commented 4 months ago

HomeWork_1

library(reprex)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(readr)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(fredr)
library(tseries)
library(tidyverse)
library(ggplot2)
library(corrplot)
#> corrplot 0.92 loaded
library(zoo)
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
library(xts)
#> 
#> ################################### WARNING ###################################
#> # We noticed you have dplyr installed. The dplyr lag() function breaks how    #
#> # base R's lag() function is supposed to work, which breaks lag(my_xts).      #
#> #                                                                             #
#> # Calls to lag(my_xts) that you enter or source() into this session won't     #
#> # work correctly.                                                             #
#> #                                                                             #
#> # All package code is unaffected because it is protected by the R namespace   #
#> # mechanism.                                                                  #
#> #                                                                             #
#> # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
#> #                                                                             #
#> # You can use stats::lag() to make sure you're not using dplyr::lag(), or you #
#> # can add conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop   #
#> # dplyr from breaking base R's lag() function.                                #
#> ################################### WARNING ###################################
#> 
#> Attaching package: 'xts'
#> The following objects are masked from 'package:dplyr':
#> 
#>     first, last
library(dplyr)

fredr_set_key('7011d4c496181649f1cd03a8e8906594')
icnsa_data <- fredr(series_id = 'ICNSA')
ccnsa_data <- fredr(series_id = 'CCNSA')
head(icnsa_data)
#> # A tibble: 6 × 5
#>   date       series_id  value realtime_start realtime_end
#>   <date>     <chr>      <dbl> <date>         <date>      
#> 1 1967-01-07 ICNSA     346000 2024-02-22     2024-02-22  
#> 2 1967-01-14 ICNSA     334000 2024-02-22     2024-02-22  
#> 3 1967-01-21 ICNSA     277000 2024-02-22     2024-02-22  
#> 4 1967-01-28 ICNSA     252000 2024-02-22     2024-02-22  
#> 5 1967-02-04 ICNSA     274000 2024-02-22     2024-02-22  
#> 6 1967-02-11 ICNSA     276000 2024-02-22     2024-02-22
head(ccnsa_data)
#> # A tibble: 6 × 5
#>   date       series_id   value realtime_start realtime_end
#>   <date>     <chr>       <dbl> <date>         <date>      
#> 1 1967-01-07 CCNSA     1594000 2024-02-22     2024-02-22  
#> 2 1967-01-14 CCNSA     1563000 2024-02-22     2024-02-22  
#> 3 1967-01-21 CCNSA     1551000 2024-02-22     2024-02-22  
#> 4 1967-01-28 CCNSA     1533000 2024-02-22     2024-02-22  
#> 5 1967-02-04 CCNSA     1534000 2024-02-22     2024-02-22  
#> 6 1967-02-11 CCNSA     1557000 2024-02-22     2024-02-22
icnsa_data$date <- as.Date(icnsa_data$date)
ccnsa_data$date <- as.Date(ccnsa_data$date)

ts_icnsa <- ts(icnsa_data$value, frequency = 52)
ts_ccnsa <- ts(ccnsa_data$value, frequency = 52)
summary(ts_icnsa)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  133000  265800  326919  365013  406725 6161268
summary(ts_ccnsa)
#>     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
#>   785000  1996902  2494495  2761854  3145702 23037921

plot(stl(ts_icnsa, s.window="periodic"), main="Seasonal Decomposition of ICNSA")

plot(stl(ts_ccnsa, s.window="periodic"), main="Seasonal Decomposition of CCNSA")


acf(ts_icnsa, main="ACF of ICNSA")

pacf(ts_icnsa, main="PACF of ICNSA")

acf(ts_ccnsa, main="ACF of CCNSA")

pacf(ts_ccnsa, main="PACF of CCNSA")


# merging the data to ensure that the correlation calculation is based on matching time points between the two series
data_merged <- icnsa_data %>% inner_join(ccnsa_data, by = "date", suffix = c("_ICNSA", "_CCNSA"))
head(data_merged)
#> # A tibble: 6 × 9
#>   date       series_id_ICNSA value_ICNSA realtime_start_ICNSA realtime_end_ICNSA
#>   <date>     <chr>                 <dbl> <date>               <date>            
#> 1 1967-01-07 ICNSA                346000 2024-02-22           2024-02-22        
#> 2 1967-01-14 ICNSA                334000 2024-02-22           2024-02-22        
#> 3 1967-01-21 ICNSA                277000 2024-02-22           2024-02-22        
#> 4 1967-01-28 ICNSA                252000 2024-02-22           2024-02-22        
#> 5 1967-02-04 ICNSA                274000 2024-02-22           2024-02-22        
#> 6 1967-02-11 ICNSA                276000 2024-02-22           2024-02-22        
#> # ℹ 4 more variables: series_id_CCNSA <chr>, value_CCNSA <dbl>,
#> #   realtime_start_CCNSA <date>, realtime_end_CCNSA <date>

adf.test(data_merged$value_ICNSA, alternative = "stationary")
#> Warning in adf.test(data_merged$value_ICNSA, alternative = "stationary"):
#> p-value smaller than printed p-value
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  data_merged$value_ICNSA
#> Dickey-Fuller = -8.8645, Lag order = 14, p-value = 0.01
#> alternative hypothesis: stationary
adf.test(data_merged$value_CCNSA, alternative = "stationary")
#> Warning in adf.test(data_merged$value_CCNSA, alternative = "stationary"):
#> p-value smaller than printed p-value
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  data_merged$value_CCNSA
#> Dickey-Fuller = -7.5226, Lag order = 14, p-value = 0.01
#> alternative hypothesis: stationary

# Calculating the correlation
correlation_value <- cor(data_merged$value_ICNSA, data_merged$value_CCNSA)
print(correlation_value)
#> [1] 0.714954

arima_model <- auto.arima(data_merged$value_ICNSA, xreg = data_merged$value_CCNSA)
summary(arima_model)
#> Series: data_merged$value_ICNSA 
#> Regression with ARIMA(2,1,1) errors 
#> 
#> Coefficients:
#>          ar1      ar2      ma1     xreg
#>       0.7061  -0.3209  -0.2450  -0.0404
#> s.e.  0.1812   0.0607   0.2083   0.0113
#> 
#> sigma^2 = 7.849e+09:  log likelihood = -38148.56
#> AIC=76307.11   AICc=76307.13   BIC=76337.11
#> 
#> Training set error measures:
#>                     ME     RMSE     MAE        MPE    MAPE    MASE        ACF1
#> Training set -25.27923 88519.01 42219.6 -0.7313102 10.5957 1.11972 0.004447645
checkresiduals(arima_model)

#> 
#>  Ljung-Box test
#> 
#> data:  Residuals from Regression with ARIMA(2,1,1) errors
#> Q* = 80.612, df = 7, p-value = 1.033e-14
#> 
#> Model df: 3.   Total lags used: 10
fc <- forecast(arima_model, xreg = tail(data_merged$value_CCNSA, 1), h = 1)
forecasted_value <- mean(fc$mean, na.rm = TRUE)

forecasted_value
#> [1] 221167.5

Created on 2024-02-22 with reprex v2.0.2

ShriGayathri98 commented 4 months ago

HW_2

library(reprex)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(fredr)
library(ggplot2)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(tseries)
library(urca)
library(tidyverse)
library(readxl)
library(forecast)

fredr_set_key('7011d4c496181649f1cd03a8e8906594')
icnsa_data <- fredr(series_id = 'ICNSA')

head(icnsa_data)
#> # A tibble: 6 × 5
#>   date       series_id  value realtime_start realtime_end
#>   <date>     <chr>      <dbl> <date>         <date>      
#> 1 1967-01-07 ICNSA     346000 2024-02-28     2024-02-28  
#> 2 1967-01-14 ICNSA     334000 2024-02-28     2024-02-28  
#> 3 1967-01-21 ICNSA     277000 2024-02-28     2024-02-28  
#> 4 1967-01-28 ICNSA     252000 2024-02-28     2024-02-28  
#> 5 1967-02-04 ICNSA     274000 2024-02-28     2024-02-28  
#> 6 1967-02-11 ICNSA     276000 2024-02-28     2024-02-28

icnsa_data$date <- as.Date(icnsa_data$date)
ts_icnsa <- ts(icnsa_data$value, frequency = 52)

plot(stl(ts_icnsa, s.window="periodic"), main = "Seasonal Decomposition")


ggplot(icnsa_data, aes(x = date, y = value)) +
  geom_line(color = 'blue') +  
  labs(title = "ICNSA Time Series Data", x = "Date", y = "Value") 


covid_start <- as.Date("2020-01-01")
covid_end <- as.Date("2021-05-30")

filtered_data <- icnsa_data %>%
  filter(date < covid_start | date > covid_end)

plot(icnsa_data$date, icnsa_data$value, type = "l", col = "blue", 
     xlab = "Year", ylab = "Value", main = "ICNSA Data with COVID Period Highlighted")

# Highlight the COVID period by plotting it in a different color
covid_data <- icnsa_data[icnsa_data$date >= covid_start & icnsa_data$date <= covid_end, ]
lines(covid_data$date, covid_data$value, col = "red", lwd = 2)

# Optionally, add a legend
legend("topright", legend = c("COVID Period", "Other Periods"), 
       col = c("red", "blue"), lwd = 2, bty = "n")


acf(filtered_data$value)

pacf(filtered_data$value)


# Define the spar values
spar_values <- c(0.1, 0.2, 0.4, 0.5, 1)

# Use lapply to loop over spar values and plot
plot_list <- lapply(spar_values, function(spar) {
  # Fit smooth spline with current spar value
  fit_spline <- smooth.spline(as.numeric(filtered_data$date), filtered_data$value, spar = spar)

  # Predict using the fitted spline for the entire period
  values_predicted <- predict(fit_spline, as.numeric(icnsa_data$date))

  # Plot the original data
  plot(icnsa_data$date, icnsa_data$value, type = 'l', main = paste("Spar =", spar),
       xlab = "Date", ylab = "Values", col = "blue")

  # Add the smoothed data
  lines(icnsa_data$date, values_predicted$y, col = "red")

  # Add a legend
  legend("topleft", legend = c("Original", "Smoothed"), col = c("blue", "red"), lty = 1)
})


fit_spline <- smooth.spline(as.numeric(filtered_data$date), filtered_data$value, spar=0.4)

predicted_val <- predict(fit_spline, as.numeric(icnsa_data$date))

plot(icnsa_data$date, icnsa_data$value, type='l', main=paste("Spar =", "0.4"),
     xlab="Date", ylab="Values", col="blue")
lines(icnsa_data$date, predicted_val$y, col="red")
legend("topleft", legend=c("Smoothed", "Imputed"), col=c("red", "blue"), lty=1)


# Multiplicative
holtwinters_multi <- HoltWinters(ts_icnsa, seasonal = "multiplicative")
forecast_multi <- forecast(holtwinters_multi, h = 1)

#Additive
holtwinters_add <- HoltWinters(ts_icnsa, seasonal = "additive")
forecast_add <- forecast(holtwinters_add, h = 1)

#Forecast Values
cat("HoltWinters Multiplicative Forecast:", forecast_multi$mean, "\n")
#> HoltWinters Multiplicative Forecast: 197681.2
cat("HoltWinters Additive Forecast:", forecast_add$mean, "\n")
#> HoltWinters Additive Forecast: 161954.6

Created on 2024-02-28 with reprex v2.0.2

ShriGayathri98 commented 3 months ago

HW 3

library(reprex)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(fredr)
library(ggplot2)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(tseries)
library(urca)
library(tidyverse)
library(readxl)
library(forecast)
library(lmtest)
#> Loading required package: zoo
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric

library(KFAS)
#> Please cite KFAS in publications by using: 
#> 
#>   Jouni Helske (2017). KFAS: Exponential Family State Space Models in R. Journal of Statistical Software, 78(10), 1-39. doi:10.18637/jss.v078.i10.
library(zoo)
library(dlm)
#> 
#> Attaching package: 'dlm'
#> The following object is masked from 'package:ggplot2':
#> 
#>     %+%
library(imputeTS)
#> 
#> Attaching package: 'imputeTS'
#> The following object is masked from 'package:zoo':
#> 
#>     na.locf
#> The following object is masked from 'package:tseries':
#> 
#>     na.remove

fredr_set_key('7011d4c496181649f1cd03a8e8906594')
icnsa_data <- fredr(series_id = 'ICNSA')

head(icnsa_data)
#> # A tibble: 6 × 5
#>   date       series_id  value realtime_start realtime_end
#>   <date>     <chr>      <dbl> <date>         <date>      
#> 1 1967-01-07 ICNSA     346000 2024-03-07     2024-03-07  
#> 2 1967-01-14 ICNSA     334000 2024-03-07     2024-03-07  
#> 3 1967-01-21 ICNSA     277000 2024-03-07     2024-03-07  
#> 4 1967-01-28 ICNSA     252000 2024-03-07     2024-03-07  
#> 5 1967-02-04 ICNSA     274000 2024-03-07     2024-03-07  
#> 6 1967-02-11 ICNSA     276000 2024-03-07     2024-03-07

icnsa_data$date <- as.Date(icnsa_data$date)
ts_icnsa <- ts(icnsa_data$value, frequency = 52)

plot(stl(ts_icnsa, s.window="periodic"), main = "Seasonal Decomposition")


ggplot(icnsa_data, aes(x = date, y = value)) +
  geom_line(color = 'blue') +  
  labs(title = "ICNSA Time Series Data", x = "Date", y = "Value") 


covid_start <- as.Date("2020-03-01")
covid_end <- as.Date("2021-07-01")

plot(icnsa_data$date, icnsa_data$value, type = "l", col = "blue", 
     xlab = "Year", ylab = "Value", main = "ICNSA Data with COVID Period Highlighted")

covid_data <- icnsa_data[icnsa_data$date >= covid_start & icnsa_data$date <= covid_end, ]

lines(covid_data$date, covid_data$value, col = "red", lwd = 2)
legend("topright", legend = c("COVID Period", "Other Periods"), 
       col = c("red", "blue"), lwd = 2, bty = "n")


covid_data1 <- covid_data
non_covid_data <- anti_join(icnsa_data, covid_data,by = "date")
non_covid_data1 <- non_covid_data

#assigning null values
covid_data$value<-NA
covid_data1$value <- NA

# smooth spline 
ss_fit <- smooth.spline(x = non_covid_data$date, y = non_covid_data$value, lambda = 0.6)
impute_values <- predict(ss_fit, x = as.numeric(covid_data$date))
covid_data$value <- impute_values$y

complete_data <- rbind(non_covid_data, covid_data)
complete_data <- complete_data[order(complete_data$date), ]

complete_data_kalman <- rbind(non_covid_data1, covid_data1) %>% arrange(date)

plot(complete_data$date, complete_data$value, type = "l", main = "Smooth Spline")


#Fitted a structural time series model
struct_model <- StructTS(ts_icnsa, type = "BSM")

predicted_val <- forecast(struct_model, h=1)

predicted_val
#>          Point Forecast    Lo 80    Hi 80    Lo 95  Hi 95
#> 58.36538       213119.3 88803.37 337435.2 22994.51 403244

autoplot(predicted_val) + autolayer(ts_icnsa)


#Another covariate
nyiclaim <- fredr(series_id = "NYICLAIMS")
nyiclaim_ts <- ts(nyiclaim$value, frequency = 52)

adjusted_icnsa <- window(ts_icnsa, start = start(nyiclaim_ts), end = end(nyiclaim_ts))

arima_with_covariate <- auto.arima(adjusted_icnsa, xreg = nyiclaim_ts)

nyiclaim_tail_value <- tail(nyiclaim_ts, n = 1)

forecasted_values <- forecast(arima_with_covariate, xreg = matrix(tail(nyiclaim_ts, 1)), h = 1)

forecasted_mean <- forecasted_values$mean
print(forecasted_mean)
#> Time Series:
#> Start = c(39, 10) 
#> End = c(39, 10) 
#> Frequency = 52 
#> [1] 363515.1

ggplot2::autoplot(forecasted_values) + 
  autolayer(fitted(arima_with_covariate), series = "Fitted", alpha = 0.5) +
  ggtitle("Forecast and Fitted Values with External Regressor") +
  theme_minimal()

Created on 2024-03-07 with reprex v2.0.2

ShriGayathri98 commented 2 months ago

Assignment 4:

library(readxl)
library(tidyverse)
library(zoo)
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
library(ggplot2)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(tseries)
library(vars)
#> Loading required package: MASS
#> 
#> Attaching package: 'MASS'
#> The following object is masked from 'package:dplyr':
#> 
#>     select
#> Loading required package: strucchange
#> Loading required package: sandwich
#> 
#> Attaching package: 'strucchange'
#> The following object is masked from 'package:stringr':
#> 
#>     boundary
#> Loading required package: urca
#> Loading required package: lmtest
library(BigVAR)
#> Loading required package: lattice
library(reprex)

# Load the datasets
tcg_data <- read_excel("C:\\Users\\Shri Gayathiri\\OneDrive\\Documents\\Time_Series\\TCG_Capital_Goods.xlsx")
dap_data <- read_excel("C:\\Users\\Shri Gayathiri\\OneDrive\\Documents\\Time_Series\\DAP_Defence_Aircraft_and_Parts.xlsx")
ce_data <- read_excel("C:\\Users\\Shri Gayathiri\\OneDrive\\Documents\\Time_Series\\34X_Communication_Equipments.xlsx")
iti_data <- read_excel("C:\\Users\\Shri Gayathiri\\OneDrive\\Documents\\Time_Series\\ITI_Infomation_technology_Industries.xlsx")

# Convert to Time Series

tcg_data$Period <- as.Date(as.yearmon(tcg_data$Period, "%b-%Y"))
tcg_data_ts <- ts(tcg_data$Value, start=c(1992,1), frequency=12)

dap_data$Period <- as.Date(as.yearmon(dap_data$Period, "%b-%Y"))
dap_data_ts <- ts(dap_data$Value, start=c(1992,1), frequency=12)

ce_data$Period <- as.Date(as.yearmon(ce_data$Period, "%b-%Y"))
ce_data_ts <- ts(ce_data$Value, start=c(1992,1), frequency=12)

iti_data$Period <- as.Date(as.yearmon(iti_data$Period, "%b-%Y"))
iti_data_ts <- ts(iti_data$Value, start=c(1992,1), frequency=12)

# Plotting the Time Series

tcg_data %>%
  ggplot(aes(x=Period, y=Value)) +
  geom_line() + 
  labs(title="Time Series of Capital Goods", x="Time", y="Value")


dap_data %>%
  ggplot(aes(x=Period, y=Value)) +
  geom_line() + 
  labs(title="Time Series of Defence Aircrafts and Parts", x="Time", y="Value")


ce_data %>%
  ggplot(aes(x=Period, y=Value)) +
  geom_line() + 
  labs(title="Time Series of Communication Equipments", x="Time", y="Value")


iti_data %>%
  ggplot(aes(x=Period, y=Value)) +
  geom_line() + 
  labs(title="Time Series of Information Technology Industries", x="Time", y="Value")


# Decomposition of Time Series

tcg_decomp <- stl(tcg_data_ts, s.window="periodic")
autoplot(tcg_decomp)


dap_decomp <- stl(dap_data_ts, s.window="periodic")
autoplot(dap_decomp)


ce_decomp <- stl(ce_data_ts, s.window="periodic")
autoplot(ce_decomp)


iti_decomp <- stl(iti_data_ts, s.window="periodic")
autoplot(iti_decomp)


#Autocorrelation Plots

par(mfrow=c(1, 2))

acf(tcg_data_ts)
pacf(tcg_data_ts)


acf(dap_data_ts)
pacf(dap_data_ts)


acf(ce_data_ts)
pacf(ce_data_ts)


acf(iti_data_ts)
pacf(iti_data_ts)


par(mfrow=c(1, 1))

# Stationary or not

adf.test(tcg_data_ts)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  tcg_data_ts
#> Dickey-Fuller = -3.2761, Lag order = 7, p-value = 0.0752
#> alternative hypothesis: stationary
adf.test(dap_data_ts)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  dap_data_ts
#> Dickey-Fuller = -2.3168, Lag order = 7, p-value = 0.4434
#> alternative hypothesis: stationary
adf.test(ce_data_ts)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  ce_data_ts
#> Dickey-Fuller = -2.9542, Lag order = 7, p-value = 0.1742
#> alternative hypothesis: stationary
adf.test(iti_data_ts)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  iti_data_ts
#> Dickey-Fuller = -2.8325, Lag order = 7, p-value = 0.2256
#> alternative hypothesis: stationary

# INTERPRETATIONS:
# tcg_data_ts: With a p-value of 0.0752, which is greater than 0.05, there's weak evidence against the null hypothesis. Thus, you do not have sufficient evidence to conclude the series is stationary.
# dap_data_ts: The p-value is 0.4434, much higher than 0.05, indicating strong evidence in favor of the null hypothesis. Therefore, this series is likely non-stationary.
# ce_data_ts: Here, the p-value is 0.1742, again higher than the usual threshold of 0.05, suggesting that this series is also likely non-stationary.
# iti_data_ts: With a p-value of 0.2256, the evidence does not strongly reject the null hypothesis, implying this series might be non-stationary as well.

# Applying Log Transformation:
tcg_data_diff <- diff(tcg_data_ts, differences = 1)

dap_data_diff <- diff(dap_data_ts, differences = 1)

ce_data_diff <- diff(ce_data_ts, differences = 1)

iti_data_diff <- diff(iti_data_ts, differences = 1)

# Checking if Stationary:

adf.test(tcg_data_diff)
#> Warning in adf.test(tcg_data_diff): p-value smaller than printed p-value
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  tcg_data_diff
#> Dickey-Fuller = -5.1118, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
adf.test(dap_data_diff)
#> Warning in adf.test(dap_data_diff): p-value smaller than printed p-value
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  dap_data_diff
#> Dickey-Fuller = -7.154, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
adf.test(ce_data_diff)
#> Warning in adf.test(ce_data_diff): p-value smaller than printed p-value
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  ce_data_diff
#> Dickey-Fuller = -4.1155, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
adf.test(iti_data_diff)
#> Warning in adf.test(iti_data_diff): p-value smaller than printed p-value
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  iti_data_diff
#> Dickey-Fuller = -4.9382, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary

# fitting VAR(1) Model:

data_combined <- cbind(tcg_data_diff, dap_data_diff, ce_data_diff, iti_data_diff)

var1_model <- VAR(data_combined, p=1, type="const")
summary(var1_model)
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: tcg_data_diff, dap_data_diff, ce_data_diff, iti_data_diff 
#> Deterministic variables: const 
#> Sample size: 384 
#> Log Likelihood: -11431.799 
#> Roots of the characteristic polynomial:
#> 0.4779 0.4333 0.3647 0.203
#> Call:
#> VAR(y = data_combined, p = 1, type = "const")
#> 
#> 
#> Estimation results for equation tcg_data_diff: 
#> ============================================== 
#> tcg_data_diff = tcg_data_diff.l1 + dap_data_diff.l1 + ce_data_diff.l1 + iti_data_diff.l1 + const 
#> 
#>                    Estimate Std. Error t value Pr(>|t|)    
#> tcg_data_diff.l1  -0.313702   0.057346  -5.470 8.17e-08 ***
#> dap_data_diff.l1   0.003925   0.269966   0.015   0.9884    
#> ce_data_diff.l1    0.511589   0.373996   1.368   0.1722    
#> iti_data_diff.l1   0.022406   0.203944   0.110   0.9126    
#> const            170.748541  77.120977   2.214   0.0274 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 1506 on 379 degrees of freedom
#> Multiple R-Squared: 0.09168, Adjusted R-squared: 0.0821 
#> F-statistic: 9.564 on 4 and 379 DF,  p-value: 2.239e-07 
#> 
#> 
#> Estimation results for equation dap_data_diff: 
#> ============================================== 
#> dap_data_diff = tcg_data_diff.l1 + dap_data_diff.l1 + ce_data_diff.l1 + iti_data_diff.l1 + const 
#> 
#>                   Estimate Std. Error t value Pr(>|t|)    
#> tcg_data_diff.l1 -0.003826   0.009696  -0.395    0.693    
#> dap_data_diff.l1 -0.469648   0.045645 -10.289   <2e-16 ***
#> ce_data_diff.l1  -0.017993   0.063235  -0.285    0.776    
#> iti_data_diff.l1 -0.002549   0.034482  -0.074    0.941    
#> const             6.694270  13.039447   0.513    0.608    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 254.6 on 379 degrees of freedom
#> Multiple R-Squared: 0.2232,  Adjusted R-squared: 0.215 
#> F-statistic: 27.23 on 4 and 379 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ce_data_diff: 
#> ============================================= 
#> ce_data_diff = tcg_data_diff.l1 + dap_data_diff.l1 + ce_data_diff.l1 + iti_data_diff.l1 + const 
#> 
#>                   Estimate Std. Error t value Pr(>|t|)    
#> tcg_data_diff.l1 -0.001160   0.008815  -0.132   0.8954    
#> dap_data_diff.l1 -0.099211   0.041498  -2.391   0.0173 *  
#> ce_data_diff.l1  -0.439856   0.057490  -7.651 1.67e-13 ***
#> iti_data_diff.l1  0.078668   0.031350   2.509   0.0125 *  
#> const            -1.441337  11.854790  -0.122   0.9033    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 231.5 on 379 degrees of freedom
#> Multiple R-Squared: 0.1497,  Adjusted R-squared: 0.1407 
#> F-statistic: 16.68 on 4 and 379 DF,  p-value: 1.324e-12 
#> 
#> 
#> Estimation results for equation iti_data_diff: 
#> ============================================== 
#> iti_data_diff = tcg_data_diff.l1 + dap_data_diff.l1 + ce_data_diff.l1 + iti_data_diff.l1 + const 
#> 
#>                  Estimate Std. Error t value Pr(>|t|)    
#> tcg_data_diff.l1  0.01319    0.01919   0.687 0.492237    
#> dap_data_diff.l1 -0.15968    0.09033  -1.768 0.077923 .  
#> ce_data_diff.l1   0.05541    0.12514   0.443 0.658188    
#> iti_data_diff.l1 -0.25563    0.06824  -3.746 0.000208 ***
#> const            18.11579   25.80499   0.702 0.483093    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 503.9 on 379 degrees of freedom
#> Multiple R-Squared: 0.05774, Adjusted R-squared: 0.0478 
#> F-statistic: 5.806 on 4 and 379 DF,  p-value: 0.0001522 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>               tcg_data_diff dap_data_diff ce_data_diff iti_data_diff
#> tcg_data_diff       2268292         39358        94728        395025
#> dap_data_diff         39358         64844        -6213         -6045
#> ce_data_diff          94728         -6213        53597         70506
#> iti_data_diff        395025         -6045        70506        253958
#> 
#> Correlation matrix of residuals:
#>               tcg_data_diff dap_data_diff ce_data_diff iti_data_diff
#> tcg_data_diff        1.0000        0.1026       0.2717        0.5205
#> dap_data_diff        0.1026        1.0000      -0.1054       -0.0471
#> ce_data_diff         0.2717       -0.1054       1.0000        0.6043
#> iti_data_diff        0.5205       -0.0471       0.6043        1.0000

# Selecting Lag Value:

lag_selection <- VARselect(data_combined, lag.max=10, type="const")
optimal_p <- lag_selection$selection["AIC(n)"]
print(optimal_p)
#> AIC(n) 
#>      4

varp_model <- VAR(data_combined, p=optimal_p, type="const")
summary(varp_model)
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: tcg_data_diff, dap_data_diff, ce_data_diff, iti_data_diff 
#> Deterministic variables: const 
#> Sample size: 381 
#> Log Likelihood: -11234.557 
#> Roots of the characteristic polynomial:
#> 0.7925 0.722 0.722 0.6783 0.6783 0.6464 0.645 0.645 0.642 0.642 0.5556 0.4982 0.4732 0.4732 0.3276 0.3276
#> Call:
#> VAR(y = data_combined, p = optimal_p, type = "const")
#> 
#> 
#> Estimation results for equation tcg_data_diff: 
#> ============================================== 
#> tcg_data_diff = tcg_data_diff.l1 + dap_data_diff.l1 + ce_data_diff.l1 + iti_data_diff.l1 + tcg_data_diff.l2 + dap_data_diff.l2 + ce_data_diff.l2 + iti_data_diff.l2 + tcg_data_diff.l3 + dap_data_diff.l3 + ce_data_diff.l3 + iti_data_diff.l3 + tcg_data_diff.l4 + dap_data_diff.l4 + ce_data_diff.l4 + iti_data_diff.l4 + const 
#> 
#>                    Estimate Std. Error t value Pr(>|t|)    
#> tcg_data_diff.l1  -0.421295   0.061250  -6.878 2.65e-11 ***
#> dap_data_diff.l1   0.184308   0.307501   0.599 0.549297    
#> ce_data_diff.l1    0.002987   0.416736   0.007 0.994285    
#> iti_data_diff.l1   0.352271   0.222979   1.580 0.115013    
#> tcg_data_diff.l2  -0.247045   0.065566  -3.768 0.000192 ***
#> dap_data_diff.l2   0.278298   0.359966   0.773 0.439951    
#> ce_data_diff.l2   -0.680854   0.470936  -1.446 0.149109    
#> iti_data_diff.l2   0.844975   0.240112   3.519 0.000488 ***
#> tcg_data_diff.l3   0.122983   0.065630   1.874 0.061748 .  
#> dap_data_diff.l3  -0.028154   0.356841  -0.079 0.937157    
#> ce_data_diff.l3   -0.736550   0.472763  -1.558 0.120110    
#> iti_data_diff.l3   0.804280   0.242591   3.315 0.001007 ** 
#> tcg_data_diff.l4   0.035106   0.061213   0.574 0.566655    
#> dap_data_diff.l4  -0.601584   0.303325  -1.983 0.048085 *  
#> ce_data_diff.l4   -0.211577   0.418266  -0.506 0.613273    
#> iti_data_diff.l4   0.332920   0.227477   1.464 0.144185    
#> const            158.718792  74.820918   2.121 0.034571 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 1415 on 364 degrees of freedom
#> Multiple R-Squared: 0.2286,  Adjusted R-squared: 0.1947 
#> F-statistic: 6.744 on 16 and 364 DF,  p-value: 1.725e-13 
#> 
#> 
#> Estimation results for equation dap_data_diff: 
#> ============================================== 
#> dap_data_diff = tcg_data_diff.l1 + dap_data_diff.l1 + ce_data_diff.l1 + iti_data_diff.l1 + tcg_data_diff.l2 + dap_data_diff.l2 + ce_data_diff.l2 + iti_data_diff.l2 + tcg_data_diff.l3 + dap_data_diff.l3 + ce_data_diff.l3 + iti_data_diff.l3 + tcg_data_diff.l4 + dap_data_diff.l4 + ce_data_diff.l4 + iti_data_diff.l4 + const 
#> 
#>                   Estimate Std. Error t value Pr(>|t|)    
#> tcg_data_diff.l1 -0.007043   0.010561  -0.667    0.505    
#> dap_data_diff.l1 -0.584291   0.053020 -11.020  < 2e-16 ***
#> ce_data_diff.l1  -0.108938   0.071855  -1.516    0.130    
#> iti_data_diff.l1  0.022162   0.038447   0.576    0.565    
#> tcg_data_diff.l2 -0.028204   0.011305  -2.495    0.013 *  
#> dap_data_diff.l2 -0.269262   0.062066  -4.338 1.86e-05 ***
#> ce_data_diff.l2  -0.088042   0.081200  -1.084    0.279    
#> iti_data_diff.l2  0.059750   0.041401   1.443    0.150    
#> tcg_data_diff.l3 -0.007343   0.011316  -0.649    0.517    
#> dap_data_diff.l3 -0.047184   0.061528  -0.767    0.444    
#> ce_data_diff.l3   0.080064   0.081515   0.982    0.327    
#> iti_data_diff.l3  0.008306   0.041828   0.199    0.843    
#> tcg_data_diff.l4 -0.009337   0.010555  -0.885    0.377    
#> dap_data_diff.l4 -0.026388   0.052300  -0.505    0.614    
#> ce_data_diff.l4  -0.009451   0.072119  -0.131    0.896    
#> iti_data_diff.l4 -0.016238   0.039222  -0.414    0.679    
#> const            14.504080  12.900827   1.124    0.262    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 243.9 on 364 degrees of freedom
#> Multiple R-Squared: 0.2946,  Adjusted R-squared: 0.2636 
#> F-statistic: 9.501 on 16 and 364 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation ce_data_diff: 
#> ============================================= 
#> ce_data_diff = tcg_data_diff.l1 + dap_data_diff.l1 + ce_data_diff.l1 + iti_data_diff.l1 + tcg_data_diff.l2 + dap_data_diff.l2 + ce_data_diff.l2 + iti_data_diff.l2 + tcg_data_diff.l3 + dap_data_diff.l3 + ce_data_diff.l3 + iti_data_diff.l3 + tcg_data_diff.l4 + dap_data_diff.l4 + ce_data_diff.l4 + iti_data_diff.l4 + const 
#> 
#>                   Estimate Std. Error t value Pr(>|t|)    
#> tcg_data_diff.l1 -0.005406   0.009601  -0.563  0.57377    
#> dap_data_diff.l1 -0.119837   0.048202  -2.486  0.01336 *  
#> ce_data_diff.l1  -0.520189   0.065325  -7.963 2.17e-14 ***
#> iti_data_diff.l1  0.084804   0.034953   2.426  0.01574 *  
#> tcg_data_diff.l2 -0.005926   0.010278  -0.577  0.56461    
#> dap_data_diff.l2 -0.010869   0.056426  -0.193  0.84736    
#> ce_data_diff.l2  -0.094502   0.073822  -1.280  0.20131    
#> iti_data_diff.l2  0.092578   0.037639   2.460  0.01437 *  
#> tcg_data_diff.l3 -0.017100   0.010288  -1.662  0.09734 .  
#> dap_data_diff.l3  0.004079   0.055937   0.073  0.94191    
#> ce_data_diff.l3   0.120246   0.074108   1.623  0.10555    
#> iti_data_diff.l3  0.100629   0.038027   2.646  0.00849 ** 
#> tcg_data_diff.l4 -0.006502   0.009595  -0.678  0.49845    
#> dap_data_diff.l4 -0.001943   0.047548  -0.041  0.96743    
#> ce_data_diff.l4   0.111038   0.065565   1.694  0.09121 .  
#> iti_data_diff.l4  0.088047   0.035658   2.469  0.01400 *  
#> const            -0.939539  11.728551  -0.080  0.93620    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 221.8 on 364 degrees of freedom
#> Multiple R-Squared: 0.2502,  Adjusted R-squared: 0.2173 
#> F-statistic: 7.593 on 16 and 364 DF,  p-value: 1.82e-15 
#> 
#> 
#> Estimation results for equation iti_data_diff: 
#> ============================================== 
#> iti_data_diff = tcg_data_diff.l1 + dap_data_diff.l1 + ce_data_diff.l1 + iti_data_diff.l1 + tcg_data_diff.l2 + dap_data_diff.l2 + ce_data_diff.l2 + iti_data_diff.l2 + tcg_data_diff.l3 + dap_data_diff.l3 + ce_data_diff.l3 + iti_data_diff.l3 + tcg_data_diff.l4 + dap_data_diff.l4 + ce_data_diff.l4 + iti_data_diff.l4 + const 
#> 
#>                   Estimate Std. Error t value Pr(>|t|)    
#> tcg_data_diff.l1 -0.003193   0.020370  -0.157 0.875545    
#> dap_data_diff.l1 -0.168442   0.102265  -1.647 0.100399    
#> ce_data_diff.l1   0.068399   0.138593   0.494 0.621940    
#> iti_data_diff.l1 -0.282322   0.074156  -3.807 0.000165 ***
#> tcg_data_diff.l2  0.001935   0.021805   0.089 0.929321    
#> dap_data_diff.l2  0.040285   0.119713   0.337 0.736681    
#> ce_data_diff.l2   0.277450   0.156619   1.772 0.077313 .  
#> iti_data_diff.l2 -0.069580   0.079854  -0.871 0.384139    
#> tcg_data_diff.l3 -0.013063   0.021826  -0.599 0.549870    
#> dap_data_diff.l3  0.102061   0.118674   0.860 0.390349    
#> ce_data_diff.l3   0.058705   0.157226   0.373 0.709082    
#> iti_data_diff.l3  0.347503   0.080678   4.307 2.13e-05 ***
#> tcg_data_diff.l4  0.006140   0.020358   0.302 0.763133    
#> dap_data_diff.l4  0.027284   0.100877   0.270 0.786950    
#> ce_data_diff.l4   0.286051   0.139102   2.056 0.040455 *  
#> iti_data_diff.l4  0.076581   0.075652   1.012 0.312078    
#> const            12.772003  24.883105   0.513 0.608067    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 470.5 on 364 degrees of freedom
#> Multiple R-Squared: 0.2048,  Adjusted R-squared: 0.1698 
#> F-statistic: 5.859 on 16 and 364 DF,  p-value: 2.084e-11 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>               tcg_data_diff dap_data_diff ce_data_diff iti_data_diff
#> tcg_data_diff       2001700         29497        79795        346124
#> dap_data_diff         29497         59510        -6763         -6152
#> ce_data_diff          79795         -6763        49186         63059
#> iti_data_diff        346124         -6152        63059        221392
#> 
#> Correlation matrix of residuals:
#>               tcg_data_diff dap_data_diff ce_data_diff iti_data_diff
#> tcg_data_diff       1.00000       0.08546       0.2543       0.51994
#> dap_data_diff       0.08546       1.00000      -0.1250      -0.05359
#> ce_data_diff        0.25430      -0.12500       1.0000       0.60429
#> iti_data_diff       0.51994      -0.05359       0.6043       1.00000

# Forecasting the next month
forecast_results <- predict(varp_model, n.ahead = 1)

# Access and display the forecasted values
forecasts <- forecast_results$fcst
print(forecasts)
#> $tcg_data_diff
#>                         fcst     lower    upper       CI
#> tcg_data_diff.fcst -193.0425 -2966.028 2579.943 2772.986
#> 
#> $dap_data_diff
#>                         fcst     lower    upper       CI
#> dap_data_diff.fcst -20.46063 -498.5864 457.6651 478.1258
#> 
#> $ce_data_diff
#>                       fcst     lower    upper       CI
#> ce_data_diff.fcst 4.706112 -429.9732 439.3854 434.6793
#> 
#> $iti_data_diff
#>                       fcst     lower    upper       CI
#> iti_data_diff.fcst 20.2639 -901.9447 942.4725 922.2086

# For a detailed look, including confidence intervals for each series
for(series in names(forecasts)) {
  cat("Forecasts for", series, ":\n")
  print(forecasts[[series]][, "fcst"])
}
#> Forecasts for tcg_data_diff :
#> [1] -193.0425
#> Forecasts for dap_data_diff :
#> [1] -20.46063
#> Forecasts for ce_data_diff :
#> [1] 4.706112
#> Forecasts for iti_data_diff :
#> [1] 20.2639

# Testing the Granger Causality:

var_names <- names(summary(varp_model)$varresult)
print(var_names)
#> [1] "tcg_data_diff" "dap_data_diff" "ce_data_diff"  "iti_data_diff"

# Loop to test Granger causality for each pair of variables in the VAR model
for (i in 1:length(var_names)) {
  for (j in 1:length(var_names)) {
    if (i != j) { # Ensuring not to test the same variable
      # Testing Granger causality
      test_result <- causality(varp_model, cause = var_names[i])

      # Since causality() tests 'cause' against all others, extract the specific result
      specific_result <- test_result$Granger

      # Print results for the specific pair
      cat("Testing if", var_names[i], "Granger-causes", var_names[j], ":\n")
      print(specific_result)
      cat("\n---\n")
    }
  }
}
#> Testing if tcg_data_diff Granger-causes dap_data_diff :
#> 
#>  Granger causality H0: tcg_data_diff do not Granger-cause dap_data_diff
#>  ce_data_diff iti_data_diff
#> 
#> data:  VAR object varp_model
#> F-Test = 0.92777, df1 = 12, df2 = 1456, p-value = 0.5179
#> 
#> 
#> ---
#> Testing if tcg_data_diff Granger-causes ce_data_diff :
#> 
#>  Granger causality H0: tcg_data_diff do not Granger-cause dap_data_diff
#>  ce_data_diff iti_data_diff
#> 
#> data:  VAR object varp_model
#> F-Test = 0.92777, df1 = 12, df2 = 1456, p-value = 0.5179
#> 
#> 
#> ---
#> Testing if tcg_data_diff Granger-causes iti_data_diff :
#> 
#>  Granger causality H0: tcg_data_diff do not Granger-cause dap_data_diff
#>  ce_data_diff iti_data_diff
#> 
#> data:  VAR object varp_model
#> F-Test = 0.92777, df1 = 12, df2 = 1456, p-value = 0.5179
#> 
#> 
#> ---
#> Testing if dap_data_diff Granger-causes tcg_data_diff :
#> 
#>  Granger causality H0: dap_data_diff do not Granger-cause tcg_data_diff
#>  ce_data_diff iti_data_diff
#> 
#> data:  VAR object varp_model
#> F-Test = 1.5471, df1 = 12, df2 = 1456, p-value = 0.101
#> 
#> 
#> ---
#> Testing if dap_data_diff Granger-causes ce_data_diff :
#> 
#>  Granger causality H0: dap_data_diff do not Granger-cause tcg_data_diff
#>  ce_data_diff iti_data_diff
#> 
#> data:  VAR object varp_model
#> F-Test = 1.5471, df1 = 12, df2 = 1456, p-value = 0.101
#> 
#> 
#> ---
#> Testing if dap_data_diff Granger-causes iti_data_diff :
#> 
#>  Granger causality H0: dap_data_diff do not Granger-cause tcg_data_diff
#>  ce_data_diff iti_data_diff
#> 
#> data:  VAR object varp_model
#> F-Test = 1.5471, df1 = 12, df2 = 1456, p-value = 0.101
#> 
#> 
#> ---
#> Testing if ce_data_diff Granger-causes tcg_data_diff :
#> 
#>  Granger causality H0: ce_data_diff do not Granger-cause tcg_data_diff
#>  dap_data_diff iti_data_diff
#> 
#> data:  VAR object varp_model
#> F-Test = 2.1389, df1 = 12, df2 = 1456, p-value = 0.0125
#> 
#> 
#> ---
#> Testing if ce_data_diff Granger-causes dap_data_diff :
#> 
#>  Granger causality H0: ce_data_diff do not Granger-cause tcg_data_diff
#>  dap_data_diff iti_data_diff
#> 
#> data:  VAR object varp_model
#> F-Test = 2.1389, df1 = 12, df2 = 1456, p-value = 0.0125
#> 
#> 
#> ---
#> Testing if ce_data_diff Granger-causes iti_data_diff :
#> 
#>  Granger causality H0: ce_data_diff do not Granger-cause tcg_data_diff
#>  dap_data_diff iti_data_diff
#> 
#> data:  VAR object varp_model
#> F-Test = 2.1389, df1 = 12, df2 = 1456, p-value = 0.0125
#> 
#> 
#> ---
#> Testing if iti_data_diff Granger-causes tcg_data_diff :
#> 
#>  Granger causality H0: iti_data_diff do not Granger-cause tcg_data_diff
#>  dap_data_diff ce_data_diff
#> 
#> data:  VAR object varp_model
#> F-Test = 2.4301, df1 = 12, df2 = 1456, p-value = 0.003978
#> 
#> 
#> ---
#> Testing if iti_data_diff Granger-causes dap_data_diff :
#> 
#>  Granger causality H0: iti_data_diff do not Granger-cause tcg_data_diff
#>  dap_data_diff ce_data_diff
#> 
#> data:  VAR object varp_model
#> F-Test = 2.4301, df1 = 12, df2 = 1456, p-value = 0.003978
#> 
#> 
#> ---
#> Testing if iti_data_diff Granger-causes ce_data_diff :
#> 
#>  Granger causality H0: iti_data_diff do not Granger-cause tcg_data_diff
#>  dap_data_diff ce_data_diff
#> 
#> data:  VAR object varp_model
#> F-Test = 2.4301, df1 = 12, df2 = 1456, p-value = 0.003978
#> 
#> 
#> ---

# Fitting the Sparse VAR Model:

sparse_var_fit <- BigVAR.fit(Y=data_combined,struct='Basic',p=3,lambda=1)[,,1]

#print(sparse_var_fit)

summary(sparse_var_fit)
#>        V1                V2                  V3                 V4          
#>  Min.   : -2.369   Min.   :-0.407977   Min.   :-0.59109   Min.   :-0.50857  
#>  1st Qu.:  8.431   1st Qu.:-0.109175   1st Qu.:-0.24139   1st Qu.:-0.20577  
#>  Median : 12.633   Median :-0.006556   Median :-0.10949   Median :-0.04389  
#>  Mean   : 46.600   Mean   :-0.104999   Mean   :-0.14886   Mean   :-0.12257  
#>  3rd Qu.: 50.802   3rd Qu.:-0.002381   3rd Qu.:-0.01696   3rd Qu.: 0.03932  
#>  Max.   :163.503   Max.   : 0.001092   Max.   : 0.21463   Max.   : 0.10606  
#>        V5                 V6                   V7                 V8          
#>  Min.   :-0.24796   Min.   :-0.2462918   Min.   :-0.24982   Min.   :-0.49691  
#>  1st Qu.:-0.05208   1st Qu.:-0.0830793   1st Qu.:-0.05891   1st Qu.:-0.20265  
#>  Median : 0.06273   Median :-0.0154029   Median : 0.02148   Median :-0.08952  
#>  Mean   : 0.07279   Mean   :-0.0683548   Mean   : 0.06028   Mean   :-0.09475  
#>  3rd Qu.: 0.18761   3rd Qu.:-0.0006784   3rd Qu.: 0.14066   3rd Qu.: 0.01839  
#>  Max.   : 0.41365   Max.   : 0.0036785   Max.   : 0.44797   Max.   : 0.29693  
#>        V9                V10                 V11                 V12           
#>  Min.   :-0.05937   Min.   :-0.011428   Min.   :-0.018786   Min.   :-0.478197  
#>  1st Qu.: 0.03312   1st Qu.:-0.010155   1st Qu.:-0.001633   1st Qu.:-0.160614  
#>  Median : 0.07667   Median :-0.007812   Median : 0.036839   Median : 0.008952  
#>  Mean   : 0.22168   Mean   : 0.020024   Mean   : 0.094792   Mean   :-0.092341  
#>  3rd Qu.: 0.26523   3rd Qu.: 0.022367   3rd Qu.: 0.133263   3rd Qu.: 0.077225  
#>  Max.   : 0.79274   Max.   : 0.107149   Max.   : 0.324275   Max.   : 0.090930  
#>       V13         
#>  Min.   :0.01842  
#>  1st Qu.:0.06256  
#>  Median :0.20812  
#>  Mean   :0.28708  
#>  3rd Qu.:0.43263  
#>  Max.   :0.71364

Created on 2024-04-10 with reprex v2.0.2