Open ayesheepatra opened 10 months ago
# Load necessary libraries
library(reprex)
library(fredr)
library(dygraphs)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(xts)
#> Loading required package: zoo
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(ggplot2)
# Set up API key to read the unemployment rate dataset from FRED
fredr_set_key("1d2fb74509b6e2b2a9ba83de71fedf09")
# Print all the columns of the initial dataset for visualization
icnsa = fredr(series_id = "ICNSA")
columns <- colnames(icnsa)
print(columns)
#> [1] "date" "series_id" "value" "realtime_start"
#> [5] "realtime_end"
#[1] "date" "series_id" "value" "realtime_start" "realtime_end"
# Create a backup of the initial dataset
icnsa_bk <- icnsa
icnsa_bk <- ts(icnsa_bk$value, frequency = 12)
# Convert date column to Date type
icnsa$date <- as.Date(icnsa$date)
# Create a new column 'covid_time' based on a condition
icnsa$covid_time <- ifelse(icnsa$date >= as.Date("2020-01-01") & icnsa$date <= as.Date("2022-12-31"), 1, 0)
generated_time_series <- xts(cbind(value = icnsa$value, covid_time = icnsa$covid_time), order.by = icnsa$date)
# Print the generated time series
print(generated_time_series)
#> value covid_time
#> 1967-01-07 346000 0
#> 1967-01-14 334000 0
#> 1967-01-21 277000 0
#> 1967-01-28 252000 0
#> 1967-02-04 274000 0
#> 1967-02-11 276000 0
#> 1967-02-18 247000 0
#> 1967-02-25 248000 0
#> 1967-03-04 326000 0
#> 1967-03-11 240000 0
#> ...
#> 2023-12-02 294615 0
#> 2023-12-09 249090 0
#> 2023-12-16 241040 0
#> 2023-12-23 274840 0
#> 2023-12-30 269409 0
#> 2024-01-06 318906 0
#> 2024-01-13 291330 0
#> 2024-01-20 249947 0
#> 2024-01-27 263919 0
#> 2024-02-03 232727 0
plot(generated_time_series, main = "Unemployment Rate Time Series")
# Decomposition of Time series
icnsa_decomposition <- decompose(icnsa_bk)
plot(icnsa_decomposition)
print(str(generated_time_series))
#> An xts object on 1967-01-07 / 2024-02-03 containing:
#> Data: double [2979, 2]
#> Columns: value, covid_time
#> Index: Date [2979] (TZ: "UTC")
#> NULL
arima_timeseries_model <- auto.arima(generated_time_series[,"value"], xreg = generated_time_series[,"covid_time"])
summary(arima_timeseries_model)
#> Series: generated_time_series[, "value"]
#> Regression with ARIMA(5,0,2) errors
#>
#> Coefficients:
#> ar1 ar2 ar3 ar4 ar5 ma1 ma2 intercept
#> 0.3153 -0.1500 0.8233 -0.3932 0.1237 1.0267 0.9761 358753.45
#> s.e. 0.0221 0.0226 0.0176 0.0200 0.0191 0.0126 0.0099 17059.72
#> covid_time
#> 126260.06
#> s.e. 48367.48
#>
#> sigma^2 = 7.464e+09: log likelihood = -38085.11
#> AIC=76190.22 AICc=76190.3 BIC=76250.22
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -59.20931 86261.83 41755.3 -2.038431 10.90387 0.1143793
#> ACF1
#> Training set 0.000529841
window_size <- 52
# Smoothen the peak of unemployment during COVID time in 2020 using a moving average
icnsa$smoothed_value <- zoo::rollmean(icnsa$value, k = window_size, fill = NA)
# Plot the smoothed data
ggplot(icnsa, aes(x = date, y = smoothed_value)) +
geom_line() +
labs(title = "Smoothed Unemployment Numbers Over Time", x = "Date", y = "Smoothed Unemployment Numbers") +
theme_minimal()
#> Warning: Removed 51 rows containing missing values (`geom_line()`).
forecasted_values <- forecast(arima_timeseries_model, h = 12, xreg = matrix(1, ncol = 1, nrow = 12))
#> Warning in forecast.forecast_ARIMA(arima_timeseries_model, h = 12, xreg =
#> matrix(1, : xreg contains different column names from the xreg used in
#> training. Please check that the regressors are in the same order.
# Plot the forecasted values with the COVID time variable
plot(forecasted_values, main = "Unemployment Forecast with COVID Time Variable")
print(forecasted_values)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 20854 356464.6 245748.3 467181.0 187138.5643 525790.7
#> 20861 375736.3 190442.2 561030.3 92353.4301 659119.1
#> 20868 389917.4 158698.7 621136.1 36298.8774 743535.9
#> 20875 403413.5 146290.3 660536.8 10177.4320 796649.7
#> 20882 418542.9 144943.9 692141.9 109.3981 836976.4
#> 20889 425074.6 139523.9 710625.4 -11637.4830 861786.8
#> 20896 432783.2 139168.6 726397.7 -16261.5239 881827.9
#> 20903 443136.8 143623.6 742649.9 -14929.0409 901202.6
#> 20910 446342.6 142203.7 750481.4 -18797.6776 911482.8
#> 20917 451449.4 144219.3 758679.4 -18418.4072 921317.1
#> 20924 458879.4 149343.1 768415.8 -14515.5311 932274.4
#> 20931 459977.6 148572.0 771383.2 -16276.1798 936231.4
# Print the structure of forecasted_values
str(forecasted_values)
#> List of 10
#> $ method : chr "Regression with ARIMA(5,0,2) errors"
#> $ model :List of 19
#> ..$ coef : Named num [1:9] 0.315 -0.15 0.823 -0.393 0.124 ...
#> .. ..- attr(*, "names")= chr [1:9] "ar1" "ar2" "ar3" "ar4" ...
#> ..$ sigma2 : num 7.46e+09
#> ..$ var.coef : num [1:9, 1:9] 4.89e-04 -2.24e-04 -1.31e-05 -1.82e-04 5.50e-05 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:9] "ar1" "ar2" "ar3" "ar4" ...
#> .. .. ..$ : chr [1:9] "ar1" "ar2" "ar3" "ar4" ...
#> ..$ mask : logi [1:9] TRUE TRUE TRUE TRUE TRUE TRUE ...
#> ..$ loglik : num -38085
#> ..$ aic : num 76190
#> ..$ arma : int [1:7] 5 2 0 0 0 0 0
#> ..$ residuals: Time-Series [1:2979] from 1 to 20847: -4464 -11769 -54216 -9478 15598 ...
#> ..$ call : language auto.arima(y = generated_time_series[, "value"], xreg = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | __truncated__ ...
#> ..$ series : chr "generated_time_series[, \"value\"]"
#> ..$ code : int 0
#> ..$ n.cond : int 0
#> ..$ nobs : int 2979
#> ..$ model :List of 10
#> .. ..$ phi : num [1:5] 0.315 -0.15 0.823 -0.393 0.124
#> .. ..$ theta: num [1:4] 1.027 0.976 0 0
#> .. ..$ Delta: num(0)
#> .. ..$ Z : num [1:5] 1 0 0 0 0
#> .. ..$ a : num [1:5] -126026 -88814 -87650 23834 -11726
#> .. ..$ P : num [1:5, 1:5] 0 0 0 0 0 ...
#> .. ..$ T : num [1:5, 1:5] 0.315 -0.15 0.823 -0.393 0.124 ...
#> .. ..$ V : num [1:5, 1:5] 1 1.027 0.976 0 0 ...
#> .. ..$ h : num 0
#> .. ..$ Pn : num [1:5, 1:5] 1 1.027 0.976 0 0 ...
#> ..$ bic : num 76250
#> ..$ aicc : num 76190
#> ..$ xreg : num [1:2979, 1] 0 0 0 0 0 0 0 0 0 0 ...
#> .. ..- attr(*, "dimnames")=List of 2
#> .. .. ..$ : chr [1:2979] "1967-01-07" "1967-01-14" "1967-01-21" "1967-01-28" ...
#> .. .. ..$ : chr "covid_time"
#> ..$ x : Time-Series [1:2979] from 1 to 20847: 346000 334000 277000 252000 274000 276000 247000 248000 326000 240000 ...
#> ..$ fitted : Time-Series [1:2979] from 1 to 20847: 350464 345769 331216 261478 258402 ...
#> ..- attr(*, "class")= chr [1:3] "forecast_ARIMA" "ARIMA" "Arima"
#> $ level : num [1:2] 80 95
#> $ mean : Time-Series [1:12] from 20854 to 20931: 356465 375736 389917 403414 418543 ...
#> $ lower : Time-Series [1:12, 1:2] from 20854 to 20931: 245748 190442 158699 146290 144944 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : NULL
#> .. ..$ : chr [1:2] "80%" "95%"
#> $ upper : Time-Series [1:12, 1:2] from 20854 to 20931: 467181 561030 621136 660537 692142 ...
#> ..- attr(*, "dimnames")=List of 2
#> .. ..$ : NULL
#> .. ..$ : chr [1:2] "80%" "95%"
#> $ x : Time-Series [1:2979] from 1 to 20847: 346000 334000 277000 252000 274000 276000 247000 248000 326000 240000 ...
#> $ series : chr "generated_time_series[, \"value\"]"
#> $ fitted : Time-Series [1:2979] from 1 to 20847: 350464 345769 331216 261478 258402 ...
#> $ residuals: Time-Series [1:2979] from 1 to 20847: -4464 -11769 -54216 -9478 15598 ...
#> - attr(*, "class")= chr "forecast"
Created on 2024-02-09 with reprex v2.1.0
library(reprex)
library(fredr)
library(dygraphs)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(xts)
#> Loading required package: zoo
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(ggplot2)
library(tseries)
# Set up API key to read the unemployment rate dataset from FRED
fredr_set_key("1d2fb74509b6e2b2a9ba83de71fedf09")
# Print all the columns of the initial dataset for visualization
monthly = fredr(series_id = "M0417BDE00BERM370NNBR")
columns <- colnames(monthly)
print(columns)
#> [1] "date" "series_id" "value" "realtime_start"
#> [5] "realtime_end"
plot(monthly)
head(monthly)
#> # A tibble: 6 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 1897-07-01 M0417BDE00BERM370NNBR 114. 2024-02-13 2024-02-13
#> 2 1897-08-01 M0417BDE00BERM370NNBR 116. 2024-02-13 2024-02-13
#> 3 1897-09-01 M0417BDE00BERM370NNBR 115. 2024-02-13 2024-02-13
#> 4 1897-10-01 M0417BDE00BERM370NNBR 116. 2024-02-13 2024-02-13
#> 5 1897-11-01 M0417BDE00BERM370NNBR 114. 2024-02-13 2024-02-13
#> 6 1897-12-01 M0417BDE00BERM370NNBR 113. 2024-02-13 2024-02-13
generated_time_series <- xts(cbind(value = monthly$value, date = monthly$date), order.by = monthly$date)
plot(generated_time_series)
ts_data <- ts(monthly$value, frequency = 12)
# Seasonal Decomposition Plot of data
plot(stl(ts_data, s.window="periodic"), main = "Seasonal Decomposition")
#data has trend and seasonality
#plotting ACF to understand seasonality pattern
acf(ts_data, main = "Autocorrelation Function (ACF)")
#shows a fast decaying plot, which means there correlation between data in short lags, but long term data points are not correlated
pacf(ts_data, main = "Partial Autocorrelation Function (PACF)")
#pacf is well within confidence internal, so a lower oder AR model can be considered
adf_result <- adf.test(monthly$value)
#> Warning in adf.test(monthly$value): p-value greater than printed p-value
#results
cat("ADF Statistic:", adf_result$statistic, "\n")
#> ADF Statistic: 5.602348
cat("p-value:", adf_result$p.value, "\n")
#> p-value: 0.99
cat("Critical Values:", adf_result$critical, "\n")
#> Critical Values:
cat("Test Type:", adf_result$test, "\n")
#> Test Type:
#p-value is high, indicating need of fifferencing as data is not stationary
arima_model1 <- Arima(ts_data, order = c(1, 2, 1))
summary(arima_model1)
#> Series: ts_data
#> ARIMA(1,2,1)
#>
#> Coefficients:
#> ar1 ma1
#> -0.2487 -0.2797
#> s.e. 0.2051 0.1925
#>
#> sigma^2 = 32.89: log likelihood = -701.89
#> AIC=1409.78 AICc=1409.89 BIC=1419.99
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set 0.3400288 5.683833 3.335583 0.1229903 2.108703 0.2990036
#> ACF1
#> Training set -0.001010589
arima_model2 <- Arima(ts_data, order = c(1, 3, 1))
summary(arima_model2)
#> Series: ts_data
#> ARIMA(1,3,1)
#>
#> Coefficients:
#> ar1 ma1
#> -0.4927 -0.9857
#> s.e. 0.0861 0.0213
#>
#> sigma^2 = 33.47: log likelihood = -702.83
#> AIC=1411.66 AICc=1411.77 BIC=1421.86
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE ACF1
#> Training set 0.387566 5.720577 3.435684 0.1831659 2.18323 0.3079767 -0.03727901
arima_model3 <- Arima(ts_data, order = c(2, 3, 2))
summary(arima_model3)
#> Series: ts_data
#> ARIMA(2,3,2)
#>
#> Coefficients:
#> ar1 ar2 ma1 ma2
#> 0.281 0.2052 -1.9667 0.9999
#> s.e. 0.093 0.0953 0.0187 0.0187
#>
#> sigma^2 = 30.01: log likelihood = -692.75
#> AIC=1395.49 AICc=1395.77 BIC=1412.49
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE ACF1
#> Training set 0.3707702 5.39157 3.156113 0.1656218 2.00119 0.2829158 0.007535675
#AICC reduced
# Forecast values
predicted_values <- forecast(arima_model3, h = 12)
print(predicted_values)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> Sep 19 391.7538 384.7030 398.8046 380.9705 402.5371
#> Oct 19 431.7470 420.0547 443.4394 413.8651 449.6289
#> Nov 19 468.1901 451.4773 484.9029 442.6301 493.7501
#> Dec 19 507.1262 485.3855 528.8670 473.8766 540.3758
#> Jan 20 548.4500 521.4638 575.4362 507.1782 589.7218
#> Feb 20 593.3721 560.8204 625.9237 543.5886 643.1555
#> Mar 20 642.2109 603.6272 680.7946 583.2023 701.2195
#> Apr 20 695.3045 650.0914 740.5177 626.1570 764.4520
#> May 20 752.8133 700.2462 805.3804 672.4189 833.2077
#> Jun 20 814.8516 754.0895 875.6138 721.9239 907.7793
#> Jul 20 881.4846 811.5792 951.3900 774.5735 988.3957
#> Aug 20 952.7539 872.6598 1032.8481 830.2605 1075.2473
Created on 2024-02-13 with reprex v2.1.0
# Load required packages
library(reprex)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(readr)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(fredr)
library(tseries)
library(tidyverse)
library(ggplot2)
library(corrplot)
#> corrplot 0.92 loaded
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(xts)
#>
#> ######################### Warning from 'xts' package ##########################
#> # #
#> # The dplyr lag() function breaks how base R's lag() function is supposed to #
#> # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
#> # source() into this session won't work correctly. #
#> # #
#> # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
#> # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
#> # dplyr from breaking base R's lag() function. #
#> # #
#> # Code in packages is not affected. It's protected by R's namespace mechanism #
#> # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
#> # #
#> ###############################################################################
#>
#> Attaching package: 'xts'
#> The following objects are masked from 'package:dplyr':
#>
#> first, last
# Set up API key to read the unemployment rate dataset from FRED
fredr_set_key('1d2fb74509b6e2b2a9ba83de71fedf09')
# Get the ICNSA data
# Print all the columns of the initial dataset for visualization
icnsa_data <- fredr(series_id = 'ICNSA')
icnsa_data$Date <- as.Date(icnsa_data$date)
head(icnsa_data, n = 5)
#> # A tibble: 5 × 6
#> date series_id value realtime_start realtime_end Date
#> <date> <chr> <dbl> <date> <date> <date>
#> 1 1967-01-07 ICNSA 346000 2024-02-22 2024-02-22 1967-01-07
#> 2 1967-01-14 ICNSA 334000 2024-02-22 2024-02-22 1967-01-14
#> 3 1967-01-21 ICNSA 277000 2024-02-22 2024-02-22 1967-01-21
#> 4 1967-01-28 ICNSA 252000 2024-02-22 2024-02-22 1967-01-28
#> 5 1967-02-04 ICNSA 274000 2024-02-22 2024-02-22 1967-02-04
# Display the CCNSA data
ccnsa_data <- fredr(series_id = 'CCNSA')
ccnsa_data$Date <- as.Date(ccnsa_data$date)
head(ccnsa_data, n = 5)
#> # A tibble: 5 × 6
#> date series_id value realtime_start realtime_end Date
#> <date> <chr> <dbl> <date> <date> <date>
#> 1 1967-01-07 CCNSA 1594000 2024-02-22 2024-02-22 1967-01-07
#> 2 1967-01-14 CCNSA 1563000 2024-02-22 2024-02-22 1967-01-14
#> 3 1967-01-21 CCNSA 1551000 2024-02-22 2024-02-22 1967-01-21
#> 4 1967-01-28 CCNSA 1533000 2024-02-22 2024-02-22 1967-01-28
#> 5 1967-02-04 CCNSA 1534000 2024-02-22 2024-02-22 1967-02-04
# creating time series object
icnsa_ts <- ts(icnsa_data$value, frequency = 52)
ccnsa_ts <- ts(ccnsa_data$value, frequency = 52)
# plot ICNSA
plot(icnsa_ts, main = "Date", xlab = "Date", ylab = "Uninsured Claims")
plot(stl(icnsa_ts, s.window="periodic"), main = "Seasonal Decomposition")
acf(icnsa_ts, main = "ACF for ICNSA")
pacf(icnsa_ts, main = "PACF for ICNSA")
# plot CCNSA
plot(ccnsa_ts, main = "Date", xlab = "Date", ylab = "Continued Claims")
plot(stl(ccnsa_ts, s.window="periodic"), main = "Seasonal Decomposition")
acf(ccnsa_ts, main = "ACF for CCNSA")
pacf(ccnsa_ts, main = "PACF for CCNSA")
# show the ADF test to prove stationarity
adf.test(icnsa_ts, alternative="stationary")
#> Warning in adf.test(icnsa_ts, alternative = "stationary"): p-value smaller than
#> printed p-value
#>
#> Augmented Dickey-Fuller Test
#>
#> data: icnsa_ts
#> Dickey-Fuller = -8.8664, Lag order = 14, p-value = 0.01
#> alternative hypothesis: stationary
adf.test(ccnsa_ts, alternative="stationary")
#> Warning in adf.test(ccnsa_ts, alternative = "stationary"): p-value smaller than
#> printed p-value
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ccnsa_ts
#> Dickey-Fuller = -7.5226, Lag order = 14, p-value = 0.01
#> alternative hypothesis: stationary
merged_data <- merge(icnsa_data, ccnsa_data, by = "Date", suffixes = c("_icn", "_ccn"))
head(merged_data, n =5)
#> Date date_icn series_id_icn value_icn realtime_start_icn
#> 1 1967-01-07 1967-01-07 ICNSA 346000 2024-02-22
#> 2 1967-01-14 1967-01-14 ICNSA 334000 2024-02-22
#> 3 1967-01-21 1967-01-21 ICNSA 277000 2024-02-22
#> 4 1967-01-28 1967-01-28 ICNSA 252000 2024-02-22
#> 5 1967-02-04 1967-02-04 ICNSA 274000 2024-02-22
#> realtime_end_icn date_ccn series_id_ccn value_ccn realtime_start_ccn
#> 1 2024-02-22 1967-01-07 CCNSA 1594000 2024-02-22
#> 2 2024-02-22 1967-01-14 CCNSA 1563000 2024-02-22
#> 3 2024-02-22 1967-01-21 CCNSA 1551000 2024-02-22
#> 4 2024-02-22 1967-01-28 CCNSA 1533000 2024-02-22
#> 5 2024-02-22 1967-02-04 CCNSA 1534000 2024-02-22
#> realtime_end_ccn
#> 1 2024-02-22
#> 2 2024-02-22
#> 3 2024-02-22
#> 4 2024-02-22
#> 5 2024-02-22
#checking for correlation
corr <- cor(merged_data$value_icn, merged_data$value_ccn, use = "complete.obs")
corr
#> [1] 0.714954
# corr of 0.71 indicates that both the datasets are heavily related
# cross-correlation function
ccf(merged_data$value_icn, merged_data$value_ccn, lag.max = 20, main = "CCF: ICNSA and CCNSA")
# update the ts object for the merged data
icnsa_ts <- ts(merged_data$value_icn, frequency = 52)
ccnsa_ts <- ts(merged_data$value_ccn, frequency = 52)
# regARIMA model
model <- auto.arima(icnsa_ts, xreg = ccnsa_ts)
summary(model)
#> Series: icnsa_ts
#> Regression with ARIMA(0,1,1)(0,0,1)[52] errors
#>
#> Coefficients:
#> ma1 sma1 drift xreg
#> 0.4973 0.2557 55.5232 -0.0383
#> s.e. 0.0161 0.0161 2953.2570 0.0066
#>
#> sigma^2 = 7.406e+09: log likelihood = -38063.85
#> AIC=76137.71 AICc=76137.73 BIC=76167.7
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -52.27764 85985.98 38993.81 -0.5353816 9.804793 0.4106407
#> ACF1
#> Training set 0.0394591
# Forcasted value
forecast <- forecast(model, xreg = tail(merged_data$value_ccn, 1), h = 1)
forecast_mean <- mean(forecast$mean, na.rm = TRUE)
cat("Forecasted Value:", forecast_mean, "\n")
#> Forecasted Value: 211577.3
mean_diff <- mean(diff(icnsa_ts))
mean_diff
#> [1] -37.36434
Created on 2024-02-22 with reprex v2.1.0
# Load required packages
library(reprex)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(readr)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(fredr)
library(tseries)
library(tidyverse)
library(ggplot2)
library(corrplot)
#> corrplot 0.92 loaded
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(xts)
#>
#> ######################### Warning from 'xts' package ##########################
#> # #
#> # The dplyr lag() function breaks how base R's lag() function is supposed to #
#> # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
#> # source() into this session won't work correctly. #
#> # #
#> # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
#> # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
#> # dplyr from breaking base R's lag() function. #
#> # #
#> # Code in packages is not affected. It's protected by R's namespace mechanism #
#> # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
#> # #
#> ###############################################################################
#>
#> Attaching package: 'xts'
#> The following objects are masked from 'package:dplyr':
#>
#> first, last
# Set up API key to read the unemployment rate dataset from FRED
fredr_set_key('1d2fb74509b6e2b2a9ba83de71fedf09')
# Get the ICNSA data
# Print all the columns of the initial dataset for visualization
icnsa_data <- fredr(series_id = 'ICNSA')
icnsa_data$date <- as.Date(icnsa_data$date)
column_names_icnsa <- colnames(icnsa_data)
print(column_names_icnsa)
#> [1] "date" "series_id" "value" "realtime_start"
#> [5] "realtime_end"
head(icnsa_data, n = 5)
#> # A tibble: 5 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 1967-01-07 ICNSA 346000 2024-02-28 2024-02-28
#> 2 1967-01-14 ICNSA 334000 2024-02-28 2024-02-28
#> 3 1967-01-21 ICNSA 277000 2024-02-28 2024-02-28
#> 4 1967-01-28 ICNSA 252000 2024-02-28 2024-02-28
#> 5 1967-02-04 ICNSA 274000 2024-02-28 2024-02-28
ggplot(icnsa_data, aes(x = date, y = value)) +
geom_line() +
labs(title = "ICNSA over Time", x = "Date", y = "ICNSA Value") +
theme_minimal()
#setting the covid period
covid_start <- as.Date("2020-01-01")
covid_end <- as.Date("2022-01-31")
ggplot(icnsa_data, aes(x = date, y = value)) +
geom_line() +
geom_vline(xintercept = as.numeric(covid_start), linetype = "dashed", color = "red") +
geom_vline(xintercept = as.numeric(covid_end), linetype = "dashed", color = "red") +
labs(title = "ICNSA Marking Covid Period ",
x = "Year",
y = "Number")
#setting the values during the COVID date range to NA
icnsa_data$value[icnsa_data$date >= covid_start & icnsa_data$date <= covid_end] <- NA
covid_period_data <- icnsa_data[icnsa_data$date >= covid_start & icnsa_data$date <= covid_end, ]
# Print and plotting the subset of the dataframe during covid to check its "NA"
print(covid_period_data)
#> # A tibble: 109 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 2020-01-04 ICNSA NA 2024-02-28 2024-02-28
#> 2 2020-01-11 ICNSA NA 2024-02-28 2024-02-28
#> 3 2020-01-18 ICNSA NA 2024-02-28 2024-02-28
#> 4 2020-01-25 ICNSA NA 2024-02-28 2024-02-28
#> 5 2020-02-01 ICNSA NA 2024-02-28 2024-02-28
#> 6 2020-02-08 ICNSA NA 2024-02-28 2024-02-28
#> 7 2020-02-15 ICNSA NA 2024-02-28 2024-02-28
#> 8 2020-02-22 ICNSA NA 2024-02-28 2024-02-28
#> 9 2020-02-29 ICNSA NA 2024-02-28 2024-02-28
#> 10 2020-03-07 ICNSA NA 2024-02-28 2024-02-28
#> # ℹ 99 more rows
ggplot(icnsa_data, aes(x = date, y = value)) +
geom_line() +
geom_vline(xintercept = as.numeric(covid_start), linetype = "dashed", color = "red") +
geom_vline(xintercept = as.numeric(covid_end), linetype = "dashed", color = "red") +
labs(title = "ICNSA Marking Covid Period ",
x = "Year",
y = "Number")
# Apply cubic spline imputation for the COVID period
icnsa_data$value <- na.spline(icnsa_data$value)
# Plot the data with imputed values
ggplot(icnsa_data, aes(x = date, y = value)) +
geom_line() +
labs(title = "ICNSA over Time (Imputed Values for COVID Period)", x = "Date", y = "ICNSA Value") +
theme_minimal()
#filtering covid data out
icnsa_non_covid <- icnsa_data %>%
filter(date < covid_start | date > covid_end)
spar_values <- c(0.1, 0.4, 1)
# Prepare the plot window
par(mfrow=c(3, 1))
for(spar in spar_values) {
# Fit smooth spline with current spar value
fit_spline <- smooth.spline(as.numeric(icnsa_non_covid$date), icnsa_non_covid$value, spar=spar)
# Predict using the fitted spline for the entire period
predicted_values <- predict(fit_spline, as.numeric(icnsa_data$date))
# Plot
plot(icnsa_data$date, icnsa_data$value, type='l', main=paste("Spar =", spar),
xlab="Date", ylab="ICNSA Values", col="blue")
lines(icnsa_data$date, predicted_values$y, col="red")
legend("topleft", legend=c("Imputed", "Smoothed"), col=c("blue", "red"), lty=1)
}
#choosing spar=0.4
fit_spline <- smooth.spline(as.numeric(icnsa_non_covid$date), icnsa_non_covid$value, spar=0.4)
predicted_values <- predict(fit_spline, as.numeric(icnsa_data$date))
plot(icnsa_data$date, icnsa_data$value, type='l', main=paste("Spar =", "0.4"),
xlab="Date", ylab="ICNSA Values", col="blue")
lines(icnsa_data$date, predicted_values$y, col="red")
legend("topleft", legend=c("Imputed", "Smoothed"), col=c("blue", "red"), lty=1)
#Using both a multiplicative and an additive Holt-Winters model to forecast next value
ts_data <- ts(icnsa_data$value, frequency = 52)
hwm_multi <- HoltWinters(ts_data, seasonal = "multiplicative")
forecast_hwm_multi <- forecast(hwm_multi, h = 1)
print(forecast_hwm_multi)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 58.32692 190326.5 149762.9 230890.1 128289.8 252363.2
forecast_hwm_multi_mean <- mean(forecast_hwm_multi$mean, na.rm = TRUE)
print(forecast_hwm_multi_mean)
#> [1] 190326.5
hwm_additive <- HoltWinters(ts_data, seasonal = "additive")
forecast_hwm_additive <- forecast(hwm_additive, h = 1)
print(forecast_hwm_additive)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 58.32692 217107.6 176164.8 258050.4 154490.9 279724.3
forecast_hwm_additive_mean <- mean(forecast_hwm_additive$mean, na.rm = TRUE)
print(forecast_hwm_additive_mean)
#> [1] 217107.6
Created on 2024-02-28 with reprex v2.1.0
# Load required packages
library(reprex)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(readr)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(fredr)
library(tseries)
library(tidyverse)
library(ggplot2)
library(corrplot)
#> corrplot 0.92 loaded
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(xts)
#>
#> ######################### Warning from 'xts' package ##########################
#> # #
#> # The dplyr lag() function breaks how base R's lag() function is supposed to #
#> # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
#> # source() into this session won't work correctly. #
#> # #
#> # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
#> # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
#> # dplyr from breaking base R's lag() function. #
#> # #
#> # Code in packages is not affected. It's protected by R's namespace mechanism #
#> # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
#> # #
#> ###############################################################################
#>
#> Attaching package: 'xts'
#> The following objects are masked from 'package:dplyr':
#>
#> first, last
library(stats)
# Set up API key to read the unemployment rate dataset from FRED
fredr_set_key('1d2fb74509b6e2b2a9ba83de71fedf09')
# Get the ICNSA data
# Print all the columns of the initial dataset for visualization
icnsa_data <- fredr(series_id = 'ICNSA')
icnsa_data$date <- as.Date(icnsa_data$date)
column_names_icnsa <- colnames(icnsa_data)
print(column_names_icnsa)
#> [1] "date" "series_id" "value" "realtime_start"
#> [5] "realtime_end"
head(icnsa_data, n = 5)
#> # A tibble: 5 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 1967-01-07 ICNSA 346000 2024-03-07 2024-03-07
#> 2 1967-01-14 ICNSA 334000 2024-03-07 2024-03-07
#> 3 1967-01-21 ICNSA 277000 2024-03-07 2024-03-07
#> 4 1967-01-28 ICNSA 252000 2024-03-07 2024-03-07
#> 5 1967-02-04 ICNSA 274000 2024-03-07 2024-03-07
#change ppoint detection
library(changepoint)
#> Successfully loaded changepoint package version 2.2.4
#> See NEWS for details of changes.
ts_data <- icnsa_data$value
cpt_result <- cpt.mean(ts_data)
plot(cpt_result, ts_data)
# Get the indices of change points
change_point_indices <- cpts(cpt_result)
change_point_date <- icnsa_data$date[change_point_indices-1]
print(change_point_date)
#> [1] "2020-03-07"
covid_start <- as.Date(change_point_date)
covid_end <- as.Date("2021-01-02")
# Identifying the index of rows between this covid period
covid_start_idx <- which(icnsa_data$date == covid_start)
covid_end_idx <- which(icnsa_data$date == covid_end)
covid_idx <- rep(0, nrow(icnsa_data))
covid_idx[covid_start_idx:covid_end_idx] <- 1
covid_idx <- as.logical(covid_idx)
#marking the covid period
ggplot(icnsa_data, aes(x = date, y = value)) +
geom_line() +
geom_vline(xintercept = as.numeric(covid_start), linetype = "dashed", color = "red") +
geom_vline(xintercept = as.numeric(covid_end), linetype = "dashed", color = "red") +
labs(title = "ICNSA Marking Covid Period ",
x = "Year",
y = "Number")
#setting the values during the COVID date range to NA
icnsa_data$value[icnsa_data$date >= covid_start & icnsa_data$date <= covid_end] <- NA
covid_period_data <- icnsa_data[icnsa_data$date >= covid_start & icnsa_data$date <= covid_end, ]
# Print and plotting the subset of the dataframe during covid to check its "NA"
print(covid_period_data)
#> # A tibble: 44 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 2020-03-07 ICNSA NA 2024-03-07 2024-03-07
#> 2 2020-03-14 ICNSA NA 2024-03-07 2024-03-07
#> 3 2020-03-21 ICNSA NA 2024-03-07 2024-03-07
#> 4 2020-03-28 ICNSA NA 2024-03-07 2024-03-07
#> 5 2020-04-04 ICNSA NA 2024-03-07 2024-03-07
#> 6 2020-04-11 ICNSA NA 2024-03-07 2024-03-07
#> 7 2020-04-18 ICNSA NA 2024-03-07 2024-03-07
#> 8 2020-04-25 ICNSA NA 2024-03-07 2024-03-07
#> 9 2020-05-02 ICNSA NA 2024-03-07 2024-03-07
#> 10 2020-05-09 ICNSA NA 2024-03-07 2024-03-07
#> # ℹ 34 more rows
ggplot(icnsa_data, aes(x = date, y = value)) +
geom_line() +
geom_vline(xintercept = as.numeric(covid_start), linetype = "dashed", color = "red") +
geom_vline(xintercept = as.numeric(covid_end), linetype = "dashed", color = "red") +
labs(title = "ICNSA Marking Covid Period ",
x = "Year",
y = "Number")
# Convert data to time series object
ts_data <- ts(icnsa_data$value, start = start(icnsa_data$date), frequency = 52)
# Mark the missing values during the COVID period
covid_start <- as.Date("2021-01-02")
covid_end <- as.Date("2022-01-31")
ts_data[icnsa_data$date >= covid_start & icnsa_data$date <= covid_end] <- NA
# Perform local linear trend model
model_fit <- StructTS(ts_data, type = "trend")
print(model_fit)
#>
#> Call:
#> StructTS(x = ts_data, type = "trend")
#>
#> Variances:
#> level slope epsilon
#> 1.564e+09 0.000e+00 5.446e+08
# Use KalmanSmooth to obtain the smoothed state estimates
smoothed_model <- tsSmooth(model_fit)
# Updating NA values
icnsa_data$value[covid_idx] <- ifelse(is.na(icnsa_data$value), smoothed_model[covid_idx], icnsa_data$value)
#> Warning in icnsa_data$value[covid_idx] <- ifelse(is.na(icnsa_data$value), :
#> number of items to replace is not a multiple of replacement length
#icnsa_data$value[is.na(icnsa_data$value)] <- smoothed_model[is.na(icnsa_data$value)]
ggplot() +
geom_line() +
geom_line(data = icnsa_data, aes(x = date, y = value), color = "black") +
labs(title = "ICNSA over Time with Imputed Values", x = "Date", y = "ICNSA Value") +
theme_minimal()
# Fitting structure time series model to the entire ICNSA dataset including the covid imputed values
# Converting the imputed data to a 'ts' object
icnsa_imputed_ts <- ts(icnsa_data$value, start = c(start(icnsa_data$date)), frequency = 52)
# Fitting a structural time series model with trend, error and seasonal components
ss_imputed_model <- StructTS(icnsa_imputed_ts, type = "BSM")
summary(ss_imputed_model)
#> Length Class Mode
#> coef 4 -none- numeric
#> loglik 1 -none- numeric
#> loglik0 1 -none- numeric
#> data 2982 ts numeric
#> residuals 2982 ts numeric
#> fitted 8946 mts numeric
#> call 3 -none- call
#> series 1 -none- character
#> code 1 -none- numeric
#> model 7 -none- list
#> model0 7 -none- list
#> xtsp 3 -none- numeric
# Getting the smoothed state estimates
smoothed_imputed <- tsSmooth(ss_imputed_model)
residuals_model <- residuals(ss_imputed_model)
# Plot residuals to visualize the error component
plot(residuals_model, main="Residuals from Structural Model", ylab="Residuals", xlab="Time")
# reading the CCNSA data
ccnsa_data = fredr(series_id = "CCNSA")
column_names_ccnsa <- colnames(ccnsa_data)
print(column_names_ccnsa)
#> [1] "date" "series_id" "value" "realtime_start"
#> [5] "realtime_end"
head(ccnsa_data)
#> # A tibble: 6 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 1967-01-07 CCNSA 1594000 2024-03-07 2024-03-07
#> 2 1967-01-14 CCNSA 1563000 2024-03-07 2024-03-07
#> 3 1967-01-21 CCNSA 1551000 2024-03-07 2024-03-07
#> 4 1967-01-28 CCNSA 1533000 2024-03-07 2024-03-07
#> 5 1967-02-04 CCNSA 1534000 2024-03-07 2024-03-07
#> 6 1967-02-11 CCNSA 1557000 2024-03-07 2024-03-07
# Merging ICNSA and CCNSA data
merged_data <- merge(icnsa_data, ccnsa_data, by = "date", all = TRUE)
print(colnames(merged_data))
#> [1] "date" "series_id.x" "value.x" "realtime_start.x"
#> [5] "realtime_end.x" "series_id.y" "value.y" "realtime_start.y"
#> [9] "realtime_end.y"
# Assuming there are no NAs in the dataset, convert to 'ts' object
merged_ts <- ts(merged_data$value.y, start = c(1967, 1), frequency = 52)
# Fit an ARIMA model with a covariate
arima_with_covariate <- auto.arima(icnsa_imputed_ts, xreg = merged_ts, seasonal = TRUE)
#using past value from iurnsa as we don't have a future value.
past_value_ccnsa <- tail(merged_data$value.y, 2)
print(past_value_ccnsa)
#> [1] 2098851 NA
# Forecast the next week's value using the model with the covariate
future_week_forecast <- forecast(arima_with_covariate, h = 1, xreg = past_value_ccnsa)
#> Warning in forecast.forecast_ARIMA(arima_with_covariate, h = 1, xreg =
#> past_value_ccnsa): Upper prediction intervals are not finite.
print(future_week_forecast)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 58.34615 231104.5 176050.1 286158.8 146906.1 315302.8
#> 58.36538 NA NA NA NA NA
forecast_mean <- mean(future_week_forecast$mean, na.rm = TRUE)
print(forecast_mean)
#> [1] 231104.5
Created on 2024-03-07 with reprex v2.1.0
library(reprex)
library(fredr)
library(dygraphs)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(xts)
#> Loading required package: zoo
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(ggplot2)
library(tseries)
library(vars)
#> Loading required package: MASS
#> Loading required package: strucchange
#> Loading required package: sandwich
#> Loading required package: urca
#> Loading required package: lmtest
library(BigVAR)
#> Loading required package: lattice
library(readxl)
data_13STI <- read_excel("/Users/umakantapatra/Downloads/SeriesReport-13S-V.xlsx")
data_14STI <- read_excel("/Users/umakantapatra/Downloads/SeriesReport-14S-V.xlsx")
data_15STI <- read_excel("/Users/umakantapatra/Downloads/SeriesReport-15S-V.xlsx")
data_16STI <- read_excel("/Users/umakantapatra/Downloads/SeriesReport-16S-V.xlsx")
# Step 3: Empirical Analysis
#View(data_13STI)
data_13STI$Period = as.Date(paste("01", data_13STI$Period, sep = "-"), format = "%d-%b-%Y")
#View(data_13STI)
plot(data_13STI, type="l")
#> Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion
data_14STI$Period = as.Date(paste("01", data_14STI$Period, sep = "-"), format = "%d-%b-%Y")
#View(data_14STI)
plot(data_14STI, type="l")
#> Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion
data_15STI$Period = as.Date(paste("01", data_15STI$Period, sep = "-"), format = "%d-%b-%Y")
#View(data_15STI)
plot(data_15STI, type="l")
#> Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion
data_16STI$Period = as.Date(paste("01", data_16STI$Period, sep = "-"), format = "%d-%b-%Y")
#View(data_16STI)
plot(data_16STI, type="l")
#> Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion
data_13STI <- subset(data_13STI, Period <= as.Date("2024-02-01"))
data_15STI <- subset(data_15STI, Period <= as.Date("2024-02-01"))
data_14STI <- subset(data_14STI, Period <= as.Date("2024-02-01"))
data_16STI <- subset(data_16STI, Period <= as.Date("2024-02-01"))
par(mfrow = c(2, 2)) # 2 rows and 2 columns
plot(data_13STI, main = "Plot 13S - Textile Mills", type = "l")
plot(data_14STI, main = "Plot 14S - Textile Products", type = "l")
plot(data_15STI, main = "Plot 15S - Apparel", type = "l")
plot(data_16STI, main = "Plot 16S - Leather and Allied Leather and Allied Products", type = "l")
# ADF test for stationarity for each time series
data_13STI$Value <- as.numeric(data_13STI$Value)
ts_data_13STI = ts(data_13STI$Value, frequency = 12)
adf.test(ts_data_13STI, alternative = "stationary")
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts_data_13STI
#> Dickey-Fuller = -0.93277, Lag order = 7, p-value = 0.9488
#> alternative hypothesis: stationary
ts_data_13STI = diff(ts_data_13STI)
adf.test(ts_data_13STI, alternative = "stationary")
#> Warning in adf.test(ts_data_13STI, alternative = "stationary"): p-value smaller
#> than printed p-value
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts_data_13STI
#> Dickey-Fuller = -5.1367, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
data_14STI$Value <- as.numeric(data_14STI$Value)
ts_data_14STI = ts(data_14STI$Value, frequency = 12)
adf.test(ts_data_14STI, alternative = "stationary")
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts_data_14STI
#> Dickey-Fuller = -2.9436, Lag order = 7, p-value = 0.1787
#> alternative hypothesis: stationary
ts_data_14STI = diff(ts_data_14STI)
adf.test(ts_data_14STI, alternative = "stationary")
#> Warning in adf.test(ts_data_14STI, alternative = "stationary"): p-value smaller
#> than printed p-value
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts_data_14STI
#> Dickey-Fuller = -4.4991, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
data_15STI$Value <- as.numeric(data_15STI$Value)
ts_data_15STI = ts(data_15STI$Value, frequency = 12)
adf.test(ts_data_15STI, alternative = "stationary")
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts_data_15STI
#> Dickey-Fuller = -1.2818, Lag order = 7, p-value = 0.8804
#> alternative hypothesis: stationary
ts_data_15STI = diff(ts_data_15STI)
adf.test(ts_data_15STI, alternative = "stationary")
#> Warning in adf.test(ts_data_15STI, alternative = "stationary"): p-value smaller
#> than printed p-value
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts_data_15STI
#> Dickey-Fuller = -5.3159, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
data_16STI$Value <- as.numeric(data_16STI$Value)
ts_data_16STI = ts(data_16STI$Value, frequency = 12)
adf.test(ts_data_16STI, alternative = "stationary")
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts_data_16STI
#> Dickey-Fuller = -1.4755, Lag order = 7, p-value = 0.7986
#> alternative hypothesis: stationary
ts_data_16STI = diff(ts_data_16STI)
adf.test(ts_data_16STI, alternative = "stationary")
#> Warning in adf.test(ts_data_16STI, alternative = "stationary"): p-value smaller
#> than printed p-value
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts_data_16STI
#> Dickey-Fuller = -5.5215, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
#Performed 1- order differencing to make the series stationary
#ACF and PACF
acf(data_13STI$Value)
pacf(data_13STI$Value)
acf(data_14STI$Value)
pacf(data_14STI$Value)
acf(data_15STI$Value)
pacf(data_15STI$Value)
acf(data_16STI$Value)
pacf(data_16STI$Value)
#Correlation Analysis
# Combine all series into one dataframe
all_series <- data.frame(data_13STI$Value, data_14STI$Value, data_15STI$Value, data_16STI$Value)
# Compute correlation matrix
correlation_matrix <- cor(all_series)
# Print correlation matrix
print("Correlation Matrix between Series:")
#> [1] "Correlation Matrix between Series:"
print(correlation_matrix)
#> data_13STI.Value data_14STI.Value data_15STI.Value
#> data_13STI.Value 1.0000000 0.6216084 0.9926421
#> data_14STI.Value 0.6216084 1.0000000 0.6254968
#> data_15STI.Value 0.9926421 0.6254968 1.0000000
#> data_16STI.Value 0.9047105 0.6654211 0.8926975
#> data_16STI.Value
#> data_13STI.Value 0.9047105
#> data_14STI.Value 0.6654211
#> data_15STI.Value 0.8926975
#> data_16STI.Value 1.0000000
#Cross-Correlation Function Analysis
names <- c("13STI", "14STI", "15STI", "16STI")
# Initialize an empty list to store the cross-correlation results
ccf_results <- list()
# Loop through each pair of time series
for (i in 1:(length(names)-1)) {
for (j in (i+1):length(names)) {
# Extract the time series data
series1 <- get(paste0("data_", names[i]))
series2 <- get(paste0("data_", names[j]))
# Compute cross-correlation
ccf_result <- ccf(series1$Value, series2$Value)
# Store the result in the list
ccf_results[[paste(names[i], names[j], sep = "_vs_")]] <- ccf_result
}
}
# Plot the cross-correlation functions
par(mfrow = c(2, 3)) # Adjust the layout as needed
for (k in 1:length(ccf_results)) {
plot(ccf_results[[k]], main = names(ccf_results)[k], mar = c(3, 3, 3, 3)) # Adjust margin settings
}
# Fit VAR(1) and VAR(p) model
# Combine all series into one dataframe
all_series <- data.frame(data_13STI$Value, data_14STI$Value, data_15STI$Value, data_16STI$Value)
# Convert the dataframe to a time series object
ts_all_series <- ts(all_series, frequency = 12)
plot(ts_all_series[, 1], main = "Time Series Plot for Series 13S")
plot(ts_all_series[, 2], main = "Time Series Plot for Series 14S")
plot(ts_all_series[, 3], main = "Time Series Plot for Series 15S")
plot(ts_all_series[, 4], main = "Time Series Plot for Series 16S")
# Fit VAR(1) model
var_model_1 <- VAR(ts_all_series, p = 1)
# Summary of VAR(1) model
summary(var_model_1)
#>
#> VAR Estimation Results:
#> =========================
#> Endogenous variables: data_13STI.Value, data_14STI.Value, data_15STI.Value, data_16STI.Value
#> Deterministic variables: const
#> Sample size: 385
#> Log Likelihood: -7900.344
#> Roots of the characteristic polynomial:
#> 0.9974 0.9924 0.9789 0.9789
#> Call:
#> VAR(y = ts_all_series, p = 1)
#>
#>
#> Estimation results for equation data_13STI.Value:
#> =================================================
#> data_13STI.Value = data_13STI.Value.l1 + data_14STI.Value.l1 + data_15STI.Value.l1 + data_16STI.Value.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> data_13STI.Value.l1 0.924142 0.012971 71.249 < 2e-16 ***
#> data_14STI.Value.l1 -0.029038 0.006077 -4.778 2.53e-06 ***
#> data_15STI.Value.l1 0.029501 0.005937 4.969 1.02e-06 ***
#> data_16STI.Value.l1 0.091492 0.015245 6.002 4.56e-09 ***
#> const 203.485325 34.441965 5.908 7.70e-09 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 46.48 on 380 degrees of freedom
#> Multiple R-Squared: 0.9992, Adjusted R-squared: 0.9992
#> F-statistic: 1.156e+05 on 4 and 380 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation data_14STI.Value:
#> =================================================
#> data_14STI.Value = data_13STI.Value.l1 + data_14STI.Value.l1 + data_15STI.Value.l1 + data_16STI.Value.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> data_13STI.Value.l1 -0.051328 0.012045 -4.261 2.57e-05 ***
#> data_14STI.Value.l1 0.979558 0.005644 173.568 < 2e-16 ***
#> data_15STI.Value.l1 0.021675 0.005513 3.931 1e-04 ***
#> data_16STI.Value.l1 0.057005 0.014157 4.027 6.83e-05 ***
#> const 143.457187 31.984643 4.485 9.66e-06 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 43.16 on 380 degrees of freedom
#> Multiple R-Squared: 0.9934, Adjusted R-squared: 0.9933
#> F-statistic: 1.427e+04 on 4 and 380 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation data_15STI.Value:
#> =================================================
#> data_15STI.Value = data_13STI.Value.l1 + data_14STI.Value.l1 + data_15STI.Value.l1 + data_16STI.Value.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> data_13STI.Value.l1 -0.09902 0.02199 -4.503 8.92e-06 ***
#> data_14STI.Value.l1 -0.07908 0.01030 -7.675 1.41e-13 ***
#> data_15STI.Value.l1 1.03245 0.01007 102.575 < 2e-16 ***
#> data_16STI.Value.l1 0.21406 0.02585 8.283 2.08e-15 ***
#> const 331.65249 58.39173 5.680 2.68e-08 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 78.8 on 380 degrees of freedom
#> Multiple R-Squared: 0.9995, Adjusted R-squared: 0.9994
#> F-statistic: 1.728e+05 on 4 and 380 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation data_16STI.Value:
#> =================================================
#> data_16STI.Value = data_13STI.Value.l1 + data_14STI.Value.l1 + data_15STI.Value.l1 + data_16STI.Value.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> data_13STI.Value.l1 0.0005947 0.0062391 0.095 0.924
#> data_14STI.Value.l1 -0.0053299 0.0029233 -1.823 0.069 .
#> data_15STI.Value.l1 -0.0016955 0.0028558 -0.594 0.553
#> data_16STI.Value.l1 1.0109152 0.0073330 137.858 <2e-16 ***
#> const 11.0417398 16.5673887 0.666 0.506
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 22.36 on 380 degrees of freedom
#> Multiple R-Squared: 0.9967, Adjusted R-squared: 0.9967
#> F-statistic: 2.887e+04 on 4 and 380 DF, p-value: < 2.2e-16
#>
#>
#>
#> Covariance matrix of residuals:
#> data_13STI.Value data_14STI.Value data_15STI.Value
#> data_13STI.Value 2160.3 430.0 1278.6
#> data_14STI.Value 430.0 1863.0 571.3
#> data_15STI.Value 1278.6 571.3 6209.2
#> data_16STI.Value 336.3 223.2 664.4
#> data_16STI.Value
#> data_13STI.Value 336.3
#> data_14STI.Value 223.2
#> data_15STI.Value 664.4
#> data_16STI.Value 499.9
#>
#> Correlation matrix of residuals:
#> data_13STI.Value data_14STI.Value data_15STI.Value
#> data_13STI.Value 1.0000 0.2143 0.3491
#> data_14STI.Value 0.2143 1.0000 0.1680
#> data_15STI.Value 0.3491 0.1680 1.0000
#> data_16STI.Value 0.3236 0.2313 0.3771
#> data_16STI.Value
#> data_13STI.Value 0.3236
#> data_14STI.Value 0.2313
#> data_15STI.Value 0.3771
#> data_16STI.Value 1.0000
# Fit VAR(p) model with p > 1 (e.g., p = 2)
var_model_p <- VAR(ts_all_series, p = 6)
# Summary of VAR(p) model
summary(var_model_p)
#>
#> VAR Estimation Results:
#> =========================
#> Endogenous variables: data_13STI.Value, data_14STI.Value, data_15STI.Value, data_16STI.Value
#> Deterministic variables: const
#> Sample size: 380
#> Log Likelihood: -7509.59
#> Roots of the characteristic polynomial:
#> 0.995 0.9682 0.9682 0.8979 0.8979 0.8565 0.8565 0.7569 0.7152 0.7152 0.7084 0.7084 0.6971 0.6971 0.6404 0.6404 0.6342 0.6342 0.5779 0.5411 0.5411 0.4792 0.4792 0.419
#> Call:
#> VAR(y = ts_all_series, p = 6)
#>
#>
#> Estimation results for equation data_13STI.Value:
#> =================================================
#> data_13STI.Value = data_13STI.Value.l1 + data_14STI.Value.l1 + data_15STI.Value.l1 + data_16STI.Value.l1 + data_13STI.Value.l2 + data_14STI.Value.l2 + data_15STI.Value.l2 + data_16STI.Value.l2 + data_13STI.Value.l3 + data_14STI.Value.l3 + data_15STI.Value.l3 + data_16STI.Value.l3 + data_13STI.Value.l4 + data_14STI.Value.l4 + data_15STI.Value.l4 + data_16STI.Value.l4 + data_13STI.Value.l5 + data_14STI.Value.l5 + data_15STI.Value.l5 + data_16STI.Value.l5 + data_13STI.Value.l6 + data_14STI.Value.l6 + data_15STI.Value.l6 + data_16STI.Value.l6 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> data_13STI.Value.l1 1.043988 0.053020 19.690 < 2e-16 ***
#> data_14STI.Value.l1 0.098502 0.060789 1.620 0.10604
#> data_15STI.Value.l1 0.062769 0.036362 1.726 0.08518 .
#> data_16STI.Value.l1 0.255755 0.118188 2.164 0.03113 *
#> data_13STI.Value.l2 0.032838 0.076462 0.429 0.66785
#> data_14STI.Value.l2 -0.025540 0.095720 -0.267 0.78976
#> data_15STI.Value.l2 -0.057613 0.058964 -0.977 0.32920
#> data_16STI.Value.l2 -0.004918 0.183263 -0.027 0.97861
#> data_13STI.Value.l3 -0.012372 0.076115 -0.163 0.87097
#> data_14STI.Value.l3 0.010943 0.092014 0.119 0.90540
#> data_15STI.Value.l3 0.095023 0.057977 1.639 0.10210
#> data_16STI.Value.l3 -0.393937 0.179389 -2.196 0.02874 *
#> data_13STI.Value.l4 -0.067336 0.075713 -0.889 0.37441
#> data_14STI.Value.l4 -0.138506 0.092321 -1.500 0.13443
#> data_15STI.Value.l4 -0.027788 0.058203 -0.477 0.63335
#> data_16STI.Value.l4 0.144842 0.179422 0.807 0.42005
#> data_13STI.Value.l5 -0.021986 0.076589 -0.287 0.77423
#> data_14STI.Value.l5 -0.062572 0.092046 -0.680 0.49708
#> data_15STI.Value.l5 -0.142906 0.059908 -2.385 0.01758 *
#> data_16STI.Value.l5 0.060575 0.178990 0.338 0.73524
#> data_13STI.Value.l6 -0.018330 0.050281 -0.365 0.71566
#> data_14STI.Value.l6 0.107421 0.058172 1.847 0.06563 .
#> data_15STI.Value.l6 0.089110 0.038706 2.302 0.02190 *
#> data_16STI.Value.l6 -0.034897 0.113877 -0.306 0.75944
#> const 111.524950 38.418820 2.903 0.00393 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 40.48 on 355 degrees of freedom
#> Multiple R-Squared: 0.9994, Adjusted R-squared: 0.9994
#> F-statistic: 2.509e+04 on 24 and 355 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation data_14STI.Value:
#> =================================================
#> data_14STI.Value = data_13STI.Value.l1 + data_14STI.Value.l1 + data_15STI.Value.l1 + data_16STI.Value.l1 + data_13STI.Value.l2 + data_14STI.Value.l2 + data_15STI.Value.l2 + data_16STI.Value.l2 + data_13STI.Value.l3 + data_14STI.Value.l3 + data_15STI.Value.l3 + data_16STI.Value.l3 + data_13STI.Value.l4 + data_14STI.Value.l4 + data_15STI.Value.l4 + data_16STI.Value.l4 + data_13STI.Value.l5 + data_14STI.Value.l5 + data_15STI.Value.l5 + data_16STI.Value.l5 + data_13STI.Value.l6 + data_14STI.Value.l6 + data_15STI.Value.l6 + data_16STI.Value.l6 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> data_13STI.Value.l1 -0.033813 0.046493 -0.727 0.467540
#> data_14STI.Value.l1 1.283687 0.053306 24.082 < 2e-16 ***
#> data_15STI.Value.l1 0.007999 0.031886 0.251 0.802064
#> data_16STI.Value.l1 0.219926 0.103638 2.122 0.034525 *
#> data_13STI.Value.l2 -0.014401 0.067050 -0.215 0.830056
#> data_14STI.Value.l2 -0.128809 0.083937 -1.535 0.125774
#> data_15STI.Value.l2 0.038333 0.051705 0.741 0.458955
#> data_16STI.Value.l2 -0.082675 0.160703 -0.514 0.607252
#> data_13STI.Value.l3 0.053207 0.066745 0.797 0.425882
#> data_14STI.Value.l3 -0.085521 0.080687 -1.060 0.289905
#> data_15STI.Value.l3 -0.114217 0.050840 -2.247 0.025279 *
#> data_16STI.Value.l3 -0.181017 0.157306 -1.151 0.250616
#> data_13STI.Value.l4 -0.105645 0.066393 -1.591 0.112451
#> data_14STI.Value.l4 0.060648 0.080956 0.749 0.454262
#> data_15STI.Value.l4 0.125179 0.051038 2.453 0.014661 *
#> data_16STI.Value.l4 0.017360 0.157334 0.110 0.912203
#> data_13STI.Value.l5 0.152838 0.067161 2.276 0.023459 *
#> data_14STI.Value.l5 -0.210779 0.080715 -2.611 0.009400 **
#> data_15STI.Value.l5 -0.082923 0.052533 -1.578 0.115344
#> data_16STI.Value.l5 0.042797 0.156956 0.273 0.785265
#> data_13STI.Value.l6 -0.087702 0.044091 -1.989 0.047454 *
#> data_14STI.Value.l6 0.062114 0.051010 1.218 0.224160
#> data_15STI.Value.l6 0.041545 0.033941 1.224 0.221752
#> data_16STI.Value.l6 0.015222 0.099858 0.152 0.878929
#> const 119.317885 33.689353 3.542 0.000451 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 35.5 on 355 degrees of freedom
#> Multiple R-Squared: 0.9958, Adjusted R-squared: 0.9955
#> F-statistic: 3510 on 24 and 355 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation data_15STI.Value:
#> =================================================
#> data_15STI.Value = data_13STI.Value.l1 + data_14STI.Value.l1 + data_15STI.Value.l1 + data_16STI.Value.l1 + data_13STI.Value.l2 + data_14STI.Value.l2 + data_15STI.Value.l2 + data_16STI.Value.l2 + data_13STI.Value.l3 + data_14STI.Value.l3 + data_15STI.Value.l3 + data_16STI.Value.l3 + data_13STI.Value.l4 + data_14STI.Value.l4 + data_15STI.Value.l4 + data_16STI.Value.l4 + data_13STI.Value.l5 + data_14STI.Value.l5 + data_15STI.Value.l5 + data_16STI.Value.l5 + data_13STI.Value.l6 + data_14STI.Value.l6 + data_15STI.Value.l6 + data_16STI.Value.l6 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> data_13STI.Value.l1 -0.003032 0.078040 -0.039 0.969035
#> data_14STI.Value.l1 0.033352 0.089475 0.373 0.709556
#> data_15STI.Value.l1 1.281285 0.053521 23.940 < 2e-16 ***
#> data_16STI.Value.l1 0.288078 0.173959 1.656 0.098604 .
#> data_13STI.Value.l2 -0.019449 0.112544 -0.173 0.862897
#> data_14STI.Value.l2 -0.180493 0.140890 -1.281 0.200996
#> data_15STI.Value.l2 -0.071343 0.086789 -0.822 0.411609
#> data_16STI.Value.l2 0.079626 0.269743 0.295 0.768022
#> data_13STI.Value.l3 -0.004346 0.112032 -0.039 0.969076
#> data_14STI.Value.l3 0.268212 0.135435 1.980 0.048433 *
#> data_15STI.Value.l3 0.084757 0.085336 0.993 0.321282
#> data_16STI.Value.l3 -0.312962 0.264041 -1.185 0.236700
#> data_13STI.Value.l4 0.084344 0.111442 0.757 0.449645
#> data_14STI.Value.l4 -0.100597 0.135886 -0.740 0.459608
#> data_15STI.Value.l4 -0.302176 0.085669 -3.527 0.000475 ***
#> data_16STI.Value.l4 -0.089145 0.264089 -0.338 0.735897
#> data_13STI.Value.l5 0.060070 0.112731 0.533 0.594461
#> data_14STI.Value.l5 -0.062304 0.135482 -0.460 0.645891
#> data_15STI.Value.l5 -0.051144 0.088178 -0.580 0.562274
#> data_16STI.Value.l5 -0.401537 0.263454 -1.524 0.128368
#> data_13STI.Value.l6 -0.088132 0.074008 -1.191 0.234507
#> data_14STI.Value.l6 0.028643 0.085622 0.335 0.738183
#> data_15STI.Value.l6 0.043306 0.056971 0.760 0.447676
#> data_16STI.Value.l6 0.450306 0.167614 2.687 0.007559 **
#> const -34.272653 56.548336 -0.606 0.544851
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 59.58 on 355 degrees of freedom
#> Multiple R-Squared: 0.9997, Adjusted R-squared: 0.9997
#> F-statistic: 4.974e+04 on 24 and 355 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation data_16STI.Value:
#> =================================================
#> data_16STI.Value = data_13STI.Value.l1 + data_14STI.Value.l1 + data_15STI.Value.l1 + data_16STI.Value.l1 + data_13STI.Value.l2 + data_14STI.Value.l2 + data_15STI.Value.l2 + data_16STI.Value.l2 + data_13STI.Value.l3 + data_14STI.Value.l3 + data_15STI.Value.l3 + data_16STI.Value.l3 + data_13STI.Value.l4 + data_14STI.Value.l4 + data_15STI.Value.l4 + data_16STI.Value.l4 + data_13STI.Value.l5 + data_14STI.Value.l5 + data_15STI.Value.l5 + data_16STI.Value.l5 + data_13STI.Value.l6 + data_14STI.Value.l6 + data_15STI.Value.l6 + data_16STI.Value.l6 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> data_13STI.Value.l1 -0.003563 0.023883 -0.149 0.88151
#> data_14STI.Value.l1 0.017717 0.027383 0.647 0.51806
#> data_15STI.Value.l1 0.020262 0.016380 1.237 0.21690
#> data_16STI.Value.l1 1.203170 0.053239 22.600 < 2e-16 ***
#> data_13STI.Value.l2 0.029492 0.034443 0.856 0.39243
#> data_14STI.Value.l2 -0.022466 0.043118 -0.521 0.60267
#> data_15STI.Value.l2 0.016643 0.026561 0.627 0.53132
#> data_16STI.Value.l2 -0.120588 0.082553 -1.461 0.14497
#> data_13STI.Value.l3 0.050559 0.034287 1.475 0.14121
#> data_14STI.Value.l3 0.057987 0.041449 1.399 0.16268
#> data_15STI.Value.l3 -0.006818 0.026116 -0.261 0.79418
#> data_16STI.Value.l3 -0.021617 0.080807 -0.268 0.78923
#> data_13STI.Value.l4 -0.091732 0.034106 -2.690 0.00749 **
#> data_14STI.Value.l4 -0.045034 0.041587 -1.083 0.27959
#> data_15STI.Value.l4 -0.050007 0.026218 -1.907 0.05728 .
#> data_16STI.Value.l4 -0.090067 0.080822 -1.114 0.26587
#> data_13STI.Value.l5 0.035333 0.034500 1.024 0.30647
#> data_14STI.Value.l5 0.001145 0.041463 0.028 0.97798
#> data_15STI.Value.l5 -0.013021 0.026986 -0.483 0.62973
#> data_16STI.Value.l5 0.103259 0.080628 1.281 0.20114
#> data_13STI.Value.l6 -0.003718 0.022649 -0.164 0.86969
#> data_14STI.Value.l6 -0.006254 0.026204 -0.239 0.81151
#> data_15STI.Value.l6 0.026853 0.017436 1.540 0.12442
#> data_16STI.Value.l6 -0.098949 0.051297 -1.929 0.05453 .
#> const -28.927881 17.306106 -1.672 0.09550 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 18.24 on 355 degrees of freedom
#> Multiple R-Squared: 0.9979, Adjusted R-squared: 0.9978
#> F-statistic: 7052 on 24 and 355 DF, p-value: < 2.2e-16
#>
#>
#>
#> Covariance matrix of residuals:
#> data_13STI.Value data_14STI.Value data_15STI.Value
#> data_13STI.Value 1638.72 73.95 434.7
#> data_14STI.Value 73.95 1260.09 272.1
#> data_15STI.Value 434.68 272.06 3550.2
#> data_16STI.Value 88.35 54.90 114.3
#> data_16STI.Value
#> data_13STI.Value 88.35
#> data_14STI.Value 54.90
#> data_15STI.Value 114.30
#> data_16STI.Value 332.52
#>
#> Correlation matrix of residuals:
#> data_13STI.Value data_14STI.Value data_15STI.Value
#> data_13STI.Value 1.00000 0.05146 0.1802
#> data_14STI.Value 0.05146 1.00000 0.1286
#> data_15STI.Value 0.18022 0.12863 1.0000
#> data_16STI.Value 0.11968 0.08482 0.1052
#> data_16STI.Value
#> data_13STI.Value 0.11968
#> data_14STI.Value 0.08482
#> data_15STI.Value 0.10520
#> data_16STI.Value 1.00000
# Produce a one-month-ahead forecast
forecast_var_p <- predict(var_model_p, n.ahead = 1)
# Print the forecasted values
print(forecast_var_p)
#> $data_13STI.Value
#> fcst lower upper CI
#> data_13STI.Value.fcst 3515.507 3436.166 3594.849 79.34144
#>
#> $data_14STI.Value
#> fcst lower upper CI
#> data_14STI.Value.fcst 3613.969 3544.395 3683.543 69.57428
#>
#> $data_15STI.Value
#> fcst lower upper CI
#> data_15STI.Value.fcst 2194.372 2077.59 2311.154 116.782
#>
#> $data_16STI.Value
#> fcst lower upper CI
#> data_16STI.Value.fcst 1235.018 1199.278 1270.758 35.74007
# Granger causality test
granger_results <- causality(var_model_p)
#> Warning in causality(var_model_p):
#> Argument 'cause' has not been specified;
#> using first variable in 'x$y' (data_13STI.Value) as cause variable.
print(granger_results)
#> $Granger
#>
#> Granger causality H0: data_13STI.Value do not Granger-cause
#> data_14STI.Value data_15STI.Value data_16STI.Value
#>
#> data: VAR object var_model_p
#> F-Test = 2.0166, df1 = 18, df2 = 1420, p-value = 0.006966
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: data_13STI.Value and
#> data_14STI.Value data_15STI.Value data_16STI.Value
#>
#> data: VAR object var_model_p
#> Chi-squared = 15.732, df = 3, p-value = 0.001287
#Fit BigVar
all_series_matrix <- as.matrix(all_series)
# Fit the sparse VAR model with LASSO regularization, lambda = 0.1, and OwnOther-based sparsity
sparse_var_model <- BigVAR.fit(Y = all_series_matrix, p = 6, lambda = 0.1, struct = "OwnOther")
# Print summary of the sparse VAR model
print(summary(sparse_var_model))
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> -61.94693 -0.05029 -0.00913 1.42869 0.03857 116.98556
Created on 2024-04-10 with reprex v2.1.0
Created on 2024-02-08 with reprex v2.1.0