jlivsey / UB-sping24-time-series

2 stars 1 forks source link

Abhishek Bedarkar #17

Open abhishek-bedarkar opened 7 months ago

abhishek-bedarkar commented 7 months ago
if (!requireNamespace("forecast", quietly = TRUE)) {
  install.packages("forecast")
}
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo

if (!requireNamespace("readr", quietly = TRUE)) {
  install.packages("readr")
}

if (!requireNamespace("lubridate", quietly = TRUE)) {
  install.packages("lubridate")
}

if (!requireNamespace("reprex", quietly = TRUE)) {
  install.packages("reprex")
}

if (!requireNamespace("fredr", quietly = TRUE)) {
  install.packages("fredr")
}

# Load required packages
library(forecast)
#> Warning: package 'forecast' was built under R version 4.3.2
library(readr)
#> Warning: package 'readr' was built under R version 4.3.2
library(lubridate)
#> Warning: package 'lubridate' was built under R version 4.3.2
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(reprex)
#> Warning: package 'reprex' was built under R version 4.3.2
library(fredr)
#> Warning: package 'fredr' was built under R version 4.3.2

fredr_set_key('33b45f8519b2a909fca701748cf47495')
uninsured_claims <- fredr(series_id = 'ICNSA')

print(head(uninsured_claims, n = 5))
#> # A tibble: 5 × 5
#>   date       series_id  value realtime_start realtime_end
#>   <date>     <chr>      <dbl> <date>         <date>      
#> 1 1967-01-07 ICNSA     346000 2024-02-07     2024-02-07  
#> 2 1967-01-14 ICNSA     334000 2024-02-07     2024-02-07  
#> 3 1967-01-21 ICNSA     277000 2024-02-07     2024-02-07  
#> 4 1967-01-28 ICNSA     252000 2024-02-07     2024-02-07  
#> 5 1967-02-04 ICNSA     274000 2024-02-07     2024-02-07
# transform DATE column to Date format
uninsured_claims$DATE <- as.Date(uninsured_claims$date, format="%m/%d/%Y")

# Create a time series object. Here I have kept frequency 52 to render data yearly
ts_data <- ts(uninsured_claims$value, frequency = 52)

# Here I have rendered Seasonal Decomposition Plot to check existence of any underlying patterns.
plot(stl(ts_data, s.window="periodic"), main = "Seasonal Decomposition of Uninsured Claims Data")

# From the above plot we can see that the trend is increasing and decreasing in the data over time with a spike in covid year. 
# The seasonality shows cyclical pattern in the data.

# Here I have rendered ACF Plot to analyse correlation between a time series and lagged versions of itself
acf(ts_data, main = "Autocorrelation Function (ACF)")

#From above plot we can see that there is some positive autocorrelation at the first few lags.

# PACF Plot to analyze time series and lagged versions of itself after accounting for the correlations at all shorter lags
pacf(ts_data, main = "Partial Autocorrelation Function (PACF)")

# From above plot we can confirm that there are no significant spikes at any of the lags

# Plot of uninsured claims data
plot(ts_data, main = "Uninsured Claims Data", xlab = "Date", ylab = "Uninsured Claims")

# Plot shows slowly increasing uninsured claim value with sudden splike in covid year 

# Spike in covid year is justifiable because due to pandemic many people lost their job. I think such
# year can I have dominating effect on model. So let's try to forecast values by both including and excluding Covid
# years

# Here I have used auto arima model to fit a time series including COVID year
model_with_covid <- auto.arima(ts_data)

# predicting unemployment
forecast_values_with_covid <- forecast(model_with_covid, h = 1)
forecast_values_with_covid <- forecast_values_with_covid$mean

# Display the forecasted unemployment value including COVID year
cat("Forecasted Unemployment  (including COVID year):", forecast_values_with_covid, "\n")
#> Forecasted Unemployment  (including COVID year): 276396.2

# Now lets filter out the COVID year
uninsured_claims_without_covid <- uninsured_claims[year(uninsured_claims$date) != 2020, ]

# Create a time series object without COVID year
ts_data_without_covid <- ts(uninsured_claims_without_covid$value, frequency = 52)

plot(ts_data_without_covid, main = "Uninsured Claims Data excluding covid year", xlab = "Date", ylab = "Uninsured Claims")


# Let's try to fit same time series model but excluding COVID year
model_without_covid <- auto.arima(ts_data_without_covid)

# Forecast unemployment value
forecast_values_without_covid <- forecast(model_without_covid, h = 1)
forecasted_unemployment_without_covid <- forecast_values_without_covid$mean

cat("Forecasted Unemployment (excluding COVID year):", forecasted_unemployment_without_covid, "\n")
#> Forecasted Unemployment (excluding COVID year): 253814.9

Created on 2024-02-07 with reprex v2.1.0

abhishek-bedarkar commented 7 months ago
if (!requireNamespace("forecast", quietly = TRUE)) {
  install.packages("forecast")
}
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo

if (!requireNamespace("readr", quietly = TRUE)) {
  install.packages("readr")
}

if (!requireNamespace("lubridate", quietly = TRUE)) {
  install.packages("lubridate")
}

if (!requireNamespace("reprex", quietly = TRUE)) {
  install.packages("reprex")
}

if (!requireNamespace("fredr", quietly = TRUE)) {
  install.packages("fredr")
}

# Load required packages
library(forecast)
#> Warning: package 'forecast' was built under R version 4.3.2
library(readr)
#> Warning: package 'readr' was built under R version 4.3.2
library(lubridate)
#> Warning: package 'lubridate' was built under R version 4.3.2
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(reprex)
#> Warning: package 'reprex' was built under R version 4.3.2
library(fredr)
#> Warning: package 'fredr' was built under R version 4.3.2
library(tseries)

# Load data
fredr_set_key('33b45f8519b2a909fca701748cf47495')
data <- fredr(series_id = 'MSRSFL445') 

# Print first 5 rows
print(head(data, 5))
#> # A tibble: 5 × 5
#>   date       series_id value realtime_start realtime_end
#>   <date>     <chr>     <dbl> <date>         <date>      
#> 1 2019-01-01 MSRSFL445   4.2 2024-02-12     2024-02-12  
#> 2 2019-02-01 MSRSFL445   2.8 2024-02-12     2024-02-12  
#> 3 2019-03-01 MSRSFL445   0.9 2024-02-12     2024-02-12  
#> 4 2019-04-01 MSRSFL445   4.7 2024-02-12     2024-02-12  
#> 5 2019-05-01 MSRSFL445   4.7 2024-02-12     2024-02-12

# Load time series data with a frequency of 12 since it's monthly data
ts_data <- ts(data$value, start = c(2019, 1), frequency = 12)  

# Decompose the time series to visualize trends and seasonality
plot(decompose(ts_data))


# From the above plot we can observe that there is some seasonality in the data

# Here I have performed the Augmented Dickey-Fuller Test to check the presence of stationarity in data
adf_value <- adf.test(ts_data)
print(adf_value)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  ts_data
#> Dickey-Fuller = -2.5289, Lag order = 3, p-value = 0.3612
#> alternative hypothesis: stationary

# Since the p-value is well above 0.05 we cannot reject the alternate hypothesis, current series is
# stationary we need to perform differencing or seasonal differencing. 

# Here I have rendered ACF Plot to analyze the correlation between a time series and lagged versions of itself
acf(ts_data, main = "Autocorrelation Function (ACF)")


# From the above plot we can see two important observations:
# 1. ACF value significantly drops at lag 3 and 4 serving as our potential p-values.s
# 2. Gradual decay in ACF value suggests the need for differencing.

# PACF Plot to analyze time series and lagged versions of itself after accounting for the correlations at all shorter lags
pacf(ts_data, main = "Partial Autocorrelation Function (PACF)")

# From the above plot we can confirm that at lag 3 PACF value is well within boundaries. 

# Here I've experimented with ARIMA model p=3, d=1 and q=2
arima_model1 <- Arima(ts_data, order = c(3, 1, 2))
summary(arima_model1)
#> Series: ts_data 
#> ARIMA(3,1,2) 
#> 
#> Coefficients:
#>          ar1     ar2      ar3     ma1      ma2
#>       0.3639  0.4704  -0.1671  -0.726  -0.2740
#> s.e.  0.4149  0.2474   0.1676   0.420   0.4145
#> 
#> sigma^2 = 27.38:  log likelihood = -173.81
#> AIC=359.62   AICc=361.3   BIC=371.88
#> 
#> Training set error measures:
#>                       ME     RMSE      MAE       MPE     MAPE      MASE
#> Training set -0.06214262 4.954806 3.184096 -19.46737 76.46925 0.3175811
#>                      ACF1
#> Training set -0.005173568

# Here I've experimented with ARIMA model p=4, d=1 and q=2
arima_model2 <- Arima(ts_data, order = c(4, 2, 2))
summary(arima_model2)
#> Series: ts_data 
#> ARIMA(4,2,2) 
#> 
#> Coefficients:
#>           ar1      ar2      ar3      ar4     ma1      ma2
#>       -1.1596  -0.1231  -0.1018  -0.2221  0.0000  -1.0000
#> s.e.   0.1290   0.2048   0.2030   0.1281  0.0878   0.0878
#> 
#> sigma^2 = 28.67:  log likelihood = -173.6
#> AIC=361.2   AICc=363.53   BIC=375.37
#> 
#> Training set error measures:
#>                      ME   RMSE      MAE       MPE     MAPE      MASE
#> Training set -0.2372466 4.9717 3.424514 -20.79488 77.61675 0.3415603
#>                     ACF1
#> Training set -0.01263695

# AIC values for both models are almost the same, however AICC shows little difference.
# Arima model 1 performed better than Arima model 2.

# Let's try for different values of p, d, and q
p_values <- 0:4
d_values <- 0:2
q_values <- 0:3

best_aic <- Inf
best_order <- c(0, 0, 0)

# Grid search
for (p in p_values) {
  for (d in d_values) {
    for (q in q_values) {
      order <- c(p, d, q)
      tryCatch({
        arima_model <- Arima(ts_data, order = order)
        current_aic <- AIC(arima_model)
        if (current_aic < best_aic) {
          best_aic <- current_aic
          best_order <- order
        }
      }, error = function(e) {})
    }
  }
}

# Best order by minimizing AIC
print(best_order)
#> [1] 3 1 3

# Best AIC value 
print(best_aic)
#> [1] 355.7573

# ARIMA(3,1,3) is the best order of Arima which results in minimum AIC value.

# Now let's fit the best model
best_arima_model <- Arima(ts_data, order = best_order)

# Summary of model
print(summary(best_arima_model))
#> Series: ts_data 
#> ARIMA(3,1,3) 
#> 
#> Coefficients:
#>           ar1     ar2     ar3     ma1      ma2      ma3
#>       -0.9512  0.4501  0.6899  0.7457  -0.7458  -1.0000
#> s.e.   0.1172  0.1643  0.1178  0.1095   0.1164   0.1111
#> 
#> sigma^2 = 23.65:  log likelihood = -170.88
#> AIC=355.76   AICc=358.04   BIC=370.06
#> 
#> Training set error measures:
#>                       ME     RMSE      MAE       MPE     MAPE      MASE
#> Training set -0.02503214 4.559807 2.911838 -16.43528 69.44629 0.2904261
#>                      ACF1
#> Training set -0.006409612

# Here I've plotted residuals for a given span
checkresiduals(best_arima_model)

#> 
#>  Ljung-Box test
#> 
#> data:  Residuals from ARIMA(3,1,3)
#> Q* = 17.99, df = 6, p-value = 0.006258
#> 
#> Model df: 6.   Total lags used: 12
# Residual seems maximum in the year 2020, covid could be the reason for the residual spike.

# Accuracy of model
print(accuracy(best_arima_model))
#>                       ME     RMSE      MAE       MPE     MAPE      MASE
#> Training set -0.02503214 4.559807 2.911838 -16.43528 69.44629 0.2904261
#>                      ACF1
#> Training set -0.006409612

# Forecast values 
predicted_values <- forecast(best_arima_model, h = 12)
print(predicted_values)
#>          Point Forecast     Lo 80     Hi 80      Lo 95    Hi 95
#> Nov 2023      0.7583145 -5.617365  7.133994  -8.992446 10.50908
#> Dec 2023      1.4051606 -6.691383  9.501704 -10.977434 13.78776
#> Jan 2024      3.4673464 -5.827340 12.762033 -10.747650 17.68234
#> Feb 2024      3.1478541 -6.515845 12.811554 -11.631499 17.92721
#> Mar 2024      4.8261503 -5.390496 15.042797 -10.798862 20.45116
#> Apr 2024      4.5085572 -5.819184 14.836298 -11.286359 20.30347
#> May 2024      5.3456142 -5.211665 15.902893 -10.800351 21.49158
#> Jun 2024      5.5642527 -5.059929 16.188434 -10.684030 21.81254
#> Jul 2024      5.5139176 -5.174230 16.202065 -10.832193 21.86003
#> Aug 2024      6.2376666 -4.518492 16.993825 -10.212459 22.68779
#> Sep 2024      5.6773928 -5.084780 16.439566 -10.781930 22.13672
#> Oct 2024      6.5013604 -4.324615 17.327336 -10.055540 23.05826

Created on 2024-02-12 with reprex v2.1.0

abhishek-bedarkar commented 7 months ago
if (!requireNamespace("forecast", quietly = TRUE)) {
  install.packages("forecast")
}
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo

if (!requireNamespace("readr", quietly = TRUE)) {
  install.packages("readr")
}

if (!requireNamespace("lubridate", quietly = TRUE)) {
  install.packages("lubridate")
}

if (!requireNamespace("reprex", quietly = TRUE)) {
  install.packages("reprex")
}

if (!requireNamespace("fredr", quietly = TRUE)) {
  install.packages("fredr")
}

if (!requireNamespace("zoo", quietly = TRUE)) {
  install.packages("zoo")
}

# Load required packages
library(forecast)
#> Warning: package 'forecast' was built under R version 4.3.2
library(readr)
#> Warning: package 'readr' was built under R version 4.3.2
library(lubridate)
#> Warning: package 'lubridate' was built under R version 4.3.2
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(reprex)
#> Warning: package 'reprex' was built under R version 4.3.2
library(fredr)
#> Warning: package 'fredr' was built under R version 4.3.2
library(zoo)
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric

fredr_set_key('33b45f8519b2a909fca701748cf47495')

# Load Initial Claims data
icnsa_data <- fredr(series_id = 'ICNSA')

# Convert to date type
icnsa_data$date <- as.Date(icnsa_data$date)

# Removed covid year as done in previous homework to reduce dominance of one year
icnsa_data <- icnsa_data[year(icnsa_data$date) != 2020, ]

# Time Series Plot for ICNSA
plot(icnsa_data$date, icnsa_data$value, type = "l", col = "blue", ylab = "Initial Claims (ICNSA)", xlab = "Date")


# Seasonal Decomposition for ICNSA
decomposed_icnsa <- decompose(ts(icnsa_data$value, frequency = 52), "multiplicative")
plot(decomposed_icnsa)


# Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) for ICNSA
acf(ts(icnsa_data$value, frequency = 52), lag.max = 52)

pacf(ts(icnsa_data$value, frequency = 52), lag.max = 52)


# Load Insured Unemployment Rate (IURNSA) data with weekly frequency
iurnsa_data <- fredr(series_id = 'IURNSA')
iurnsa_data$date <- as.Date(iurnsa_data$date)
iurnsa_data <- iurnsa_data[year(iurnsa_data$date) != 2020, ]

# Time Series Plot for IURNSA
plot(iurnsa_data$date, iurnsa_data$value, type = "l", col = "green", ylab = "Insured Unemployment Rate (IURNSA)", xlab = "Date")


# Seasonal Decomposition for IURNSA
decomposed_iurnsa <- decompose(ts(iurnsa_data$value, frequency = 52), "multiplicative")
plot(decomposed_iurnsa)


# Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) for IURNSA
iurnsa_ts <- ts(na.omit(iurnsa_data$value), frequency = 52)
acf(ts(iurnsa_ts, frequency = 52), lag.max = 52)

pacf(ts(iurnsa_ts, frequency = 52), lag.max = 52)


# To match the timeseries length I have decided remove data before 1971 as API dont have data before that.
icnsa_data <- icnsa_data[icnsa_data$date >= as.Date("1971-01-01"), ]
icnsa_data <- icnsa_data[-nrow(icnsa_data), ]
iurnsa_data <- iurnsa_data[iurnsa_data$date >= as.Date("1971-01-01"), ]
merged_data <- data.frame(Date = icnsa_data$date, ICNSA = icnsa_data$value, IURNSA = iurnsa_data$value)

# Summary statistics of merged dataset
summary(merged_data)
#>       Date                ICNSA             IURNSA     
#>  Min.   :1971-01-02   Min.   : 152144   Min.   :0.800  
#>  1st Qu.:1984-01-10   1st Qu.: 274821   1st Qu.:1.900  
#>  Median :1997-01-18   Median : 332900   Median :2.500  
#>  Mean   :1997-02-08   Mean   : 356650   Mean   :2.676  
#>  3rd Qu.:2010-01-26   3rd Qu.: 412000   3rd Qu.:3.200  
#>  Max.   :2024-02-03   Max.   :1082696   Max.   :7.900

# Calculate and print correlation between ICNSA and IURNSA
correlation <- cor(merged_data$ICNSA, merged_data$IURNSA, use = "complete.obs")
print(correlation)
#> [1] 0.6339136

# Fit an auto.arima model with IURNSA as an external regressor for ICNSA
auto_model <- auto.arima(merged_data$ICNSA, xreg = merged_data$IURNSA)

# Summary of the auto.arima() model
summary(auto_model)
#> Series: merged_data$ICNSA 
#> Regression with ARIMA(0,1,1) errors 
#> 
#> Coefficients:
#>           ma1       xreg
#>       -0.3907  89216.834
#> s.e.   0.0208   6853.545
#> 
#> sigma^2 = 2.688e+09:  log likelihood = -33362.46
#> AIC=66730.92   AICc=66730.93   BIC=66748.64
#> 
#> Training set error measures:
#>                    ME     RMSE      MAE        MPE     MAPE     MASE
#> Training set 83.27416 51817.47 34656.13 -0.6499279 9.118609 0.989804
#>                      ACF1
#> Training set -0.003496045

# Create a new vector for last week value
new_values_numeric <- tail(merged_data$IURNSA, 1)

# Make a forecast for the next week with the provided values
forecast_values <- forecast(auto_model, xreg = new_values_numeric)

# Display the forecast summary
cat("Forecasted Value:", forecast_values$mean, "\n")
#> Forecasted Value: 245856.7
abhishek-bedarkar commented 7 months ago

fredr_set_key('33b45f8519b2a909fca701748cf47495')

load ICNSA data

icnsa_data <- fredr(series_id = "ICNSA")

Converting date to Date format

icnsa_data$date <- as.Date(icnsa_data$date)

Plotting original data

plot(icnsa_data$date, icnsa_data$value, type = "l", main = "Claims Original", xlab = "Year", ylab = "Number")


![](https://i.imgur.com/g4LqcVU.png)<!-- -->

``` r

# From the above plot we can select below covid start and end date.
covid_start_date <- as.Date("2020-03-01")
covid_end_date <- as.Date("2021-06-30")

# Plotting Covid period data with vertical lines to visulize the covid region
plot(icnsa_data$date, icnsa_data$value, type = "l", 
     main = "Claims Marking the Covid period",
     xlab = "Year",
     ylab = "Number")

abline(v = as.numeric(covid_start_date), col = "blue", lty = 2)
abline(v = as.numeric(covid_end_date), col = "blue", lty = 2)


# Filtering non-Covid data
non_covid_period <- icnsa_data[icnsa_data$date < covid_start_date | icnsa_data$date > covid_end_date, ]

covid_period <- icnsa_data[icnsa_data$date >= covid_start_date & icnsa_data$date <= covid_end_date, ]

# Implementing cubic spline with different values of spar
lambda_values <- seq(0.1, 1.0, by = 0.1)
for (lambda in lambda_values) {
  spline_fit_covid <- smooth.spline(x = as.numeric(non_covid_period$date), y = non_covid_period$value, spar = lambda)

  # Imputing new values using spline fit
  imputed_values <- predict(spline_fit_covid, x = as.numeric(covid_period$date))$y

  # Updating Covid period data with imputed values
  covid_period$value <- imputed_values

  # Combining non-Covid and imputed values
  updated_icnsa_data <- rbind(non_covid_period, covid_period)
  updated_icnsa_data <- updated_icnsa_data %>% arrange(date)

  # Plotting updated data
  plot(updated_icnsa_data$date, updated_icnsa_data$value, type = "l", col = "red", lwd = 2,
       main = paste("Comparison of Time Series (spar =", lambda, ")"), xlab = "Year", ylab = "Number")

  lines(icnsa_data$date, icnsa_data$value, col = "blue", lwd = 2)
  legend("topright", legend = c("Updated", "Original"), col = c("red", "blue"), lty = 1, lwd = 2)
}


# From the above plot we can see that trade-off between goodness of fit and smoothness of the spline is best at spar = 0.2
spline_fit_covid <- smooth.spline(x = as.numeric(non_covid_period$date), y = non_covid_period$value, spar = 0.2)

# Imputing new values using spline fit
imputed_values <- predict(spline_fit_covid, x = as.numeric(covid_period$date))$y

# Updating Covid period data with imputed values
covid_period$value <- imputed_values

# Combining non-Covid and imputed values
updated_icnsa_data <- rbind(non_covid_period, covid_period)
updated_icnsa_data <- updated_icnsa_data %>% arrange(date)

# Plotting updated data
plot(updated_icnsa_data$date, updated_icnsa_data$value, type = "l", col = "red", lwd = 2,
     main = "Updated (penalty - 0.2) Vs Original", xlab = "Year", ylab = "Number")

# Add the original time series using lines
lines(icnsa_data$date, icnsa_data$value, col = "blue", lwd = 2)

# Add legend
legend("topright", legend = c("Updated", "Original"), col = c("red", "blue"), lty = 1, lwd = 2)


# Create old and new series with frequecy as 52
ts_data_old <- ts(icnsa_data$value, frequency = 52)
ts_data <- ts(updated_icnsa_data$value, frequency = 52)

# Plot both time series for change comparision
plot(ts_data_old, col = "blue", main = "Comparison of Time Series", ylab = "Value")
lines(ts_data, col = "red")
legend("topright", legend = c("Original", "Updated"), col = c("blue", "red"), lty = 1, lwd = 2)


# Implementation of Holt-Winters Multiplicative Model
hwm_multi <- HoltWinters(ts_data, seasonal = "multiplicative")
forecasted_value <- forecast(hwm_multi, h = 1)
point_forecast <- forecasted_value$mean
cat("Forecasted Unemployment using Holt-Winters Multiplicative Model :", point_forecast, "\n")
#> Forecasted Unemployment using Holt-Winters Multiplicative Model : 192700

# Implementation of Holt-Winters Additive Model
hwa_add <- HoltWinters(ts_data, seasonal = "additive")
forecasted_value <- forecast(hwa_add, h = 1)
point_forecast <- forecasted_value$mean
cat("Forecasted Unemployment using Holt-Winters additive Model :", point_forecast, "\n")
#> Forecasted Unemployment using Holt-Winters additive Model : 206691.2

Created on 2024-02-28 with reprex v2.1.0

abhishek-bedarkar commented 6 months ago
library(forecast)
#> Warning: package 'forecast' was built under R version 4.3.2
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(readr)
#> Warning: package 'readr' was built under R version 4.3.2
library(lubridate)
#> Warning: package 'lubridate' was built under R version 4.3.2
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(reprex)
#> Warning: package 'reprex' was built under R version 4.3.2
library(fredr)
#> Warning: package 'fredr' was built under R version 4.3.2
library(zoo)
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.3.2
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.3.3
library(imputeTS)
#> Warning: package 'imputeTS' was built under R version 4.3.3
#> 
#> Attaching package: 'imputeTS'
#> The following object is masked from 'package:zoo':
#> 
#>     na.locf
library(KFAS)
#> Warning: package 'KFAS' was built under R version 4.3.3
#> Please cite KFAS in publications by using: 
#> 
#>   Jouni Helske (2017). KFAS: Exponential Family State Space Models in R. Journal of Statistical Software, 78(10), 1-39. doi:10.18637/jss.v078.i10.
library(MARSS)
#> Warning: package 'MARSS' was built under R version 4.3.3

fredr_set_key('33b45f8519b2a909fca701748cf47495')

# load ICNSA data
icnsa_data <- fredr(series_id = "ICNSA")

# Converting date to Date format
icnsa_data$date <- as.Date(icnsa_data$date)

# Plotting original data
plot(icnsa_data$date, icnsa_data$value, type = "l", 
     main = "Claims Original",
     xlab = "Year",
     ylab = "Number")


# From the above plot we can select below covid start and end date.
covid_start_date <- as.Date("2020-03-01")
covid_end_date <- as.Date("2021-06-30")

# Plotting Covid period data with vertical lines to visulize the covid region
plot(icnsa_data$date, icnsa_data$value, type = "l", 
     main = "Claims Marking the Covid period",
     xlab = "Year",
     ylab = "Number")

abline(v = as.numeric(covid_start_date), col = "blue", lty = 2)
abline(v = as.numeric(covid_end_date), col = "blue", lty = 2)


# Filtering non-Covid data
non_covid_period <- icnsa_data[icnsa_data$date < covid_start_date | icnsa_data$date > covid_end_date, ]

covid_period <- icnsa_data[icnsa_data$date >= covid_start_date & icnsa_data$date <= covid_end_date, ]

# Implementing cubic spline with different values of spar
lambda_values <- seq(0.1, 1.0, by = 0.1)
for (lambda in lambda_values) {
  spline_fit_covid <- smooth.spline(x = as.numeric(non_covid_period$date), y = non_covid_period$value, spar = lambda)

  # Imputing new values using spline fit
  imputed_values <- predict(spline_fit_covid, x = as.numeric(covid_period$date))$y

  # Updating Covid period data with imputed values
  covid_period$value <- imputed_values

  # Combining non-Covid and imputed values
  updated_icnsa_data <- rbind(non_covid_period, covid_period)
  updated_icnsa_data <- updated_icnsa_data %>% arrange(date)

  # Plotting updated data
  plot(updated_icnsa_data$date, updated_icnsa_data$value, type = "l", col = "red", lwd = 2,
       main = paste("Comparison of Time Series (spar =", lambda, ")"), xlab = "Year", ylab = "Number")

  lines(icnsa_data$date, icnsa_data$value, col = "blue", lwd = 2)
  legend("topright", legend = c("Updated", "Original"), col = c("red", "blue"), lty = 1, lwd = 2)
}


# From the above plot we can see that trade-off between goodness of fit and smoothness of the spline is best at spar = 0.2
spline_fit_covid <- smooth.spline(x = as.numeric(non_covid_period$date), y = non_covid_period$value, spar = 0.2)

# Imputing new values using spline fit
imputed_values <- predict(spline_fit_covid, x = as.numeric(covid_period$date))$y

# Updating Covid period data with imputed values
covid_period$value <- imputed_values

# Combining non-Covid and imputed values
updated_icnsa_data <- rbind(non_covid_period, covid_period)
updated_icnsa_data <- updated_icnsa_data %>% arrange(date)

# Plotting updated data
plot(updated_icnsa_data$date, updated_icnsa_data$value, type = "l", col = "red", lwd = 2,
     main = "Updated (penalty - 0.2) Vs Original", xlab = "Year", ylab = "Number")

# Add the original time series using lines
lines(icnsa_data$date, icnsa_data$value, col = "blue", lwd = 2)

# Add legend
legend("topright", legend = c("Updated", "Original"), col = c("red", "blue"), lty = 1, lwd = 2)


# Create old and new series with frequecy as 52
ts_data_old <- ts(icnsa_data$value, frequency = 52)
ts_data_spline <- ts(updated_icnsa_data$value, frequency = 52)

#### state-space missing value methodology ##

icnsa_data <- fredr(series_id = "ICNSA")

# Convert date to Date format
icnsa_data$date <- as.Date(icnsa_data$date)

# Plot original data
plot(icnsa_data$date, icnsa_data$value, type = "l", 
     main = "Claims Original",
     xlab = "Year",
     ylab = "Number")


# Plot Covid period data
plot(icnsa_data$date, icnsa_data$value, type = "l", 
     main = "Claims Marking the Covid period",
     xlab = "Year",
     ylab = "Number")
abline(v = as.numeric(covid_start_date), col = "blue", lty = 2)
abline(v = as.numeric(covid_end_date), col = "blue", lty = 2)


# Set values between Covid start and end date to NA
icnsa_data$value[icnsa_data$date >= covid_start_date & icnsa_data$date <= covid_end_date] <- NA

# Plot data with NA values
plot(icnsa_data$date, icnsa_data$value, type = "l", 
     main = "Claims with NA Values (Covid Period)",
     xlab = "Year",
     ylab = "Number")


# Impute values for the Covid period using state-space methodology
imputed_values_na_kalman <- na_kalman(icnsa_data$value, smooth = TRUE)
icnsa_data$value <- imputed_values_na_kalman

plot(icnsa_data$date, icnsa_data$value, type = "l", 
     main = "Claims with Imputed Values (na_kalman)",
     xlab = "Year",
     ylab = "Number")


# Create a time series object
ts_data_kalman <- ts(icnsa_data$value, frequency = 52)

# Comparison of original series with imputed series
plot(ts_data_kalman, col = "blue", type = "l", lty = 1, ylab = "Value", main = "Time Series Comparison")
lines(ts_data_old, col = "black", type = "l", lty = 2)
lines(ts_data_spline, col = "green", type = "l", lty = 3)

legend("topright", legend = c("Kalman", "Original", "Splines"), col = c("blue", "black", "green"), lty = 1:3)


par(mfrow=c(1,2))

# Plot ACF
acf_result <- acf(ts_data_kalman, lag.max = 30, main = "ACF for Time Series Data")

# Plot PACF
pacf_result <- pacf(ts_data_kalman, lag.max = 30, main = "PACF for Time Series Data")


par(mfrow=c(1,1))

# Seasonal decomposition to check tre
decomposition <- stl(ts_data_kalman, s.window = "periodic")
plot(decomposition)


# Plot data with imputed values
plot(icnsa_data$date, icnsa_data$value, type = "l", 
     main = "Claims with Imputed Values (na_kalman)",
     xlab = "Year",
     ylab = "Number")


# Fitting a structural time series model with both trend and seasonal components
ssm_model <- StructTS(ts_data_kalman, type = "BSM")

summary(ssm_model)
#>           Length Class  Mode     
#> coef         4   -none- numeric  
#> loglik       1   -none- numeric  
#> loglik0      1   -none- numeric  
#> data      2982   ts     numeric  
#> residuals 2982   ts     numeric  
#> fitted    8946   mts    numeric  
#> call         3   -none- call     
#> series       1   -none- character
#> code         1   -none- numeric  
#> model        7   -none- list     
#> model0       7   -none- list     
#> xtsp         3   -none- numeric

# Use tsSmooth to obtain the smoothed state estimates
smoothed_data <- tsSmooth(ssm_model)

residuals_ssm <- residuals(ssm_model)

# Plot residuals to visualize the error component
plot(residuals_ssm, main="Residuals", ylab="Residuals", xlab="Time")


# Load IURNSA data
iurnsa_data <- fredr(series_id = "IURNSA")
iurnsa_data$date <- as.Date(iurnsa_data$date)

# Check the structure and values of iurnsa_data to ensure compatibility with ICNSA data
icnsa_data <- icnsa_data[icnsa_data$date >= as.Date("1971-01-01"), ]

# Remove last row of icnsa_data
icnsa_data <- icnsa_data[-nrow(icnsa_data), ]

merged_data <- merge(icnsa_data, iurnsa_data, by = "date", all.x = TRUE)

arima_with_covariate <- auto.arima(merged_data$value.x, xreg = merged_data$value.y, seasonal = TRUE)

print(summary(arima_with_covariate))
#> Series: merged_data$value.x 
#> Regression with ARIMA(2,1,1) errors 
#> 
#> Coefficients:
#>          ar1     ar2      ma1       xreg
#>       0.6945  0.1520  -0.9862  21484.074
#> s.e.  0.0209  0.0206   0.0041   4004.552
#> 
#> sigma^2 = 2.456e+09:  log likelihood = -33899.75
#> AIC=67809.5   AICc=67809.52   BIC=67839.13
#> 
#> Training set error measures:
#>                     ME     RMSE     MAE       MPE    MAPE      MASE
#> Training set -210.9891 49514.24 33218.4 -1.422244 8.87351 0.9845949
#>                      ACF1
#> Training set -0.004072133

# Forecast the next week's value using the model with the covariate
forecasted_value <- forecast(arima_with_covariate, h = 1, xreg = tail(merged_data$value.y, 2))

print(forecasted_value)
#>      Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
#> 2774       207688.5 144176.1 271200.8 110554.74 304822.2
#> 2775       209741.9 131911.7 287572.1  90710.82 328773.0

forecast_point_value <- mean(forecasted_value$mean, na.rm = TRUE)

print(forecast_point_value)
#> [1] 208715.2
abhishek-bedarkar commented 5 months ago

Here I have loaded all csv files

For simplicity I'm keeping name of series numerical

series1 <-read.csv("./series_1.csv") # Computer and Electronic Products: U.S. Total series2 <-read.csv("./series_2.csv") # Electronic computing manufacturing series3 <-read.csv("./series_3.csv") # Communications Equipment Manufacturing series4 <-read.csv("./series_4.csv") # Electrical Equipment Manufacturing: U.S. Total

Data exploration

cat("First 5 rows of series1:\n")

> First 5 rows of series1:

print(head(series1, 5))

> Period Value

> 1 Jan-92 86541

> 2 Feb-92 86726

> 3 Mar-92 86635

> 4 Apr-92 85228

> 5 May-92 85353

cat("\n")

cat("First 5 rows of series2:\n")

> First 5 rows of series2:

print(head(series1, 5))

> Period Value

> 1 Jan-92 86541

> 2 Feb-92 86726

> 3 Mar-92 86635

> 4 Apr-92 85228

> 5 May-92 85353

cat("\n")

cat("First 5 rows of series3:\n")

> First 5 rows of series3:

print(head(series1, 5))

> Period Value

> 1 Jan-92 86541

> 2 Feb-92 86726

> 3 Mar-92 86635

> 4 Apr-92 85228

> 5 May-92 85353

cat("\n")

cat("First 5 rows of series4:\n")

> First 5 rows of series4:

print(head(series1, 5))

> Period Value

> 1 Jan-92 86541

> 2 Feb-92 86726

> 3 Mar-92 86635

> 4 Apr-92 85228

> 5 May-92 85353

cat("\n")

From above result we can see that period column for each series is not in date format

Function to convert Period from string to valid date format

cast_to_date <- function(data) { data$Period <- dmy(paste("01-", data$Period)) return(data) }

Convert Period to valid date format for all series

series1 <- cast_to_date(series1)

cat("First 5 rows of series1:\n")

> First 5 rows of series1:

print(head(series1, 5))

> Period Value

> 1 1992-01-01 86541

> 2 1992-02-01 86726

> 3 1992-03-01 86635

> 4 1992-04-01 85228

> 5 1992-05-01 85353

cat("\n")

perform for rest of the series

series2 <- cast_to_date(series2) series3 <- cast_to_date(series3) series4 <- cast_to_date(series4)

check for missing values and removing them

missing_values_count <- function(series) { missing_count <- sum(is.na(series$Value)) cat("Missing values count in the series:", missing_count, "\n") return(missing_count) }

missing_values_count(series1)

> Missing values count in the series: 10

> [1] 10

missing_values_count(series2)

> Missing values count in the series: 10

> [1] 10

missing_values_count(series3)

> Missing values count in the series: 10

> [1] 10

missing_values_count(series4)

> Missing values count in the series: 10

> [1] 10

Remove missing values from each series

remove_missing_values <- function(series) { series <- na.omit(series) return(series) }

print("After updating missing values")

> [1] "After updating missing values"

series1 <- remove_missing_values(series1) series2 <- remove_missing_values(series2) series3 <- remove_missing_values(series3) series4 <- remove_missing_values(series4)

Data visulization

Plot each series

ggplot() + geom_line(data = series1, aes(x = Period, y = Value, color = "Series 1"), linewidth = 1) + geom_line(data = series2, aes(x = Period, y = Value, color = "Series 2"), linewidth = 1) + geom_line(data = series3, aes(x = Period, y = Value, color = "Series 3"), linewidth = 1) + geom_line(data = series4, aes(x = Period, y = Value, color = "Series 4"), linewidth = 1) + labs(x = "Period", y = "Value", color = "Series") + scale_color_manual(values = c("Series 1" = "orange", "Series 2" = "red", "Series 3" = "blue", "Series 4" = "green"), labels = c("Series 1", "Series 2", "Series 3", "Series 4")) + theme_minimal()


![](https://i.imgur.com/ImsL2iX.png)<!-- -->

``` r

# Stationariy check function 
check_stationarity <- function(series) {

  # Perform adf test and print relevant result
  adf_test <- adf.test(series$Value)
  cat("ADF Test Results for", deparse(substitute(series)), ":\n")
  cat("Test Statistic:", adf_test$statistic, "\n")
  cat("P-value:", adf_test$p.value, "\n")
  cat("Critical Values:", adf_test$critical, "\n")
  cat("Result: ")

  if (adf_test$p.value < 0.05) {
    cat("Series is stationary because p value is less than 0.05\n")
  } else {
    cat("Series is non-stationary\n")
  }
  cat("\n")

}

check_stationarity(series1)
#> ADF Test Results for series1 :
#> Test Statistic: -3.707706 
#> P-value: 0.02384469 
#> Critical Values: 
#> Result: Series is stationary because p value is less than 0.05
check_stationarity(series2)
#> ADF Test Results for series2 :
#> Test Statistic: -2.399282 
#> P-value: 0.4085477 
#> Critical Values: 
#> Result: Series is non-stationary
check_stationarity(series3)
#> ADF Test Results for series3 :
#> Test Statistic: -3.060519 
#> P-value: 0.1293384 
#> Critical Values: 
#> Result: Series is non-stationary
check_stationarity(series4)
#> ADF Test Results for series4 :
#> Test Statistic: -1.968479 
#> P-value: 0.5904555 
#> Critical Values: 
#> Result: Series is non-stationary

# From above result we can see that series 2, 3 and 4 are not stationary, so I've applied differencing 
# to make it stationary

# Function to difference the series
difference_series <- function(series) {
  series_diff <- diff(series$Value)
  return(data.frame(Period = series[-1, "Period"], Value = series_diff))
}

# Difference each non-stationary series
series2_diff <- difference_series(series2)
series3_diff <- difference_series(series3)
series4_diff <- difference_series(series4)

# Check stationarity after differencing
check_stationarity(series2_diff)
#> Warning in adf.test(series$Value): p-value smaller than printed p-value
#> ADF Test Results for series2_diff :
#> Test Statistic: -5.564854 
#> P-value: 0.01 
#> Critical Values: 
#> Result: Series is stationary because p value is less than 0.05
check_stationarity(series3_diff)
#> Warning in adf.test(series$Value): p-value smaller than printed p-value
#> ADF Test Results for series3_diff :
#> Test Statistic: -3.998121 
#> P-value: 0.01 
#> Critical Values: 
#> Result: Series is stationary because p value is less than 0.05
check_stationarity(series4_diff)
#> ADF Test Results for series4_diff :
#> Test Statistic: -3.655165 
#> P-value: 0.02783413 
#> Critical Values: 
#> Result: Series is stationary because p value is less than 0.05

# Now all series are stationary so we can check ACF and PACF to understand lag relations

# Function for ACF and PACF plot
calculate_autocorrelation <- function(series) {
  par(mfrow = c(2, 1))
  acf_result <- acf(series$Value, main = paste("ACF for", deparse(substitute(series))))
  pacf_result <- pacf(series$Value, main = paste("PACF for", deparse(substitute(series))))
}

# Calculate and plot ACF and PACF for series 1, 2, 3, and 4
calculate_autocorrelation(series1)

calculate_autocorrelation(series2_diff)

calculate_autocorrelation(series3_diff)

calculate_autocorrelation(series4_diff)


par(mfrow = c(3, 1))
# Function to assess cross-correlation
assess_cross_correlation <- function(series1, other_series, series_name) {
  cross_correlation <- ccf(series1$Value, other_series$Value, main = paste("Cross-Correlation between series 1 and", series_name))
  return(cross_correlation)
}

# Assess cross-correlation between series 1 and series 2
cross_correlation_2 <- assess_cross_correlation(series1, series2_diff, "series 2")

# Assess cross-correlation between series 1 and series 3
cross_correlation_3 <- assess_cross_correlation(series1, series3_diff, "series 3")

# Assess cross-correlation between series 1 and series 4
cross_correlation_4 <- assess_cross_correlation(series1, series4_diff, "series 4")


# Remove last element to match the series size 
series1 <- series1[-1, ]

# Merger all series to create a dataframe
data <- data.frame(series1 = series1$Value, series2 = series2_diff$Value, series3 = series3_diff$Value, series4 = series4_diff$Value)

# Fit VAR(1) model
var_model <- VAR(data, p = 1)

# Summary of the model
summary(var_model)
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: series1, series2, series3, series4 
#> Deterministic variables: const 
#> Sample size: 384 
#> Log Likelihood: -11178.718 
#> Roots of the characteristic polynomial:
#> 0.9965 0.456 0.3892 0.1909
#> Call:
#> VAR(y = data, p = 1)
#> 
#> 
#> Estimation results for equation series1: 
#> ======================================== 
#> series1 = series1.l1 + series2.l1 + series3.l1 + series4.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> series1.l1   0.999019   0.003096 322.630  < 2e-16 ***
#> series2.l1  -0.147802   0.193451  -0.764   0.4453    
#> series3.l1   0.535634   0.082285   6.510 2.39e-10 ***
#> series4.l1   0.443841   0.244082   1.818   0.0698 .  
#> const      204.468562 344.824165   0.593   0.5536    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 936.6 on 379 degrees of freedom
#> Multiple R-Squared: 0.9965,  Adjusted R-squared: 0.9965 
#> F-statistic: 2.696e+04 on 4 and 379 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation series2: 
#> ======================================== 
#> series2 = series1.l1 + series2.l1 + series3.l1 + series4.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> series1.l1 -1.271e-03  8.072e-04  -1.574   0.1162    
#> series2.l1 -1.894e-01  5.043e-02  -3.755   0.0002 ***
#> series3.l1  1.669e-02  2.145e-02   0.778   0.4369    
#> series4.l1 -2.000e-02  6.363e-02  -0.314   0.7534    
#> const       1.243e+02  8.989e+01   1.383   0.1675    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 244.1 on 379 degrees of freedom
#> Multiple R-Squared: 0.04237, Adjusted R-squared: 0.03226 
#> F-statistic: 4.192 on 4 and 379 DF,  p-value: 0.002469 
#> 
#> 
#> Estimation results for equation series3: 
#> ======================================== 
#> series3 = series1.l1 + series2.l1 + series3.l1 + series4.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> series1.l1  -0.003407   0.001778  -1.916   0.0561 .  
#> series2.l1   0.100896   0.111092   0.908   0.3643    
#> series3.l1   0.395503   0.047253   8.370 1.13e-15 ***
#> series4.l1  -0.122140   0.140168  -0.871   0.3841    
#> const      395.020514 198.020896   1.995   0.0468 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 537.8 on 379 degrees of freedom
#> Multiple R-Squared: 0.175,   Adjusted R-squared: 0.1663 
#> F-statistic:  20.1 on 4 and 379 DF,  p-value: 5.009e-15 
#> 
#> 
#> Estimation results for equation series4: 
#> ======================================== 
#> series4 = series1.l1 + series2.l1 + series3.l1 + series4.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> series1.l1  7.715e-04  5.881e-04   1.312   0.1903    
#> series2.l1  6.226e-02  3.674e-02   1.695   0.0909 .  
#> series3.l1 -6.680e-03  1.563e-02  -0.427   0.6693    
#> series4.l1  4.456e-01  4.635e-02   9.613   <2e-16 ***
#> const      -6.294e+01  6.549e+01  -0.961   0.3371    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 177.9 on 379 degrees of freedom
#> Multiple R-Squared: 0.2145,  Adjusted R-squared: 0.2062 
#> F-statistic: 25.88 on 4 and 379 DF,  p-value: < 2.2e-16 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>         series1 series2 series3 series4
#> series1  877171   53638  327344   42418
#> series2   53638   59608    6173    1827
#> series3  327344    6173  289275   14378
#> series4   42418    1827   14378   31636
#> 
#> Correlation matrix of residuals:
#>         series1 series2 series3 series4
#> series1  1.0000 0.23457 0.64984 0.25463
#> series2  0.2346 1.00000 0.04701 0.04206
#> series3  0.6498 0.04701 1.00000 0.15030
#> series4  0.2546 0.04206 0.15030 1.00000

# Fit VAR(p) model p = 16, I've selected lag value as 16 because from CCF plot we can see that at lag 16 correlation value is high.
var_model_p <- VAR(data, p = 16)

# Summary of the model
summary(var_model_p)
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: series1, series2, series3, series4 
#> Deterministic variables: const 
#> Sample size: 369 
#> Log Likelihood: -10384.284 
#> Roots of the characteristic polynomial:
#> 0.9771 0.9671 0.9671 0.9542 0.9542 0.9357 0.9357 0.9342 0.9342 0.9269 0.9269 0.924 0.924 0.9233 0.9233 0.9217 0.9217 0.9214 0.9214 0.9194 0.9194 0.9182 0.9182 0.9153 0.9153 0.9085 0.9085 0.9053 0.9053 0.9035 0.9035 0.9018 0.9018 0.9016 0.9016 0.8985 0.8985 0.8949 0.8949 0.8896 0.8896 0.8752 0.8752 0.864 0.864 0.8537 0.8537 0.8518 0.8518  0.84 0.8323 0.8323 0.8241 0.8142 0.8142 0.8074 0.8074 0.8043 0.8043 0.7631 0.7631 0.5345 0.5345 0.2314
#> Call:
#> VAR(y = data, p = 16)
#> 
#> 
#> Estimation results for equation series1: 
#> ======================================== 
#> series1 = series1.l1 + series2.l1 + series3.l1 + series4.l1 + series1.l2 + series2.l2 + series3.l2 + series4.l2 + series1.l3 + series2.l3 + series3.l3 + series4.l3 + series1.l4 + series2.l4 + series3.l4 + series4.l4 + series1.l5 + series2.l5 + series3.l5 + series4.l5 + series1.l6 + series2.l6 + series3.l6 + series4.l6 + series1.l7 + series2.l7 + series3.l7 + series4.l7 + series1.l8 + series2.l8 + series3.l8 + series4.l8 + series1.l9 + series2.l9 + series3.l9 + series4.l9 + series1.l10 + series2.l10 + series3.l10 + series4.l10 + series1.l11 + series2.l11 + series3.l11 + series4.l11 + series1.l12 + series2.l12 + series3.l12 + series4.l12 + series1.l13 + series2.l13 + series3.l13 + series4.l13 + series1.l14 + series2.l14 + series3.l14 + series4.l14 + series1.l15 + series2.l15 + series3.l15 + series4.l15 + series1.l16 + series2.l16 + series3.l16 + series4.l16 + const 
#> 
#>               Estimate Std. Error t value Pr(>|t|)    
#> series1.l1    1.347126   0.079693  16.904  < 2e-16 ***
#> series2.l1   -0.282056   0.220085  -1.282 0.200967    
#> series3.l1   -0.014002   0.117314  -0.119 0.905075    
#> series4.l1   -0.355950   0.265425  -1.341 0.180902    
#> series1.l2   -0.244125   0.132204  -1.847 0.065781 .  
#> series2.l2   -0.285109   0.220775  -1.291 0.197546    
#> series3.l2   -0.202474   0.119450  -1.695 0.091091 .  
#> series4.l2    0.817630   0.269885   3.030 0.002659 ** 
#> series1.l3    0.209564   0.132666   1.580 0.115230    
#> series2.l3    0.187157   0.220977   0.847 0.397688    
#> series3.l3   -0.119574   0.121369  -0.985 0.325303    
#> series4.l3   -0.658136   0.274169  -2.400 0.016975 *  
#> series1.l4   -0.371605   0.131474  -2.826 0.005019 ** 
#> series2.l4    0.572091   0.225999   2.531 0.011865 *  
#> series3.l4    0.148473   0.125118   1.187 0.236287    
#> series4.l4    0.263576   0.279015   0.945 0.345579    
#> series1.l5    0.081162   0.133001   0.610 0.542159    
#> series2.l5   -0.133740   0.228793  -0.585 0.559287    
#> series3.l5    0.016082   0.123910   0.130 0.896817    
#> series4.l5   -0.024395   0.281310  -0.087 0.930951    
#> series1.l6   -0.062616   0.131392  -0.477 0.634022    
#> series2.l6    0.448251   0.228666   1.960 0.050875 .  
#> series3.l6    0.191085   0.123714   1.545 0.123492    
#> series4.l6   -0.104778   0.281143  -0.373 0.709641    
#> series1.l7    0.065090   0.129913   0.501 0.616712    
#> series2.l7   -0.038677   0.226901  -0.170 0.864765    
#> series3.l7   -0.061529   0.124994  -0.492 0.622895    
#> series4.l7    0.241162   0.280508   0.860 0.390613    
#> series1.l8    0.043280   0.130271   0.332 0.739943    
#> series2.l8    0.061298   0.228413   0.268 0.788602    
#> series3.l8   -0.094966   0.125861  -0.755 0.451113    
#> series4.l8   -0.208170   0.280564  -0.742 0.458678    
#> series1.l9    0.122229   0.130347   0.938 0.349133    
#> series2.l9    0.332816   0.226661   1.468 0.143045    
#> series3.l9   -0.117420   0.126073  -0.931 0.352404    
#> series4.l9   -0.051593   0.280908  -0.184 0.854399    
#> series1.l10  -0.176181   0.130538  -1.350 0.178133    
#> series2.l10  -0.188755   0.225580  -0.837 0.403386    
#> series3.l10  -0.097529   0.124231  -0.785 0.433029    
#> series4.l10   0.361578   0.281533   1.284 0.200009    
#> series1.l11  -0.181705   0.129861  -1.399 0.162764    
#> series2.l11   0.671601   0.228289   2.942 0.003513 ** 
#> series3.l11   0.244844   0.123068   1.990 0.047541 *  
#> series4.l11  -0.237751   0.280893  -0.846 0.397990    
#> series1.l12  -0.151625   0.129893  -1.167 0.244002    
#> series2.l12   0.350992   0.229580   1.529 0.127343    
#> series3.l12   0.256012   0.123565   2.072 0.039119 *  
#> series4.l12   0.349664   0.283440   1.234 0.218289    
#> series1.l13   0.454069   0.127852   3.552 0.000444 ***
#> series2.l13  -0.004417   0.229983  -0.019 0.984690    
#> series3.l13  -0.156873   0.121881  -1.287 0.199039    
#> series4.l13  -0.431383   0.283011  -1.524 0.128482    
#> series1.l14  -0.173183   0.128964  -1.343 0.180311    
#> series2.l14  -0.131728   0.226793  -0.581 0.561785    
#> series3.l14  -0.047029   0.118491  -0.397 0.691719    
#> series4.l14   0.087536   0.279364   0.313 0.754238    
#> series1.l15   0.223119   0.127348   1.752 0.080774 .  
#> series2.l15  -0.251859   0.221436  -1.137 0.256271    
#> series3.l15  -0.312415   0.117633  -2.656 0.008328 ** 
#> series4.l15  -0.209736   0.279581  -0.750 0.453727    
#> series1.l16  -0.190842   0.079267  -2.408 0.016654 *  
#> series2.l16  -0.176121   0.215688  -0.817 0.414824    
#> series3.l16  -0.018775   0.083627  -0.225 0.822515    
#> series4.l16   0.214461   0.284571   0.754 0.451656    
#> const       737.088285 333.091635   2.213 0.027649 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 748.1 on 304 degrees of freedom
#> Multiple R-Squared: 0.998,   Adjusted R-squared: 0.9975 
#> F-statistic:  2337 on 64 and 304 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation series2: 
#> ======================================== 
#> series2 = series1.l1 + series2.l1 + series3.l1 + series4.l1 + series1.l2 + series2.l2 + series3.l2 + series4.l2 + series1.l3 + series2.l3 + series3.l3 + series4.l3 + series1.l4 + series2.l4 + series3.l4 + series4.l4 + series1.l5 + series2.l5 + series3.l5 + series4.l5 + series1.l6 + series2.l6 + series3.l6 + series4.l6 + series1.l7 + series2.l7 + series3.l7 + series4.l7 + series1.l8 + series2.l8 + series3.l8 + series4.l8 + series1.l9 + series2.l9 + series3.l9 + series4.l9 + series1.l10 + series2.l10 + series3.l10 + series4.l10 + series1.l11 + series2.l11 + series3.l11 + series4.l11 + series1.l12 + series2.l12 + series3.l12 + series4.l12 + series1.l13 + series2.l13 + series3.l13 + series4.l13 + series1.l14 + series2.l14 + series3.l14 + series4.l14 + series1.l15 + series2.l15 + series3.l15 + series4.l15 + series1.l16 + series2.l16 + series3.l16 + series4.l16 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> series1.l1  -0.002794   0.021076  -0.133 0.894611    
#> series2.l1  -0.086508   0.058205  -1.486 0.138248    
#> series3.l1   0.009529   0.031026   0.307 0.758952    
#> series4.l1   0.040684   0.070196   0.580 0.562631    
#> series1.l2  -0.015683   0.034964  -0.449 0.654079    
#> series2.l2  -0.134555   0.058388  -2.305 0.021868 *  
#> series3.l2   0.073256   0.031591   2.319 0.021063 *  
#> series4.l2  -0.002168   0.071376  -0.030 0.975793    
#> series1.l3   0.009357   0.035086   0.267 0.789881    
#> series2.l3   0.263681   0.058441   4.512 9.19e-06 ***
#> series3.l3  -0.051259   0.032098  -1.597 0.111317    
#> series4.l3   0.048257   0.072509   0.666 0.506212    
#> series1.l4  -0.022162   0.034771  -0.637 0.524368    
#> series2.l4  -0.110756   0.059769  -1.853 0.064844 .  
#> series3.l4   0.052746   0.033090   1.594 0.111966    
#> series4.l4   0.112751   0.073790   1.528 0.127557    
#> series1.l5   0.021671   0.035174   0.616 0.538294    
#> series2.l5   0.075345   0.060508   1.245 0.214015    
#> series3.l5  -0.008860   0.032770  -0.270 0.787060    
#> series4.l5  -0.009387   0.074397  -0.126 0.899676    
#> series1.l6   0.011808   0.034749   0.340 0.734241    
#> series2.l6   0.103090   0.060475   1.705 0.089275 .  
#> series3.l6   0.016568   0.032718   0.506 0.612966    
#> series4.l6   0.027012   0.074353   0.363 0.716643    
#> series1.l7   0.039701   0.034358   1.156 0.248791    
#> series2.l7   0.169688   0.060008   2.828 0.004999 ** 
#> series3.l7  -0.050022   0.033057  -1.513 0.131270    
#> series4.l7  -0.082679   0.074185  -1.114 0.265947    
#> series1.l8  -0.019712   0.034452  -0.572 0.567646    
#> series2.l8  -0.074325   0.060408  -1.230 0.219502    
#> series3.l8  -0.009404   0.033286  -0.283 0.777739    
#> series4.l8  -0.217688   0.074200  -2.934 0.003603 ** 
#> series1.l9  -0.025523   0.034473  -0.740 0.459630    
#> series2.l9   0.102190   0.059944   1.705 0.089265 .  
#> series3.l9   0.016410   0.033342   0.492 0.622952    
#> series4.l9   0.045507   0.074291   0.613 0.540629    
#> series1.l10 -0.003247   0.034523  -0.094 0.925129    
#> series2.l10 -0.209089   0.059659  -3.505 0.000526 ***
#> series3.l10  0.037416   0.032855   1.139 0.255676    
#> series4.l10 -0.095637   0.074456  -1.284 0.199955    
#> series1.l11  0.004907   0.034344   0.143 0.886487    
#> series2.l11 -0.008893   0.060375  -0.147 0.882999    
#> series3.l11 -0.013242   0.032547  -0.407 0.684392    
#> series4.l11 -0.042628   0.074287  -0.574 0.566507    
#> series1.l12  0.014424   0.034352   0.420 0.674859    
#> series2.l12  0.054481   0.060716   0.897 0.370267    
#> series3.l12 -0.006039   0.032679  -0.185 0.853520    
#> series4.l12  0.132066   0.074961   1.762 0.079109 .  
#> series1.l13  0.014794   0.033813   0.438 0.662034    
#> series2.l13 -0.032297   0.060823  -0.531 0.595812    
#> series3.l13 -0.065892   0.032234  -2.044 0.041795 *  
#> series4.l13  0.065324   0.074847   0.873 0.383478    
#> series1.l14 -0.070485   0.034107  -2.067 0.039617 *  
#> series2.l14 -0.147781   0.059979  -2.464 0.014299 *  
#> series3.l14  0.045578   0.031337   1.454 0.146858    
#> series4.l14  0.014325   0.073883   0.194 0.846393    
#> series1.l15 -0.018525   0.033679  -0.550 0.582697    
#> series2.l15  0.020108   0.058563   0.343 0.731565    
#> series3.l15  0.033056   0.031110   1.063 0.288836    
#> series4.l15  0.065115   0.073940   0.881 0.379204    
#> series1.l16  0.060572   0.020964   2.889 0.004137 ** 
#> series2.l16 -0.132334   0.057043  -2.320 0.021008 *  
#> series3.l16  0.034943   0.022117   1.580 0.115154    
#> series4.l16  0.112462   0.075260   1.494 0.136131    
#> const       81.381303  88.091932   0.924 0.356311    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 197.8 on 304 degrees of freedom
#> Multiple R-Squared: 0.4852,  Adjusted R-squared: 0.3768 
#> F-statistic: 4.477 on 64 and 304 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation series3: 
#> ======================================== 
#> series3 = series1.l1 + series2.l1 + series3.l1 + series4.l1 + series1.l2 + series2.l2 + series3.l2 + series4.l2 + series1.l3 + series2.l3 + series3.l3 + series4.l3 + series1.l4 + series2.l4 + series3.l4 + series4.l4 + series1.l5 + series2.l5 + series3.l5 + series4.l5 + series1.l6 + series2.l6 + series3.l6 + series4.l6 + series1.l7 + series2.l7 + series3.l7 + series4.l7 + series1.l8 + series2.l8 + series3.l8 + series4.l8 + series1.l9 + series2.l9 + series3.l9 + series4.l9 + series1.l10 + series2.l10 + series3.l10 + series4.l10 + series1.l11 + series2.l11 + series3.l11 + series4.l11 + series1.l12 + series2.l12 + series3.l12 + series4.l12 + series1.l13 + series2.l13 + series3.l13 + series4.l13 + series1.l14 + series2.l14 + series3.l14 + series4.l14 + series1.l15 + series2.l15 + series3.l15 + series4.l15 + series1.l16 + series2.l16 + series3.l16 + series4.l16 + const 
#> 
#>               Estimate Std. Error t value Pr(>|t|)    
#> series1.l1    0.015121   0.052536   0.288  0.77368    
#> series2.l1    0.085729   0.145085   0.591  0.55504    
#> series3.l1    0.226910   0.077336   2.934  0.00360 ** 
#> series4.l1   -0.232784   0.174974  -1.330  0.18439    
#> series1.l2   -0.009042   0.087152  -0.104  0.91744    
#> series2.l2    0.123273   0.145540   0.847  0.39766    
#> series3.l2   -0.034518   0.078745  -0.438  0.66144    
#> series4.l2    0.395345   0.177915   2.222  0.02701 *  
#> series1.l3    0.028334   0.087457   0.324  0.74618    
#> series2.l3    0.002355   0.145673   0.016  0.98711    
#> series3.l3    0.197209   0.080009   2.465  0.01426 *  
#> series4.l3   -0.115636   0.180739  -0.640  0.52279    
#> series1.l4   -0.046482   0.086671  -0.536  0.59214    
#> series2.l4    0.517404   0.148984   3.473  0.00059 ***
#> series3.l4    0.044390   0.082481   0.538  0.59084    
#> series4.l4   -0.142364   0.183934  -0.774  0.43954    
#> series1.l5   -0.009533   0.087677  -0.109  0.91349    
#> series2.l5   -0.214224   0.150826  -1.420  0.15653    
#> series3.l5    0.054412   0.081684   0.666  0.50583    
#> series4.l5   -0.041928   0.185447  -0.226  0.82128    
#> series1.l6   -0.014116   0.086617  -0.163  0.87065    
#> series2.l6    0.302194   0.150742   2.005  0.04588 *  
#> series3.l6    0.226777   0.081555   2.781  0.00576 ** 
#> series4.l6    0.047618   0.185336   0.257  0.79741    
#> series1.l7    0.063577   0.085642   0.742  0.45844    
#> series2.l7   -0.275169   0.149579  -1.840  0.06680 .  
#> series3.l7    0.033432   0.082399   0.406  0.68522    
#> series4.l7   -0.234194   0.184917  -1.266  0.20631    
#> series1.l8   -0.002814   0.085878  -0.033  0.97388    
#> series2.l8    0.180068   0.150575   1.196  0.23268    
#> series3.l8   -0.093130   0.082970  -1.122  0.26256    
#> series4.l8    0.036454   0.184955   0.197  0.84389    
#> series1.l9    0.040042   0.085928   0.466  0.64156    
#> series2.l9    0.141831   0.149420   0.949  0.34327    
#> series3.l9   -0.047127   0.083111  -0.567  0.57111    
#> series4.l9   -0.113344   0.185181  -0.612  0.54095    
#> series1.l10  -0.060655   0.086054  -0.705  0.48144    
#> series2.l10   0.050865   0.148708   0.342  0.73255    
#> series3.l10  -0.034594   0.081896  -0.422  0.67302    
#> series4.l10   0.178489   0.185594   0.962  0.33696    
#> series1.l11  -0.064747   0.085608  -0.756  0.45004    
#> series2.l11   0.369178   0.150494   2.453  0.01472 *  
#> series3.l11   0.070080   0.081129   0.864  0.38838    
#> series4.l11  -0.003655   0.185172  -0.020  0.98427    
#> series1.l12  -0.066109   0.085629  -0.772  0.44069    
#> series2.l12   0.383118   0.151345   2.531  0.01186 *  
#> series3.l12   0.005202   0.081457   0.064  0.94912    
#> series4.l12   0.201544   0.186851   1.079  0.28161    
#> series1.l13   0.184501   0.084283   2.189  0.02935 *  
#> series2.l13  -0.036493   0.151610  -0.241  0.80995    
#> series3.l13  -0.080154   0.080347  -0.998  0.31927    
#> series4.l13  -0.126162   0.186568  -0.676  0.49941    
#> series1.l14  -0.054497   0.085016  -0.641  0.52199    
#> series2.l14   0.008907   0.149508   0.060  0.95253    
#> series3.l14  -0.054322   0.078112  -0.695  0.48732    
#> series4.l14  -0.014207   0.184163  -0.077  0.93856    
#> series1.l15   0.081662   0.083951   0.973  0.33146    
#> series2.l15  -0.262274   0.145976  -1.797  0.07338 .  
#> series3.l15  -0.061353   0.077547  -0.791  0.42946    
#> series4.l15   0.018009   0.184307   0.098  0.92223    
#> series1.l16  -0.088179   0.052255  -1.687  0.09254 .  
#> series2.l16   0.014854   0.142187   0.104  0.91687    
#> series3.l16   0.009669   0.055129   0.175  0.86089    
#> series4.l16  -0.118152   0.187596  -0.630  0.52928    
#> const       353.562171 219.582137   1.610  0.10840    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 493.1 on 304 degrees of freedom
#> Multiple R-Squared: 0.4399,  Adjusted R-squared: 0.322 
#> F-statistic:  3.73 on 64 and 304 DF,  p-value: 7.251e-15 
#> 
#> 
#> Estimation results for equation series4: 
#> ======================================== 
#> series4 = series1.l1 + series2.l1 + series3.l1 + series4.l1 + series1.l2 + series2.l2 + series3.l2 + series4.l2 + series1.l3 + series2.l3 + series3.l3 + series4.l3 + series1.l4 + series2.l4 + series3.l4 + series4.l4 + series1.l5 + series2.l5 + series3.l5 + series4.l5 + series1.l6 + series2.l6 + series3.l6 + series4.l6 + series1.l7 + series2.l7 + series3.l7 + series4.l7 + series1.l8 + series2.l8 + series3.l8 + series4.l8 + series1.l9 + series2.l9 + series3.l9 + series4.l9 + series1.l10 + series2.l10 + series3.l10 + series4.l10 + series1.l11 + series2.l11 + series3.l11 + series4.l11 + series1.l12 + series2.l12 + series3.l12 + series4.l12 + series1.l13 + series2.l13 + series3.l13 + series4.l13 + series1.l14 + series2.l14 + series3.l14 + series4.l14 + series1.l15 + series2.l15 + series3.l15 + series4.l15 + series1.l16 + series2.l16 + series3.l16 + series4.l16 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> series1.l1   0.021166   0.017680   1.197 0.232173    
#> series2.l1   0.078381   0.048826   1.605 0.109462    
#> series3.l1  -0.014708   0.026026  -0.565 0.572404    
#> series4.l1   0.190283   0.058884   3.231 0.001367 ** 
#> series1.l2  -0.035644   0.029329  -1.215 0.225197    
#> series2.l2   0.018530   0.048979   0.378 0.705449    
#> series3.l2  -0.017160   0.026500  -0.648 0.517754    
#> series4.l2   0.119794   0.059874   2.001 0.046305 *  
#> series1.l3   0.037931   0.029432   1.289 0.198457    
#> series2.l3   0.037392   0.049023   0.763 0.446214    
#> series3.l3  -0.036504   0.026926  -1.356 0.176189    
#> series4.l3   0.176361   0.060824   2.900 0.004010 ** 
#> series1.l4  -0.017782   0.029167  -0.610 0.542541    
#> series2.l4  -0.046079   0.050138  -0.919 0.358796    
#> series3.l4  -0.021899   0.027757  -0.789 0.430754    
#> series4.l4   0.231606   0.061899   3.742 0.000219 ***
#> series1.l5  -0.021389   0.029506  -0.725 0.469081    
#> series2.l5  -0.040386   0.050757  -0.796 0.426848    
#> series3.l5   0.035782   0.027489   1.302 0.194018    
#> series4.l5  -0.001763   0.062408  -0.028 0.977485    
#> series1.l6   0.033504   0.029149   1.149 0.251292    
#> series2.l6  -0.015972   0.050729  -0.315 0.753087    
#> series3.l6   0.024875   0.027446   0.906 0.365485    
#> series4.l6   0.034454   0.062371   0.552 0.581082    
#> series1.l7  -0.062771   0.028821  -2.178 0.030178 *  
#> series2.l7  -0.027685   0.050338  -0.550 0.582733    
#> series3.l7   0.033375   0.027730   1.204 0.229696    
#> series4.l7   0.045884   0.062230   0.737 0.461493    
#> series1.l8   0.029286   0.028901   1.013 0.311699    
#> series2.l8   0.024115   0.050673   0.476 0.634490    
#> series3.l8   0.012998   0.027922   0.466 0.641902    
#> series4.l8   0.007026   0.062243   0.113 0.910204    
#> series1.l9   0.037992   0.028917   1.314 0.189894    
#> series2.l9  -0.062099   0.050285  -1.235 0.217804    
#> series3.l9  -0.032241   0.027969  -1.153 0.249920    
#> series4.l9   0.051557   0.062319   0.827 0.408713    
#> series1.l10 -0.023825   0.028960  -0.823 0.411318    
#> series2.l10  0.002025   0.050045   0.040 0.967750    
#> series3.l10 -0.017385   0.027560  -0.631 0.528639    
#> series4.l10  0.067652   0.062458   1.083 0.279596    
#> series1.l11  0.011052   0.028810   0.384 0.701527    
#> series2.l11  0.066795   0.050646   1.319 0.188208    
#> series3.l11 -0.009197   0.027302  -0.337 0.736457    
#> series4.l11 -0.029636   0.062316  -0.476 0.634717    
#> series1.l12  0.015231   0.028817   0.529 0.597494    
#> series2.l12 -0.036144   0.050932  -0.710 0.478467    
#> series3.l12 -0.033956   0.027413  -1.239 0.216418    
#> series4.l12 -0.202854   0.062881  -3.226 0.001392 ** 
#> series1.l13 -0.026181   0.028364  -0.923 0.356717    
#> series2.l13  0.099355   0.051022   1.947 0.052418 .  
#> series3.l13  0.012406   0.027039   0.459 0.646705    
#> series4.l13 -0.087160   0.062786  -1.388 0.166086    
#> series1.l14  0.008832   0.028611   0.309 0.757757    
#> series2.l14  0.007884   0.050314   0.157 0.875585    
#> series3.l14 -0.004335   0.026287  -0.165 0.869116    
#> series4.l14 -0.063085   0.061977  -1.018 0.309539    
#> series1.l15  0.001993   0.028252   0.071 0.943806    
#> series2.l15  0.045534   0.049125   0.927 0.354712    
#> series3.l15 -0.024430   0.026097  -0.936 0.349952    
#> series4.l15  0.113780   0.062025   1.834 0.067567 .  
#> series1.l16 -0.009874   0.017585  -0.561 0.574881    
#> series2.l16  0.014568   0.047850   0.304 0.760993    
#> series3.l16 -0.005313   0.018553  -0.286 0.774804    
#> series4.l16  0.032891   0.063132   0.521 0.602751    
#> const       62.811298  73.896101   0.850 0.395997    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 166 on 304 degrees of freedom
#> Multiple R-Squared: 0.4476,  Adjusted R-squared: 0.3313 
#> F-statistic: 3.848 on 64 and 304 DF,  p-value: 1.502e-15 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>         series1 series2 series3 series4
#> series1  559587   37584  244662   27782
#> series2   37584   39139    4849    3346
#> series3  244662    4849  243183   13222
#> series4   27782    3346   13222   27541
#> 
#> Correlation matrix of residuals:
#>         series1 series2 series3 series4
#> series1  1.0000 0.25396 0.66323  0.2238
#> series2  0.2540 1.00000 0.04971  0.1019
#> series3  0.6632 0.04971 1.00000  0.1616
#> series4  0.2238 0.10191 0.16156  1.0000

forecast_1 <- predict(var_model, n.ahead = 1)

# Generate one-month ahead forecast for VAR(p) model
forecast_p <- predict(var_model_p, n.ahead = 1)

# Print forecast results
print("Forecast for  series 1 using VAR(1) model:")
#> [1] "Forecast for  series 1 using VAR(1) model:"
print(forecast_1$fcst$series1)
#>                  fcst    lower    upper       CI
#> series1.fcst 135087.3 133251.6 136922.9 1835.652

print("Forecast for series 1 using VAR(p) model:")
#> [1] "Forecast for series 1 using VAR(p) model:"
print(forecast_p$fcst$series1)
#>                  fcst    lower    upper       CI
#> series1.fcst 134801.4 133335.2 136267.6 1466.161

# Granger causality test
granger_results <- causality(var_model_p)
#> Warning in causality(var_model_p): 
#> Argument 'cause' has not been specified;
#> using first variable in 'x$y' (series1) as cause variable.
print(granger_results)
#> $Granger
#> 
#>  Granger causality H0: series1 do not Granger-cause series2 series3
#>  series4
#> 
#> data:  VAR object var_model_p
#> F-Test = 1.414, df1 = 48, df2 = 1216, p-value = 0.03445
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: series1 and series2 series3
#>  series4
#> 
#> data:  VAR object var_model_p
#> Chi-squared = 122.73, df = 3, p-value < 2.2e-16

# Create data matrix for BigVar
data_matrix <- as.matrix(data)

# Fit the sparse VAR model with LASSO regularization, lambda = 0.1, and lag-based sparsity
sparse_var_model <- BigVAR.fit(data_matrix, p = 16, lambda = 0.1, struct = "Lag")

# Print summary of the sparse VAR model
print(summary(sparse_var_model))
#>     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
#>  -0.6510  -0.0509   0.0029   4.7696   0.0609 737.7700

Created on 2024-04-10 with reprex v2.1.0