jlivsey / UB-sping24-time-series

3 stars 1 forks source link

Vasundhara Yerriboina #36

Open Vasundharayerriboina opened 4 months ago

Vasundharayerriboina commented 4 months ago

homework1

install.packages(c("fredr", "forecast", "tseries","reprex"))
#> Installing packages into 'C:/Users/vasundhara/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'fredr' successfully unpacked and MD5 sums checked
#> package 'forecast' successfully unpacked and MD5 sums checked
#> Warning: cannot remove prior installation of package 'forecast'
#> Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
#> C:\Users\vasundhara\AppData\Local\R\win-library\4.3\00LOCK\forecast\libs\x64\forecast.dll
#> to
#> C:\Users\vasundhara\AppData\Local\R\win-library\4.3\forecast\libs\x64\forecast.dll:
#> Permission denied
#> Warning: restored 'forecast'
#> package 'tseries' successfully unpacked and MD5 sums checked
#> Warning: cannot remove prior installation of package 'tseries'
#> Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
#> C:\Users\vasundhara\AppData\Local\R\win-library\4.3\00LOCK\tseries\libs\x64\tseries.dll
#> to
#> C:\Users\vasundhara\AppData\Local\R\win-library\4.3\tseries\libs\x64\tseries.dll:
#> Permission denied
#> Warning: restored 'tseries'
#> package 'reprex' successfully unpacked and MD5 sums checked
#> 
#> The downloaded binary packages are in
#>  C:\Users\vasundhara\AppData\Local\Temp\RtmpsBiApc\downloaded_packages
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(tseries)
fredr_set_key("5348a4120a46bbd80c1cd3c29fb1104e")
icnsa_data <- fredr(series_id = "ICNSA", observation_start = as.Date("2000-01-01"))
head(icnsa_data)
#> # A tibble: 6 × 5
#>   date       series_id  value realtime_start realtime_end
#>   <date>     <chr>      <dbl> <date>         <date>      
#> 1 2000-01-01 ICNSA     439912 2024-02-22     2024-02-22  
#> 2 2000-01-08 ICNSA     606897 2024-02-22     2024-02-22  
#> 3 2000-01-15 ICNSA     442494 2024-02-22     2024-02-22  
#> 4 2000-01-22 ICNSA     328841 2024-02-22     2024-02-22  
#> 5 2000-01-29 ICNSA     332740 2024-02-22     2024-02-22  
#> 6 2000-02-05 ICNSA     365245 2024-02-22     2024-02-22
unrate_data <- fredr(series_id = "UNRATE", observation_start = as.Date("2000-01-01"))
head(unrate_data)
#> # A tibble: 6 × 5
#>   date       series_id value realtime_start realtime_end
#>   <date>     <chr>     <dbl> <date>         <date>      
#> 1 2000-01-01 UNRATE      4   2024-02-22     2024-02-22  
#> 2 2000-02-01 UNRATE      4.1 2024-02-22     2024-02-22  
#> 3 2000-03-01 UNRATE      4   2024-02-22     2024-02-22  
#> 4 2000-04-01 UNRATE      3.8 2024-02-22     2024-02-22  
#> 5 2000-05-01 UNRATE      4   2024-02-22     2024-02-22  
#> 6 2000-06-01 UNRATE      4   2024-02-22     2024-02-22
adf.test(icnsa_data$value, alternative = "stationary")
#> Warning in adf.test(icnsa_data$value, alternative = "stationary"): p-value
#> smaller than printed p-value
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  icnsa_data$value
#> Dickey-Fuller = -6.1512, Lag order = 10, p-value = 0.01
#> alternative hypothesis: stationary
plot(icnsa_data$date, icnsa_data$value, type = "l", xlab = "Year", ylab = "ICNSA")

combined_data <- merge(icnsa_data, unrate_data, by = "date")
model_data <- na.omit(combined_data)
fit <- auto.arima(model_data$value.x, xreg = model_data$value.y)
summary(fit)
#> Series: model_data$value.x 
#> Regression with ARIMA(0,0,0) errors 
#> 
#> Coefficients:
#>           xreg
#>       61475.95
#> s.e.   3289.30
#> 
#> sigma^2 = 1.591e+10:  log likelihood = -552.38
#> AIC=1108.76   AICc=1109.07   BIC=1112.24
#> 
#> Training set error measures:
#>                  ME     RMSE      MAE       MPE     MAPE      MASE      ACF1
#> Training set 6707.5 124613.8 87089.61 -5.057499 23.45817 0.6473967 0.2199446
checkresiduals(fit)

#> 
#>  Ljung-Box test
#> 
#> data:  Residuals from Regression with ARIMA(0,0,0) errors
#> Q* = 8.9069, df = 8, p-value = 0.3502
#> 
#> Model df: 0.   Total lags used: 8
unrate_model <- auto.arima(model_data$value.y)
unrate_forecast <- forecast(unrate_model, h=5) 
future_unrate_values <- unrate_forecast$mean
forecast_fit <- forecast(fit, xreg = future_unrate_values)
print(forecast_fit)
#>    Point Forecast     Lo 80    Hi 80    Lo 95    Hi 95
#> 43       237628.0  75993.19 399262.8 -9571.12 484827.2
#> 44       255514.4  93879.58 417149.2  8315.27 502713.5
#> 45       269757.1 108122.31 431392.0 22558.00 516956.3
#> 46       281098.5 119463.64 442733.3 33899.34 528297.6
#> 47       290129.4 128494.62 451764.3 42930.31 537328.6
plot(forecast_fit)

Created on 2024-02-22 with reprex v2.1.0

Vasundharayerriboina commented 4 months ago

Homework 2 ublearns

library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(tidyverse)
library(splines)
library(reprex)
#API key
fred_api_key <- "a0512c86722aa5efb0f73b0c42ab03c4"
fredr_set_key(fred_api_key)
# Function to fetch data from FRED API
icnsa_data <- fredr(series_id = "ICNSA")
icnsa_data <- fetch_fred_data("ICNSA")
#> Error in fetch_fred_data("ICNSA"): could not find function "fetch_fred_data"
head(icnsa_data)
#> # A tibble: 6 × 5
#>   date       series_id  value realtime_start realtime_end
#>   <date>     <chr>      <dbl> <date>         <date>      
#> 1 1967-01-07 ICNSA     346000 2024-02-28     2024-02-28  
#> 2 1967-01-14 ICNSA     334000 2024-02-28     2024-02-28  
#> 3 1967-01-21 ICNSA     277000 2024-02-28     2024-02-28  
#> 4 1967-01-28 ICNSA     252000 2024-02-28     2024-02-28  
#> 5 1967-02-04 ICNSA     274000 2024-02-28     2024-02-28  
#> 6 1967-02-11 ICNSA     276000 2024-02-28     2024-02-28
# Plotting ICNSA data to visually identify Covid pandemic times
plot(icnsa_data$date, icnsa_data$value, type = "l", xlab = "Date", ylab = "ICNSA", main = "Initial Claims (ICNSA)")

#start and end dates for Covid period
start_date <- as.Date("2020-03-01")
end_date <- as.Date("2021-07-31")
# Creating cubic spline model to impute values for the Covid period
covid_period <- icnsa_data %>%
  filter(date >= start_date & date <= end_date)
# Fit cubic( spline)
lambda <- 0.2
spline_fit <- smooth.spline(x = as.numeric(covid_period$date), y = covid_period$value, spar = lambda)
# Prediction of values for Covid period
imputed_values <- predict(spline_fit, x = as.numeric(covid_period$date))
# Plotting original data and the imputed values
plot(icnsa_data$date, icnsa_data$value, type = "l", xlab = "Date", ylab = "ICNSA", main = "Initial Claims (ICNSA) with Imputed Covid Values")
lines(covid_period$date, imputed_values$y, col = "red")
legend("topright", legend = c("Original", "Imputed"), col = c("black", "red"), lty = 1)

# here Converting we are converting  value column to numeric
icnsa_data$value <- as.numeric(icnsa_data$value)
# Combining  original and imputed data
imputed_data <- data.frame(date = covid_period$date, value = imputed_values$y)
combined_data <- bind_rows(icnsa_data %>% filter(date < start_date),
                           imputed_data,
                           icnsa_data %>% filter(date > end_date))

# now Converting  to time series
ts_data <- ts(combined_data$value, start = min(combined_data$date), frequency = 52)

# Fitting  multiplicative Holt-Winters model
hw_multiplicative <- HoltWinters(ts_data, seasonal = "multiplicative")
forecast_multiplicative <- forecast(hw_multiplicative, h = 1)
forecast_multiplicative
#>           Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
#> -1032.673       201438.8 74114.41 328763.2 6712.939 396164.7
# Fitting additive Holt-Winters model
hw_additive <- HoltWinters(ts_data, seasonal = "additive")
forecast_additive <- forecast(hw_additive, h = 1)
forecast_additive
#>           Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
#> -1032.673       161954.6 36845.02 287064.2 -29383.99 353293.2
# Point forecasts for both the models
point_forecast_multiplicative <- forecast_multiplicative$mean[[1]]
point_forecast_additive <- forecast_additive$mean[[1]]

cat("Point forecast using multiplicative Holt-Winters model:", point_forecast_multiplicative, "\n")
#> Point forecast using multiplicative Holt-Winters model: 201438.8
cat("Point forecast using additive Holt-Winters model:", point_forecast_additive, "\n")
#> Point forecast using additive Holt-Winters model: 161954.6

Created on 2024-02-28 with reprex v2.1.0

Vasundharayerriboina commented 4 months ago

homework 3 ublearns

library(fredr)
#> Warning: package 'fredr' was built under R version 4.3.3
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(tseries)
#> Warning: package 'tseries' was built under R version 4.3.3
library(reprex)
#> Warning: package 'reprex' was built under R version 4.3.3
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.3.3
library(splines)
library(zoo)
#> Warning: package 'zoo' was built under R version 4.3.3
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
library(seasonal)
#> Warning: package 'seasonal' was built under R version 4.3.3
#> 
#> Attaching package: 'seasonal'
#> The following object is masked from 'package:tibble':
#> 
#>     view
library(astsa)
#> 
#> Attaching package: 'astsa'
#> The following objects are masked from 'package:seasonal':
#> 
#>     trend, unemp
#> The following object is masked from 'package:forecast':
#> 
#>     gas
library(patchwork)
#> Warning: package 'patchwork' was built under R version 4.3.3

fredr_set_key("f6bd6b66ebde9023812ffffae048d90d")

# Empirical analysis to decide on start and end dates for the Covid pandemic

icnsa_data <- fredr(series_id = "ICNSA", observation_start = as.Date("2000-01-01"))
plot(icnsa_data$date, icnsa_data$value, type = "l", xlab = "Date", ylab = "ICNSA", main = "Initial Claims (ICNSA)")


start_date <- as.Date("2020-03-01")
end_date <- as.Date("2021-07-31")

#Impute values for the Covid period using state-space missing value methodology
covid_period <- icnsa_data %>%
  filter(date >= start_date & date <= end_date)
covid_period
#> # A tibble: 74 × 5
#>    date       series_id   value realtime_start realtime_end
#>    <date>     <chr>       <dbl> <date>         <date>      
#>  1 2020-03-07 ICNSA      199914 2024-03-07     2024-03-07  
#>  2 2020-03-14 ICNSA      251875 2024-03-07     2024-03-07  
#>  3 2020-03-21 ICNSA     2914268 2024-03-07     2024-03-07  
#>  4 2020-03-28 ICNSA     5981838 2024-03-07     2024-03-07  
#>  5 2020-04-04 ICNSA     6161268 2024-03-07     2024-03-07  
#>  6 2020-04-11 ICNSA     4898119 2024-03-07     2024-03-07  
#>  7 2020-04-18 ICNSA     4221704 2024-03-07     2024-03-07  
#>  8 2020-04-25 ICNSA     3466665 2024-03-07     2024-03-07  
#>  9 2020-05-02 ICNSA     2790860 2024-03-07     2024-03-07  
#> 10 2020-05-09 ICNSA     2325889 2024-03-07     2024-03-07  
#> # ℹ 64 more rows
#state-space model for missing values
state_space_fit <- StructTS(covid_period$value, type = "level")
state_space_fit
#> 
#> Call:
#> StructTS(x = covid_period$value, type = "level")
#> 
#> Variances:
#>     level    epsilon  
#> 2.798e+11  0.000e+00
#Impute missing values
imputed_values <- fitted(state_space_fit)
imputed_values
#> Time Series:
#> Start = 1 
#> End = 74 
#> Frequency = 1 
#>         level
#>  [1,]  199914
#>  [2,]  251875
#>  [3,] 2914268
#>  [4,] 5981838
#>  [5,] 6161268
#>  [6,] 4898119
#>  [7,] 4221704
#>  [8,] 3466665
#>  [9,] 2790860
#> [10,] 2325889
#> [11,] 2162822
#> [12,] 1902006
#> [13,] 1610909
#> [14,] 1555824
#> [15,] 1456353
#> [16,] 1446176
#> [17,] 1425969
#> [18,] 1390315
#> [19,] 1511632
#> [20,] 1372247
#> [21,] 1200962
#> [22,]  982808
#> [23,]  830266
#> [24,]  878555
#> [25,]  813744
#> [26,]  826436
#> [27,]  857006
#> [28,]  784765
#> [29,]  814441
#> [30,]  731391
#> [31,]  722387
#> [32,]  820590
#> [33,]  757745
#> [34,]  731660
#> [35,]  736618
#> [36,]  719716
#> [37,]  708717
#> [38,]  827447
#> [39,]  710901
#> [40,]  946091
#> [41,]  928178
#> [42,]  854113
#> [43,]  823512
#> [44,]  898597
#> [45,] 1082696
#> [46,]  930184
#> [47,]  833704
#> [48,]  843702
#> [49,]  860209
#> [50,]  827835
#> [51,]  707268
#> [52,]  749251
#> [53,]  714661
#> [54,]  750287
#> [55,]  630621
#> [56,]  701073
#> [57,]  668346
#> [58,]  620420
#> [59,]  583397
#> [60,]  611236
#> [61,]  510161
#> [62,]  488815
#> [63,]  451302
#> [64,]  416310
#> [65,]  430171
#> [66,]  364577
#> [67,]  407148
#> [68,]  396935
#> [69,]  362899
#> [70,]  382622
#> [71,]  388662
#> [72,]  411244
#> [73,]  342003
#> [74,]  325265
# Combine original and imputed data
combined_data <- bind_rows(
  icnsa_data %>% filter(date < start_date),
  data.frame(date = covid_period$date, value = imputed_values),
  icnsa_data %>% filter(date > end_date)
)
combined_data
#> # A tibble: 1,261 × 6
#>    date       series_id  value realtime_start realtime_end level
#>    <date>     <chr>      <dbl> <date>         <date>       <dbl>
#>  1 2000-01-01 ICNSA     439912 2024-03-07     2024-03-07      NA
#>  2 2000-01-08 ICNSA     606897 2024-03-07     2024-03-07      NA
#>  3 2000-01-15 ICNSA     442494 2024-03-07     2024-03-07      NA
#>  4 2000-01-22 ICNSA     328841 2024-03-07     2024-03-07      NA
#>  5 2000-01-29 ICNSA     332740 2024-03-07     2024-03-07      NA
#>  6 2000-02-05 ICNSA     365245 2024-03-07     2024-03-07      NA
#>  7 2000-02-12 ICNSA     311897 2024-03-07     2024-03-07      NA
#>  8 2000-02-19 ICNSA     281256 2024-03-07     2024-03-07      NA
#>  9 2000-02-26 ICNSA     258962 2024-03-07     2024-03-07      NA
#> 10 2000-03-04 ICNSA     283024 2024-03-07     2024-03-07      NA
#> # ℹ 1,251 more rows
# Comparing state-space imputation with spline imputation from HW 2
lambda <- 0.2
spline_fit <- smooth.spline(x = as.numeric(covid_period$date), y = covid_period$value, spar = lambda)
spline_fit
#> Call:
#> smooth.spline(x = as.numeric(covid_period$date), y = covid_period$value, 
#>     spar = lambda)
#> 
#> Smoothing Parameter  spar= 0.2  lambda= 1.504257e-07
#> Equivalent Degrees of Freedom (Df): 44.88048
#> Penalized Criterion (RSS): 443692781391
#> GCV: 38720919943
spline_imputed_values <- predict(spline_fit, x = as.numeric(covid_period$date))
spline_imputed_values
#> $x
#>  [1] 18328 18335 18342 18349 18356 18363 18370 18377 18384 18391 18398 18405
#> [13] 18412 18419 18426 18433 18440 18447 18454 18461 18468 18475 18482 18489
#> [25] 18496 18503 18510 18517 18524 18531 18538 18545 18552 18559 18566 18573
#> [37] 18580 18587 18594 18601 18608 18615 18622 18629 18636 18643 18650 18657
#> [49] 18664 18671 18678 18685 18692 18699 18706 18713 18720 18727 18734 18741
#> [61] 18748 18755 18762 18769 18776 18783 18790 18797 18804 18811 18818 18825
#> [73] 18832 18839
#> 
#> $y
#>  [1]   32334.2  532503.1 3099641.5 5506381.8 6228229.9 5049844.2 4179557.2
#>  [8] 3455580.8 2798013.2 2353443.2 2139244.3 1883370.9 1645361.9 1534849.2
#> [15] 1468563.8 1436873.0 1421230.3 1421550.9 1473746.3 1388424.9 1190026.7
#> [22]  982671.3  865854.5  832574.0  836568.0  836703.1  826272.0  814466.2
#> [29]  793171.6  732899.5  750133.5  787863.3  767870.7  740024.4  724655.3
#> [36]  722631.2  733131.6  757214.0  809701.5  883465.3  930896.1  857345.2
#> [43]  834294.4  918917.3 1036738.9  942649.5  852205.4  837041.2  860939.1
#> [50]  807059.3  736322.0  730124.6  739718.8  706242.0  673751.8  681549.9
#> [57]  666541.8  623528.0  594354.7  589966.8  527495.5  480413.8  452572.6
#> [64]  425114.1  407311.6  393815.2  390621.5  394001.3  372664.4  376994.1
#> [71]  396038.9  395559.6  355594.5  321276.9
plot(icnsa_data$date, icnsa_data$value, type = "l", xlab = "Date", ylab = "ICNSA", main = "Initial Claims (ICNSA) with Imputed Covid Values")
lines(covid_period$date, imputed_values, col = "blue")
lines(covid_period$date, spline_imputed_values$y, col = "red")
legend("topright", legend = c("Original", "State-Space Imputed", "Spline Imputed"), col = c("black", "blue", "red"), lty = 1)


# structural time series model with trend, seasonal, and error using state-space methodology

ts_data <- ts(combined_data$value, start = min(combined_data$date), frequency = 52)
ts_data 
#> Time Series:
#> Start = c(10957, 1) 
#> End = c(10981, 13) 
#> Frequency = 52 
#>    [1] 439912 606897 442494 328841 332740 365245 311897 281256 258962 283024
#>   [11] 255109 242139 239835 229520 274130 237218 240266 249458 259546 231706
#>   [21] 234599 239836 242991 267752 265617 273344 280979 363793 377982 296255
#>   [31] 253466 266151 261358 251844 239030 242375 229954 245991 222219 227249
#>   [41] 292784 255082 263445 269489 342414 294727 374160 321859 447262 390088
#>   [51] 402476 481720 568973 558768 599562 398188 447386 424696 396151 345841
#>   [61] 357591 379286 377210 351497 334747 328576 397282 346981 369745 353831
#>   [71] 336319 331765 338374 346231 335765 397015 354526 351770 375885 526826
#>   [81] 524139 406038 332957 341660 333042 317046 307850 319016 309567 317245
#>   [91] 353611 400400 441754 426881 429542 436901 443971 456366 420259 438893
#>  [101] 605916 491836 440906 529570 647045 637343 799246 558297 431690 445552
#>  [111] 438611 376573 367504 385272 386992 352045 366372 386296 432384 428834
#>  [121] 385151 367350 362681 358286 348887 346439 309183 378613 356096 358959
#>  [131] 358658 456716 506718 394586 338441 326356 332673 313869 314852 310864
#>  [141] 318361 337577 317264 319063 365613 385689 349927 375591 397346 427078
#>  [151] 372829 436549 385788 547430 486258 483449 620929 620004 724111 542563
#>  [161] 434888 449286 439520 398291 387536 429782 414568 389909 361492 371692
#>  [171] 394160 434911 399180 401342 377383 364287 362276 359500 351890 421190
#>  [181] 383371 376560 394214 483401 552621 429381 348382 333770 348207 312087
#>  [191] 313058 319362 322501 328414 301217 304968 337880 368876 328572 352117
#>  [201] 345573 397387 347719 397990 357811 486202 412627 424192 516493 552815
#>  [211] 677897 490763 382262 406298 433234 341634 328171 342140 339007 312067
#>  [221] 304462 296776 304249 350739 334965 313686 283236 292754 297061 293974
#>  [231] 304067 308229 313930 322481 318746 349920 444531 394372 313225 282128
#>  [241] 291611 262936 274433 276308 274930 250568 275846 282729 279591 338711
#>  [251] 279846 317573 305546 351404 311901 355954 320690 473570 370604 374749
#>  [261] 446699 540927 693776 467862 360583 364704 347391 309290 303814 290776
#>  [271] 332067 307061 290719 291378 294994 339709 285657 299891 290824 297347
#>  [281] 275524 276761 304306 289914 315938 289831 286681 327268 427323 374665
#>  [291] 295026 261906 269746 257151 252016 251642 271613 322387 346204 292435
#>  [301] 313847 380093 303158 304733 294376 340491 283564 368859 290730 444600
#>  [311] 391961 359108 433397 475889 555114 439873 317926 318805 321527 310078
#>  [321] 269571 272478 301867 294764 269237 265370 253985 314696 268472 291349
#>  [331] 279715 317239 288972 277168 292714 260263 285892 277441 287503 304638
#>  [341] 418363 377115 288875 259974 275430 256259 252357 251275 259539 240231
#>  [351] 267036 261396 249288 307646 271863 291372 301079 326711 286151 367690
#>  [361] 323509 448898 384123 361672 425357 499979 506059 506709 367583 359959
#>  [371] 339018 363018 305945 299000 320194 298927 277187 273432 268218 328266
#>  [381] 317917 303984 267672 274801 258516 270446 273397 263527 302368 290951
#>  [391] 292583 300348 417554 383839 298366 257426 270563 266420 257573 266179
#>  [401] 257454 245526 261971 247643 255431 298317 306519 307675 303357 325831
#>  [411] 351760 323124 324047 462902 423130 393042 456280 507908 522700 547943
#>  [421] 415397 369498 380234 377595 325886 330013 345287 341364 335909 316208
#>  [431] 342189 357209 370960 328334 337854 335533 325479 319817 326627 300989
#>  [441] 373033 349254 358158 368544 401672 476071 403607 374182 381887 372807
#>  [451] 342164 344255 360485 336131 381720 397610 392121 426786 454100 416114
#>  [461] 449429 466373 539812 513047 609128 537230 760481 629867 719691 717000
#>  [471] 731958 956791 763987 620143 682176 710152 619951 605668 645827 652635
#>  [481] 601192 590067 599299 623279 610522 596564 583457 536648 570412 540925
#>  [491] 538311 500380 581092 562449 572425 563387 585963 677038 590730 516351
#>  [501] 470988 486586 461780 460998 460525 470079 414557 441311 449620 456233
#>  [511] 513852 464985 499374 487714 537230 479350 547022 462090 673097 561655
#>  [521] 571378 561852 651215 825891 659173 507651 538617 512463 482078 458160
#>  [531] 474662 462679 439061 413067 412710 421130 514136 436814 429196 399350
#>  [541] 414327 414572 410778 418873 398864 448305 427080 444712 470366 515991
#>  [551] 502065 413679 402140 425471 405484 384955 383135 381863 341791 382341
#>  [561] 372551 373681 462667 394016 408489 421097 452657 409548 464817 412922
#>  [571] 585711 491776 495548 525710 578904 773499 549688 485950 464775 440706
#>  [581] 424400 380985 353797 407299 371721 354457 357457 353817 448029 381834
#>  [591] 387867 415974 397737 361573 376632 381497 366816 400608 394286 406633
#>  [601] 425640 473963 470086 369207 341103 354408 346014 344870 336761 348582
#>  [611] 328868 353820 328073 332394 405906 357562 377156 369647 402532 363016
#>  [621] 440157 372640 528793 435863 421103 497689 540057 646219 525422 416880
#>  [631] 422287 401365 365014 346659 334242 368433 340102 319498 323373 315800
#>  [641] 390064 370482 370632 333476 341080 325094 330427 346260 324385 376610
#>  [651] 364548 370521 369826 442192 455260 340780 312931 320219 317680 311857
#>  [661] 312542 309537 299729 330454 303685 301046 329925 362730 345227 339924
#>  [671] 361823 478551 403636 358865 500163 429191 401431 457584 490126 557424
#>  [681] 558047 437360 369567 388708 361759 351087 310512 335794 317661 301471
#>  [691] 316133 317494 356935 359415 326264 301622 301602 320253 303357 319508
#>  [701] 294608 332964 336970 336901 335424 383811 410974 340457 281692 288861
#>  [711] 282756 281164 279803 269359 229648 272946 255087 252196 335937 360957
#>  [721] 312037 325326 331867 364167 327053 369197 321896 463413 414613 418272
#>  [731] 452664 488537 534966 416116 357806 357742 360338 322761 312665 317832
#>  [741] 302311 285970 274072 294862 299162 318793 299182 318127 288748 270738
#>  [751] 287398 275412 264133 313371 301195 305029 305791 322753 370559 287049
#>  [761] 257625 247877 269468 249463 249006 249780 234755 242318 239780 227571
#>  [771] 257545 273756 256166 271331 266921 309338 286115 357202 294389 389284
#>  [781] 327827 340827 389757 439342 529685 383538 281885 306643 324158 277904
#>  [791] 280639 315566 277925 260242 248032 239748 253533 308173 279797 250780
#>  [801] 236421 242882 243612 253454 230676 275619 258764 263199 274646 303585
#>  [811] 344471 262949 230314 224104 239326 229251 226649 230079 232507 198903
#>  [821] 219342 215116 227176 256522 232860 245365 258440 291098 264816 305424
#>  [831] 262628 384491 313276 319641 346542 405368 502904 378747 295936 311940
#>  [841] 290796 258380 248870 265802 247628 236888 230882 235716 245035 270419
#>  [851] 242400 245040 243392 261899 244869 240798 246740 232300 266277 247968
#>  [861] 263662 267437 298673 268526 231925 219202 231542 219570 217011 215688
#>  [871] 217715 193291 205649 198455 200456 238581 233633 237314 245751 258608
#>  [881] 223770 287794 249774 351580 305268 315068 343213 350561 414742 352799
#>  [891] 284030 280983 259713 245886 239322 212829 243959 222227 224693 228269
#>  [901] 208347 239823 225864 241611 210955 215040 206905 210544 232138 212696
#>  [911] 234652 228883 239635 252886 284329 257763 220455 198776 211924 198280
#>  [921] 195130 196227 250627 211923 212313 212987 204180 229241 205592 216004
#>  [931] 215977 242111 236654 275004 224851 326052 282055 287479 325180 351500
#>  [941] 403930 354708 260432 268197 243422 233252 212609 196294 225893 205185
#>  [951] 198649 195433 201057 231759 226090 200139 186451 190262 195214 207043
#>  [961] 202846 191523 217289 206023 222766 231539 264869 232238 201288 179880
#>  [971] 185174 180038 173331 175745 173607 162640 173624 172930 171816 193936
#>  [981] 190501 198733 198530 214814 235981 226576 218658 317936 261525 255195
#>  [991] 291581 327388 350681 343678 269369 250580 254263 242762 210679 203049
#> [1001] 220540 209302 194335 190023 183775 196071 196364 211762 204755 204033
#> [1011] 188264 191931 198194 189577 220186 205921 225819 224565 231995 243621
#> [1021] 196382 178897 179879 186914 171386 176867 179516 160342 173134 175394
#> [1031] 172968 188106 201677 186748 198733 205625 238996 227892 252428 216827
#> [1041] 317866 270547 287243 312524 335294 337883 281646 228443 224561 219459
#> [1051] 209218 198845 216625     NA     NA     NA     NA     NA     NA     NA
#> [1061]     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> [1071]     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> [1081]     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> [1091]     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> [1101]     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> [1111]     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
#> [1121]     NA     NA     NA     NA     NA     NA     NA 322782 309229 298533
#> [1131] 291942 285683 265902 306581 301143 262105 280538 257197 247270 241656
#> [1141] 256937 240332 253211 216420 283651 265349 254823 257349 314796 415162
#> [1151] 337075 267637 258288 231452 240502 216574 196537 220486 204494 182581
#> [1161] 197128 194671 223751 197219 202952 196422 185815 199631 187695 183446
#> [1171] 186568 205890 206272 207061 218886 243485 237092 200929 196116 196308
#> [1181] 187391 179220 173867 172835 152144 168701 153821 167378 198340 178773
#> [1191] 184420 185973 206079 200237 248916 199323 287976 250038 248444 269877
#> [1201] 278487 339883 288330 225228 225026 234007 225332 211007 202156 238840
#> [1211] 218084 213003 224193 207120 235237 229319 225137 220115 204962 200738
#> [1221] 202645 208772 220449 251384 250891 229718 251686 258252 258169 213497
#> [1231] 205550 227917 213803 199437 193441 191353 175594 176586 175650 174701
#> [1241] 199743 182394 193999 199306 214161 217438 240979 199750 294615 249090
#> [1251] 241040 274840 269409 318906 291330 249947 263919 234729 223985 199337
#> [1261] 193988
structural_fit <- StructTS(ts_data, type = "BSM")
structural_fit 
#> 
#> Call:
#> StructTS(x = ts_data, type = "BSM")
#> 
#> Variances:
#>     level      slope       seas    epsilon  
#> 271956514      70653  295295347          0
# covariate series and adding it to the model
gdp_data <- fredr(series_id = "GDP", observation_start = as.Date("2000-01-01"), frequency = "q")
gdp_data
#> # A tibble: 96 × 5
#>    date       series_id  value realtime_start realtime_end
#>    <date>     <chr>      <dbl> <date>         <date>      
#>  1 2000-01-01 GDP       10002. 2024-03-07     2024-03-07  
#>  2 2000-04-01 GDP       10248. 2024-03-07     2024-03-07  
#>  3 2000-07-01 GDP       10318. 2024-03-07     2024-03-07  
#>  4 2000-10-01 GDP       10436. 2024-03-07     2024-03-07  
#>  5 2001-01-01 GDP       10470. 2024-03-07     2024-03-07  
#>  6 2001-04-01 GDP       10599  2024-03-07     2024-03-07  
#>  7 2001-07-01 GDP       10598. 2024-03-07     2024-03-07  
#>  8 2001-10-01 GDP       10660. 2024-03-07     2024-03-07  
#>  9 2002-01-01 GDP       10784. 2024-03-07     2024-03-07  
#> 10 2002-04-01 GDP       10887. 2024-03-07     2024-03-07  
#> # ℹ 86 more rows
gdp_data <- gdp_data %>%
  mutate(date = as.Date(paste0(date, "-01"))) %>%
  select(date, value)
gdp_data
#> # A tibble: 96 × 2
#>    date        value
#>    <date>      <dbl>
#>  1 2000-01-01 10002.
#>  2 2000-04-01 10248.
#>  3 2000-07-01 10318.
#>  4 2000-10-01 10436.
#>  5 2001-01-01 10470.
#>  6 2001-04-01 10599 
#>  7 2001-07-01 10598.
#>  8 2001-10-01 10660.
#>  9 2002-01-01 10784.
#> 10 2002-04-01 10887.
#> # ℹ 86 more rows
combined_data_gdp <- left_join(combined_data, gdp_data, by = "date")
combined_data_gdp
#> # A tibble: 1,261 × 7
#>    date       series_id value.x realtime_start realtime_end level value.y
#>    <date>     <chr>       <dbl> <date>         <date>       <dbl>   <dbl>
#>  1 2000-01-01 ICNSA      439912 2024-03-07     2024-03-07      NA  10002.
#>  2 2000-01-08 ICNSA      606897 2024-03-07     2024-03-07      NA     NA 
#>  3 2000-01-15 ICNSA      442494 2024-03-07     2024-03-07      NA     NA 
#>  4 2000-01-22 ICNSA      328841 2024-03-07     2024-03-07      NA     NA 
#>  5 2000-01-29 ICNSA      332740 2024-03-07     2024-03-07      NA     NA 
#>  6 2000-02-05 ICNSA      365245 2024-03-07     2024-03-07      NA     NA 
#>  7 2000-02-12 ICNSA      311897 2024-03-07     2024-03-07      NA     NA 
#>  8 2000-02-19 ICNSA      281256 2024-03-07     2024-03-07      NA     NA 
#>  9 2000-02-26 ICNSA      258962 2024-03-07     2024-03-07      NA     NA 
#> 10 2000-03-04 ICNSA      283024 2024-03-07     2024-03-07      NA     NA 
#> # ℹ 1,251 more rows
combined_data_gdp <- combined_data_gdp %>%
  arrange(date) %>%
  mutate(value.y = na.approx(value.y, rule = 2))
combined_data_gdp
#> # A tibble: 1,261 × 7
#>    date       series_id value.x realtime_start realtime_end level value.y
#>    <date>     <chr>       <dbl> <date>         <date>       <dbl>   <dbl>
#>  1 2000-01-01 ICNSA      439912 2024-03-07     2024-03-07      NA  10002.
#>  2 2000-01-08 ICNSA      606897 2024-03-07     2024-03-07      NA  10021.
#>  3 2000-01-15 ICNSA      442494 2024-03-07     2024-03-07      NA  10040.
#>  4 2000-01-22 ICNSA      328841 2024-03-07     2024-03-07      NA  10059.
#>  5 2000-01-29 ICNSA      332740 2024-03-07     2024-03-07      NA  10078.
#>  6 2000-02-05 ICNSA      365245 2024-03-07     2024-03-07      NA  10097.
#>  7 2000-02-12 ICNSA      311897 2024-03-07     2024-03-07      NA  10116.
#>  8 2000-02-19 ICNSA      281256 2024-03-07     2024-03-07      NA  10134.
#>  9 2000-02-26 ICNSA      258962 2024-03-07     2024-03-07      NA  10153.
#> 10 2000-03-04 ICNSA      283024 2024-03-07     2024-03-07      NA  10172.
#> # ℹ 1,251 more rows
# structural time series model with covariate
arima_fit_covariate <- auto.arima(ts_data, xreg = combined_data_gdp$value.y)
arima_fit_covariate
#> Series: ts_data 
#> Regression with ARIMA(2,1,2)(0,1,1)[52] errors 
#> 
#> Coefficients:
#>          ar1      ar2      ma1     ma2     sma1      xreg
#>       0.9210  -0.0031  -1.6310  0.6733  -0.2293  -27.5410
#> s.e.  0.0533   0.0436   0.0438  0.0374   0.0300   57.1998
#> 
#> sigma^2 = 813660670:  log likelihood = -13259.07
#> AIC=26532.15   AICc=26532.24   BIC=26567.83
#point forecast
point_forecast <- forecast(arima_fit_covariate, xreg = tail(combined_data_gdp$value.y, 1), h = 1)$mean
cat("\nPoint forecast for the next week:", point_forecast)
#> 
#> Point forecast for the next week: 233445.6

Created on 2024-03-07 with reprex v2.1.0

Vasundharayerriboina commented 2 months ago
library(reprex)
#> Warning: package 'reprex' was built under R version 4.3.3
library(vars)
#> Warning: package 'vars' was built under R version 4.3.3
#> Loading required package: MASS
#> Loading required package: strucchange
#> Warning: package 'strucchange' was built under R version 4.3.3
#> Loading required package: zoo
#> Warning: package 'zoo' was built under R version 4.3.3
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
#> Loading required package: sandwich
#> Warning: package 'sandwich' was built under R version 4.3.3
#> Loading required package: urca
#> Warning: package 'urca' was built under R version 4.3.3
#> Loading required package: lmtest
#> Warning: package 'lmtest' was built under R version 4.3.3
library(zoo)
library(mice)  # For multiple imputation
#> Warning: package 'mice' was built under R version 4.3.3
#> Warning in check_dep_version(): ABI version mismatch: 
#> lme4 was built with Matrix ABI version 1
#> Current Matrix ABI version is 0
#> Please re-install lme4 from source or restore original 'Matrix' package
#> 
#> Attaching package: 'mice'
#> The following object is masked from 'package:stats':
#> 
#>     filter
#> The following objects are masked from 'package:base':
#> 
#>     cbind, rbind
library(BigVAR)
#> Warning: package 'BigVAR' was built under R version 4.3.3
#> Loading required package: lattice
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.3.3
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.3.3
#> Warning: package 'tidyr' was built under R version 4.3.3
#> Warning: package 'lubridate' was built under R version 4.3.3
library(tseries)  # For ADF test
#> Warning: package 'tseries' was built under R version 4.3.3
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo

# Paths to the CSV files
file_paths <- list(
  "C:/Users/vasundhara/Downloads/35SVS.csv",
  "C:/Users/vasundhara/Downloads/36SVS.csv",
  "C:/Users/vasundhara/Downloads/37SVS.csv",
  "C:/Users/vasundhara/Downloads/39SVS.csv"
)

# Read CSV files into a list of data frames
data_list <- lapply(file_paths, read.csv, skip = 11, header = TRUE)

# Assuming the series are in the second column (first column being the date)
combined_data <- do.call(cbind, lapply(data_list, function(x) x[,2]))

# Description of the series
cat("Description of Series:\n")
#> Description of Series:
cat("Series1: Electrical Equipment - Represents electrical machinery and appliances manufacturing.\n")
#> Series1: Electrical Equipment - Represents electrical machinery and appliances manufacturing.
cat("Series2: Transportation Equipment - Involves the manufacturing of vehicles and other transportation equipment.\n")
#> Series2: Transportation Equipment - Involves the manufacturing of vehicles and other transportation equipment.
cat("Series3: Furniture - Pertains to the production of household and office furniture.\n")
#> Series3: Furniture - Pertains to the production of household and office furniture.
cat("Series4: Miscellaneous - Covers a variety of other manufactured products.\n\n")
#> Series4: Miscellaneous - Covers a variety of other manufactured products.

# Ensure no NA values - impute or remove
combined_data <- na.approx(combined_data)

# Provide valid column names
colnames(combined_data) <- paste("Series", 1:4, sep="")

# Convert to time series
start_year <- 2000  # Adjust this as per the actual data
combined_data_ts <- ts(combined_data, start=c(start_year,1), frequency=12)

# Convert combined data to a tibble for easier plotting
combined_data_tibble <- as_tibble(combined_data)
combined_data_tibble$Time <- rownames(combined_data_tibble)
combined_data_long <- pivot_longer(combined_data_tibble, -Time, names_to = "Series", values_to = "Value")

# Plotting with ggplot2
ggplot(combined_data_long, aes(x = as.numeric(Time), y = Value, color = Series)) +
  geom_line() +
  theme_minimal() +
  labs(title = " Time Series Plots", x = "Time", y = "Value") +
  facet_wrap(~Series, scales = "free_y")


# Empirical Analysis
par(mfrow=c(2,2)) # Set up the plotting area for multiple plots
for (i in 1:ncol(combined_data_ts)) {
  # Histogram to show the distribution
  hist(combined_data_ts[, i], main=paste("Histogram for", colnames(combined_data_ts)[i]), xlab="Value", breaks=30)

  # Time series plot
  plot.ts(combined_data_ts[, i], main=paste("Time Series for", colnames(combined_data_ts)[i]))

  # ACF and PACF plots
  acf(combined_data_ts[, i], main=paste("ACF for", colnames(combined_data_ts)[i]))
  pacf(combined_data_ts[, i], main=paste("PACF for", colnames(combined_data_ts)[i]))
}

cat("\nEmpirical Analysis:\n")
#> 
#> Empirical Analysis:
print(summary(combined_data_ts))
#>     Series1         Series2         Series3        Series4     
#>  Min.   : 6610   Min.   :35046   Min.   :4020   Min.   : 6373  
#>  1st Qu.: 8832   1st Qu.:50400   1st Qu.:5384   1st Qu.: 9388  
#>  Median : 9993   Median :57179   Median :6116   Median :12020  
#>  Mean   : 9945   Mean   :60878   Mean   :5946   Mean   :11146  
#>  3rd Qu.:10652   3rd Qu.:75206   3rd Qu.:6433   3rd Qu.:12726  
#>  Max.   :14547   Max.   :92394   Max.   :7383   Max.   :14604

# ADF Test for stationarity example
for (i in 1:ncol(combined_data)) {
  adf_test_results <- adf.test(combined_data[,i], alternative = "stationary")
  cat(sprintf("ADF Test p-value for Series %d: %f\n", i, adf_test_results$p.value))
}
#> ADF Test p-value for Series 1: 0.554079
#> ADF Test p-value for Series 2: 0.369920
#> ADF Test p-value for Series 3: 0.388724
#> ADF Test p-value for Series 4: 0.418063

# Fit a VAR(1) model
var1_fit <- VAR(combined_data_ts, p=1, type="const")
cat("Summary of the VAR(1) model:\n")
#> Summary of the VAR(1) model:
print(summary(var1_fit))
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: Series1, Series2, Series3, Series4 
#> Deterministic variables: const 
#> Sample size: 381 
#> Log Likelihood: -10934.573 
#> Roots of the characteristic polynomial:
#> 0.993 0.9837 0.9633 0.9488
#> Call:
#> VAR(y = combined_data_ts, p = 1, type = "const")
#> 
#> 
#> Estimation results for equation Series1: 
#> ======================================== 
#> Series1 = Series1.l1 + Series2.l1 + Series3.l1 + Series4.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> Series1.l1   0.976963   0.013998  69.791   <2e-16 ***
#> Series2.l1   0.001770   0.001530   1.157    0.248    
#> Series3.l1  -0.003094   0.019933  -0.155    0.877    
#> Series4.l1   0.004076   0.009980   0.408    0.683    
#> const      113.710961  96.053123   1.184    0.237    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 213.3 on 376 degrees of freedom
#> Multiple R-Squared: 0.9798,  Adjusted R-squared: 0.9796 
#> F-statistic:  4562 on 4 and 376 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation Series2: 
#> ======================================== 
#> Series2 = Series1.l1 + Series2.l1 + Series3.l1 + Series4.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> Series1.l1    0.12646    0.18822   0.672   0.5021    
#> Series2.l1    0.94094    0.02057  45.733   <2e-16 ***
#> Series3.l1   -0.26429    0.26802  -0.986   0.3247    
#> Series4.l1    0.32307    0.13419   2.408   0.0165 *  
#> const       446.55456 1291.53781   0.346   0.7297    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 2868 on 376 degrees of freedom
#> Multiple R-Squared: 0.9632,  Adjusted R-squared: 0.9628 
#> F-statistic:  2460 on 4 and 376 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation Series3: 
#> ======================================== 
#> Series3 = Series1.l1 + Series2.l1 + Series3.l1 + Series4.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> Series1.l1 -3.631e-03  7.852e-03  -0.462   0.6441    
#> Series2.l1  7.980e-04  8.583e-04   0.930   0.3531    
#> Series3.l1  9.788e-01  1.118e-02  87.544   <2e-16 ***
#> Series4.l1 -1.055e-03  5.598e-03  -0.189   0.8506    
#> const       1.325e+02  5.388e+01   2.459   0.0144 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 119.6 on 376 degrees of freedom
#> Multiple R-Squared: 0.9759,  Adjusted R-squared: 0.9757 
#> F-statistic:  3814 on 4 and 376 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation Series4: 
#> ======================================== 
#> Series4 = Series1.l1 + Series2.l1 + Series3.l1 + Series4.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> Series1.l1  0.0032799  0.0117629   0.279    0.781    
#> Series2.l1 -0.0004682  0.0012858  -0.364    0.716    
#> Series3.l1  0.0095819  0.0167496   0.572    0.568    
#> Series4.l1  0.9920868  0.0083859 118.304   <2e-16 ***
#> const      48.3855618 80.7138218   0.599    0.549    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 179.2 on 376 degrees of freedom
#> Multiple R-Squared: 0.9933,  Adjusted R-squared: 0.9933 
#> F-statistic: 1.402e+04 on 4 and 376 DF,  p-value: < 2.2e-16 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>         Series1 Series2 Series3 Series4
#> Series1   45490  245495    7969    8642
#> Series2  245495 8224431  120600  170830
#> Series3    7969  120600   14313    5270
#> Series4    8642  170830    5270   32121
#> 
#> Correlation matrix of residuals:
#>         Series1 Series2 Series3 Series4
#> Series1  1.0000  0.4014  0.3123  0.2261
#> Series2  0.4014  1.0000  0.3515  0.3324
#> Series3  0.3123  0.3515  1.0000  0.2458
#> Series4  0.2261  0.3324  0.2458  1.0000

# Multiple imputation (example)
imputed_data <- mice(combined_data, m=5, method='pmm', seed=500)
#> 
#>  iter imp variable
#>   1   1
#>   1   2
#>   1   3
#>   1   4
#>   1   5
#>   2   1
#>   2   2
#>   2   3
#>   2   4
#>   2   5
#>   3   1
#>   3   2
#>   3   3
#>   3   4
#>   3   5
#>   4   1
#>   4   2
#>   4   3
#>   4   4
#>   4   5
#>   5   1
#>   5   2
#>   5   3
#>   5   4
#>   5   5
completed_data <- complete(imputed_data, 1)  # Use first imputation for simplicity

# Convert to ts again if needed
completed_data_ts <- ts(completed_data, start=c(start_year, 1), frequency=12)

# Fit a VAR(p) model with p > 1, selecting p using AIC
var_fit <- VAR(completed_data_ts, type = "const", ic = "AIC")
cat("Summary of the VAR(p) model with selected order based on AIC:\n")
#> Summary of the VAR(p) model with selected order based on AIC:
print(summary(var_fit))
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: Series1, Series2, Series3, Series4 
#> Deterministic variables: const 
#> Sample size: 381 
#> Log Likelihood: -10934.573 
#> Roots of the characteristic polynomial:
#> 0.993 0.9837 0.9633 0.9488
#> Call:
#> VAR(y = completed_data_ts, type = "const", ic = "AIC")
#> 
#> 
#> Estimation results for equation Series1: 
#> ======================================== 
#> Series1 = Series1.l1 + Series2.l1 + Series3.l1 + Series4.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> Series1.l1   0.976963   0.013998  69.791   <2e-16 ***
#> Series2.l1   0.001770   0.001530   1.157    0.248    
#> Series3.l1  -0.003094   0.019933  -0.155    0.877    
#> Series4.l1   0.004076   0.009980   0.408    0.683    
#> const      113.710961  96.053123   1.184    0.237    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 213.3 on 376 degrees of freedom
#> Multiple R-Squared: 0.9798,  Adjusted R-squared: 0.9796 
#> F-statistic:  4562 on 4 and 376 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation Series2: 
#> ======================================== 
#> Series2 = Series1.l1 + Series2.l1 + Series3.l1 + Series4.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> Series1.l1    0.12646    0.18822   0.672   0.5021    
#> Series2.l1    0.94094    0.02057  45.733   <2e-16 ***
#> Series3.l1   -0.26429    0.26802  -0.986   0.3247    
#> Series4.l1    0.32307    0.13419   2.408   0.0165 *  
#> const       446.55456 1291.53781   0.346   0.7297    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 2868 on 376 degrees of freedom
#> Multiple R-Squared: 0.9632,  Adjusted R-squared: 0.9628 
#> F-statistic:  2460 on 4 and 376 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation Series3: 
#> ======================================== 
#> Series3 = Series1.l1 + Series2.l1 + Series3.l1 + Series4.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> Series1.l1 -3.631e-03  7.852e-03  -0.462   0.6441    
#> Series2.l1  7.980e-04  8.583e-04   0.930   0.3531    
#> Series3.l1  9.788e-01  1.118e-02  87.544   <2e-16 ***
#> Series4.l1 -1.055e-03  5.598e-03  -0.189   0.8506    
#> const       1.325e+02  5.388e+01   2.459   0.0144 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 119.6 on 376 degrees of freedom
#> Multiple R-Squared: 0.9759,  Adjusted R-squared: 0.9757 
#> F-statistic:  3814 on 4 and 376 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation Series4: 
#> ======================================== 
#> Series4 = Series1.l1 + Series2.l1 + Series3.l1 + Series4.l1 + const 
#> 
#>              Estimate Std. Error t value Pr(>|t|)    
#> Series1.l1  0.0032799  0.0117629   0.279    0.781    
#> Series2.l1 -0.0004682  0.0012858  -0.364    0.716    
#> Series3.l1  0.0095819  0.0167496   0.572    0.568    
#> Series4.l1  0.9920868  0.0083859 118.304   <2e-16 ***
#> const      48.3855618 80.7138218   0.599    0.549    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 179.2 on 376 degrees of freedom
#> Multiple R-Squared: 0.9933,  Adjusted R-squared: 0.9933 
#> F-statistic: 1.402e+04 on 4 and 376 DF,  p-value: < 2.2e-16 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>         Series1 Series2 Series3 Series4
#> Series1   45490  245495    7969    8642
#> Series2  245495 8224431  120600  170830
#> Series3    7969  120600   14313    5270
#> Series4    8642  170830    5270   32121
#> 
#> Correlation matrix of residuals:
#>         Series1 Series2 Series3 Series4
#> Series1  1.0000  0.4014  0.3123  0.2261
#> Series2  0.4014  1.0000  0.3515  0.3324
#> Series3  0.3123  0.3515  1.0000  0.2458
#> Series4  0.2261  0.3324  0.2458  1.0000

# Compare the two fits (AIC comparison)
a1 <- AIC(var1_fit)
ap <- AIC(var_fit)
comparison_aic <- c(VAR1 = a1, VARp = ap)

# Forecasting
forecast_var1 <- predict(var1_fit, n.ahead = 1)
forecast_varp <- predict(var_fit, n.ahead = 1)

# Granger causality analysis
causality_results_series1 <- causality(var_fit, cause = "Series1")
causality_results_series2 <- causality(var_fit, cause = "Series2")
causality_results_series3 <- causality(var_fit, cause = "Series3")
causality_results_series4 <- causality(var_fit, cause = "Series4")

# Fit a sparse VAR model using BigVAR
if (length(var_fit$selection$order) > 0 && !is.na(var_fit$selection$order)) {
  model_spec <- constructModel(combined_data_ts, p = var_fit$selection$order, struct = "Lasso", gran = "lasso")
  fit <- BigVAR(model_spec)
  cat("Summary and structure of the BigVAR (Lasso) model:\n")
  print(summary(fit))
} else {
  cat("Selected order not determined. Check VAR model fitting.\n")
}
#> Selected order not determined. Check VAR model fitting.

# Display fitted sparsity structure if available
if (is.list(fit) && !is.null(fit$results)) {
  if ("lambda.min" %in% names(fit$results)) {
    cat("Lambda.min from BigVAR model:\n")
    print(fit$results$lambda.min)
  } else {
    cat("Lambda.min not found in BigVAR model results\n")
  }
} else {
  cat("Unexpected structure for BigVAR fit object\n")
}
#> Error in eval(expr, envir, enclos): object 'fit' not found

# Summary and Results Output
results <- list(
  VAR1_AIC = a1,
  VARp_AIC = ap,
  Forecast_VAR1 = forecast_var1$fcst,
  Forecast_VARp = forecast_varp$fcst,
  Granger_Causality_Series1 = causality_results_series1$Granger,
  Granger_Causality_Series2 = causality_results_series2$Granger,
  Granger_Causality_Series3 = causality_results_series3$Granger,
  Granger_Causality_Series4 = causality_results_series4$Granger
)

print(results)
#> $VAR1_AIC
#> [1] 21909.15
#> 
#> $VARp_AIC
#> [1] 21909.15
#> 
#> $Forecast_VAR1
#> $Forecast_VAR1$Series1
#>                  fcst    lower    upper      CI
#> Series1.fcst 14260.75 13842.72 14678.78 418.028
#> 
#> $Forecast_VAR1$Series2
#>                  fcst    lower    upper       CI
#> Series2.fcst 89650.24 84029.41 95271.08 5620.837
#> 
#> $Forecast_VAR1$Series3
#>                  fcst    lower    upper       CI
#> Series3.fcst 6806.639 6572.156 7041.122 234.4828
#> 
#> $Forecast_VAR1$Series4
#>                  fcst    lower    upper       CI
#> Series4.fcst 14499.75 14148.48 14851.02 351.2706
#> 
#> 
#> $Forecast_VARp
#> $Forecast_VARp$Series1
#>                  fcst    lower    upper      CI
#> Series1.fcst 14260.75 13842.72 14678.78 418.028
#> 
#> $Forecast_VARp$Series2
#>                  fcst    lower    upper       CI
#> Series2.fcst 89650.24 84029.41 95271.08 5620.837
#> 
#> $Forecast_VARp$Series3
#>                  fcst    lower    upper       CI
#> Series3.fcst 6806.639 6572.156 7041.122 234.4828
#> 
#> $Forecast_VARp$Series4
#>                  fcst    lower    upper       CI
#> Series4.fcst 14499.75 14148.48 14851.02 351.2706
#> 
#> 
#> $Granger_Causality_Series1
#> 
#>  Granger causality H0: Series1 do not Granger-cause Series2 Series3
#>  Series4
#> 
#> data:  VAR object var_fit
#> F-Test = 0.34564, df1 = 3, df2 = 1504, p-value = 0.7923
#> 
#> 
#> $Granger_Causality_Series2
#> 
#>  Granger causality H0: Series2 do not Granger-cause Series1 Series3
#>  Series4
#> 
#> data:  VAR object var_fit
#> F-Test = 0.76275, df1 = 3, df2 = 1504, p-value = 0.515
#> 
#> 
#> $Granger_Causality_Series3
#> 
#>  Granger causality H0: Series3 do not Granger-cause Series1 Series2
#>  Series4
#> 
#> data:  VAR object var_fit
#> F-Test = 0.63619, df1 = 3, df2 = 1504, p-value = 0.5917
#> 
#> 
#> $Granger_Causality_Series4
#> 
#>  Granger causality H0: Series4 do not Granger-cause Series1 Series2
#>  Series3
#> 
#> data:  VAR object var_fit
#> F-Test = 2.3918, df1 = 3, df2 = 1504, p-value = 0.06694

Created on 2024-04-11 with reprex v2.1.0