Open MilindKulkarni12 opened 9 months ago
library(reprex)
library(forecast)
#> Warning: package 'forecast' was built under R version 4.2.3
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(readr)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(fredr)
#> Warning: package 'fredr' was built under R version 4.2.3
# set the api key
fredr_set_key('48753a49458ab78ef7185cffd58175b6')
data <- fredr(series_id = 'ICNSA')
head(data, n = 5)
#> # A tibble: 5 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 1967-01-07 ICNSA 346000 2024-02-09 2024-02-09
#> 2 1967-01-14 ICNSA 334000 2024-02-09 2024-02-09
#> 3 1967-01-21 ICNSA 277000 2024-02-09 2024-02-09
#> 4 1967-01-28 ICNSA 252000 2024-02-09 2024-02-09
#> 5 1967-02-04 ICNSA 274000 2024-02-09 2024-02-09
data$DATE <- as.Date(data$date, format="%m/%d/%Y")
# set the COVID-19 period
covid_start <- as.Date("2020-01-01")
covid_end <- as.Date("2022-12-31")
# filter out the covid-19 period data
data_without_covid <- subset(data, DATE < covid_start | DATE > covid_end)
tail(data_without_covid, n = 60)
#> # A tibble: 60 × 6
#> date series_id value realtime_start realtime_end DATE
#> <date> <chr> <dbl> <date> <date> <date>
#> 1 2019-12-14 ICNSA 270547 2024-02-09 2024-02-09 2019-12-14
#> 2 2019-12-21 ICNSA 287243 2024-02-09 2024-02-09 2019-12-21
#> 3 2019-12-28 ICNSA 312524 2024-02-09 2024-02-09 2019-12-28
#> 4 2023-01-07 ICNSA 339883 2024-02-09 2024-02-09 2023-01-07
#> 5 2023-01-14 ICNSA 288330 2024-02-09 2024-02-09 2023-01-14
#> 6 2023-01-21 ICNSA 225228 2024-02-09 2024-02-09 2023-01-21
#> 7 2023-01-28 ICNSA 225026 2024-02-09 2024-02-09 2023-01-28
#> 8 2023-02-04 ICNSA 234007 2024-02-09 2024-02-09 2023-02-04
#> 9 2023-02-11 ICNSA 225332 2024-02-09 2024-02-09 2023-02-11
#> 10 2023-02-18 ICNSA 211007 2024-02-09 2024-02-09 2023-02-18
#> # ℹ 50 more rows
ts_data <- ts(data_without_covid$value, frequency = 52)
plot(ts_data, main = "Uninsured Claims without Covid-19 period", xlab = "Date", ylab = "Uninsured Claims")
# acf to detect auto-correlation between the data
acf(ts_data, main = "Autocorrelation Function (ACF)")
# pacf to detect any random spikes
pacf(ts_data, main = "Partial Autocorrelation Function (PACF)")
model <- auto.arima(ts_data)
# Forecast unemployment value
forecasted_values <- forecast(model, h = 1)
forecasted_values <- forecasted_values$mean
cat("Forecasted Unemployment (excluding COVID period):", forecasted_values, "\n")
#> Forecasted Unemployment (excluding COVID period): 232214.8
Created on 2024-02-09 with reprex v2.0.2
# Load required packages
library(reprex)
library(forecast)
#> Warning: package 'forecast' was built under R version 4.2.3
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(readr)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(fredr)
#> Warning: package 'fredr' was built under R version 4.2.3
# set the api key
fredr_set_key('48753a49458ab78ef7185cffd58175b6')
# Monthly State Retail Sales: Gasoline Stations in California
data <- fredr(series_id = 'MSRSCA447')
head(data, n = 5)
#> # A tibble: 5 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 2019-01-01 MSRSCA447 5.3 2024-02-12 2024-02-12
#> 2 2019-02-01 MSRSCA447 9.7 2024-02-12 2024-02-12
#> 3 2019-03-01 MSRSCA447 6.8 2024-02-12 2024-02-12
#> 4 2019-04-01 MSRSCA447 14.1 2024-02-12 2024-02-12
#> 5 2019-05-01 MSRSCA447 2.2 2024-02-12 2024-02-12
# create time series obj
ts_data <- ts(data$value, frequency = 12)
# plot for detecting trending and seasonality
plot(stl(ts_data, s.window="periodic"), main = "Seasonal Decomposition")
# we can observe seasonality in the data, there is also an hint of cyclic trend
# plot the time series
plot(ts_data, main = "Date", xlab = "Date", ylab = "Retail Sales: Gasoline Stations")
# plot ACF to detect models nature
acf(ts_data, main = "Autocorrelation Function (ACF)")
# ACF plot shows rapid decay indicating positive autocorrelation
# plot PACF
pacf(ts_data, main = "Partial Autocorrelation Function (PACF)")
# there aren't any significant spikes in the PACF plot, except a small one at LAG 1
# to find the best performing Arima model let us ttry and fetch the order with
# the best or lowest AIC score amongst the given range of p, d and q values
p_values <- 0:3
d_values <- 0:3
q_values <- 0:3
# initialize aic and order
top_aic <- Inf
top_order <- c(0, 0, 0)
# iterate
for (p in p_values) {
for (d in d_values) {
for (q in q_values) {
curr_order <- c(p, d, q)
curr_model <- Arima(ts_data, order=curr_order)
curr_aic <- AIC(curr_model)
if (curr_aic < top_aic) {
# new top order found, overwrite
top_aic <- curr_aic
top_order <- curr_order
}
}
}
}
# top performing order
top_order
#> [1] 2 1 2
# AIC score associated with the top order
top_aic
#> [1] 429.6667
# Arima model for the top order
model <- Arima(ts_data, order=top_order)
summary(model)
#> Series: ts_data
#> ARIMA(2,1,2)
#>
#> Coefficients:
#> ar1 ar2 ma1 ma2
#> 0.0182 -0.5750 0.1435 1.0000
#> s.e. 0.1759 0.1351 0.1035 0.0778
#>
#> sigma^2 = 91.73: log likelihood = -209.83
#> AIC=429.67 AICc=430.84 BIC=439.88
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -0.3980385 9.155217 7.393018 -0.3433542 85.88841 0.2017793
#> ACF1
#> Training set -0.04745976
# check the distribution of residuals
checkresiduals(model)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(2,1,2)
#> Q* = 17.1, df = 8, p-value = 0.02909
#>
#> Model df: 4. Total lags used: 12
# the model for the given top order has a p-value that implies
# that there is still significant auto-correlation in our model
# lets try next iteration of model to see if we get a better fit while accepting
# some of the trade-offs between p-value and AIC score
model <- Arima(ts_data, order=c(3, 1, 0))
summary(model)
#> Series: ts_data
#> ARIMA(3,1,0)
#>
#> Coefficients:
#> ar1 ar2 ar3
#> 0.1030 0.0833 -0.2168
#> s.e. 0.1286 0.1279 0.1304
#>
#> sigma^2 = 109.8: log likelihood = -213.32
#> AIC=434.65 AICc=435.42 BIC=442.82
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -0.4974367 10.10942 7.874796 2.063276 84.31892 0.2149286
#> ACF1
#> Training set -0.04552645
checkresiduals(model)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(3,1,0)
#> Q* = 25.523, df = 9, p-value = 0.002444
#>
#> Model df: 3. Total lags used: 12
# this model shows significant improvement in p-value while trading-off little
# to no difference in the AIC vlaues
# predict the values
prediction <- forecast(model, h = 12)
prediction
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> Nov 5 -23.04473 -36.47174 -9.617719 -43.57957 -2.509890
#> Dec 5 -24.66909 -44.65972 -4.678463 -55.24212 5.903936
#> Jan 6 -23.65864 -49.30818 1.990888 -62.88622 15.568930
#> Feb 6 -22.74769 -51.68857 6.193186 -67.00895 21.513562
#> Mar 6 -22.21746 -53.91732 9.482399 -70.69821 26.263291
#> Apr 6 -22.30609 -56.34158 11.729403 -74.35888 29.746703
#> May 6 -22.46859 -58.85751 13.920326 -78.12065 33.183457
#> Jun 6 -22.60770 -61.24241 16.027018 -81.69439 36.478999
#> Jul 6 -22.61634 -63.42746 18.194780 -85.03156 39.798882
#> Aug 6 -22.59358 -65.44677 20.259616 -88.13188 42.944726
#> Sep 6 -22.56179 -67.35910 22.235525 -91.07336 45.949789
#> Oct 6 -22.55474 -69.20268 24.093198 -93.89661 48.787126
# the predicted data shows steady increase over time.
Created on 2024-02-13 with reprex v2.0.2
Observations:
library(reprex)
library(forecast)
#> Warning: package 'forecast' was built under R version 4.2.3
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(readr)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(fredr)
#> Warning: package 'fredr' was built under R version 4.2.3
library(tseries)
#> Warning: package 'tseries' was built under R version 4.2.3
library(corrplot)
#> Warning: package 'corrplot' was built under R version 4.2.3
#> corrplot 0.92 loaded
fredr_set_key('48753a49458ab78ef7185cffd58175b6')
icnsa <- fredr(series_id = 'ICNSA')
icnsa$Date <- as.Date(icnsa$date)
ts_icnsa <- ts(icnsa$value, frequency = 52)
plot(stl(ts_icnsa, s.window="periodic"))
acf(ts_icnsa)
pacf(ts_icnsa)
ccnsa <- fredr(series_id = 'CCNSA')
ccnsa$Date <- as.Date(ccnsa$date)
ts_ccnsa <- ts(ccnsa$value, frequency = 52)
plot(stl(ts_ccnsa, s.window="periodic"))
acf(ts_ccnsa)
pacf(ts_ccnsa)
data <- merge(icnsa, ccnsa, by = "Date", suffixes = c("_icnsa", "_ccnsa"))
head(data, n =5)
#> Date date_icnsa series_id_icnsa value_icnsa realtime_start_icnsa
#> 1 1967-01-07 1967-01-07 ICNSA 346000 2024-02-22
#> 2 1967-01-14 1967-01-14 ICNSA 334000 2024-02-22
#> 3 1967-01-21 1967-01-21 ICNSA 277000 2024-02-22
#> 4 1967-01-28 1967-01-28 ICNSA 252000 2024-02-22
#> 5 1967-02-04 1967-02-04 ICNSA 274000 2024-02-22
#> realtime_end_icnsa date_ccnsa series_id_ccnsa value_ccnsa
#> 1 2024-02-22 1967-01-07 CCNSA 1594000
#> 2 2024-02-22 1967-01-14 CCNSA 1563000
#> 3 2024-02-22 1967-01-21 CCNSA 1551000
#> 4 2024-02-22 1967-01-28 CCNSA 1533000
#> 5 2024-02-22 1967-02-04 CCNSA 1534000
#> realtime_start_ccnsa realtime_end_ccnsa
#> 1 2024-02-22 2024-02-22
#> 2 2024-02-22 2024-02-22
#> 3 2024-02-22 2024-02-22
#> 4 2024-02-22 2024-02-22
#> 5 2024-02-22 2024-02-22
corr <- cor(data$value_icnsa, data$value_ccnsa, use = "complete.obs")
corr
#> [1] 0.714954
ts_icnsa <- ts(data$value_icnsa, frequency = 52)
ts_ccnsa <- ts(data$value_ccnsa, frequency = 52)
model <- auto.arima(ts_icnsa, xreg = ts_ccnsa)
summary(model)
#> Series: ts_icnsa
#> Regression with ARIMA(2,1,1)(1,0,2)[52] errors
#>
#> Coefficients:
#> ar1 ar2 ma1 sar1 sma1 sma2 xreg
#> 0.5666 -0.2917 -0.0020 0.4014 -0.1051 0.0931 -0.0382
#> s.e. 0.0714 0.0350 0.0794 0.0523 0.0534 0.0232 0.0069
#>
#> sigma^2 = 6.941e+09: log likelihood = -37967.81
#> AIC=75951.63 AICc=75951.68 BIC=75999.62
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -14.66077 83197.81 36955.39 -0.4938972 9.336523 0.3891743
#> ACF1
#> Training set 6.786416e-05
coeff <- mean(diff(ts_icnsa))
coeff
#> [1] -37.36434
forecast <- forecast(model, xreg = tail(data$value_ccnsa, 1), h = 1)
forecast_mean <- mean(forecast$mean)
cat("Forecasted Value:", forecast_mean, "\n")
#> Forecasted Value: 213860.3
<sup>Created on 2024-02-22 with [reprex v2.0.2](https://reprex.tidyverse.org)</sup>
Observations Data and Model:
Regression Diagnostics:
# Load required packages
library(reprex)
library(forecast)
#> Warning: package 'forecast' was built under R version 4.2.3
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(readr)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(fredr)
#> Warning: package 'fredr' was built under R version 4.2.3
library(tseries)
#> Warning: package 'tseries' was built under R version 4.2.3
library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 4.2.3
#> Warning: package 'tibble' was built under R version 4.2.3
#> Warning: package 'dplyr' was built under R version 4.2.3
library(ggplot2)
library(zoo)
#> Warning: package 'zoo' was built under R version 4.2.3
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(xts)
#> Warning: package 'xts' was built under R version 4.2.3
#>
#> ######################### Warning from 'xts' package ##########################
#> # #
#> # The dplyr lag() function breaks how base R's lag() function is supposed to #
#> # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
#> # source() into this session won't work correctly. #
#> # #
#> # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
#> # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
#> # dplyr from breaking base R's lag() function. #
#> # #
#> # Code in packages is not affected. It's protected by R's namespace mechanism #
#> # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
#> # #
#> ###############################################################################
#>
#> Attaching package: 'xts'
#> The following objects are masked from 'package:dplyr':
#>
#> first, last
# Set up API key to read the unemployment rate dataset from FRED
fredr_set_key('48753a49458ab78ef7185cffd58175b6')
# Get the ICNSA data
# Print all the columns of the initial dataset for visualization
data <- fredr(series_id = 'ICNSA')
data$Date <- as.Date(data$date)
head(data, n = 5)
#> # A tibble: 5 × 6
#> date series_id value realtime_start realtime_end Date
#> <date> <chr> <dbl> <date> <date> <date>
#> 1 1967-01-07 ICNSA 346000 2024-02-28 2024-02-28 1967-01-07
#> 2 1967-01-14 ICNSA 334000 2024-02-28 2024-02-28 1967-01-14
#> 3 1967-01-21 ICNSA 277000 2024-02-28 2024-02-28 1967-01-21
#> 4 1967-01-28 ICNSA 252000 2024-02-28 2024-02-28 1967-01-28
#> 5 1967-02-04 ICNSA 274000 2024-02-28 2024-02-28 1967-02-04
# creating time series object
ts <- ts(data$value, frequency = 52)
# plot ICNSA
plot(ts, main = "Date", xlab = "Date", ylab = "Uninsured Claims")
plot(stl(ts, s.window="periodic"), main = "Seasonal Decomposition")
# set the COVID-19 period
covid_start <- as.Date("2020-01-01")
covid_end <- as.Date("2021-12-31")
# Plot highlighting the covid period in our dataset
plot(data$date, data$value, type = "l", main = "Claims during Covid period", xlab = "Year", ylab = "Number")
abline(v = as.numeric(covid_start), col = "red", lty = 2)
abline(v = as.numeric(covid_end), col = "red", lty = 2)
# filter out the covid-19 period data
covid_period_data <- subset(data, Date >= covid_start | Date <= covid_end)
data_without_covid <- subset(data, Date < covid_start | Date > covid_end)
# Perform cubic spline interpolation for multiple values of spar to find the best fit
spar_values <- seq(0.1, 1.0, by = 0.1)
for (spar in spar_values) {
# fit the spline on our data
spline_fit <- smooth.spline(x = as.numeric(data_without_covid$Date), y = data_without_covid$value, spar = spar, tol = 1e-6)
# Impute the new values using the spline fit
imputed_data <- predict(spline_fit, x = as.numeric(covid_period_data$Date))$y
# Update Covid period data with imputed values
covid_period_data$value <- imputed_data
# Update the data with new predicted values
updated_data <- rbind(data_without_covid, covid_period_data)
updated_data <- updated_data %>% arrange(date)
# Plot updated data
plot(updated_data$date, updated_data$value, type = "l", col = "red", lwd = 2,
main = paste("Smoothened Time Series (spar =", spar, ")"), xlab = "Year", ylab = "Number")
lines(data$date, data$value, col = "black", lwd = 2)
legend("topleft", legend = c("Updated", "Original"), col = c("red", "black"), lty = 1, lwd = 2)
}
# after visualizing all the splines we believe that the one the with spar=0.4 gives the best
# naturally fit of the data and we select it for predicting our future values.
# selecting the best fir spline
spline_fit_covid <- smooth.spline(x = as.numeric(data_without_covid$Date), y = data_without_covid$value, spar = 0.4, tol = 1e-6)
imputed_values <- predict(spline_fit_covid, x = as.numeric(covid_period_data$Date))$y
covid_period_data$value <- imputed_values
updated_data <- rbind(data_without_covid, covid_period_data)
updated_data <- updated_data %>% arrange(Date)
plot(updated_data$Date, updated_data$value, type = "l", col = "red", lwd = 2,
main = "Updated TS Vs Original TS", xlab = "Year", ylab = "Number")
lines(data$Date, data$value, col = "black", lwd = 2)
legend("topright", legend = c("Updated", "Original"), col = c("red", "black"), lty = 1, lwd = 2)
# Plot both time series for change comparision
ts <- ts(data$value, frequency = 52)
new_ts <- ts(updated_data$value, frequency = 52)
plot(ts, col = "black", main = "Comparison of Time Series", ylab = "Value")
lines(new_ts, col = "red")
legend("topright", legend = c("Original", "Updated"), col = c("black", "red"), lty = 1, lwd = 2)
# Fit and forecast the multiplicative Holt-Winters model
hw_multiplicative <- HoltWinters(ts, seasonal = "multiplicative")
forecast_multiplicative <- forecast(hw_multiplicative, h = 1)
# Fit and forecast the additive Holt-Winters model
hw_additive <- HoltWinters(ts, seasonal = "additive")
forecast_additive <- forecast(hw_additive, h = 1)
# Print the forecasts
cat("Multiplicative Holt-Winters Forecast:", forecast_multiplicative$mean, "\n")
#> Multiplicative Holt-Winters Forecast: 197681.2
cat("Additive Holt-Winters Forecast:", forecast_additive$mean, "\n")
#> Additive Holt-Winters Forecast: 161954.6
Created on 2024-02-28 with reprex v2.0.2
# Load required packages
library(reprex)
library(forecast)
#> Warning: package 'forecast' was built under R version 4.2.3
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(readr)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(fredr)
#> Warning: package 'fredr' was built under R version 4.2.3
library(tseries)
#> Warning: package 'tseries' was built under R version 4.2.3
library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 4.2.3
#> Warning: package 'tibble' was built under R version 4.2.3
#> Warning: package 'dplyr' was built under R version 4.2.3
library(ggplot2)
library(zoo)
#> Warning: package 'zoo' was built under R version 4.2.3
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(xts)
#> Warning: package 'xts' was built under R version 4.2.3
#>
#> ######################### Warning from 'xts' package ##########################
#> # #
#> # The dplyr lag() function breaks how base R's lag() function is supposed to #
#> # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
#> # source() into this session won't work correctly. #
#> # #
#> # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
#> # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
#> # dplyr from breaking base R's lag() function. #
#> # #
#> # Code in packages is not affected. It's protected by R's namespace mechanism #
#> # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
#> # #
#> ###############################################################################
#>
#> Attaching package: 'xts'
#> The following objects are masked from 'package:dplyr':
#>
#> first, last
library(KFAS)
#> Warning: package 'KFAS' was built under R version 4.2.3
#> Please cite KFAS in publications by using:
#>
#> Jouni Helske (2017). KFAS: Exponential Family State Space Models in R. Journal of Statistical Software, 78(10), 1-39. doi:10.18637/jss.v078.i10.
library(imputeTS)
#> Warning: package 'imputeTS' was built under R version 4.2.3
#>
#> Attaching package: 'imputeTS'
#> The following object is masked from 'package:zoo':
#>
#> na.locf
#> The following object is masked from 'package:tseries':
#>
#> na.remove
library(MARSS)
#> Warning: package 'MARSS' was built under R version 4.2.3
library(dplyr)
library(stats)
# Set up API key to read the unemployment rate dataset from FRED
fredr_set_key('48753a49458ab78ef7185cffd58175b6')
# Get the ICNSA data
# Print all the columns of the initial dataset for visualization
data <- fredr(series_id = 'ICNSA')
data$Date <- as.Date(data$date)
head(data, n = 5)
#> # A tibble: 5 × 6
#> date series_id value realtime_start realtime_end Date
#> <date> <chr> <dbl> <date> <date> <date>
#> 1 1967-01-07 ICNSA 346000 2024-03-07 2024-03-07 1967-01-07
#> 2 1967-01-14 ICNSA 334000 2024-03-07 2024-03-07 1967-01-14
#> 3 1967-01-21 ICNSA 277000 2024-03-07 2024-03-07 1967-01-21
#> 4 1967-01-28 ICNSA 252000 2024-03-07 2024-03-07 1967-01-28
#> 5 1967-02-04 ICNSA 274000 2024-03-07 2024-03-07 1967-02-04
# creating time series object
ts <- ts(data$value, frequency = 52)
# plot ICNSA
plot(ts, main = "Date", xlab = "Date", ylab = "Uninsured Claims")
plot(stl(ts, s.window="periodic"), main = "Seasonal Decomposition")
# Plot highlighting the covid period in our dataset
plot(data$date, data$value, type = "l", main = "Claims during Covid period", xlab = "Year", ylab = "Number")
# set the COVID-19 period
covid_start <- as.Date("2020-01-01")
covid_end <- as.Date("2021-12-31")
abline(v = as.numeric(covid_start), col = "red", lty = 2)
abline(v = as.numeric(covid_end), col = "red", lty = 2)
# filter out the covid-19 period data
covid_period_data <- subset(data, Date >= covid_start & Date <= covid_end)
data_without_covid <- subset(data, Date < covid_start | Date > covid_end)
# Set values between Covid start and end date to NA
data$value[data$Date >= covid_start & data$Date <= covid_end] <- NA
# Plot data with NA values
plot(data$Date, data$value, type = "l",
main = "Initial Claims - Covid Period - Assigned NULL",
xlab = "Year",
ylab = "Number")
# Impute values for the Covid period using sSSM
imputed_values <- na_kalman(data$value, smooth = TRUE)
data$value <- imputed_values
plot(data$Date, data$value, type = "l",
main = "Imputed Values - Kalman",
xlab = "Year",
ylab = "Number")
# New time series object
new_ts <- ts(data$value, frequency = 52)
# Lets fit our spline at spar = 0.4
spline_fit <- smooth.spline(x = as.numeric(data_without_covid$Date), y = data_without_covid$value, spar = 0.4)
# Imputing new values using spline fit
imputed_values_spline <- predict(spline_fit, x = as.numeric(covid_period_data$date))$y
# Updating Covid period data with imputed values
covid_period_data$value <- imputed_values_spline
# Combine imputed values
updated_data <- rbind(data_without_covid, covid_period_data)
updated_data <- updated_data %>% arrange(Date)
plot(updated_data$date, updated_data$value, type = "l", col = "red", lwd = 2,
main = "Updated (penalty - 0.2) Vs Original", xlab = "Year", ylab = "Number")
lines(data$Date, data$value, col = "black", lwd = 2)
legend("topright", legend = c("Updated", "Original"), col = c("red", "black"), lty = 1, lwd = 2)
ts_spline <- ts(updated_data$value, frequency = 52)
# Comparison of original series with imputed series
plot(new_ts, col = "red", type = "l", lty = 1, ylab = "Value", main = "Time Series Comparison")
lines(ts, col = "black", type = "l", lty = 2)
lines(ts_spline, col = "blue", type = "l", lty = 3)
legend("topright", legend = c("New (Kalman)", "Original", "Spline"), col = c("red", "black", "blue"), lty = 1:3)
# Seasonal decomposition to check tre
decomposition <- stl(new_ts, s.window = "periodic")
plot(decomposition)
# Fitting a structural time series model with both trend and seasonal components
ssm_model <- StructTS(new_ts, type = "BSM")
ssm_model
#>
#> Call:
#> StructTS(x = new_ts, type = "BSM")
#>
#> Variances:
#> level slope seas epsilon
#> 2.967e+08 9.625e+02 2.199e+08 0.000e+00
summary(ssm_model)
#> Length Class Mode
#> coef 4 -none- numeric
#> loglik 1 -none- numeric
#> loglik0 1 -none- numeric
#> data 2982 ts numeric
#> residuals 2982 ts numeric
#> fitted 8946 mts numeric
#> call 3 -none- call
#> series 1 -none- character
#> code 1 -none- numeric
#> model 7 -none- list
#> model0 7 -none- list
#> xtsp 3 -none- numeric
# Use tsSmooth to obtain the smoothed state estimates
smoothed_data <- tsSmooth(ssm_model)
residuals <- residuals(ssm_model)
# Plot residuals to visualize the error component
plot(residuals, main="Residuals", ylab="Residuals", xlab="Time")
# Load IURNSA data
iurnsa_data <- fredr(series_id = "IURNSA")
iurnsa_data$Date <- as.Date(iurnsa_data$date)
# Check the structure and values of iurnsa_data to ensure compatibility with ICNSA data
icnsa_data <- subset(data, Date >= as.Date("1971-01-01"))
# Remove last row of icnsa_data
icnsa_data <- icnsa_data[-nrow(icnsa_data), ]
merged_data <- merge(icnsa_data, iurnsa_data, by = "date", all.x = TRUE)
arima_model <- auto.arima(merged_data$value.x, xreg = merged_data$value.y, seasonal = TRUE)
summary(arima_model)
#> Series: merged_data$value.x
#> Regression with ARIMA(2,1,1) errors
#>
#> Coefficients:
#> ar1 ar2 ma1 xreg
#> 0.6878 0.1547 -0.9862 23256.533
#> s.e. 0.0209 0.0204 0.0041 3959.789
#>
#> sigma^2 = 2.443e+09: log likelihood = -33892.12
#> AIC=67794.23 AICc=67794.26 BIC=67823.87
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE ACF1
#> Training set -178.193 49377.9 33016.53 -1.38752 8.782257 0.9876402 -0.003897536
# Forecast the next week's value using the model with the covariate
forecasted_value <- forecast(arima_model, h = 1, xreg = tail(merged_data$value.y, 2))
forecasted_value
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 2774 208369.2 145031.7 271706.6 111502.90 305235.5
#> 2775 210768.8 133396.8 288140.8 92438.51 329099.1
forecast_value <- mean(forecasted_value$mean, na.rm = TRUE)
forecast_value
#> [1] 209569
Created on 2024-03-07 with reprex v2.0.2
# Load required packages
library(readxl)
#> Warning: package 'readxl' was built under R version 4.2.3
library(reprex)
#> Warning: package 'reprex' was built under R version 4.2.3
library(forecast)
#> Warning: package 'forecast' was built under R version 4.3.3
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(readr)
#> Warning: package 'readr' was built under R version 4.3.3
library(tseries)
#> Warning: package 'tseries' was built under R version 4.3.3
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.3.3
library(MARSS)
#> Warning: package 'MARSS' was built under R version 4.3.3
library(corrplot)
#> Warning: package 'corrplot' was built under R version 4.3.3
#> corrplot 0.92 loaded
library(vars)
#> Warning: package 'vars' was built under R version 4.2.3
#> Loading required package: MASS
#> Loading required package: strucchange
#> Warning: package 'strucchange' was built under R version 4.3.3
#> Loading required package: zoo
#> Warning: package 'zoo' was built under R version 4.3.3
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
#> Loading required package: sandwich
#> Warning: package 'sandwich' was built under R version 4.3.3
#> Loading required package: urca
#> Warning: package 'urca' was built under R version 4.3.3
#> Loading required package: lmtest
#> Warning: package 'lmtest' was built under R version 4.3.3
library(BigVAR)
#> Warning: package 'BigVAR' was built under R version 4.2.3
#> Loading required package: lattice
# 25S Chemical Products
# 25A Pesticide, Fertilizer, and Other Agricultural Chemical Manufacturing
# 25B Pharmaceutical and Medicine Manufacturing
# 25C Paint, Coating, and Adhesive, Manufacturing
df_25s <- read_excel("C:/Study Material/R Studio/25STI.xlsx")
df_25a <- read_excel("C:/Study Material/R Studio/25ATI.xlsx")
df_25b <- read_excel("C:/Study Material/R Studio/25BTI.xlsx")
df_25c <- read_excel("C:/Study Material/R Studio/25CTI.xlsx")
df_25s$Period = as.Date(paste("01", df_25s$Period, sep = "-"), format = "%d-%b-%Y")
df_25a$Period = as.Date(paste("01", df_25a$Period, sep = "-"), format = "%d-%b-%Y")
df_25b$Period = as.Date(paste("01", df_25b$Period, sep = "-"), format = "%d-%b-%Y")
df_25c$Period = as.Date(paste("01", df_25c$Period, sep = "-"), format = "%d-%b-%Y")
df_25s <- subset(df_25s, Period <= as.Date("2024-02-01"))
df_25a <- subset(df_25a, Period <= as.Date("2024-02-01"))
df_25b <- subset(df_25b, Period <= as.Date("2024-02-01"))
df_25c <- subset(df_25c, Period <= as.Date("2024-02-01"))
df_25s$Value <- as.numeric(df_25s$Value)
df_25a$Value <- as.numeric(df_25a$Value)
df_25b$Value <- as.numeric(df_25b$Value)
df_25c$Value <- as.numeric(df_25c$Value)
plot(df_25s$Period, df_25s$Value, type = "l",
main = "25S Chemical Products",
xlab = "Period",
ylab = "Value")
plot(df_25a$Period, df_25a$Value, type = "l",
main = "25A Pesticide, Fertilizer and Other Agricultural Chemical Manufacturing",
xlab = "Period",
ylab = "Value")
plot(df_25b$Period, df_25b$Value, type = "l",
main = "25B Pharmaceutical and Medicine Manufacturing",
xlab = "Period",
ylab = "Value")
plot(df_25c$Period, df_25c$Value, type = "l",
main = "25C Paint, Coating, and Adhesive, Manufacturing",
xlab = "Period",
ylab = "Value")
# 4 in 1
ggplot(xlab = 'Period', ylab = 'Value') +
geom_line(data = df_25s, aes(x = Period, y = Value)) +
geom_line(data = df_25a, aes(x = Period, y = Value), color = "red") +
geom_line(data = df_25b, aes(x = Period, y = Value), color = "blue") +
geom_line(data = df_25c, aes(x = Period, y = Value), color = "green")
cor(df_25s$Value, df_25a$Value)
#> [1] 0.9606849
cor(df_25s$Value, df_25b$Value)
#> [1] 0.9899559
cor(df_25s$Value, df_25c$Value)
#> [1] 0.9040823
cor(df_25a$Value, df_25b$Value)
#> [1] 0.9513506
cor(df_25a$Value, df_25c$Value)
#> [1] 0.8941125
cor(df_25b$Value, df_25c$Value)
#> [1] 0.8781852
ccf(df_25s$Value, df_25a$Value)
ccf(df_25s$Value, df_25b$Value)
ccf(df_25s$Value, df_25c$Value)
ccf(df_25a$Value, df_25b$Value)
ccf(df_25a$Value, df_25c$Value)
ccf(df_25b$Value, df_25c$Value)
adf.test(df_25s$Value, alternative = "stationary")
#>
#> Augmented Dickey-Fuller Test
#>
#> data: df_25s$Value
#> Dickey-Fuller = -3.2907, Lag order = 7, p-value = 0.07273
#> alternative hypothesis: stationary
df_25s$Value <- c(NA, diff(df_25s$Value))
df_25s <- subset(df_25s, !is.na(Value))
adf.test(df_25s$Value, alternative = "stationary")
#> Warning in adf.test(df_25s$Value, alternative = "stationary"): p-value smaller
#> than printed p-value
#>
#> Augmented Dickey-Fuller Test
#>
#> data: df_25s$Value
#> Dickey-Fuller = -4.7374, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
acf(df_25s$Value)
pacf(df_25s$Value)
adf.test(df_25a$Value, alternative = "stationary")
#>
#> Augmented Dickey-Fuller Test
#>
#> data: df_25a$Value
#> Dickey-Fuller = -2.21, Lag order = 7, p-value = 0.4885
#> alternative hypothesis: stationary
df_25a$Value <- c(NA, diff(df_25a$Value))
df_25a <- subset(df_25a, !is.na(Value))
adf.test(df_25a$Value, alternative = "stationary")
#> Warning in adf.test(df_25a$Value, alternative = "stationary"): p-value smaller
#> than printed p-value
#>
#> Augmented Dickey-Fuller Test
#>
#> data: df_25a$Value
#> Dickey-Fuller = -7.0763, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
acf(df_25a$Value)
pacf(df_25a$Value)
adf.test(df_25b$Value, alternative = "stationary")
#>
#> Augmented Dickey-Fuller Test
#>
#> data: df_25b$Value
#> Dickey-Fuller = -2.879, Lag order = 7, p-value = 0.206
#> alternative hypothesis: stationary
df_25b$Value <- c(NA, diff(df_25b$Value))
df_25b <- subset(df_25b, !is.na(Value))
adf.test(df_25b$Value, alternative = "stationary")
#> Warning in adf.test(df_25b$Value, alternative = "stationary"): p-value smaller
#> than printed p-value
#>
#> Augmented Dickey-Fuller Test
#>
#> data: df_25b$Value
#> Dickey-Fuller = -6.2545, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
acf(df_25b$Value)
pacf(df_25b$Value)
adf.test(df_25c$Value, alternative = "stationary")
#>
#> Augmented Dickey-Fuller Test
#>
#> data: df_25c$Value
#> Dickey-Fuller = -3.7952, Lag order = 7, p-value = 0.01947
#> alternative hypothesis: stationary
df_25c$Value <- c(NA, diff(df_25c$Value))
df_25c <- subset(df_25c, !is.na(Value))
adf.test(df_25c$Value, alternative = "stationary")
#>
#> Augmented Dickey-Fuller Test
#>
#> data: df_25c$Value
#> Dickey-Fuller = -3.7229, Lag order = 7, p-value = 0.02309
#> alternative hypothesis: stationary
acf(df_25c$Value)
pacf(df_25c$Value)
ts_25s = ts(df_25s$Value, frequency = 12)
plot(stl(ts_25s, s.window="periodic"), main = "25A TI: Seasonal Decomposition")
ts_25a = ts(df_25a$Value, frequency = 12)
plot(stl(ts_25a, s.window="periodic"), main = "25A TI: Seasonal Decomposition")
ts_25b = ts(df_25b$Value, frequency = 12)
plot(stl(ts_25b, s.window="periodic"), main = "25B TI: Seasonal Decomposition")
ts_25c = ts(df_25c$Value, frequency = 12)
plot(stl(ts_25c, s.window="periodic"), main = "25C TI: Seasonal Decomposition")
# 4 in 1
ggplot(xlab = 'Period', ylab = 'Value') +
geom_line(data = df_25s, aes(x = Period, y = Value)) +
geom_line(data = df_25a, aes(x = Period, y = Value), color = "red") +
geom_line(data = df_25b, aes(x = Period, y = Value), color = "blue") +
geom_line(data = df_25c, aes(x = Period, y = Value), color = "green")
# 25S Chemical Products
# 25A Pesticide, Fertilizer, and Other Agricultural Chemical Manufacturing
# 25B Pharmaceutical and Medicine Manufacturing
# 25C Paint, Coating, and Adhesive, Manufacturing
ts_df <- data.frame(Y1=ts_25s, Y2=ts_25a, Y3=ts_25b, Y4=ts_25c)
ts_obj <- ts(ts_df)
# VAR Model
var_model <- VAR(ts_obj, p = 1)
summary(var_model)
#>
#> VAR Estimation Results:
#> =========================
#> Endogenous variables: Y1, Y2, Y3, Y4
#> Deterministic variables: const
#> Sample size: 384
#> Log Likelihood: -9709.25
#> Roots of the characteristic polynomial:
#> 0.6195 0.1965 0.0824 0.02111
#> Call:
#> VAR(y = ts_obj, p = 1)
#>
#>
#> Estimation results for equation Y1:
#> ===================================
#> Y1 = Y1.l1 + Y2.l1 + Y3.l1 + Y4.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Y1.l1 0.57145 0.07175 7.964 1.96e-14 ***
#> Y2.l1 -0.80294 0.28004 -2.867 0.00437 **
#> Y3.l1 -0.52817 0.10312 -5.122 4.82e-07 ***
#> Y4.l1 0.30315 0.64276 0.472 0.63746
#> const 147.61450 26.34659 5.603 4.06e-08 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 478.1 on 379 degrees of freedom
#> Multiple R-Squared: 0.1801, Adjusted R-squared: 0.1714
#> F-statistic: 20.81 on 4 and 379 DF, p-value: 1.597e-15
#>
#>
#> Estimation results for equation Y2:
#> ===================================
#> Y2 = Y1.l1 + Y2.l1 + Y3.l1 + Y4.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Y1.l1 0.02525 0.01366 1.849 0.0652 .
#> Y2.l1 0.06611 0.05331 1.240 0.2157
#> Y3.l1 -0.03373 0.01963 -1.719 0.0865 .
#> Y4.l1 0.09959 0.12235 0.814 0.4162
#> const 7.69050 5.01516 1.533 0.1260
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 91.01 on 379 degrees of freedom
#> Multiple R-Squared: 0.02855, Adjusted R-squared: 0.0183
#> F-statistic: 2.785 on 4 and 379 DF, p-value: 0.02649
#>
#>
#> Estimation results for equation Y3:
#> ===================================
#> Y3 = Y1.l1 + Y2.l1 + Y3.l1 + Y4.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Y1.l1 0.03433 0.05064 0.678 0.4982
#> Y2.l1 -0.16137 0.19763 -0.817 0.4147
#> Y3.l1 0.01440 0.07277 0.198 0.8432
#> Y4.l1 -0.82932 0.45361 -1.828 0.0683 .
#> const 106.38511 18.59333 5.722 2.14e-08 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 337.4 on 379 degrees of freedom
#> Multiple R-Squared: 0.01282, Adjusted R-squared: 0.002406
#> F-statistic: 1.231 on 4 and 379 DF, p-value: 0.2972
#>
#>
#> Estimation results for equation Y4:
#> ===================================
#> Y4 = Y1.l1 + Y2.l1 + Y3.l1 + Y4.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Y1.l1 0.040659 0.005376 7.563 3.01e-13 ***
#> Y2.l1 -0.035630 0.020983 -1.698 0.0903 .
#> Y3.l1 -0.045399 0.007726 -5.876 9.22e-09 ***
#> Y4.l1 0.267599 0.048161 5.556 5.20e-08 ***
#> const 2.899038 1.974086 1.469 0.1428
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 35.82 on 379 degrees of freedom
#> Multiple R-Squared: 0.2921, Adjusted R-squared: 0.2847
#> F-statistic: 39.1 on 4 and 379 DF, p-value: < 2.2e-16
#>
#>
#>
#> Covariance matrix of residuals:
#> Y1 Y2 Y3 Y4
#> Y1 228573 8358.68 117792.82 2670.17
#> Y2 8359 8282.17 -38.41 -24.23
#> Y3 117793 -38.41 113838.64 321.61
#> Y4 2670 -24.23 321.61 1283.24
#>
#> Correlation matrix of residuals:
#> Y1 Y2 Y3 Y4
#> Y1 1.0000 0.192112 0.730233 0.155910
#> Y2 0.1921 1.000000 -0.001251 -0.007431
#> Y3 0.7302 -0.001251 1.000000 0.026609
#> Y4 0.1559 -0.007431 0.026609 1.000000
forecast_var <- forecast(var_model, h = 1, frequency = 12)
forecast_y1 <- mean(forecast_var$forecast$Y1$mean)
cat("Forecasted Value for 25S Chemical Products:", forecast_y1, "\n")
#> Forecasted Value for 25S Chemical Products: 90.81823
forecast_y2 <-mean(forecast_var$forecast$Y2$mean)
cat("Forecasted Value for 25A Pesticide, Fertilizer, and Other Agricultural Chemical Manufacturing:", forecast_y2, "\n")
#> Forecasted Value for 25A Pesticide, Fertilizer, and Other Agricultural Chemical Manufacturing: 7.383598
forecast_y3 <- mean(forecast_var$forecast$Y3$mean)
cat("Forecasted Value for 25B Pharmaceutical and Medicine Manufacturing:", forecast_y3, "\n")
#> Forecasted Value for 25B Pharmaceutical and Medicine Manufacturing: 110.0477
forecast_y4 <- mean(forecast_var$forecast$Y4$mean)
cat("Forecasted Value for 25C Paint, Coating, and Adhesive, Manufacturing:", forecast_y4, "\n")
#> Forecasted Value for 25C Paint, Coating, and Adhesive, Manufacturing: -3.597713
var_model_p <- VAR(ts_obj, p = 3)
summary(var_model_p)
#>
#> VAR Estimation Results:
#> =========================
#> Endogenous variables: Y1, Y2, Y3, Y4
#> Deterministic variables: const
#> Sample size: 382
#> Log Likelihood: -9596.631
#> Roots of the characteristic polynomial:
#> 0.8538 0.6481 0.5543 0.5486 0.5486 0.4684 0.4684 0.4649 0.4491 0.4491 0.4411 0.4411
#> Call:
#> VAR(y = ts_obj, p = 3)
#>
#>
#> Estimation results for equation Y1:
#> ===================================
#> Y1 = Y1.l1 + Y2.l1 + Y3.l1 + Y4.l1 + Y1.l2 + Y2.l2 + Y3.l2 + Y4.l2 + Y1.l3 + Y2.l3 + Y3.l3 + Y4.l3 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Y1.l1 0.43830 0.08225 5.329 1.72e-07 ***
#> Y2.l1 -0.80206 0.27850 -2.880 0.004209 **
#> Y3.l1 -0.39972 0.11325 -3.530 0.000469 ***
#> Y4.l1 -0.69368 0.70540 -0.983 0.326063
#> Y1.l2 0.16991 0.08622 1.971 0.049520 *
#> Y2.l2 -0.30048 0.28379 -1.059 0.290379
#> Y3.l2 -0.07193 0.11370 -0.633 0.527333
#> Y4.l2 1.04045 0.70361 1.479 0.140063
#> Y1.l3 0.13338 0.08751 1.524 0.128325
#> Y2.l3 -0.05128 0.28489 -0.180 0.857241
#> Y3.l3 0.03905 0.11574 0.337 0.735983
#> Y4.l3 -0.42614 0.68232 -0.625 0.532651
#> const 110.74442 28.29761 3.914 0.000108 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 469.9 on 369 degrees of freedom
#> Multiple R-Squared: 0.2281, Adjusted R-squared: 0.203
#> F-statistic: 9.088 on 12 and 369 DF, p-value: 2.346e-15
#>
#>
#> Estimation results for equation Y2:
#> ===================================
#> Y2 = Y1.l1 + Y2.l1 + Y3.l1 + Y4.l1 + Y1.l2 + Y2.l2 + Y3.l2 + Y4.l2 + Y1.l3 + Y2.l3 + Y3.l3 + Y4.l3 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Y1.l1 0.0200294 0.0156672 1.278 0.20190
#> Y2.l1 0.0772226 0.0530504 1.456 0.14634
#> Y3.l1 -0.0307709 0.0215723 -1.426 0.15460
#> Y4.l1 0.0002908 0.1343688 0.002 0.99827
#> Y1.l2 0.0328210 0.0164246 1.998 0.04642 *
#> Y2.l2 0.0390587 0.0540587 0.723 0.47043
#> Y3.l2 -0.0312264 0.0216580 -1.442 0.15021
#> Y4.l2 0.3529468 0.1340279 2.633 0.00881 **
#> Y1.l3 -0.0135791 0.0166695 -0.815 0.41582
#> Y2.l3 -0.0637017 0.0542672 -1.174 0.24121
#> Y3.l3 0.0193455 0.0220467 0.877 0.38080
#> Y4.l3 -0.3555227 0.1299723 -2.735 0.00653 **
#> const 7.0857831 5.3903220 1.315 0.18948
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 89.51 on 369 degrees of freedom
#> Multiple R-Squared: 0.08328, Adjusted R-squared: 0.05347
#> F-statistic: 2.794 on 12 and 369 DF, p-value: 0.001151
#>
#>
#> Estimation results for equation Y3:
#> ===================================
#> Y3 = Y1.l1 + Y2.l1 + Y3.l1 + Y4.l1 + Y1.l2 + Y2.l2 + Y3.l2 + Y4.l2 + Y1.l3 + Y2.l3 + Y3.l3 + Y4.l3 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Y1.l1 0.0595757 0.0588507 1.012 0.3120
#> Y2.l1 -0.1623027 0.1992737 -0.814 0.4159
#> Y3.l1 -0.0302021 0.0810323 -0.373 0.7096
#> Y4.l1 -0.4771400 0.5047308 -0.945 0.3451
#> Y1.l2 -0.0710367 0.0616960 -1.151 0.2503
#> Y2.l2 -0.0142740 0.2030612 -0.070 0.9440
#> Y3.l2 0.1938417 0.0813540 2.383 0.0177 *
#> Y4.l2 -0.5769452 0.5034502 -1.146 0.2525
#> Y1.l3 -0.0009571 0.0626160 -0.015 0.9878
#> Y2.l3 0.0735605 0.2038443 0.361 0.7184
#> Y3.l3 0.0865111 0.0828141 1.045 0.2969
#> Y4.l3 0.4358522 0.4882163 0.893 0.3726
#> const 88.3532020 20.2477201 4.364 1.66e-05 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 336.2 on 369 degrees of freedom
#> Multiple R-Squared: 0.04493, Adjusted R-squared: 0.01387
#> F-statistic: 1.447 on 12 and 369 DF, p-value: 0.1425
#>
#>
#> Estimation results for equation Y4:
#> ===================================
#> Y4 = Y1.l1 + Y2.l1 + Y3.l1 + Y4.l1 + Y1.l2 + Y2.l2 + Y3.l2 + Y4.l2 + Y1.l3 + Y2.l3 + Y3.l3 + Y4.l3 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Y1.l1 0.022011 0.005962 3.692 0.000256 ***
#> Y2.l1 -0.036180 0.020187 -1.792 0.073910 .
#> Y3.l1 -0.021961 0.008209 -2.675 0.007800 **
#> Y4.l1 0.135365 0.051131 2.647 0.008458 **
#> Y1.l2 0.020041 0.006250 3.207 0.001461 **
#> Y2.l2 -0.042337 0.020571 -2.058 0.040282 *
#> Y3.l2 -0.019101 0.008241 -2.318 0.021015 *
#> Y4.l2 0.048952 0.051001 0.960 0.337769
#> Y1.l3 0.008129 0.006343 1.282 0.200821
#> Y2.l3 -0.026463 0.020650 -1.282 0.200819
#> Y3.l3 -0.016983 0.008389 -2.024 0.043650 *
#> Y4.l3 0.197127 0.049458 3.986 8.11e-05 ***
#> const 2.129868 2.051154 1.038 0.299775
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 34.06 on 369 degrees of freedom
#> Multiple R-Squared: 0.3743, Adjusted R-squared: 0.354
#> F-statistic: 18.4 on 12 and 369 DF, p-value: < 2.2e-16
#>
#>
#>
#> Covariance matrix of residuals:
#> Y1 Y2 Y3 Y4
#> Y1 220828 7340.22 120144.4 2157.16
#> Y2 7340 8012.79 547.2 -35.25
#> Y3 120144 547.24 113059.3 550.13
#> Y4 2157 -35.25 550.1 1160.25
#>
#> Correlation matrix of residuals:
#> Y1 Y2 Y3 Y4
#> Y1 1.0000 0.17450 0.76037 0.13477
#> Y2 0.1745 1.00000 0.01818 -0.01156
#> Y3 0.7604 0.01818 1.00000 0.04803
#> Y4 0.1348 -0.01156 0.04803 1.00000
forecast_var_p <- forecast(var_model_p, h =1, frequency = 12)
forecast_p_y1 <- mean(forecast_var_p$forecast$Y1$mean)
cat("Forecasted Value for 25S Chemical Products:", forecast_p_y1, "\n")
#> Forecasted Value for 25S Chemical Products: 5.004572
forecast_p_y2 <-mean(forecast_var_p$forecast$Y2$mean)
cat("Forecasted Value for 25A Pesticide, Fertilizer, and Other Agricultural Chemical Manufacturing:", forecast_p_y2, "\n")
#> Forecasted Value for 25A Pesticide, Fertilizer, and Other Agricultural Chemical Manufacturing: -3.631642
forecast_p_y3 <- mean(forecast_var_p$forecast$Y3$mean)
cat("Forecasted Value for 25B Pharmaceutical and Medicine Manufacturing:", forecast_p_y3, "\n")
#> Forecasted Value for 25B Pharmaceutical and Medicine Manufacturing: 189.0412
forecast_p_y4 <- mean(forecast_var_p$forecast$Y4$mean)
cat("Forecasted Value for 25C Paint, Coating, and Adhesive, Manufacturing:", forecast_p_y4, "\n")
#> Forecasted Value for 25C Paint, Coating, and Adhesive, Manufacturing: -18.50666
cat("AIC for VAR (p=1)", AIC(var_model), "AIC for VAR (p=3)", AIC(var_model_p))
#> AIC for VAR (p=1) 19458.5 AIC for VAR (p=3) 19297.26
cat("BIC for VAR (p=1)", BIC(var_model), "BIC for VAR (p=3)", BIC(var_model_p))
#> BIC for VAR (p=1) 19537.51 BIC for VAR (p=3) 19502.42
# Granger causality test
granger_results <- causality(var_model_p)
#> Warning in causality(var_model_p):
#> Argument 'cause' has not been specified;
#> using first variable in 'x$y' (Y1) as cause variable.
print(granger_results)
#> $Granger
#>
#> Granger causality H0: Y1 do not Granger-cause Y2 Y3 Y4
#>
#> data: VAR object var_model_p
#> F-Test = 6.8727, df1 = 9, df2 = 1476, p-value = 9.666e-10
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: Y1 and Y2 Y3 Y4
#>
#> data: VAR object var_model_p
#> Chi-squared = 145.33, df = 3, p-value < 2.2e-16
# BigVAR
data_matrix <- as.matrix(ts_df)
sparse_var_model <- BigVAR.fit(data_matrix, p = 1, lambda = 0.1, struct = "Lag")
summary(sparse_var_model)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> -0.82932 -0.03807 0.03749 13.17876 0.37022 147.61450
Created on 2024-04-11 with reprex v2.1.0
Created on 2024-02-08 with reprex v2.1.0