Open pavan-149 opened 10 months ago
setwd("C:/Users/pavan varma/Desktop/Spring Sem")
library(reprex)
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(tsbox)
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
# Set FRED API key
fredr_set_key("ad8fa632ef9ee5e564d414be20b7766b")
# Fetch ICNSA data from FRED
icnsa <- fredr(series_id = "ICNSA")
# Convert 'icnsa' to a time series object
icnsa_ts <- ts(icnsa$value, start = c(1978, 1), frequency = 365.25/7) # Adjusted frequency considering leap years (365.25 days in a year)
# Remove NA values
icnsa_ts <- na.omit(icnsa_ts)
# Define COVID years (replace with actual COVID years)
covid_years <- c(2020, 2021)
# Plot ICNSA values over time
plot(icnsa_ts, type = "l", xlab = "Year", ylab = "ICNSA", main = "ICNSA Time Series Data")
# Highlight COVID years
for (year in covid_years) {
abline(v = as.numeric(paste(year, "-01-01", sep = "")), col = "red", lwd = 2)
abline(v = as.numeric(paste(year + 1, "-01-01", sep = "")), col = "red", lwd = 2)
}
#> Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): NAs
#> introduced by coercion
#> Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): NAs
#> introduced by coercion
#> Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): NAs
#> introduced by coercion
#> Warning in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): NAs
#> introduced by coercion
# Calculate ACF and PACF
acf_values <- acf(icnsa_ts)
pacf_values <- pacf(icnsa_ts)
# Extract significant lag values from ACF and PACF
lag_acf <- which(abs(acf_values$acf) > 0.2) # Adjust threshold as needed
lag_pacf <- which(abs(pacf_values$acf) > 0.2) # Adjust threshold as needed
# Choose the maximum lag from ACF and PACF
lag_value <- max(max(lag_acf), max(lag_pacf))
# Create lagged variables
icnsa_lagged <- lag(icnsa_ts, k = lag_value)
# Remove NA values introduced by lagging
icnsa_lagged <- na.omit(icnsa_lagged)
# Fit lag regression model including COVID years
model_including_covid <- lm(icnsa_ts ~ icnsa_lagged)
# Forecast next week's unemployment figure including COVID years
future_data <- lag(icnsa_ts, k = lag_value)
forecast_value_including_covid <- predict(model_including_covid, newdata = data.frame(icnsa_lagged = tail(future_data, 1)))
# Print the forecasted value including COVID years
print(paste("Forecasted next week's unemployment figure including COVID years:", forecast_value_including_covid))
#> [1] "Forecasted next week's unemployment figure including COVID years: 232727"
Created on 2024-02-09 with reprex v2.1.0
-Forecasted next week's unemployment figure: 232727
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(ggplot2)
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(tseries)
library(reprex)
# Working directory
setwd("C:/Users/pavan varma/Desktop/Spring Sem/TSA")
# Loading data
data <- read.csv("OECD.csv", header = TRUE)
# Converting date column to Date type
data$date <- as.Date(data$DATE)
# Removing rows with missing values
data <- na.omit(data)
# Check the structure of your data
str(data)
#> 'data.frame': 275 obs. of 3 variables:
#> $ DATE : chr "2001-01-01" "2001-02-01" "2001-03-01" "2001-04-01" ...
#> $ OECDLRHUTTTTSTSAM: num 6.46 6.47 6.47 6.5 6.52 ...
#> $ date : Date, format: "2001-01-01" "2001-02-01" ...
# Visualizing data
ggplot(data, aes(x = date, y = OECDLRHUTTTTSTSAM)) +
geom_line() +
labs(title = "Harmonized Unemployment Monthly Rates", x = "Date", y = "Unemployment Rate")
# Checking for seasonality
decomposition <- decompose(ts(data$OECDLRHUTTTTSTSAM, frequency = 12))
seasonal <- decomposition$seasonal
seasonal_plot <- ggplot() +
geom_line(aes(x = time(data$OECDLRHUTTTTSTSAM), y = seasonal)) +
labs(title = "Seasonal Component", x = "Date", y = "Seasonal Index")
print(seasonal_plot)
#> Don't know how to automatically pick scale for object of type <ts>. Defaulting
#> to continuous.
# ADF test for stationarity
adf_test <- adf.test(data$OECDLRHUTTTTSTSAM)
adf_test_result <- ifelse(adf_test$p.value < 0.05, "Stationary", "Non-Stationary")
print(paste("ADF Test Result:", adf_test_result))
#> [1] "ADF Test Result: Non-Stationary"
# Identifing Non-Seasonal Parameters (p, d, q)
acf_data <- acf(diff(data$OECDLRHUTTTTSTSAM), main = "ACF of Differenced Data")
pacf_data <- pacf(diff(data$OECDLRHUTTTTSTSAM), main = "PACF of Differenced Data")
# Identifing Seasonal Parameters (P, D, Q)
sacf_data <- acf(data$OECDLRHUTTTTSTSAM, lag.max = 12*2, main = "SACF of Data")
spacf_data <- pacf(data$OECDLRHUTTTTSTSAM, lag.max = 12*2, main = "SPACF of Data")
# Manual ARIMA model specification
p <- 1
d <- 0
q <- 1
P <- 1
D <- 1
Q <- 1
# Fitting ARIMA model
fit <- arima(data$OECDLRHUTTTTSTSAM, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = 12))
#> Warning in log(s2): NaNs produced
#> Warning in log(s2): NaNs produced
# Model summary
summary(fit)
#>
#> Call:
#> arima(x = data$OECDLRHUTTTTSTSAM, order = c(p, d, q), seasonal = list(order = c(P,
#> D, Q), period = 12))
#>
#> Coefficients:
#> ar1 ma1 sar1 sma1
#> 0.9796 0.1598 0.0258 -0.9440
#> s.e. 0.0157 0.0636 0.0703 0.0723
#>
#> sigma^2 estimated as 0.04811: log likelihood = 12.8, aic = -15.59
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -0.007383039 0.2144984 0.07564254 -0.1920257 1.096264 1.009035
#> ACF1
#> Training set -0.0001888289
# Model diagnostics
checkresiduals(fit)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(1,0,1)(1,1,1)[12]
#> Q* = 7.0704, df = 6, p-value = 0.3144
#>
#> Model df: 4. Total lags used: 10
# Forecasting
# Forcasting 1 period
forecast_values <- forecast(fit, h = 1)
# Plot forecast
plot(forecast_values)
# Print forecast summary
summary(forecast_values)
#>
#> Forecast method: ARIMA(1,0,1)(1,1,1)[12]
#>
#> Model Information:
#>
#> Call:
#> arima(x = data$OECDLRHUTTTTSTSAM, order = c(p, d, q), seasonal = list(order = c(P,
#> D, Q), period = 12))
#>
#> Coefficients:
#> ar1 ma1 sar1 sma1
#> 0.9796 0.1598 0.0258 -0.9440
#> s.e. 0.0157 0.0636 0.0703 0.0723
#>
#> sigma^2 estimated as 0.04811: log likelihood = 12.8, aic = -15.59
#>
#> Error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -0.007383039 0.2144984 0.07564254 -0.1920257 1.096264 1.009035
#> ACF1
#> Training set -0.0001888289
#>
#> Forecasts:
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 276 4.862572 4.580266 5.144878 4.430822 5.294322
Created on 2024-02-13 with reprex v2.1.0
library(reprex)
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
fredr_set_key("ad8fa632ef9ee5e564d414be20b7766b")
icnsa <- fredr(series_id = "ICNSA") icnsa$date <- as.Date(icnsa$date) # Adjusted column name to lowercase
moving_avg <- rollmean(icnsa$value, k = 4, align = "right", fill = NA) # Adjusted column name to lowercase
plot(icnsa$date, icnsa$value, type = "l", xlab = "Date", ylab = "Unemployment", main = "Unemployment over Time", col = "blue") lines(icnsa$date, moving_avg, col = "red")
legend("topright", legend = c("ICNSA", "4-week Moving Average"), col = c("blue", "red"), lty = 1)
rect(as.Date("2020-01-01"), par("usr")[3], as.Date("2022-12-31"), par("usr")[4], col = "gray", border = NA, density = 30)
![](https://i.imgur.com/OZIxViT.png)<!-- -->
``` r
# Forecast of today's unemployment figure (next 4 weeks)
forecast_today <- rep(moving_avg[length(moving_avg)], 1)
print(paste("Forecasted unemployment figure for today:", forecast_today))
#> [1] "Forecasted unemployment figure for today: 242689.75"
Created on 2024-02-16 with reprex v2.1.0
library(reprex)
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(tsbox)
# Set FRED API key
fredr_set_key("ad8fa632ef9ee5e564d414be20b7766b")
# Fetching ICNSA data from FRED
icnsa <- fredr(series_id = "ICNSA")
# Fetching NYCCLAIMS data from FRED
nycclaims <- fredr(series_id = "NYCCLAIMS")
# Removing missing values
icnsa <- na.omit(icnsa)
nycclaims <- na.omit(nycclaims)
# Merging datasets based on date
merged_data <- merge(icnsa, nycclaims, by = "date", all = FALSE)
# Removing missing values
merged_data <- na.omit(merged_data)
# Visualize the relationship between ICNSA and NYCCLAIMS
plot(merged_data$date, merged_data$value.x, type = "l", main = "ICNSA vs. NYCCLAIMS", ylab = "ICNSA", xlab = "Date")
lines(merged_data$date, merged_data$value.y, col = "red")
# Calculating the correlation coefficient
correlation <- cor(merged_data$value.x, merged_data$value.y)
print(paste("Correlation coefficient between ICNSA and NYCCLAIMS:", correlation))
#> [1] "Correlation coefficient between ICNSA and NYCCLAIMS: 0.674691579063901"
# Cross-correlation function
ccf_result <- ccf(merged_data$value.x, merged_data$value.y, lag.max = 10)
plot(ccf_result, main = "Cross-correlation Function: ICNSA vs. NYCCLAIMS", xlab = "Lag", ylab = "Cross-correlation")
# Autocorrelation function (ACF)
acf_result <- acf(merged_data$value.x, main = "Autocorrelation Function: ICNSA")
# Partial autocorrelation function (PACF)
pacf_result <- pacf(merged_data$value.x, main = "Partial Autocorrelation Function: ICNSA")
# Removing missing values
merged_data <- na.omit(merged_data)
# Fitting regARIMA model with covariates using automatic model identification
regarima_model <- auto.arima(merged_data$value.x, xreg = merged_data$value.y)
# Summary of the model
summary(regarima_model)
#> Series: merged_data$value.x
#> Regression with ARIMA(1,0,3) errors
#>
#> Coefficients:
#> ar1 ma1 ma2 ma3 intercept xreg
#> 0.8383 0.5869 0.1333 -0.0473 347024.69 0.0977
#> s.e. 0.0235 0.0296 0.0368 0.0289 32136.24 0.1025
#>
#> sigma^2 = 9.369e+09: log likelihood = -25577.49
#> AIC=51168.98 AICc=51169.03 BIC=51208.12
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE ACF1
#> Training set 0.1140441 96647.05 45095.5 -2.021267 11.48395 1.142174 0.001482879
# Regression diagnostics
checkresiduals(regarima_model)
#>
#> Ljung-Box test
#>
#> data: Residuals from Regression with ARIMA(1,0,3) errors
#> Q* = 9.4649, df = 6, p-value = 0.1491
#>
#> Model df: 4. Total lags used: 10
# Forecast with the regARIMA model
forecast_result <- forecast(regarima_model, xreg = merged_data$value.y)
# Plot of the forecast
plot(forecast_result)
# The most recent value of NYCCLAIMS
latest_nycclaims <- tail(merged_data$value.y, 1)
# Forecast of ICNSA for the current week using the merged model
forecast_icnsa <- forecast(regarima_model, xreg = latest_nycclaims)
# Extracting the point forecast for the current week
icnsa_point_forecast_current <- forecast_icnsa$mean[1]
# Printing the point forecast for ICNSA from the merged model for the current week
cat("Point forecast for ICNSA from the merged model for the current week:", icnsa_point_forecast_current, "\n")
#> Point forecast for ICNSA from the merged model for the current week: 226040.1
Created on 2024-02-21 with reprex v2.1.0
library(reprex)
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(tsbox)
# Set FRED API key
fredr_set_key("ad8fa632ef9ee5e564d414be20b7766b")
# Fetching ICNSA data from FRED
icnsa = fredr(series_id = "ICNSA")
# Convert 'date' column to Date type
icnsa$date = as.Date(icnsa$date)
# Removing missing values
icnsa = na.omit(icnsa)
# Plotting the ICNSA data
plot(icnsa$date, icnsa$value, type = "l", xlab = "Date", ylab = "Initial Claims",
main = "Initial Claims over Time")
# Adding vertical lines for potential start and end dates
abline(v = as.Date("2020-03-01"), col = "red", lty = 2)
abline(v = as.Date("2021-06-30"), col = "blue", lty = 2)
# Fitting cubic spline interpolation with lambda = 0.5
spline_fit = smooth.spline(icnsa$date, icnsa$value, lambda = 0.5)
# Imputing values for the Covid period
covid_period_start = as.Date("2020-03-01")
covid_period_end = as.Date("2021-06-30")
covid_period_values = predict(spline_fit, newdata = seq(covid_period_start, covid_period_end, by = "week"))
# Combining original and imputed data
icnsa_imputed = data.frame(date = seq(min(icnsa$date), max(icnsa$date), by = "week"),
value = c(icnsa$value, covid_period_values$y))
# Fitting multiplicative Holt-Winters model
hw_multiplicative = HoltWinters(ts(icnsa_imputed$value, frequency = 52), seasonal = "multiplicative")
# Fitting additive Holt-Winters model
hw_additive = HoltWinters(ts(icnsa_imputed$value, frequency = 52), seasonal = "additive")
# Forecasting next ICNSA value using both models
forecast_multiplicative = forecast(hw_multiplicative, h = 1)
forecast_additive = forecast(hw_additive, h = 1)
# forecasts
print(forecast_multiplicative)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 115.6538 385774.3 290594.9 480953.8 240209.9 531338.7
print(forecast_additive)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 115.6538 404580.3 306141.4 503019.2 254031 555129.6
Created on 2024-02-28 with reprex v2.1.0
library(reprex)
library(fredr)
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(ggplot2)
library(changepoint)
#> Successfully loaded changepoint package version 2.2.4
#> See NEWS for details of changes.
library(imputeTS)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
#>
#> Attaching package: 'imputeTS'
#> The following object is masked from 'package:zoo':
#>
#> na.locf
library(forecast)
library(bfast)
#> Warning: package 'bfast' was built under R version 4.3.3
#> Loading required package: strucchangeRcpp
#> Warning: package 'strucchangeRcpp' was built under R version 4.3.3
#> Loading required package: sandwich
#> Warning: package 'sandwich' was built under R version 4.3.3
# Set FRED API key
fredr_set_key("ad8fa632ef9ee5e564d414be20b7766b")
# Fetching ICNSA data from FRED
icnsa <- fredr(series_id = "ICNSA")
ilcclaims <- fredr(series_id = "ILCClaims")
# Converting 'date' column to Date type
icnsa$date <- as.Date(icnsa$date)
ilcclaims$date <- as.Date(ilcclaims$date)
# Plotting initial ICNSA data
ggplot(icnsa, aes(x = date, y = value)) +
geom_line(color = "blue") +
labs(title = "Initial ICNSA Data", x = "Date", y = "ICNSA Value") +
theme_minimal()
# Plotting initial ILCClaims data
ggplot(ilcclaims, aes(x = date, y = value)) +
geom_line(color = "red") +
labs(title = "Initial ILCClaims Data", x = "Date", y = "ILCClaims Value") +
theme_minimal()
# Merging ICNSA and ILCClaims data
merged_data <- merge(icnsa, ilcclaims, by = "date", all = TRUE)
# Removing missing values
merged_data <- na.omit(merged_data)
# Fitting a trend line to the ICNSA data
trend_icnsa <- lm(value.x ~ date, data = merged_data)
summary(trend_icnsa)
#>
#> Call:
#> lm(formula = value.x ~ date, data = merged_data)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -241796 -100052 -41853 31035 5770707
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 3.208e+05 2.211e+04 14.506 <2e-16 ***
#> date 3.801e+00 1.646e+00 2.309 0.0211 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 294700 on 1985 degrees of freedom
#> Multiple R-squared: 0.002678, Adjusted R-squared: 0.002176
#> F-statistic: 5.331 on 1 and 1985 DF, p-value: 0.02106
# Calculating residuals (deviations from the trend line)
merged_data$residuals <- residuals(trend_icnsa)
# Setting a threshold level for residuals
threshold <- sd(merged_data$residuals) # Adjust this threshold as needed
# Performing change point detection on the residuals
cpt <- cpt.mean(merged_data$residuals)
# Extracting the dates associated with the change points
change_point_dates <- merged_data$date[cpt@cpts]
# Identifing the start date based on the first change point
start_date <- change_point_dates[1]
# Finding the index of the last residual above the threshold
end_index <- tail(which(merged_data$residuals > threshold), 1)
# Using the date corresponding to the end_index as the end date
end_date <- merged_data$date[end_index]
# Printing the start and end dates
cat("Start Date:", start_date, "\n")
#> Start Date: 18335
cat("End Date:", end_date, "\n")
#> End Date: 18713
# Plotting the detected change points
plot(cpt, type = "l", col = "red", main = "Change Points Detection")
# Imputing missing values using state-space methodology
merged_data_imputed_state_space <- na_kalman(merged_data$value.x)
# Applying the spline imputation method for comparison
spline_fit <- smooth.spline(merged_data$date, merged_data$value.x)
merged_data_imputed_spline <- predict(spline_fit, xout = merged_data$date)$y
# Calculating metrics for state-space imputation
mae_state_space <- mean(abs(merged_data$value.x - merged_data_imputed_state_space))
rmse_state_space <- sqrt(mean((merged_data$value.x - merged_data_imputed_state_space)^2))
rsquared_state_space <- summary(lm(merged_data_imputed_state_space ~ merged_data$value.x))$r.squared
bias_state_space <- mean(merged_data_imputed_state_space - merged_data$value.x)
# Calculating metrics for spline imputation
mae_spline <- mean(abs(merged_data$value.x - merged_data_imputed_spline))
rmse_spline <- sqrt(mean((merged_data$value.x - merged_data_imputed_spline)^2))
rsquared_spline <- summary(lm(merged_data_imputed_spline ~ merged_data$value.x))$r.squared
bias_spline <- mean(merged_data_imputed_spline - merged_data$value.x)
# Printing the metrics
comparison_results <- data.frame(Method = c("State-Space Imputation", "Spline Imputation"),
MAE = c(mae_state_space, mae_spline),
RMSE = c(rmse_state_space, rmse_spline),
R_squared = c(rsquared_state_space, rsquared_spline),
Bias = c(bias_state_space, bias_spline))
print(comparison_results)
#> Method MAE RMSE R_squared Bias
#> 1 State-Space Imputation 0.0 0.0 1.0000000 0.000000e+00
#> 2 Spline Imputation 49349.9 167394.3 0.6781726 3.188004e-11
# Plotting the ICNSA data before and after state-space imputation
plot(merged_data$date, merged_data$value.x, type = "l", col = "blue", lty = 1, xlab = "Date", ylab = "Initial Claims", main = "ICNSA Data Before and After Imputation")
lines(merged_data$date, merged_data_imputed_state_space, col = "red", lty = 2)
# Plotting the ICNSA data before and after spline imputation
plot(merged_data$date, merged_data$value.x, type = "l", col = "black", lty = 1, xlab = "Date", ylab = "Initial Claims", main = "ICNSA Data Before and After Imputation")
lines(merged_data$date, merged_data_imputed_spline, col = "red", lty = 3)
# Calculating correlations and plots
cor_residuals <- cor(merged_data$residuals, merged_data$value.x)
ccf_plot <- ccf(merged_data$residuals, merged_data$value.x)
acf_plot <- acf(merged_data$residuals)
pacf_plot <- pacf(merged_data$residuals)
# Correlation Residuals
print(cor_residuals)
#> [1] 0.99866
# Plots of CCF, ACF, PACF
plot(ccf_plot)
plot(acf_plot)
plot(pacf_plot)
# Manually selecting ARIMA model based on correlation coefficients
lags <- 1:10
correlations <- sapply(lags, function(lag) cor(merged_data$value.x[-1], lagged_data <- lag(merged_data$value.x, lag)[-1]))
# Identifying significant lag values
significant_lags <- lags[abs(correlations) > 0.5]
# ARIMA model build with adjusted parameters
arima_model <- Arima(merged_data$value.x, order = c(1, 0, 0), seasonal = list(order = c(1, 0, 1), period = 52))
# Forecast of next week's value
forecast_next_week <- forecast(arima_model, h = 1)
# Printing the forecast
print(forecast_next_week)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 1988 209028.8 69045.38 349012.2 -5057.362 423114.9
Created on 2024-03-07 with reprex v2.1.0
setwd("C:/Users/pavan varma/OneDrive - University at Buffalo/Desktop/Spring Sem/TSA")
library(reprex)
library(censusapi)
#> Warning: package 'censusapi' was built under R version 4.3.3
#>
#> Attaching package: 'censusapi'
#> The following object is masked from 'package:methods':
#>
#> getFunction
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(readxl)
#> Warning: package 'readxl' was built under R version 4.3.3
library(tsbox)
library(ggplot2)
library(vars)
#> Warning: package 'vars' was built under R version 4.3.3
#> Loading required package: MASS
#> Warning: package 'MASS' was built under R version 4.3.3
#> Loading required package: strucchange
#> Warning: package 'strucchange' was built under R version 4.3.3
#> Loading required package: sandwich
#> Warning: package 'sandwich' was built under R version 4.3.3
#> Loading required package: urca
#> Loading required package: lmtest
library(lmtest)
library(BigVAR)
#> Warning: package 'BigVAR' was built under R version 4.3.3
#> Loading required package: lattice
#> Warning: package 'lattice' was built under R version 4.3.3
# Read the datasets
GS <- read_excel("SeriesReport-202404101652-V.xlsx")
FBS <- read_excel("SeriesReport-202404101653-V.xlsx")
HPCS <- read_excel("SeriesReport-202404101654-V.xlsx")
CCAS <- read_excel("SeriesReport-202404101655-V.xlsx")
# Convert "Value" column to numeric and "Period" to Date format for each dataset
convert_and_format <- function(data) {
data$Value <- as.numeric(data$Value)
data$Period <- as.Date(paste0("01-", data$Period), format = "%d-%b-%Y")
return(data)
}
GS <- convert_and_format(GS)
#> Warning in convert_and_format(GS): NAs introduced by coercion
FBS <- convert_and_format(FBS)
#> Warning in convert_and_format(FBS): NAs introduced by coercion
HPCS <- convert_and_format(HPCS)
#> Warning in convert_and_format(HPCS): NAs introduced by coercion
CCAS <- convert_and_format(CCAS)
#> Warning in convert_and_format(CCAS): NAs introduced by coercion
# Merge the datasets based on the "Period" column
merged_data <- merge(GS, FBS, by = "Period", all = TRUE)
merged_data <- merge(merged_data, HPCS, by = "Period", all = TRUE)
merged_data <- merge(merged_data, CCAS, by = "Period", all = TRUE)
#> Warning in merge.data.frame(merged_data, CCAS, by = "Period", all = TRUE):
#> column names 'Value.x', 'Value.y' are duplicated in the result
# Define new column names
new_column_names <- c("Period", "GS_Value", "FBS_Value", "HPCS_Value", "CCAS_Value")
# Assign new column names to the merged dataset
colnames(merged_data) <- new_column_names
merged_data = na.omit(merged_data)
# Summary Statistics
summary(merged_data)
#> Period GS_Value FBS_Value HPCS_Value
#> Min. :1992-01-01 Min. :12601 Min. :30382 Min. : 7292
#> 1st Qu.:2000-01-08 1st Qu.:19084 1st Qu.:36448 1st Qu.:12434
#> Median :2008-01-16 Median :34941 Median :46847 Median :20152
#> Mean :2008-01-15 Mean :32356 Mean :48849 Mean :19693
#> 3rd Qu.:2016-01-24 3rd Qu.:43190 3rd Qu.:57561 3rd Qu.:26127
#> Max. :2024-02-01 Max. :68257 Max. :82993 Max. :37455
#> CCAS_Value
#> Min. : 2849
#> 1st Qu.:13386
#> Median :17496
#> Mean :17281
#> 3rd Qu.:21271
#> Max. :26625
# Plot time series for each variable
ggplot(merged_data, aes(x = Period)) +
geom_line(aes(y = GS_Value, color = "GS")) +
geom_line(aes(y = FBS_Value, color = "FBS")) +
geom_line(aes(y = HPCS_Value, color = "HPCS")) +
geom_line(aes(y = CCAS_Value, color = "CCAS")) +
labs(color = "Variables", title = "Time Series Plot of Merged Data") +
theme_minimal()
# Correlation Analysis
correlation_matrix <- cor(merged_data[, -1]) # Exclude "Period" column
print(correlation_matrix)
#> GS_Value FBS_Value HPCS_Value CCAS_Value
#> GS_Value 1.0000000 0.8814042 0.9213464 0.9350807
#> FBS_Value 0.8814042 1.0000000 0.9762019 0.8946275
#> HPCS_Value 0.9213464 0.9762019 1.0000000 0.9432814
#> CCAS_Value 0.9350807 0.8946275 0.9432814 1.0000000
# Compute and plot ACF for each variable
par(mfrow=c(2,2))
for (i in 2:5) {
acf_result <- acf(merged_data[, i], main = paste("ACF for", new_column_names[i]))
}
# Compute and plot PACF for each variable
par(mfrow=c(2,2))
for (i in 2:5) {
pacf_result <- pacf(merged_data[, i], main = paste("PACF for", new_column_names[i]))
}
# Compute and plot CCF between each pair of variables
par(mfrow=c(1,3))
for (i in 2:5) {
for (j in 2:5) {
if (i != j) {
ccf_result <- ccf(merged_data[, i], merged_data[, j], main = paste("CCF between", new_column_names[i], "and", new_column_names[j]))
}
}
}
# Prepare the data for VAR modeling
merged_data_ts <- ts(merged_data[, -1], frequency = 12) # Exclude "Period" column
# Fit VAR(1) model
var_model_1 <- VAR(merged_data_ts, p = 1)
# Fit VAR(p) model with p > 1
var_model_p <- VAR(merged_data_ts, p = 2) # Change p value as desired
# Compare the two models using AIC
AIC_1 <- AIC(var_model_1)
AIC_p <- AIC(var_model_p)
# Print AIC values
print(paste("AIC for VAR(1) model:", AIC_1))
#> [1] "AIC for VAR(1) model: 24718.1302816642"
print(paste("AIC for VAR(p) model with p > 1:", AIC_p))
#> [1] "AIC for VAR(p) model with p > 1: 24086.819078207"
# Decide which model is better based on AIC
if (AIC_1 < AIC_p) {
print("VAR(1) model is better.")
} else {
print("VAR(p) model with p > 1 is better.")
}
#> [1] "VAR(p) model with p > 1 is better."
# Forecast one month ahead using VAR(1) model
forecast_1 <- predict(var_model_1, n.ahead = 1)
# Forecast one month ahead using VAR(p) model with p > 1
forecast_p <- predict(var_model_p, n.ahead = 1)
# Print the forecasts
print("Forecast for VAR(1) model:")
#> [1] "Forecast for VAR(1) model:"
print(forecast_1$fcst)
#> $GS_Value
#> fcst lower upper CI
#> GS_Value.fcst 53159.35 50479.12 55839.58 2680.232
#>
#> $FBS_Value
#> fcst lower upper CI
#> FBS_Value.fcst 82832.31 80770.73 84893.89 2061.577
#>
#> $HPCS_Value
#> fcst lower upper CI
#> HPCS_Value.fcst 36115.87 35367.09 36864.66 748.7856
#>
#> $CCAS_Value
#> fcst lower upper CI
#> CCAS_Value.fcst 26160.28 24309.91 28010.65 1850.369
print("Forecast for VAR(p) model with p > 1:")
#> [1] "Forecast for VAR(p) model with p > 1:"
print(forecast_p$fcst)
#> $GS_Value
#> fcst lower upper CI
#> GS_Value.fcst 53627.2 51317.23 55937.16 2309.967
#>
#> $FBS_Value
#> fcst lower upper CI
#> FBS_Value.fcst 83023.89 81214.63 84833.15 1809.262
#>
#> $HPCS_Value
#> fcst lower upper CI
#> HPCS_Value.fcst 36186.4 35644.32 36728.48 542.0815
#>
#> $CCAS_Value
#> fcst lower upper CI
#> CCAS_Value.fcst 26381.57 24903.86 27859.28 1477.708
## Granger Causality Tests for VAR(1) Model
granger_test_1 <- causality(var_model_1, cause = "GS_Value")
print(granger_test_1)
#> $Granger
#>
#> Granger causality H0: GS_Value do not Granger-cause FBS_Value
#> HPCS_Value CCAS_Value
#>
#> data: VAR object var_model_1
#> F-Test = 1.8076, df1 = 3, df2 = 1520, p-value = 0.1438
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: GS_Value and FBS_Value
#> HPCS_Value CCAS_Value
#>
#> data: VAR object var_model_1
#> Chi-squared = 78.701, df = 3, p-value < 2.2e-16
granger_test_1 <- causality(var_model_1, cause = "FBS_Value")
print(granger_test_1)
#> $Granger
#>
#> Granger causality H0: FBS_Value do not Granger-cause GS_Value
#> HPCS_Value CCAS_Value
#>
#> data: VAR object var_model_1
#> F-Test = 0.4492, df1 = 3, df2 = 1520, p-value = 0.7179
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: FBS_Value and GS_Value
#> HPCS_Value CCAS_Value
#>
#> data: VAR object var_model_1
#> Chi-squared = 122.19, df = 3, p-value < 2.2e-16
granger_test_1 <- causality(var_model_1, cause = "HPCS_Value")
print(granger_test_1)
#> $Granger
#>
#> Granger causality H0: HPCS_Value do not Granger-cause GS_Value
#> FBS_Value CCAS_Value
#>
#> data: VAR object var_model_1
#> F-Test = 1.6263, df1 = 3, df2 = 1520, p-value = 0.1814
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: HPCS_Value and GS_Value
#> FBS_Value CCAS_Value
#>
#> data: VAR object var_model_1
#> Chi-squared = 137.87, df = 3, p-value < 2.2e-16
granger_test_1 <- causality(var_model_1, cause = "CCAS_Value")
print(granger_test_1)
#> $Granger
#>
#> Granger causality H0: CCAS_Value do not Granger-cause GS_Value
#> FBS_Value HPCS_Value
#>
#> data: VAR object var_model_1
#> F-Test = 5.5748, df1 = 3, df2 = 1520, p-value = 0.0008387
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: CCAS_Value and GS_Value
#> FBS_Value HPCS_Value
#>
#> data: VAR object var_model_1
#> Chi-squared = 136, df = 3, p-value < 2.2e-16
## Granger Causality Tests for VAR(p) Model with p > 1
granger_test_p <- causality(var_model_p, cause = "GS_Value")
print(granger_test_p)
#> $Granger
#>
#> Granger causality H0: GS_Value do not Granger-cause FBS_Value
#> HPCS_Value CCAS_Value
#>
#> data: VAR object var_model_p
#> F-Test = 2.8709, df1 = 6, df2 = 1500, p-value = 0.008747
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: GS_Value and FBS_Value
#> HPCS_Value CCAS_Value
#>
#> data: VAR object var_model_p
#> Chi-squared = 51.271, df = 3, p-value = 4.284e-11
granger_test_p <- causality(var_model_p, cause = "FBS_Value")
print(granger_test_p)
#> $Granger
#>
#> Granger causality H0: FBS_Value do not Granger-cause GS_Value
#> HPCS_Value CCAS_Value
#>
#> data: VAR object var_model_p
#> F-Test = 7.8538, df1 = 6, df2 = 1500, p-value = 2.398e-08
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: FBS_Value and GS_Value
#> HPCS_Value CCAS_Value
#>
#> data: VAR object var_model_p
#> Chi-squared = 140.45, df = 3, p-value < 2.2e-16
granger_test_p <- causality(var_model_p, cause = "HPCS_Value")
print(granger_test_p)
#> $Granger
#>
#> Granger causality H0: HPCS_Value do not Granger-cause GS_Value
#> FBS_Value CCAS_Value
#>
#> data: VAR object var_model_p
#> F-Test = 8.9283, df1 = 6, df2 = 1500, p-value = 1.345e-09
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: HPCS_Value and GS_Value
#> FBS_Value CCAS_Value
#>
#> data: VAR object var_model_p
#> Chi-squared = 73.098, df = 3, p-value = 8.882e-16
granger_test_p <- causality(var_model_p, cause = "CCAS_Value")
print(granger_test_p)
#> $Granger
#>
#> Granger causality H0: CCAS_Value do not Granger-cause GS_Value
#> FBS_Value HPCS_Value
#>
#> data: VAR object var_model_p
#> F-Test = 21.054, df1 = 6, df2 = 1500, p-value < 2.2e-16
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: CCAS_Value and GS_Value
#> FBS_Value HPCS_Value
#>
#> data: VAR object var_model_p
#> Chi-squared = 142.15, df = 3, p-value < 2.2e-16
# Construct a Basic VAR-L(3,4) model with sparsity structure
model <- constructModel(merged_data_ts, p = 4, struct = "Basic", gran = c(50, 10), verbose = FALSE)
# Perform cross-validation and select the optimal penalty parameter
results <- cv.BigVAR(model)
print(results)
#> *** BIGVAR MODEL Results ***
#> Structure
#> [1] "Basic"
#> Loss
#> [1] "L2"
#> Forecast Horizon
#> [1] 1
#> Minnesota VAR
#> [1] FALSE
#> Maximum Lag Order
#> [1] 4
#> Optimal Lambda
#> [1] 625900000
#> Grid Depth
#> [1] 50
#> Index of Optimal Lambda
#> [1] 10
#> Fraction of active coefficients
#> [1] 0.3841
#> In-Sample Loss
#> [1] 14300000
#> BigVAR Out of Sample Loss
#> [1] 15700000
#> *** Benchmark Results ***
#> Conditional Mean Out of Sample Loss
#> [1] 1.01e+09
#> AIC Out of Sample Loss
#> [1] 10500000
#> BIC Out of Sample Loss
#> [1] 1e+07
#> RW Out of Sample Loss
#> [1] 9870000
# Visualize the sparsity pattern of the estimated coefficients
SparsityPlot.BigVAR.results(results)
Created on 2024-04-10 with reprex v2.1.0
Created on 2024-02-07 with reprex v2.1.0
-Forecasted unemployment figure for today: 280303