Open anjuanand6 opened 9 months ago
HW 1
The following steps have been performed:
• Load the most current Initial Claims (ICNSA) data. Not seasonally adjusted. • Download another series that you think might be related to ICNSA to be used as a covariate. (Taken WEI data for this). • Do some empirical data analysis to decide on possible covariates and stochastic structure to use for a regARIMA model. • You may use automatic model identification, such at auto.arima() or ARIMA() without the pdf() argument, but the final model must be justified with your own words and analysis. • Fit your final model and comment on the regression diagnostics. • Produce a point forecast from your final model.
# Load necessary libraries
library(tidyverse)
library(reprex)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(tseries)
library(readr)
library(fredr)
library(dplyr)
library(lubridate)
library(tsbox)
library(fpp3)
#> ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
#> ✔ tsibble 1.1.4 ✔ fable 0.3.3
#> ✔ tsibbledata 0.4.1 ✔ fabletools 0.3.4
#> ✔ feasts 0.3.1
#> ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
#> ✖ lubridate::date() masks base::date()
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ tsibble::intersect() masks base::intersect()
#> ✖ tsibble::interval() masks lubridate::interval()
#> ✖ dplyr::lag() masks stats::lag()
#> ✖ tsibble::setdiff() masks base::setdiff()
#> ✖ tsibble::union() masks base::union()
library(seasonal)
#>
#> Attaching package: 'seasonal'
#> The following object is masked from 'package:tibble':
#>
#> view
library(ggplot2)
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following object is masked from 'package:tsibble':
#>
#> index
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(splines)
#1. Load the most current Initial Claims (ICNSA) data
# Set the API Key
fredr_set_key("6359e5121a13be3ece5d737e3c242a41")
# Fetch the ICNSA data, not seasonally adjusted
icnsa_data <- fredr(series_id = "ICNSA")
#2. Empirical analysis for Covid pandemic period
# Plot the data to visually find the COVID period
ggplot(icnsa_data, aes(x = date, y = value)) +
geom_line() +
labs(title = "ICNSA Data Over Time", x = "Date", y = "Initial Claims")
# After plotting, manually defining the start and end date of the COVID period
start_date <- as.Date("2020-03-01") # Observation start date
end_date <- as.Date("2021-03-01") # Observation end date
# Step 3: Cubic Spline Imputation
# Convert the date to a proper Date object
icnsa_data$date <- as.Date(icnsa_data$date)
# Define the time points for the spline excluding the COVID period and create a spline function
time_points <- icnsa_data$date[icnsa_data$date < start_date | icnsa_data$date > end_date]
values <- icnsa_data$value[icnsa_data$date < start_date | icnsa_data$date > end_date]
cs_fit <- splinefun(time_points, values, method = "fmm")
# Apply the spline to impute values for the COVID period
imputed_values <- cs_fit(icnsa_data$date[icnsa_data$date >= start_date & icnsa_data$date <= end_date])
icnsa_data$value[icnsa_data$date >= start_date & icnsa_data$date <= end_date] <- imputed_values
# Plot the data with imputed values
ggplot(icnsa_data, aes(x = date, y = value)) +
geom_line() +
geom_line(data = icnsa_data[icnsa_data$date >= start_date & icnsa_data$date <= end_date, ], aes(x = date, y = value), color = "red") +
labs(title = "ICNSA Data with Imputed COVID Period", x = "Date", y = "Initial Claims")
#Non Covid Data
non_covid_data <- icnsa_data %>%
filter(date < start_date | date > end_date)
#Covid Data
covid_data <- icnsa_data %>%
filter(date >= start_date & date <= end_date)
# Define a range of lambda values
lambda_values <- seq(0.1, 1, by = 0.1)
# Initialize an empty list to store plots for each lambda
plots <- list()
# Loop through each lambda value to fit a spline and impute new values
for (i in seq_along(lambda_values)) {
lambda <- lambda_values[i]
# Fit the spline model with the current lambda value
spline_fit <- smooth.spline(x = as.numeric(non_covid_data$date), y = non_covid_data$value, spar = lambda)
# Impute new values for the COVID period
imputed_values <- predict(spline_fit, x = as.numeric(covid_data$date))$y
covid_data$value <- imputed_values
# Combine non-COVID and COVID data with imputed values
updated_icnsa_data <- bind_rows(non_covid_data, covid_data) %>%
arrange(date)
# Create a plot for the current lambda value
p <- ggplot() +
geom_line(data = icnsa_data, aes(x = date, y = value), color = "red") +
geom_line(data = updated_icnsa_data, aes(x = date, y = value), color = "blue") +
labs(title = paste("ICNSA Data with Imputed Values (lambda =", lambda, ")"),
x = "Date", y = "Value") +
theme_minimal()
# Store the plot in the list
plots[[i]] <- p
}
# Display all plots
plots
#> [[1]]
#>
#> [[2]]
#>
#> [[3]]
#>
#> [[4]]
#>
#> [[5]]
#>
#> [[6]]
#>
#> [[7]]
#>
#> [[8]]
#>
#> [[9]]
#>
#> [[10]]
#Find the optimal lambda value
rmse_values <- numeric(length(lambda_values))
for (i in seq_along(lambda_values)) {
# Fit model excluding each point one by one and calculate RMSE
rmse <- numeric(length(covid_data$date))
for (j in seq_along(covid_data$date)) {
# Using all but one observation for fitting
train_data <- non_covid_data[-j, ]
test_data <- non_covid_data[j, ]
# Fit spline with current lambda value
spline_fit <- smooth.spline(x = as.numeric(train_data$date), y = train_data$value, spar = lambda_values[i])
# Predict the excluded observation
predicted_value <- predict(spline_fit, x = as.numeric(test_data$date))$y
# Calculate RMSE for the prediction
rmse[j] <- sqrt(mean((predicted_value - test_data$value) ^ 2))
}
# Average RMSE for current lambda
rmse_values[i] <- mean(rmse)
}
# Find the lambda value with the smallest RMSE
optimal_lambda <- lambda_values[which.min(rmse_values)]
# Output the optimal lambda value
print(optimal_lambda)
#> [1] 0.2
# Taking the optimal_lambda value for spline fit
spline_fit <- smooth.spline(x = as.numeric(non_covid_data$date), y = non_covid_data$value, spar = optimal_lambda)
# Function to impute missing values using spline fit
impute_missing_values <- function(non_covid_data, covid_data, optimal_lambda) {
spline_fit <- smooth.spline(x = as.numeric(non_covid_data$date),
y = non_covid_data$value, spar = optimal_lambda)
imputed_values <- predict(spline_fit, x = as.numeric(covid_data$date))$y
covid_data$value <- imputed_values
updated_icnsa_data <- rbind(non_covid_data, covid_data) %>% arrange(date)
return(updated_icnsa_data)
}
# Plotting function for updated data
plot_updated_data <- function(updated_icnsa_data, original_icnsa_data) {
plot(updated_icnsa_data$date, updated_icnsa_data$value, type = "l", col = "red", lwd = 2,
main = "Updated Vs Original", xlab = "Year", ylab = "Number")
lines(original_icnsa_data$date, original_icnsa_data$value, col = "blue", lwd = 2)
legend("topright", legend = c("Updated", "Original"), col = c("red", "blue"), lty = 1, lwd = 2)
}
# Plotting function for comparison of time series
plot_time_series_comparison <- function(ts_data_old, ts_data) {
plot(ts_data_old, col = "red", main = "Comparison of Time Series", ylab = "Value")
lines(ts_data, col = "blue")
legend("topright", legend = c("Original", "Updated"), col = c("red", "blue"), lty = 1, lwd = 2)
}
#4. Use both a multiplicative and an additive Holt-Winters model to forecast the next ICNSA value using the newly imputed Covid values.
# Function to forecast using Holt-Winters models
holt_winters_forecast <- function(ts_data) {
hwm_multi <- HoltWinters(ts_data, seasonal = "multiplicative")
forecasted_value_multi <- forecast(hwm_multi, h = 1)$mean
hwa_add <- HoltWinters(ts_data, seasonal = "additive")
forecasted_value_add <- forecast(hwa_add, h = 1)$mean
return(list(multiplicative = forecasted_value_multi, additive = forecasted_value_add))
}
# Impute missing values
updated_icnsa_data <- impute_missing_values(non_covid_data, covid_data, optimal_lambda)
# Plot updated data
plot_updated_data(updated_icnsa_data, icnsa_data)
# Create time series for comparison
ts_data_old <- ts(icnsa_data$value, frequency = 52)
ts_data <- ts(updated_icnsa_data$value, frequency = 52)
# Plot time series comparison
plot_time_series_comparison(ts_data_old, ts_data)
# Forecast using Holt-Winters models
forecast_results <- holt_winters_forecast(ts_data)
cat("Forecasted Unemployment using Holt-Winters Multiplicative Model:", forecast_results$multiplicative, "\n")
#> Forecasted Unemployment using Holt-Winters Multiplicative Model: 191025.8
cat("Forecasted Unemployment using Holt-Winters Additive Model:", forecast_results$additive, "\n")
#> Forecasted Unemployment using Holt-Winters Additive Model: 197790.4
Created on 2024-02-28 with reprex v2.1.0
HW - 2
The following steps were performed:
The results after forecasting are: Forecasted Unemployment using Holt-Winters Multiplicative Model: 191025.8 Forecasted Unemployment using Holt-Winters Additive Model: 197790.4
# Load necessary libraries
library(fredr)
library(ggplot2)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(KFAS)
#> Warning: package 'KFAS' was built under R version 4.3.3
#> Please cite KFAS in publications by using:
#>
#> Jouni Helske (2017). KFAS: Exponential Family State Space Models in R. Journal of Statistical Software, 78(10), 1-39. doi:10.18637/jss.v078.i10.
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(reprex)
# Set API key for FRED data
fredr_set_key("6359e5121a13be3ece5d737e3c242a41")
icnsa_data <- fredr(series_id = "ICNSA")
ccnsa_data <- fredr(series_id = "CCNSA")
# Plot the ICNSA data
ggplot(icnsa_data, aes(x = date, y = value)) +
geom_line() +
labs(title = "ICNSA Data Over Time", x = "Date", y = "Initial Claims")
# Plot the CCNSA data
ggplot(ccnsa_data, aes(x = date, y = value)) +
geom_line() +
labs(title = "CCNSA Data Over Time", x = "Date", y = "Continuous Claims")
# Retrieve and format Initial Claims Non-seasonally Adjusted (ICNSA) data
initial_claims_data <- fredr(series_id = "ICNSA") %>%
mutate(Date = as.Date(date), InitialClaims = value) %>%
select(Date, InitialClaims)
# Retrieve and format Continued Claims (Insured Unemployment) data
continuous_claims_data <- fredr(series_id = "CCNSA") %>%
mutate(Date = as.Date(date), UnemploymentRate = value) %>%
select(Date, UnemploymentRate)
# Define the period to exclude due to COVID-19 impact
covid_start_date <- as.Date("2020-03-01")
covid_end_date <- as.Date("2021-03-01")
# Filter out the COVID-19 impact period from the ICNSA data
filtered_initial_claims_data <- initial_claims_data[!(initial_claims_data$Date >= covid_start_date & initial_claims_data$Date <= covid_end_date), ]
# Apply cubic spline interpolation to impute missing data for the COVID-19 period
cubic_spline_model <- smooth.spline(filtered_initial_claims_data$Date, filtered_initial_claims_data$InitialClaims, spar=0.2)
all_dates <- initial_claims_data$Date
cubic_spline_predictions <- predict(cubic_spline_model, x = as.numeric(all_dates))
initial_claims_data$CubicSplineImputed <- cubic_spline_predictions$y
# Merge and prepare data for analysis
merged_data <- merge(initial_claims_data, continuous_claims_data, by = "Date", all.x = TRUE) %>%
na.locf()
# Plot initial claims time series with COVID-19 period highlighted
ggplot(merged_data, aes(x = Date, y = InitialClaims)) +
geom_line() + labs(title = "ICNSA Time Series", x = "Date", y = "Initial Claims") +
geom_vline(xintercept = as.numeric(covid_start_date), color = "red", linetype = "dashed") +
geom_vline(xintercept = as.numeric(as.Date("2021-03-01")), color = "red", linetype = "dashed")
# Define and fit a state-space model
model_spec <- SSModel(InitialClaims ~ SSMtrend(degree = 2, Q = list(matrix(1), matrix(1))) +
SSMseasonal(period = 52, sea.type = "dummy"), H = matrix(1), data = merged_data)
fit <- fitSSM(model_spec, inits = rep(0.1, 2))
summary(fit)
#> Length Class Mode
#> optim.out 5 -none- list
#> model 14 SSModel list
# Apply Kalman filter to impute data using the state-space model
state_space_imputation <- KFS(fit$model)
merged_data$StateSpaceImputed <- state_space_imputation$alphahat[,"level"]
# Compare original, cubic spline imputed, and state-space model imputed data visually
ggplot(merged_data, aes(x = Date)) +
geom_line(aes(y = InitialClaims), color = "grey", linetype="solid") +
geom_line(aes(y = CubicSplineImputed), color = "green", linetype="dotted") +
geom_line(aes(y = StateSpaceImputed), color = "blue", linetype="dashed") +
labs(title = "ICNSA: Original vs. Cubic Spline Imputed vs. State-space Imputed", x = "Date", y = "Initial Claims") +
theme_minimal()
# Focus on comparison between original and state-space imputed data with highlighted COVID-19 period
ggplot(merged_data, aes(x = Date)) +
geom_line(aes(y = InitialClaims), color = "grey") +
geom_line(aes(y = StateSpaceImputed), color = "blue") +
labs(title = "Comparison of Original and State-space Imputed ICNSA Data", x = "Date", y = "Initial Claims") +
geom_vline(xintercept = as.numeric(covid_start_date), color = "red", linetype = "dashed") +
geom_vline(xintercept = as.numeric(as.Date("2021-03-01")), color = "red", linetype = "dashed") +
theme_minimal()
# Forecast the next data point using the state-space model
forecast_results <- predict(fit$model, n.ahead = 1)
forecast_results
#> Time Series:
#> Start = 2984
#> End = 2984
#> Frequency = 1
#> fit
#> [1,] 212633.3
Created on 2024-03-07 with reprex v2.1.0
HW - 3
The following steps have been performed:
The forecasted value is: 212633.3
# !diagnostics off
# Load necessary libraries
library(readxl)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(tseries)
#> Warning: package 'tseries' was built under R version 4.3.3
library(ggplot2)
library(reprex)
library(vars)
#> Warning: package 'vars' was built under R version 4.3.3
#> Loading required package: MASS
#> Loading required package: strucchange
#> Warning: package 'strucchange' was built under R version 4.3.3
#> Loading required package: zoo
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
#> Loading required package: sandwich
#> Warning: package 'sandwich' was built under R version 4.3.3
#> Loading required package: urca
#> Loading required package: lmtest
library(BigVAR)
#> Warning: package 'BigVAR' was built under R version 4.3.3
#> Loading required package: lattice
# Read the data
department_stores <- read_excel("C:/Users/Admin/MRTS4521.xlsx", skip = 7)
print(head(department_stores))
#> # A tibble: 6 × 2
#> Period Value
#> <chr> <chr>
#> 1 Jan-1992 14134
#> 2 Feb-1992 14472
#> 3 Mar-1992 14543
#> 4 Apr-1992 14529
#> 5 May-1992 14634
#> 6 Jun-1992 14597
clothing_stores <- read_excel("C:/Users/Admin/MRTS4481.xlsx", skip = 7)
print(head(clothing_stores))
#> # A tibble: 6 × 2
#> Period Value
#> <chr> <chr>
#> 1 Jan-1992 6758
#> 2 Feb-1992 6791
#> 3 Mar-1992 6721
#> 4 Apr-1992 6954
#> 5 May-1992 6947
#> 6 Jun-1992 7091
jewelry_stores <- read_excel("C:/Users/Admin/MRTS44831.xlsx", skip = 7)
print(head(jewelry_stores))
#> # A tibble: 6 × 2
#> Period Value
#> <chr> <chr>
#> 1 Jan-1992 1248
#> 2 Feb-1992 1230
#> 3 Mar-1992 1283
#> 4 Apr-1992 1247
#> 5 May-1992 1222
#> 6 Jun-1992 1238
retail_trade <- read_excel("C:/Users/Admin/MRTS4400.xlsx", skip = 7)
print(head(retail_trade))
#> # A tibble: 6 × 2
#> Period Value
#> <chr> <chr>
#> 1 Jan-1992 146925
#> 2 Feb-1992 147223
#> 3 Mar-1992 146805
#> 4 Apr-1992 148032
#> 5 May-1992 149010
#> 6 Jun-1992 149800
# Ensure 'Value' column is numeric and replace non-numeric entries with NA
department_stores$Value <- as.numeric(as.character(department_stores$Value))
#> Warning: NAs introduced by coercion
clothing_stores$Value <- as.numeric(as.character(clothing_stores$Value))
#> Warning: NAs introduced by coercion
jewelry_stores$Value <- as.numeric(as.character(jewelry_stores$Value))
#> Warning: NAs introduced by coercion
retail_trade$Value <- as.numeric(as.character(retail_trade$Value))
#> Warning: NAs introduced by coercion
# Check for NA values after coercion
sum(is.na(department_stores$Value))
#> [1] 11
sum(is.na(clothing_stores$Value))
#> [1] 11
sum(is.na(jewelry_stores$Value))
#> [1] 46
sum(is.na(retail_trade$Value))
#> [1] 11
# Remove the NA values
department_stores <- department_stores[!is.na(department_stores$Value), ]
clothing_stores <- clothing_stores[!is.na(clothing_stores$Value), ]
jewelry_stores <- jewelry_stores[!is.na(jewelry_stores$Value), ]
retail_trade <- retail_trade[!is.na(retail_trade$Value), ]
# Convert to time series object
ts_dep_stores <- ts(department_stores$Value, start=c(1992, 1), frequency=12)
ts_clothing_stores <- ts(clothing_stores$Value, start=c(1992, 1), frequency=12)
ts_jewelry_stores <- ts(jewelry_stores$Value, start=c(1992, 1), frequency=12)
ts_retail_trade <- ts(retail_trade$Value, start=c(1992, 1), frequency=12)
# Plot the time series
plot(ts_dep_stores, main="Department Stores Sales", xlab="Year", ylab="Sales")
plot(ts_clothing_stores, main="Clothing Stores Sales", xlab="Year", ylab="Sales")
plot(ts_jewelry_stores, main="Jewelry Stores Sales", xlab="Year", ylab="Sales")
plot(ts_retail_trade, main="Retail Trade", xlab="Year", ylab="Sales")
# Decompose the time series
decomp_dept <- stl(ts_dep_stores, s.window="periodic")
plot(decomp_dept)
decomp_cloth <- stl(ts_clothing_stores, s.window="periodic")
plot(decomp_cloth)
decomp_jewel <- stl(ts_jewelry_stores, s.window="periodic")
plot(decomp_jewel)
decomp_retail <- stl(ts_retail_trade, s.window="periodic")
plot(decomp_retail)
# Check for stationarity
adf.test(ts_dep_stores)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts_dep_stores
#> Dickey-Fuller = -3.3768, Lag order = 7, p-value = 0.05812
#> alternative hypothesis: stationary
adf.test(ts_clothing_stores)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts_clothing_stores
#> Dickey-Fuller = -3.7445, Lag order = 7, p-value = 0.02201
#> alternative hypothesis: stationary
adf.test(ts_jewelry_stores)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts_jewelry_stores
#> Dickey-Fuller = -3.795, Lag order = 7, p-value = 0.01955
#> alternative hypothesis: stationary
adf.test(ts_retail_trade)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts_retail_trade
#> Dickey-Fuller = -0.45768, Lag order = 7, p-value = 0.9835
#> alternative hypothesis: stationary
# Fit an ARIMA model
fit_dept <- auto.arima(ts_dep_stores)
summary(fit_dept)
#> Series: ts_dep_stores
#> ARIMA(3,2,2)(0,0,2)[12]
#>
#> Coefficients:
#> ar1 ar2 ar3 ma1 ma2 sma1 sma2
#> -0.9711 -0.4152 -0.2922 -0.2016 -0.7491 -0.1403 -0.1017
#> s.e. 0.1310 0.0712 0.0506 0.1299 0.1266 0.0534 0.0494
#>
#> sigma^2 = 89245: log likelihood = -2725.25
#> AIC=5466.5 AICc=5466.88 BIC=5498.08
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -16.72261 295.2265 173.5556 -0.132656 1.305143 0.2954698
#> ACF1
#> Training set -0.008641679
fit_cloth <- auto.arima(ts_clothing_stores)
summary(fit_cloth)
#> Series: ts_clothing_stores
#> ARIMA(0,1,3)(0,0,1)[12] with drift
#>
#> Coefficients:
#> ma1 ma2 ma3 sma1 drift
#> 0.3546 -0.3968 -0.3897 -0.1856 31.4015
#> s.e. 0.0480 0.0491 0.0484 0.0521 14.1911
#>
#> sigma^2 = 355359: log likelihood = -2996.91
#> AIC=6005.83 AICc=6006.05 BIC=6029.53
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set 0.2376561 591.4563 269.3632 -0.5275825 2.689432 0.3377146
#> ACF1
#> Training set 0.008129612
fit_jewel <- auto.arima(ts_jewelry_stores)
summary(fit_jewel)
#> Series: ts_jewelry_stores
#> ARIMA(1,1,3)(0,0,2)[12] with drift
#>
#> Coefficients:
#> ar1 ma1 ma2 ma3 sma1 sma2 drift
#> 0.3328 -0.1132 -0.4468 -0.2359 -0.1500 -0.1688 4.4865
#> s.e. 0.1377 0.1394 0.0465 0.0850 0.0838 0.0751 1.3311
#>
#> sigma^2 = 12991: log likelihood = -2145.6
#> AIC=4307.2 AICc=4307.63 BIC=4338.04
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set 0.3999299 112.669 66.62347 -0.4103501 3.482894 0.4829962
#> ACF1
#> Training set -0.001226958
fit_retail <- auto.arima(ts_retail_trade)
summary(fit_retail)
#> Series: ts_retail_trade
#> ARIMA(0,1,2)(0,0,2)[12] with drift
#>
#> Coefficients:
#> ma1 ma2 sma1 sma2 drift
#> -0.1072 -0.1511 -0.0859 -0.0809 1196.3729
#> s.e. 0.0518 0.0541 0.0542 0.0515 204.2875
#>
#> sigma^2 = 41594718: log likelihood = -3910.87
#> AIC=7833.73 AICc=7833.96 BIC=7857.44
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -5.419509 6398.944 2914.286 -0.08863509 0.8278123 0.1662024
#> ACF1
#> Training set -0.002816978
# Diagnostics - check residuals
checkresiduals(fit_dept)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(3,2,2)(0,0,2)[12]
#> Q* = 27.531, df = 17, p-value = 0.05073
#>
#> Model df: 7. Total lags used: 24
checkresiduals(fit_cloth)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(0,1,3)(0,0,1)[12] with drift
#> Q* = 17.391, df = 20, p-value = 0.6274
#>
#> Model df: 4. Total lags used: 24
checkresiduals(fit_jewel)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(1,1,3)(0,0,2)[12] with drift
#> Q* = 13.148, df = 18, p-value = 0.7827
#>
#> Model df: 6. Total lags used: 24
checkresiduals(fit_retail)
#>
#> Ljung-Box test
#>
#> data: Residuals from ARIMA(0,1,2)(0,0,2)[12] with drift
#> Q* = 69.593, df = 20, p-value = 2.122e-07
#>
#> Model df: 4. Total lags used: 24
# Plot ACF
acf(ts_dep_stores, main="ACF for Department Stores Sales")
acf(ts_clothing_stores, main="ACF for Clothing Stores Sales")
acf(ts_jewelry_stores, main="ACF for Jewelry Stores Sales")
acf(ts_retail_trade, main="ACF for Retail Trade")
# Plot PACF
pacf(ts_dep_stores, main="PACF for Department Stores Sales")
pacf(ts_clothing_stores, main="PACF for Clothing Stores Sales")
pacf(ts_jewelry_stores, main="PACF for Jewelry Stores Sales")
pacf(ts_retail_trade, main="PACF for Retail Trade")
# Determine the shortest length among the series
min_length <- min(length(ts_dep_stores), length(ts_clothing_stores), length(ts_jewelry_stores), length(ts_retail_trade))
# Trim the series to the shortest length
ts_dep_stores_trimmed <- ts_dep_stores[1:min_length]
ts_clothing_stores_trimmed <- ts_clothing_stores[1:min_length]
ts_jewelry_stores_trimmed <- ts_jewelry_stores[1:min_length]
ts_retail_trade_trimmed <- ts_retail_trade[1:min_length]
# Combine the trimmed series into a data frame
data_combined <- data.frame(department_stores = ts_dep_stores_trimmed,
clothing_stores = ts_clothing_stores_trimmed,
jewelry_stores = ts_jewelry_stores_trimmed,
retail_trade = ts_retail_trade_trimmed)
# Convert to a ts object with appropriate start and frequency
data_ts <- ts(data_combined, start=c(1992, 1), frequency=12)
# Fit a VAR(1) model
var1_model <- VAR(data_ts, p=1, type="both")
# Display summary of VAR(1) model
summary(var1_model)
#>
#> VAR Estimation Results:
#> =========================
#> Endogenous variables: department_stores, clothing_stores, jewelry_stores, retail_trade
#> Deterministic variables: both
#> Sample size: 349
#> Log Likelihood: -10408.866
#> Roots of the characteristic polynomial:
#> 0.9845 0.9613 0.8336 0.6646
#> Call:
#> VAR(y = data_ts, p = 1, type = "both")
#>
#>
#> Estimation results for equation department_stores:
#> ==================================================
#> department_stores = department_stores.l1 + clothing_stores.l1 + jewelry_stores.l1 + retail_trade.l1 + const + trend
#>
#> Estimate Std. Error t value Pr(>|t|)
#> department_stores.l1 9.773e-01 1.377e-02 70.990 < 2e-16 ***
#> clothing_stores.l1 -7.362e-02 1.978e-02 -3.722 0.000231 ***
#> jewelry_stores.l1 3.088e-01 1.872e-01 1.650 0.099952 .
#> retail_trade.l1 -4.702e-03 1.982e-03 -2.373 0.018208 *
#> const 1.235e+03 3.298e+02 3.745 0.000211 ***
#> trend 4.194e+00 1.473e+00 2.847 0.004676 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 304.1 on 343 degrees of freedom
#> Multiple R-Squared: 0.9869, Adjusted R-squared: 0.9867
#> F-statistic: 5180 on 5 and 343 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation clothing_stores:
#> ================================================
#> clothing_stores = department_stores.l1 + clothing_stores.l1 + jewelry_stores.l1 + retail_trade.l1 + const + trend
#>
#> Estimate Std. Error t value Pr(>|t|)
#> department_stores.l1 0.039301 0.031524 1.247 0.2134
#> clothing_stores.l1 0.858498 0.045293 18.954 <2e-16 ***
#> jewelry_stores.l1 -0.082018 0.428650 -0.191 0.8484
#> retail_trade.l1 -0.001558 0.004537 -0.343 0.7315
#> const 617.932190 755.153651 0.818 0.4138
#> trend 6.274845 3.372526 1.861 0.0637 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 696.3 on 343 degrees of freedom
#> Multiple R-Squared: 0.9512, Adjusted R-squared: 0.9505
#> F-statistic: 1336 on 5 and 343 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation jewelry_stores:
#> ===============================================
#> jewelry_stores = department_stores.l1 + clothing_stores.l1 + jewelry_stores.l1 + retail_trade.l1 + const + trend
#>
#> Estimate Std. Error t value Pr(>|t|)
#> department_stores.l1 2.102e-02 5.548e-03 3.788 0.000179 ***
#> clothing_stores.l1 -4.509e-03 7.971e-03 -0.566 0.571973
#> jewelry_stores.l1 6.598e-01 7.543e-02 8.747 < 2e-16 ***
#> retail_trade.l1 2.646e-03 7.985e-04 3.314 0.001018 **
#> const -2.548e+02 1.329e+02 -1.917 0.056037 .
#> trend -4.662e-01 5.935e-01 -0.786 0.432695
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 122.5 on 343 degrees of freedom
#> Multiple R-Squared: 0.9298, Adjusted R-squared: 0.9288
#> F-statistic: 908.4 on 5 and 343 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation retail_trade:
#> =============================================
#> retail_trade = department_stores.l1 + clothing_stores.l1 + jewelry_stores.l1 + retail_trade.l1 + const + trend
#>
#> Estimate Std. Error t value Pr(>|t|)
#> department_stores.l1 2.360e-01 2.525e-01 0.935 0.350583
#> clothing_stores.l1 -1.419e+00 3.628e-01 -3.912 0.000110 ***
#> jewelry_stores.l1 -1.349e+00 3.433e+00 -0.393 0.694692
#> retail_trade.l1 9.483e-01 3.634e-02 26.097 < 2e-16 ***
#> const 1.556e+04 6.048e+03 2.573 0.010510 *
#> trend 9.722e+01 2.701e+01 3.599 0.000366 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 5577 on 343 degrees of freedom
#> Multiple R-Squared: 0.9963, Adjusted R-squared: 0.9962
#> F-statistic: 1.825e+04 on 5 and 343 DF, p-value: < 2.2e-16
#>
#>
#>
#> Covariance matrix of residuals:
#> department_stores clothing_stores jewelry_stores retail_trade
#> department_stores 92471 153362 23273 1268390
#> clothing_stores 153362 484825 71278 2801721
#> jewelry_stores 23273 71278 15015 487965
#> retail_trade 1268390 2801721 487965 31098936
#>
#> Correlation matrix of residuals:
#> department_stores clothing_stores jewelry_stores retail_trade
#> department_stores 1.0000 0.7243 0.6246 0.7480
#> clothing_stores 0.7243 1.0000 0.8354 0.7215
#> jewelry_stores 0.6246 0.8354 1.0000 0.7141
#> retail_trade 0.7480 0.7215 0.7141 1.0000
# Determine optimal number of lags for VAR(p) model using AIC
lag_selection <- VARselect(data_ts, lag.max=10, type="both")
optimal_lags <- lag_selection$selection["AIC(n)"]
# Fit a VAR(p) model with the optimal number of lags
varp_model <- VAR(data_ts, p=optimal_lags, type="both")
# Display summary of VAR(p) model
summary(varp_model)
#>
#> VAR Estimation Results:
#> =========================
#> Endogenous variables: department_stores, clothing_stores, jewelry_stores, retail_trade
#> Deterministic variables: both
#> Sample size: 346
#> Log Likelihood: -10074.681
#> Roots of the characteristic polynomial:
#> 0.9878 0.9878 0.9185 0.7692 0.6972 0.6829 0.6829 0.6431 0.6431 0.5793 0.5793 0.5684 0.5684 0.555 0.5475 0.5475
#> Call:
#> VAR(y = data_ts, p = optimal_lags, type = "both")
#>
#>
#> Estimation results for equation department_stores:
#> ==================================================
#> department_stores = department_stores.l1 + clothing_stores.l1 + jewelry_stores.l1 + retail_trade.l1 + department_stores.l2 + clothing_stores.l2 + jewelry_stores.l2 + retail_trade.l2 + department_stores.l3 + clothing_stores.l3 + jewelry_stores.l3 + retail_trade.l3 + department_stores.l4 + clothing_stores.l4 + jewelry_stores.l4 + retail_trade.l4 + const + trend
#>
#> Estimate Std. Error t value Pr(>|t|)
#> department_stores.l1 0.391793 0.079961 4.900 1.51e-06 ***
#> clothing_stores.l1 0.275423 0.043234 6.370 6.37e-10 ***
#> jewelry_stores.l1 0.714494 0.231380 3.088 0.00219 **
#> retail_trade.l1 -0.012353 0.004630 -2.668 0.00800 **
#> department_stores.l2 0.300850 0.092458 3.254 0.00126 **
#> clothing_stores.l2 -0.490130 0.056327 -8.702 < 2e-16 ***
#> jewelry_stores.l2 -0.797520 0.258746 -3.082 0.00223 **
#> retail_trade.l2 0.017460 0.005588 3.125 0.00194 **
#> department_stores.l3 0.124866 0.093058 1.342 0.18058
#> clothing_stores.l3 0.195897 0.061762 3.172 0.00166 **
#> jewelry_stores.l3 0.484133 0.259908 1.863 0.06340 .
#> retail_trade.l3 0.003161 0.005523 0.572 0.56747
#> department_stores.l4 0.157334 0.077782 2.023 0.04391 *
#> clothing_stores.l4 -0.022866 0.048953 -0.467 0.64073
#> jewelry_stores.l4 -0.224192 0.226866 -0.988 0.32378
#> retail_trade.l4 -0.010552 0.004555 -2.317 0.02115 *
#> const 939.080318 307.260777 3.056 0.00242 **
#> trend 1.267779 1.323701 0.958 0.33889
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 242.8 on 328 degrees of freedom
#> Multiple R-Squared: 0.992, Adjusted R-squared: 0.9916
#> F-statistic: 2396 on 17 and 328 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation clothing_stores:
#> ================================================
#> clothing_stores = department_stores.l1 + clothing_stores.l1 + jewelry_stores.l1 + retail_trade.l1 + department_stores.l2 + clothing_stores.l2 + jewelry_stores.l2 + retail_trade.l2 + department_stores.l3 + clothing_stores.l3 + jewelry_stores.l3 + retail_trade.l3 + department_stores.l4 + clothing_stores.l4 + jewelry_stores.l4 + retail_trade.l4 + const + trend
#>
#> Estimate Std. Error t value Pr(>|t|)
#> department_stores.l1 -0.470608 0.187166 -2.514 0.01240 *
#> clothing_stores.l1 1.471416 0.101199 14.540 < 2e-16 ***
#> jewelry_stores.l1 1.481334 0.541593 2.735 0.00657 **
#> retail_trade.l1 -0.022066 0.010837 -2.036 0.04253 *
#> department_stores.l2 0.283130 0.216416 1.308 0.19170
#> clothing_stores.l2 -0.849183 0.131844 -6.441 4.23e-10 ***
#> jewelry_stores.l2 -2.490214 0.605647 -4.112 4.97e-05 ***
#> retail_trade.l2 0.020145 0.013080 1.540 0.12447
#> department_stores.l3 0.301336 0.217820 1.383 0.16748
#> clothing_stores.l3 0.008905 0.144567 0.062 0.95092
#> jewelry_stores.l3 1.686023 0.608367 2.771 0.00590 **
#> retail_trade.l3 0.029211 0.012928 2.260 0.02450 *
#> department_stores.l4 -0.092797 0.182064 -0.510 0.61061
#> clothing_stores.l4 0.246877 0.114584 2.155 0.03192 *
#> jewelry_stores.l4 -0.548916 0.531026 -1.034 0.30204
#> retail_trade.l4 -0.031366 0.010662 -2.942 0.00350 **
#> const 901.039717 719.206842 1.253 0.21116
#> trend 6.571186 3.098394 2.121 0.03469 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 568.4 on 328 degrees of freedom
#> Multiple R-Squared: 0.9682, Adjusted R-squared: 0.9665
#> F-statistic: 586.9 on 17 and 328 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation jewelry_stores:
#> ===============================================
#> jewelry_stores = department_stores.l1 + clothing_stores.l1 + jewelry_stores.l1 + retail_trade.l1 + department_stores.l2 + clothing_stores.l2 + jewelry_stores.l2 + retail_trade.l2 + department_stores.l3 + clothing_stores.l3 + jewelry_stores.l3 + retail_trade.l3 + department_stores.l4 + clothing_stores.l4 + jewelry_stores.l4 + retail_trade.l4 + const + trend
#>
#> Estimate Std. Error t value Pr(>|t|)
#> department_stores.l1 -1.017e-01 3.192e-02 -3.186 0.001579 **
#> clothing_stores.l1 1.513e-01 1.726e-02 8.770 < 2e-16 ***
#> jewelry_stores.l1 6.513e-01 9.236e-02 7.052 1.05e-11 ***
#> retail_trade.l1 1.262e-03 1.848e-03 0.683 0.495098
#> department_stores.l2 1.287e-01 3.691e-02 3.488 0.000554 ***
#> clothing_stores.l2 -2.040e-01 2.248e-02 -9.073 < 2e-16 ***
#> jewelry_stores.l2 -1.251e-01 1.033e-01 -1.211 0.226634
#> retail_trade.l2 5.435e-04 2.230e-03 0.244 0.807644
#> department_stores.l3 3.259e-02 3.714e-02 0.877 0.380951
#> clothing_stores.l3 6.650e-03 2.465e-02 0.270 0.787514
#> jewelry_stores.l3 2.591e-01 1.037e-01 2.497 0.012998 *
#> retail_trade.l3 4.426e-03 2.205e-03 2.008 0.045501 *
#> department_stores.l4 -4.543e-02 3.105e-02 -1.463 0.144344
#> clothing_stores.l4 2.916e-02 1.954e-02 1.492 0.136533
#> jewelry_stores.l4 5.568e-03 9.055e-02 0.061 0.951009
#> retail_trade.l4 -5.015e-03 1.818e-03 -2.758 0.006133 **
#> const -2.625e+01 1.226e+02 -0.214 0.830687
#> trend 4.888e-01 5.284e-01 0.925 0.355630
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 96.92 on 328 degrees of freedom
#> Multiple R-Squared: 0.9564, Adjusted R-squared: 0.9542
#> F-statistic: 423.6 on 17 and 328 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation retail_trade:
#> =============================================
#> retail_trade = department_stores.l1 + clothing_stores.l1 + jewelry_stores.l1 + retail_trade.l1 + department_stores.l2 + clothing_stores.l2 + jewelry_stores.l2 + retail_trade.l2 + department_stores.l3 + clothing_stores.l3 + jewelry_stores.l3 + retail_trade.l3 + department_stores.l4 + clothing_stores.l4 + jewelry_stores.l4 + retail_trade.l4 + const + trend
#>
#> Estimate Std. Error t value Pr(>|t|)
#> department_stores.l1 -4.41938 1.39847 -3.160 0.001724 **
#> clothing_stores.l1 5.33965 0.75614 7.062 9.88e-12 ***
#> jewelry_stores.l1 6.63236 4.04668 1.639 0.102180
#> retail_trade.l1 0.60132 0.08097 7.426 9.71e-13 ***
#> department_stores.l2 5.78024 1.61702 3.575 0.000403 ***
#> clothing_stores.l2 -12.89354 0.98511 -13.088 < 2e-16 ***
#> jewelry_stores.l2 -4.68000 4.52528 -1.034 0.301809
#> retail_trade.l2 0.43146 0.09773 4.415 1.37e-05 ***
#> department_stores.l3 1.78181 1.62751 1.095 0.274404
#> clothing_stores.l3 4.76946 1.08018 4.415 1.37e-05 ***
#> jewelry_stores.l3 6.35997 4.54560 1.399 0.162713
#> retail_trade.l3 0.08492 0.09659 0.879 0.379971
#> department_stores.l4 -2.94456 1.36035 -2.165 0.031142 *
#> clothing_stores.l4 1.84239 0.85615 2.152 0.032131 *
#> jewelry_stores.l4 -10.04188 3.96773 -2.531 0.011844 *
#> retail_trade.l4 -0.14184 0.07967 -1.780 0.075927 .
#> const 9587.19334 5373.77622 1.784 0.075337 .
#> trend 60.37537 23.15061 2.608 0.009525 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 4247 on 328 degrees of freedom
#> Multiple R-Squared: 0.9979, Adjusted R-squared: 0.9978
#> F-statistic: 9028 on 17 and 328 DF, p-value: < 2.2e-16
#>
#>
#>
#> Covariance matrix of residuals:
#> department_stores clothing_stores jewelry_stores retail_trade
#> department_stores 58961 97648 13653 667909
#> clothing_stores 97648 323040 42851 1608377
#> jewelry_stores 13653 42851 9394 272213
#> retail_trade 667909 1608377 272213 18034658
#>
#> Correlation matrix of residuals:
#> department_stores clothing_stores jewelry_stores retail_trade
#> department_stores 1.0000 0.7075 0.5801 0.6477
#> clothing_stores 0.7075 1.0000 0.7779 0.6664
#> jewelry_stores 0.5801 0.7779 1.0000 0.6613
#> retail_trade 0.6477 0.6664 0.6613 1.0000
# AIC and BIC for VAR(1)
aic_var1 <- AIC(var1_model)
bic_var1 <- BIC(var1_model)
# AIC and BIC for VAR(p)
aic_varp <- AIC(varp_model)
bic_varp <- BIC(varp_model)
# Print the information criteria for comparison
cat("VAR(1) AIC:", aic_var1, "BIC:", bic_var1, "\n")
#> VAR(1) AIC: 20865.73 BIC: 20958.25
cat("VAR(p) AIC:", aic_varp, "BIC:", bic_varp, "\n")
#> VAR(p) AIC: 20293.36 BIC: 20570.31
# Decide based on the lower AIC and BIC
if(aic_var1 < aic_varp && bic_var1 < bic_varp) {
cat("VAR(1) is preferred based on AIC and BIC.\n")
} else if(aic_varp < aic_var1 && bic_varp < bic_var1) {
cat("VAR(p) is preferred based on AIC and BIC.\n")
} else {
cat("The choice is not clear-cut. Further diagnostics and considerations may be necessary.\n")
}
#> VAR(p) is preferred based on AIC and BIC.
# One-month-ahead forecast for VAR(1) model
forecast_var1 <- predict(var1_model, n.ahead=1)
# One-month-ahead forecast for VAR(p) model
forecast_varp <- predict(varp_model, n.ahead=1)
# Printing the forecasts
print(forecast_var1)
#> $department_stores
#> fcst lower upper CI
#> department_stores.fcst 10012.62 9416.608 10608.62 596.0077
#>
#> $clothing_stores
#> fcst lower upper CI
#> clothing_stores.fcst 13900.56 12535.85 15265.27 1364.711
#>
#> $jewelry_stores
#> fcst lower upper CI
#> jewelry_stores.fcst 3167.979 2927.815 3408.142 240.1633
#>
#> $retail_trade
#> fcst lower upper CI
#> retail_trade.fcst 500323.3 489393.2 511253.3 10930.02
print(forecast_varp)
#> $department_stores
#> fcst lower upper CI
#> department_stores.fcst 10149.64 9673.721 10625.55 475.9152
#>
#> $clothing_stores
#> fcst lower upper CI
#> clothing_stores.fcst 14180.69 13066.71 15294.66 1113.977
#>
#> $jewelry_stores
#> fcst lower upper CI
#> jewelry_stores.fcst 3229.724 3039.76 3419.689 189.9647
#>
#> $retail_trade
#> fcst lower upper CI
#> retail_trade.fcst 505945.2 497621.8 514268.6 8323.425
# Plotting the forecasts
# For VAR(1)
plot(forecast_var1, main="One-Month-Ahead Forecast for VAR(1)")
# For VAR(p)
plot(forecast_varp, main="One-Month-Ahead Forecast for VAR(p)")
# Perform Granger causality test on the fitted VAR model
granger_test <- causality(varp_model)
#> Warning in causality(varp_model):
#> Argument 'cause' has not been specified;
#> using first variable in 'x$y' (department_stores) as cause variable.
# Print the results
print(granger_test)
#> $Granger
#>
#> Granger causality H0: department_stores do not Granger-cause
#> clothing_stores jewelry_stores retail_trade
#>
#> data: VAR object varp_model
#> F-Test = 3.9509, df1 = 12, df2 = 1312, p-value = 5.148e-06
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: department_stores and
#> clothing_stores jewelry_stores retail_trade
#>
#> data: VAR object varp_model
#> Chi-squared = 123.8, df = 3, p-value < 2.2e-16
# Create data matrix for BigVar
data_matrix <- as.matrix(data_ts)
# Fit the sparse VAR model with LASSO regularization, lambda = 0.1, and lag-based sparsity
sparse_var_model <- BigVAR.fit(data_matrix, p = 16, lambda = 0.1, struct = "Lag")
# Print summary of the sparse VAR model
print(summary(sparse_var_model))
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> -11449.260 -0.118 0.000 -48.351 0.122 1032.755
Created on 2024-04-10 with reprex v2.1.0
Homework - 4
The following steps were performed:
Created on 2024-02-22 with reprex v2.1.0