Open gudimetlatejaswi opened 8 months ago
install.packages("tidyverse") #> Installing package into 'C:/Users/999te/AppData/Local/R/win-library/4.3' #> (as 'lib' is unspecified) #> package 'tidyverse' successfully unpacked and MD5 sums checked #> #> The downloaded binary packages are in #> C:\Users\999te\AppData\Local\Temp\RtmpSOufbc\downloaded_packages library(tidyverse) library(prophet) #> Loading required package: Rcpp #> Loading required package: rlang #> #> Attaching package: 'rlang' #> The following objects are masked from 'package:purrr': #> #> %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl, #> flatten_raw, invoke, splice # Read data from CSV file df <- read.csv("C:/Users/999te/Downloads/ICNSA.csv") # Rename columns for compatibility with Prophet df <- df %>% rename(ds = DATE, y = ICNSA) # Fit a prophet model m_prophet <- prophet(df) #> Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this. #> Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this. future_prophet <- make_future_dataframe(m_prophet, periods = 1) forecast_prophet <- predict(m_prophet, future_prophet) # Display prophet forecast cat("Prophet Forecast:\n") #> Prophet Forecast: tail(forecast_prophet[c('ds', 'yhat', 'yhat_lower', 'yhat_upper')]) #> ds yhat yhat_lower yhat_upper #> 2974 2023-12-30 544996.4 223329.3 844870.0 #> 2975 2024-01-06 594020.4 289690.8 900591.5 #> 2976 2024-01-13 593485.3 289755.4 881218.5 #> 2977 2024-01-20 538122.9 226946.8 826686.9 #> 2978 2024-01-27 465529.1 181721.1 782285.9 #> 2979 2024-01-28 456659.9 138969.4 765446.7 future_predicted_value <- forecast_prophet$yhat[nrow(forecast_prophet)] last_forecast <- tail(forecast_prophet, 1) # Extracting the yhat value future_predicted_value <- last_forecast$yhat[1] # Specify the released forecast value released_forecast_value <- 261029 # Calculate the difference (yhat - released forecast value) predicted_forecast <- future_predicted_value - released_forecast_value cat("\nFuture Predicted Value (yhat - Released Forecast Value):", predicted_forecast, "\n") #> #> Future Predicted Value (yhat - Released Forecast Value): 195630.9 # Add a lag column to the data frame if 'y' column exists if ("y" %in% colnames(df)) { df$y_lag <- lag(df$y) # Remove rows with NAs introduced by lag df <- na.omit(df) } else { cat("Column 'y' not found in the dataframe.\n") } # Plot observed and predicted values from both Prophet and linear regression df_lm <- df %>% mutate(y_lm = predict(lm(y ~ y_lag, data = df), newdata = df)) library(ggplot2) library(dplyr) # Assuming df_lm is your dataframe and it has 'ds' for date, 'y' for actual and 'y_lm' for predicted # Convert 'ds' from POSIXct/POSIXlt to Date if it's not already df_lm$ds <- as.Date(df_lm$ds) # Plotting with ggplot2 ggplot(df_lm, aes(x = ds)) + geom_line(aes(y = y, color = "Actual"), linewidth = 1, group = 1) + # Add group = 1 geom_line(aes(y = y_lm, color = "Predicted"), linetype = "dashed", linewidth = 1, group = 1) + # Add group = 1 labs(title = "Actual vs. Predicted Values", y = "ICNSA", x = "Date", color = "Values") + scale_color_manual(values = c("Actual" = "black", "Predicted" = "red")) + theme_minimal()
Created on 2024-02-11 with reprex v2.1.0
- Predicted future forecast value is 195630.9
install.packages("forecast")
#> Installing package into 'C:/Users/999te/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'forecast' successfully unpacked and MD5 sums checked
#> Warning: cannot remove prior installation of package 'forecast'
#> Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
#> C:\Users\999te\AppData\Local\R\win-library\4.3\00LOCK\forecast\libs\x64\forecast.dll
#> to
#> C:\Users\999te\AppData\Local\R\win-library\4.3\forecast\libs\x64\forecast.dll:
#> Permission denied
#> Warning: restored 'forecast'
#>
#> The downloaded binary packages are in
#> C:\Users\999te\AppData\Local\Temp\Rtmp0YSBYB\downloaded_packages
install.packages("tseries")
#> Installing package into 'C:/Users/999te/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'tseries' successfully unpacked and MD5 sums checked
#> Warning: cannot remove prior installation of package 'tseries'
#> Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
#> C:\Users\999te\AppData\Local\R\win-library\4.3\00LOCK\tseries\libs\x64\tseries.dll
#> to C:\Users\999te\AppData\Local\R\win-library\4.3\tseries\libs\x64\tseries.dll:
#> Permission denied
#> Warning: restored 'tseries'
#>
#> The downloaded binary packages are in
#> C:\Users\999te\AppData\Local\Temp\Rtmp0YSBYB\downloaded_packages
install.packages("readr")
#> Installing package into 'C:/Users/999te/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'readr' successfully unpacked and MD5 sums checked
#> Warning: cannot remove prior installation of package 'readr'
#> Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
#> C:\Users\999te\AppData\Local\R\win-library\4.3\00LOCK\readr\libs\x64\readr.dll
#> to C:\Users\999te\AppData\Local\R\win-library\4.3\readr\libs\x64\readr.dll:
#> Permission denied
#> Warning: restored 'readr'
#>
#> The downloaded binary packages are in
#> C:\Users\999te\AppData\Local\Temp\Rtmp0YSBYB\downloaded_packages
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(tseries)
library(readr)
# Read the dataset
# Ensure the path to your file is correct.
data <- read_csv("C:/Users/999te/Downloads/LRHUTTTTMXM156S.csv")
#> Rows: 443 Columns: 2
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> dbl (1): LRHUTTTTMXM156S
#> date (1): DATE
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Convert the DATE column to Date type
data$DATE <- as.Date(data$DATE, format="%Y-%m-%d")
# Convert data to a time series object
data_ts <- ts(data$LRHUTTTTMXM156S, start=c(1987,1), frequency=12)
# Visualize the time series
# This plot helps in understanding the overall trend and seasonality.
plot(data_ts, xlab="Date", ylab="Value", main="Time Series Plot")
# Perform stationarity tests
# Augmented Dickey-Fuller test to check if the series is stationary.
adf_result <- adf.test(data_ts)
print(adf_result)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: data_ts
#> Dickey-Fuller = -2.7026, Lag order = 7, p-value = 0.2807
#> alternative hypothesis: stationary
# Plot ACF and PACF
# ACF and PACF plots help in identifying potential AR(p) and MA(q) terms.
acf(data_ts, main="Autocorrelation Function (ACF)")
pacf(data_ts, main="Partial Autocorrelation Function (PACF)")
# Determine ARIMA order automatically
# auto.arima function selects the best fitting ARIMA model based on AIC.
auto_arima_model <- auto.arima(data_ts)
print(auto_arima_model)
#> Series: data_ts
#> ARIMA(2,1,1)(0,0,1)[12]
#>
#> Coefficients:
#> ar1 ar2 ma1 sma1
#> 0.4703 0.3270 -0.7792 -0.0969
#> s.e. 0.1157 0.0496 0.1144 0.0456
#>
#> sigma^2 = 0.0636: log likelihood = -16.4
#> AIC=42.8 AICc=42.93 BIC=63.25
# Fit the ARIMA model using the order suggested by auto.arima
arima_model <- Arima(data_ts, model=auto_arima_model)
# Plot diagnostics
# Diagnostics plots help in checking the residuals of the fitted ARIMA model.
tsdiag(arima_model)
# Calculate accuracy of the model on the training data
# Accuracy metrics provide insights into the model's performance.
model_accuracy <- accuracy(arima_model)
print(model_accuracy)
#> ME RMSE MAE MPE MAPE MASE
#> Training set -0.003916458 0.2507595 0.1831786 -0.3573295 4.958836 0.3147154
#> ACF1
#> Training set -0.02081849
# Forecast future values
# Adjust 'h' for the desired number of periods to forecast.
forecast_values <- forecast(arima_model, h=12)
print(forecast_values)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> Dec 2023 2.751810 2.428620 3.075000 2.257533 3.246087
#> Jan 2024 2.782579 2.389722 3.175436 2.181757 3.383401
#> Feb 2024 2.789313 2.305669 3.272957 2.049643 3.528983
#> Mar 2024 2.789892 2.232509 3.347276 1.937448 3.642337
#> Apr 2024 2.766067 2.135940 3.396194 1.802371 3.729763
#> May 2024 2.759353 2.061374 3.457332 1.691887 3.826819
#> Jun 2024 2.787406 2.024228 3.550584 1.620226 3.954586
#> Jul 2024 2.762154 1.936644 3.587665 1.499645 4.024664
#> Aug 2024 2.777091 1.891623 3.662560 1.422885 4.131298
#> Sep 2024 2.783249 1.840056 3.726442 1.340760 4.225738
#> Oct 2024 2.784333 1.785439 3.783226 1.256657 4.312008
#> Nov 2024 2.772103 1.719390 3.824815 1.162118 4.382087
# Plot the forecast
# This plot visualizes the forecasted values along with confidence intervals.
plot(forecast_values)
Created on 2024-02-13 with reprex v2.1.0 -The model ARIMA(2,1,1)(0,0,1)[12] seems to be a reasonably good fit and the shaded area represents the confidence intervals for the forecasts. . -The increasing point forecast values and the confidence intervals both 80% and 95% suggest that the model expects a slight upward trend or stability in the future values within the forecast horizon.
install.packages("forecast")
#> Installing package into 'C:/Users/999te/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'forecast' successfully unpacked and MD5 sums checked
#>
#> The downloaded binary packages are in
#> C:\Users\999te\AppData\Local\Temp\RtmpEXfDpe\downloaded_packages
install.packages("readr")
#> Installing package into 'C:/Users/999te/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'readr' successfully unpacked and MD5 sums checked
#>
#> The downloaded binary packages are in
#> C:\Users\999te\AppData\Local\Temp\RtmpEXfDpe\downloaded_packages
install.packages("ggplot2")
#> Installing package into 'C:/Users/999te/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'ggplot2' successfully unpacked and MD5 sums checked
#>
#> The downloaded binary packages are in
#> C:\Users\999te\AppData\Local\Temp\RtmpEXfDpe\downloaded_packages
install.packages("tseries")
#> Installing package into 'C:/Users/999te/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'tseries' successfully unpacked and MD5 sums checked
#>
#> The downloaded binary packages are in
#> C:\Users\999te\AppData\Local\Temp\RtmpEXfDpe\downloaded_packages
library(forecast) # For ARIMA models and forecasting
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(readr) # For reading and handling CSV files
library(ggplot2) # For advanced plotting
library(tseries) # For time series analysis
# Load the ICNSA and covariate data
icnsa_data <- read_csv("C:/Users/999te/Downloads/CNP16OV.csv")
#> Rows: 913 Columns: 2
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> dbl (1): CNP16OV
#> date (1): DATE
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
covariate_data <- read_csv("C:/Users/999te/Downloads/CUUR0000SA0R.csv")
#> Rows: 1333 Columns: 2
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> dbl (1): CUUR0000SA0R
#> date (1): DATE
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Convert 'DATE' columns to Date type for proper time series handling
icnsa_data$DATE <- as.Date(icnsa_data$DATE)
covariate_data$DATE <- as.Date(covariate_data$DATE)
# Merge the datasets on 'DATE' to align the time series data
merged_data <- merge(icnsa_data, covariate_data, by = "DATE", all = TRUE)
# Handle any missing values to ensure clean data for analysis
merged_data <- na.omit(merged_data)
# Perform Exploratory Data Analysis by plotting the time series
ggplot(merged_data, aes(x = DATE)) +
geom_line(aes(y = CNP16OV, colour = "ICNSA")) +
geom_line(aes(y = CUUR0000SA0R, colour = "Covariate")) +
labs(x = "Date", y = "Value", title = "ICNSA and Covariate Time Series") +
scale_colour_manual(values = c("ICNSA" = "blue", "Covariate" = "red")) +
theme_minimal()
icnsa_ts <- ts(icnsa_data$CNP16OV, frequency = 10)
# Plot ACF
acf(icnsa_ts, main = "ACF for ICNSA Data")
# Plot PACF
pacf(icnsa_ts, main = "PACF for ICNSA Data")
# Calculate and print the correlation between ICNSA and the covariate
correlation <- cor(merged_data$CNP16OV, merged_data$CUUR0000SA0R)
cat("Correlation between ICNSA and the covariate: ", correlation, "\n")
#> Correlation between ICNSA and the covariate: -0.9313273
# Fit an ARIMA model with the covariate included
model <- auto.arima(merged_data$CNP16OV, xreg = merged_data$CUUR0000SA0R)
# Display model summary for diagnostics
summary(model)
#> Series: merged_data$CNP16OV
#> Regression with ARIMA(0,2,1) errors
#>
#> Coefficients:
#> ma1 xreg
#> -0.9606 6.4894
#> s.e. 0.0095 6.6401
#>
#> sigma^2 = 22162: log likelihood = -5850.28
#> AIC=11706.55 AICc=11706.58 BIC=11721
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set 2.312179 148.5427 48.55455 0.002486003 0.02773032 0.2526473
#> ACF1
#> Training set 0.0005680853
# Manually plot diagnostics: residuals, ACF, and PACF
par(mfrow = c(3, 1)) # Arrange plots vertically
# Plot residuals
residuals_plot <- residuals(model)
plot(residuals_plot, main = "Residuals of the Model", ylab = "Residuals", xlab = "Time")
abline(h = 0, col = "red")
# Plot ACF of residuals
acf(residuals_plot, main = "ACF of Residuals")
# Plot PACF of residuals
pacf(residuals_plot, main = "PACF of Residuals")
par(mfrow = c(1, 1)) # Reset plotting layout
# Forecasting for the next 12 periods
forecast_horizon <- 12
future_covariate <- as.matrix(data.frame(CUUR0000SA0R = rep(mean(merged_data$CUUR0000SA0R, na.rm = TRUE), forecast_horizon)))
forecast_result <- forecast(model, xreg = future_covariate, h = forecast_horizon)
#> Warning in forecast.forecast_ARIMA(model, xreg = future_covariate, h =
#> forecast_horizon): xreg contains different column names from the xreg used in
#> training. Please check that the regressors are in the same order.
# Plot the forecast
plot(forecast_result, main = "Forecast of ICNSA")
title(xlab = "Time", ylab = "Forecast Value")
# Output the forecast results
print(forecast_result)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 914 268553.4 268362.6 268744.2 268261.6 268845.2
#> 915 268720.3 268445.1 268995.4 268299.4 269141.1
#> 916 268887.1 268543.5 269230.8 268361.6 269412.7
#> 917 269054.0 268649.5 269458.5 268435.4 269672.6
#> 918 269220.9 268760.1 269681.7 268516.1 269925.7
#> 919 269387.7 268873.4 269902.1 268601.2 270174.3
#> 920 269554.6 268988.8 270120.4 268689.3 270420.0
#> 921 269721.5 269105.5 270337.4 268779.5 270663.5
#> 922 269888.4 269223.3 270553.4 268871.2 270905.5
#> 923 270055.2 269341.7 270768.8 268964.0 271146.5
#> 924 270222.1 269460.6 270983.6 269057.5 271386.7
#> 925 270389.0 269579.9 271198.1 269151.6 271626.4
# mean forecast
forecast_mean <- mean(forecast_result$mean)
print(forecast_mean)
#> [1] 269471.2
Created on 2024-02-22 with reprex v2.1.0
install.packages("fredr")
#> Installing package into 'C:/Users/999te/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'fredr' successfully unpacked and MD5 sums checked
#>
#> The downloaded binary packages are in
#> C:\Users\999te\AppData\Local\Temp\RtmpCEZaP3\downloaded_packages
install.packages("forecast")
#> Installing package into 'C:/Users/999te/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'forecast' successfully unpacked and MD5 sums checked
#>
#> The downloaded binary packages are in
#> C:\Users\999te\AppData\Local\Temp\RtmpCEZaP3\downloaded_packages
install.packages("ggplot2")
#> Installing package into 'C:/Users/999te/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'ggplot2' successfully unpacked and MD5 sums checked
#>
#> The downloaded binary packages are in
#> C:\Users\999te\AppData\Local\Temp\RtmpCEZaP3\downloaded_packages
install.packages("dplyr")
#> Installing package into 'C:/Users/999te/AppData/Local/R/win-library/4.3'
#> (as 'lib' is unspecified)
#> package 'dplyr' successfully unpacked and MD5 sums checked
#> Warning: cannot remove prior installation of package 'dplyr'
#> Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
#> C:\Users\999te\AppData\Local\R\win-library\4.3\00LOCK\dplyr\libs\x64\dplyr.dll
#> to C:\Users\999te\AppData\Local\R\win-library\4.3\dplyr\libs\x64\dplyr.dll:
#> Permission denied
#> Warning: restored 'dplyr'
#>
#> The downloaded binary packages are in
#> C:\Users\999te\AppData\Local\Temp\RtmpCEZaP3\downloaded_packages
# Load necessary libraries
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(ggplot2)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
# Set FRED API Key
fredr_set_key("1fc7fef6f34a921fad374d2810c04ce8")
# Loading ICNSA data
ICNSA_data <- fredr(series_id = "ICNSA", observation_start = as.Date("2019-01-01")) %>%
mutate(date = as.Date(date)) %>%
arrange(date)
# Adding a numeric representation of the date for spline fitting
ICNSA_data$date_numeric <- as.numeric(ICNSA_data$date)
ggplot(ICNSA_data, aes(x = date, y = value)) +
geom_line() +
labs(title = "ICNSA Claims Data",
x = "Date",
y = "value")
# Define the COVID period
covid_start <- as.Date("2020-01-30") # Empirical analysis decision
covid_end <- as.Date("2021-06-01") # Empirical analysis decision
# Highlighting the COVID period on the plot
ggplot(ICNSA_data, aes(x = date, y = value)) +
geom_line() +
geom_vline(xintercept = as.numeric(covid_start), linetype = "dashed", color = "red") +
geom_vline(xintercept = as.numeric(covid_end), linetype = "dashed", color = "red") +
labs(title = "ICNSA Claims with COVID Period Highlighted", x = "Date", y = "Claims")
# Perform cubic spline imputation for COVID period
ICNSA_data$value_imputed <- with(ICNSA_data, ifelse(date >= covid_start & date <= covid_end, NA, value))
spline_fit <- smooth.spline(ICNSA_data$date_numeric[!is.na(ICNSA_data$value_imputed)], ICNSA_data$value_imputed[!is.na(ICNSA_data$value_imputed)], cv = TRUE)
ICNSA_data$value_imputed[is.na(ICNSA_data$value_imputed)] <- predict(spline_fit, ICNSA_data$date_numeric[is.na(ICNSA_data$value_imputed)])$y
# Discuss λ value chosen and overall fit
cat("Chosen λ value:", spline_fit$spar, "\n")
#> Chosen λ value: 0.3146467
# Convert to ts object for forecasting
ts_data <- ts(ICNSA_data$value_imputed, start = c(2019, 1), frequency = 52)
# Forecasting with Holt-Winters model
forecast_mul <- forecast(HoltWinters(ts_data, seasonal = "multiplicative"), h = 52)
#> Warning in HoltWinters(ts_data, seasonal = "multiplicative"): optimization
#> difficulties: ERROR: ABNORMAL_TERMINATION_IN_LNSRCH
forecast_add <- forecast(HoltWinters(ts_data, seasonal = "additive"), h = 52)
# Mean of forecast values for summarization
mean_forecast_mul <- round(forecast_mul$mean[1])
mean_forecast_add <- round(forecast_add$mean[1])
cat("Mean of Multiplicative Model Forecast:", mean_forecast_mul, "\n")
#> Mean of Multiplicative Model Forecast: 169754
cat("Mean of Additive Model Forecast:", mean_forecast_add, "\n")
#> Mean of Additive Model Forecast: 180838
# Plotting forecasts
plot(forecast_mul, main = "Multiplicative Holt-Winters Forecast")
plot(forecast_add, main = "Additive Holt-Winters Forecast")
Created on 2024-02-28 with reprex v2.1.0
if (!requireNamespace("fredr", quietly = TRUE)) install.packages("fredr")
if (!requireNamespace("forecast", quietly = TRUE)) install.packages("forecast")
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
if (!requireNamespace("tidyverse", quietly = TRUE)) install.packages("tidyverse")
if (!requireNamespace("KFAS", quietly = TRUE)) install.packages("KFAS")
if (!requireNamespace("bsts", quietly = TRUE)) install.packages("bsts")
if (!requireNamespace("ggplot2", quietly = TRUE)) install.packages("ggplot2")
if (!requireNamespace("xts", quietly = TRUE)) install.packages("xts")
if (!requireNamespace("TTR", quietly = TRUE)) install.packages("TTR")
if (!requireNamespace("lubridate", quietly = TRUE)) install.packages("lubridate")
if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
if (!requireNamespace("dlm", quietly = TRUE)) install.packages("dlm")
if (!requireNamespace("tseries", quietly = TRUE)) install.packages("tseries")
if (!requireNamespace("urca", quietly = TRUE)) install.packages("urca")
if (!requireNamespace("readxl", quietly = TRUE)) install.packages("readxl")
if (!requireNamespace("readr", quietly = TRUE)) install.packages("readr")
# Load necessary libraries
library(fredr)
#> Warning: package 'fredr' was built under R version 4.3.3
library(forecast)
#> Warning: package 'forecast' was built under R version 4.3.3
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.3.3
#> Warning: package 'ggplot2' was built under R version 4.3.3
library(KFAS)
#> Warning: package 'KFAS' was built under R version 4.3.3
#> Please cite KFAS in publications by using:
#>
#> Jouni Helske (2017). KFAS: Exponential Family State Space Models in R. Journal of Statistical Software, 78(10), 1-39. doi:10.18637/jss.v078.i10.
library(bsts)
#> Warning: package 'bsts' was built under R version 4.3.3
#> Loading required package: BoomSpikeSlab
#> Warning: package 'BoomSpikeSlab' was built under R version 4.3.3
#> Loading required package: Boom
#> Warning: package 'Boom' was built under R version 4.3.3
#>
#> Attaching package: 'Boom'
#> The following object is masked from 'package:stats':
#>
#> rWishart
#>
#> Attaching package: 'BoomSpikeSlab'
#> The following object is masked from 'package:stats':
#>
#> knots
#> Loading required package: zoo
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
#> Loading required package: xts
#> Warning: package 'xts' was built under R version 4.3.3
#>
#> ######################### Warning from 'xts' package ##########################
#> # #
#> # The dplyr lag() function breaks how base R's lag() function is supposed to #
#> # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
#> # source() into this session won't work correctly. #
#> # #
#> # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
#> # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
#> # dplyr from breaking base R's lag() function. #
#> # #
#> # Code in packages is not affected. It's protected by R's namespace mechanism #
#> # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
#> # #
#> ###############################################################################
#>
#> Attaching package: 'xts'
#> The following objects are masked from 'package:dplyr':
#>
#> first, last
#>
#> Attaching package: 'bsts'
#> The following object is masked from 'package:BoomSpikeSlab':
#>
#> SuggestBurn
library(ggplot2)
library(xts)
library(TTR)
#> Warning: package 'TTR' was built under R version 4.3.3
library(lubridate)
library(dplyr)
library(dlm)
#> Warning: package 'dlm' was built under R version 4.3.3
#>
#> Attaching package: 'dlm'
#> The following object is masked from 'package:ggplot2':
#>
#> %+%
library(tseries)
library(urca)
library(readxl)
library(readr)
icnsa_data <- read.csv("C:/Users/999te/Downloads/CD12NRNJ.csv")
covariate_data <- read.csv("C:/Users/999te/Downloads/OECDNMERECDM.csv")
icnsa_data$DATE <- as.Date(icnsa_data$DATE)
covariate_data$DATE <- as.Date(covariate_data$DATE)
combined_data <- left_join(icnsa_data, covariate_data, by = "DATE")
# Handling missing values for the covariate
combined_data$OECDNMERECDM[is.na(combined_data$OECDNMERECDM)] <- mean(combined_data$OECDNMERECDM, na.rm = TRUE)
# Plotting ICNSA data
ggplot(combined_data, aes(x = DATE, y = CD12NRNJ)) +
geom_line() +
labs(title = "ICNSA Time Series", x = "Date", y = "ICNSA")
# Plotting Covariate data
ggplot(combined_data, aes(x = DATE, y = OECDNMERECDM)) +
geom_line() +
labs(title = "Covariate Time Series", x = "Date", y = "Covariate")
# ACF and PACF of ICNSA data
acf(combined_data$CD12NRNJ, main = "ACF of ICNSA")
pacf(combined_data$CD12NRNJ, main = "PACF of ICNSA")
# Define the COVID period
start_date <- as.Date("2020-03-01")
end_date <- as.Date("2021-06-30")
# Impute missing values during COVID period
combined_data$CD12NRNJ <- ifelse(combined_data$DATE >= start_date & combined_data$DATE <= end_date,
mean(combined_data$CD12NRNJ, na.rm = TRUE),
combined_data$CD12NRNJ)
# Modeling with bsts - Simplified example
ss_model <- AddLocalLinearTrend(list(), combined_data$CD12NRNJ)
ss_model <- AddSeasonal(ss_model, combined_data$CD12NRNJ, nseasons = 12)
bsts_model <- bsts(combined_data$CD12NRNJ, state.specification = ss_model, niter = 500)
#> =-=-=-=-= Iteration 0 Thu Mar 7 05:12:24 2024
#> =-=-=-=-=
#> =-=-=-=-= Iteration 50 Thu Mar 7 05:12:24 2024
#> =-=-=-=-=
#> =-=-=-=-= Iteration 100 Thu Mar 7 05:12:25 2024
#> =-=-=-=-=
#> =-=-=-=-= Iteration 150 Thu Mar 7 05:12:25 2024
#> =-=-=-=-=
#> =-=-=-=-= Iteration 200 Thu Mar 7 05:12:26 2024
#> =-=-=-=-=
#> =-=-=-=-= Iteration 250 Thu Mar 7 05:12:26 2024
#> =-=-=-=-=
#> =-=-=-=-= Iteration 300 Thu Mar 7 05:12:27 2024
#> =-=-=-=-=
#> =-=-=-=-= Iteration 350 Thu Mar 7 05:12:27 2024
#> =-=-=-=-=
#> =-=-=-=-= Iteration 400 Thu Mar 7 05:12:27 2024
#> =-=-=-=-=
#> =-=-=-=-= Iteration 450 Thu Mar 7 05:12:28 2024
#> =-=-=-=-=
# Plotting the fitted model
plot(bsts_model)
# Predict the next 52 weeks
forecast_length <- 52
forecast_bsts <- predict(bsts_model, horizon = forecast_length, burn = 100)
# Directly accessing the forecast mean. If forecast_bsts$mean is not correctly shaped, ensure predict() outputs as expected.
# If predict() returns a list where mean forecasts are directly accessible, you may not need colMeans().
forecast_mean <- forecast_bsts$mean *1000000
# Print the forecasted mean values
if (exists("forecast_mean")) { # Check if the variable exists to avoid errors
print(forecast_mean)
# Calculate the overall mean of the forecasted values if necessary
overall_mean <- mean(forecast_mean)
print(overall_mean)
} else {
print("Forecasted values mean was not calculated correctly.")
}
#> [1] 378675.8 380912.8 381642.9 377207.2 378958.0 378320.6 378189.9 376069.5
#> [9] 377843.2 378743.9 377976.6 377838.3 378762.3 380705.2 381124.1 375983.8
#> [17] 377456.0 376830.1 377205.4 374890.3 377931.3 378758.0 377916.7 378771.4
#> [25] 377781.7 379787.0 380664.1 375103.9 376951.6 375634.1 375514.0 373866.9
#> [33] 375742.2 376205.8 375057.7 375698.8 375158.1 376947.1 377885.9 372519.0
#> [41] 374145.9 373042.5 373472.2 370674.0 373033.8 373405.3 371832.5 372295.2
#> [49] 371448.8 373722.3 374433.2 368883.3
#> [1] 376338.8
Created on 2024-03-07 with reprex v2.1.0
# Install and load necessary packages
if (!requireNamespace("readxl", quietly = TRUE)) install.packages("readxl")
if (!requireNamespace("zoo", quietly = TRUE)) install.packages("zoo")
if (!requireNamespace("vars", quietly = TRUE)) install.packages("vars")
if (!requireNamespace("forecast", quietly = TRUE)) install.packages("forecast")
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
if (!requireNamespace("tseries", quietly = TRUE)) install.packages("tseries")
if (!requireNamespace("urca", quietly = TRUE)) install.packages("urca")
if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
if (!requireNamespace("tidyr", quietly = TRUE)) install.packages("tidyr")
if (!requireNamespace("purrr", quietly = TRUE)) install.packages("purrr")
if (!requireNamespace("ggplot2", quietly = TRUE)) install.packages("ggplot2")
library(readxl)
#> Warning: package 'readxl' was built under R version 4.3.3
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(vars)
#> Warning: package 'vars' was built under R version 4.3.3
#> Loading required package: MASS
#> Loading required package: strucchange
#> Warning: package 'strucchange' was built under R version 4.3.3
#> Loading required package: sandwich
#> Warning: package 'sandwich' was built under R version 4.3.3
#> Loading required package: urca
#> Warning: package 'urca' was built under R version 4.3.3
#> Loading required package: lmtest
library(forecast)
#> Warning: package 'forecast' was built under R version 4.3.3
library(tseries)
#> Warning: package 'tseries' was built under R version 4.3.3
library(urca)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:MASS':
#>
#> select
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tidyr)
library(purrr)
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.3.3
prepare_dataset <- function(file_path) {
# Load the dataset, skipping initial rows that might contain metadata
data <- read_excel(file_path, skip = 8)
# Assuming the first column contains years and needs no change
colnames(data)[1] <- "Year"
# Convert all other columns (months) to numeric, setting non-convertible values to NA
for (i in 2:ncol(data)) {
data[[i]] <- as.numeric(as.character(data[[i]]))
}
colnames(data)[-1] <- month.abb
# Reshape from wide to long format
data_long <- pivot_longer(data, cols = -Year, names_to = "Month", values_to = "Value")
# Convert 'Month' from abbreviation to numeric (1-12)
data_long$Month <- match(data_long$Month, month.abb)
# Create Date column
data_long <- data_long %>%
mutate(Date = as.Date(paste(Year, Month, 1, sep="-"))) %>%
select(Date, Value) %>%
drop_na() # Drop rows where conversion to numeric failed
# Create zoo object
ts_data <- zoo(data_long$Value, order.by = data_long$Date)
return(ts_data)
}
file_paths <- c(
CMSVS = "C:/Users/999te/Downloads/CMSVS.xlsx",
NXAVS = "C:/Users/999te/Downloads/NXAVS.xlsx",
NDEVS = "C:/Users/999te/Downloads/NDEVS.xlsx",
DEFVS = "C:/Users/999te/Downloads/DEFVS.xlsx"
)
# Apply the preparation function to each dataset
ts_data_list <- lapply(file_paths, prepare_dataset)
#> New names:
#> • `23727` -> `23727...11`
#> • `23727` -> `23727...12`
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> New names:
#> • `7835` -> `7835...2`
#> • `7835` -> `7835...9`
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
#> Warning in FUN(X[[i]], ...): NAs introduced by coercion
# Check one of the time series to ensure it looks correct
print(head(ts_data_list[[1]]))
#> 1993-01-01 1993-02-01 1993-03-01 1993-04-01 1993-05-01 1993-06-01
#> 23984 24978 24873 24898 25205 25037
names(ts_data_list) <- c("CMSVS", "NXAVS", "NDEVS", "DEFVS")
# Plot each time series
par(mfrow=c(2,2))
for (i in names(ts_data_list)) {
plot(ts_data_list[[i]], main=i, xlab="Date", ylab="Value", type='l')
}
# Convert each zoo object in ts_data_list to a data frame for ggplot
ts_data_frames <- lapply(names(ts_data_list), function(name) {
data.frame(Date = index(ts_data_list[[name]]),
Value = coredata(ts_data_list[[name]]),
Series = name)
})
# Combine all data frames into one for easier plotting with ggplot2
combined_df <- do.call(rbind, ts_data_frames)
# Create a plot for each time series
p <- ggplot(combined_df, aes(x = Date, y = Value, group = Series, color = Series)) +
geom_line() +
labs(title = "Time Series Data for All Datasets", x = "Date", y = "Value") +
theme_minimal() +
facet_wrap(~Series, scales = "free_y") +
theme(legend.position = "none")
# Print the plot
print(p)
# Conduct Augmented Dickey-Fuller Test to check for stationarity and print results
adf_results <- lapply(ts_data_list, function(ts) {
adf.test(ts, alternative = "stationary")
})
# Print ADF test results
print(adf_results)
#> $CMSVS
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts
#> Dickey-Fuller = -2.2515, Lag order = 7, p-value = 0.4709
#> alternative hypothesis: stationary
#>
#>
#> $NXAVS
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts
#> Dickey-Fuller = -3.7115, Lag order = 7, p-value = 0.02368
#> alternative hypothesis: stationary
#>
#>
#> $NDEVS
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts
#> Dickey-Fuller = -3.5404, Lag order = 7, p-value = 0.03891
#> alternative hypothesis: stationary
#>
#>
#> $DEFVS
#>
#> Augmented Dickey-Fuller Test
#>
#> data: ts
#> Dickey-Fuller = -2.3074, Lag order = 7, p-value = 0.4473
#> alternative hypothesis: stationary
# Combine all series into a single multivariate time series object
combined_ts <- do.call(merge, ts_data_list)
# Fit a VAR(1) model
var1_model <- VAR(combined_ts, p = 1, type = "const")
# Fit a VAR(2) model
var2_model <- VAR(combined_ts, p = 2, type = "const")
# Compare the models using AIC and BIC
aic_var1 <- AIC(var1_model)
bic_var1 <- BIC(var1_model)
aic_var2 <- AIC(var2_model)
bic_var2 <- BIC(var2_model)
cat("VAR(1) - AIC:", aic_var1, "; BIC:", bic_var1, "\n")
#> VAR(1) - AIC: 23841.23 ; BIC: 23919.66
cat("VAR(2) - AIC:", aic_var2, "; BIC:", bic_var2, "\n")
#> VAR(2) - AIC: 23654.56 ; BIC: 23795.64
# Decide which model is better based on AIC (or BIC if preferred)
preferred_model <- if(aic_var1 < aic_var2) {var1_model} else {var2_model}
preferred_p <- ifelse(aic_var1 < aic_var2, 1, 2)
cat("Preferred model based on AIC: VAR(", preferred_p, ")\n")
#> Preferred model based on AIC: VAR( 2 )
# Forecasting with VAR(1)
forecast_var1 <- predict(var1_model, n.ahead = 1)
print(forecast_var1$fcst)
#> $CMSVS
#> fcst lower upper CI
#> CMSVS.fcst 63040.94 61656.7 64425.19 1384.241
#>
#> $NXAVS
#> fcst lower upper CI
#> NXAVS.fcst 74509.42 72435.81 76583.03 2073.607
#>
#> $NDEVS
#> fcst lower upper CI
#> NDEVS.fcst 82320.88 79344.15 85297.6 2976.724
#>
#> $DEFVS
#> fcst lower upper CI
#> DEFVS.fcst 14122.95 13373.94 14871.96 749.0071
# Forecasting with VAR(2)
forecast_var2 <- predict(var2_model, n.ahead = 1)
print(forecast_var2$fcst)
#> $CMSVS
#> fcst lower upper CI
#> CMSVS.fcst 62954.96 61564.99 64344.94 1389.972
#>
#> $NXAVS
#> fcst lower upper CI
#> NXAVS.fcst 74570.5 72532.41 76608.59 2038.092
#>
#> $NDEVS
#> fcst lower upper CI
#> NDEVS.fcst 81247.04 78431.19 84062.89 2815.847
#>
#> $DEFVS
#> fcst lower upper CI
#> DEFVS.fcst 14193.68 13502.65 14884.72 691.035
# Test Granger causality without specifying 'cause' to test all combinations
granger_results <- causality(preferred_model)
#> Warning in causality(preferred_model):
#> Argument 'cause' has not been specified;
#> using first variable in 'x$y' (CMSVS) as cause variable.
# Print the Granger causality test results
print(granger_results)
#> $Granger
#>
#> Granger causality H0: CMSVS do not Granger-cause NXAVS NDEVS DEFVS
#>
#> data: VAR object preferred_model
#> F-Test = 3.8665, df1 = 6, df2 = 1452, p-value = 0.0007808
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: CMSVS and NXAVS NDEVS DEFVS
#>
#> data: VAR object preferred_model
#> Chi-squared = 70.826, df = 3, p-value = 2.887e-15
unlink("C:/Users/999te/AppData/Local/R/win-library/4.3/00LOCK", recursive = TRUE)
if (!requireNamespace("BigVAR", quietly = TRUE)) install.packages("BigVAR")
library(BigVAR)
#> Warning: package 'BigVAR' was built under R version 4.3.3
#> Loading required package: lattice
combined_ts_matrix <- as.matrix(combined_ts)
struct_type <- "BasicEN"
model_spec <- constructModel(combined_ts_matrix, p = 2, struct = struct_type, gran = c(10, 50))
# Check if model_spec was successfully created
if (!is.null(model_spec)) {
# Proceed with cross-validation to optimize model parameters
cv_results <- cv.BigVAR(model_spec)
# Assuming cv_results is successfully created, forecast
if (!is.null(cv_results)) {
forecast_results <- predict(cv_results, n.ahead = 1)
# Print the forecast results
print(forecast_results)
} else {
cat("Cross-validation failed. Ensure model_spec is valid.\n")
}
} else {
cat("Failed to construct model. Check struct_type and other parameters.\n")
}
#> Validation Stage: BasicEN | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 6% | |===== | 7% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 13% | |========== | 14% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 26% | |=================== | 27% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 33% | |======================== | 34% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 46% | |================================= | 47% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 53% | |====================================== | 54% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 66% | |=============================================== | 67% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 93% | |================================================================== | 94% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%[1] "Evaluation Stage"
#> [,1]
#> [1,] 59455.53
#> [2,] 72412.80
#> [3,] 80592.89
#> [4,] 11376.76
Created on 2024-04-11 with reprex v2.1.0
Assignment1