Open AlishaRuqshan opened 4 months ago
# HW2
# All packages
library(fredr)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(dygraphs)
library(xts)
#> Loading required package: zoo
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
#>
#> ######################### Warning from 'xts' package ##########################
#> # #
#> # The dplyr lag() function breaks how base R's lag() function is supposed to #
#> # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
#> # source() into this session won't work correctly. #
#> # #
#> # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
#> # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
#> # dplyr from breaking base R's lag() function. #
#> # #
#> # Code in packages is not affected. It's protected by R's namespace mechanism #
#> # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
#> # #
#> ###############################################################################
#>
#> Attaching package: 'xts'
#> The following objects are masked from 'package:dplyr':
#>
#> first, last
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(reprex)
fredr_set_key("052142bc981666b4ebcb1c8df98d006b")
# Reading Data
icnsa_data = fredr(series_id = "ICNSA")
# Date column conversion to date data type
icnsa_data$date <- as.Date(icnsa_data$date)
# Updataing dataframe to remove other columns and keep only date and value
updated_icnsa_df <- icnsa_data %>%
select(date, value) %>%
na.omit()
# Define the COVID-19 period
covid_start <- as.Date("2020-01-01")
covid_end <- as.Date("2022-12-31")
icnsa_no_covid <- subset(updated_icnsa_df, date < covid_start | date > covid_end)
# Autocorrelation Plot for dataframe
acf(icnsa_no_covid$value)
# Partial Autocorrelation Plot for dataframe
pacf(icnsa_no_covid$value)
str(icnsa_no_covid)
#> tibble [2,822 × 2] (S3: tbl_df/tbl/data.frame)
#> $ date : Date[1:2822], format: "1967-01-07" "1967-01-14" ...
#> $ value: num [1:2822] 346000 334000 277000 252000 274000 276000 247000 248000 326000 240000 ...
# Ensuring value column as a numeric datatype
icnsa_no_covid$value <- as.numeric(icnsa_no_covid$value)
# Creating a timeseries object with frequency 7 as weekly data
ts_updated_icnsa_df_no_covid <- ts(icnsa_no_covid$value, frequency = 7)
# Fitting an ARIMA model using auto.arima function. We can also specify p,q,d values using above acf and pcf plots.
arima_model_no_covid <- auto.arima(ts_updated_icnsa_df_no_covid)
summary(arima_model_no_covid)
#> Series: ts_updated_icnsa_df_no_covid
#> ARIMA(2,1,2)
#>
#> Coefficients:
#> ar1 ar2 ma1 ma2
#> 0.9555 -0.0637 -1.1942 0.2068
#> s.e. 0.1222 0.1030 0.1204 0.1179
#>
#> sigma^2 = 2.477e+09: log likelihood = -34510.67
#> AIC=69031.34 AICc=69031.36 BIC=69061.07
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -146.1736 49722.01 33579.3 -1.452661 9.090663 0.4276818
#> ACF1
#> Training set 0.0005209598
# Forcasting the value
forecast_values <- forecast(arima_model_no_covid, h = 1)
plot(forecast_values, main = "ARIMA Forecast",
xlab = "Date", ylab = "Number of Cases")
forecast_mean <- mean(forecast_values$mean, na.rm = TRUE)
print(forecast_mean)
#> [1] 235751.1
Created on 2024-02-09 with reprex v2.1.0
# Install the fredr package
library(reprex)
if (!requireNamespace("fredr", quietly = TRUE)) install.packages("fredr")
library(ggplot2)
# Load the fredr library
library(fredr)
# Set your FRED API key
fredr_set_key("f4e804771fd1a4241706e9c4498f0839") # Your FRED API key
# Fetch the most current Initial Claims (ICNSA) data
initial_claims <- fredr(series_id = "ICNSA", observation_start = as.Date("2022-01-01"))
# Date column conversion to date data type
icnsa_data$date <- as.Date(icnsa_data$date)
#> Error in eval(expr, envir, enclos): object 'icnsa_data' not found
# Viewing the data
head(initial_claims)
#> # A tibble: 6 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 2022-01-01 ICNSA 314796 2024-02-22 2024-02-22
#> 2 2022-01-08 ICNSA 415162 2024-02-22 2024-02-22
#> 3 2022-01-15 ICNSA 337075 2024-02-22 2024-02-22
#> 4 2022-01-22 ICNSA 267637 2024-02-22 2024-02-22
#> 5 2022-01-29 ICNSA 258288 2024-02-22 2024-02-22
#> 6 2022-02-05 ICNSA 231452 2024-02-22 2024-02-22
#plot it!
ggplot(initial_claims, aes(x = date, y = value)) +
geom_line() +
labs(title = "Initial Claims Over Time", x = "Date", y = "Initial Claims (ICNSA)") +
theme_minimal()
# Fetch the most current Continued Claims (Not Seasonally Adjusted) data
continued_claims_nsadj <- fredr(series_id = "CCNSA", observation_start = as.Date("2020-01-01"))
# View the data
head(continued_claims_nsadj)
#> # A tibble: 6 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 2020-01-04 CCNSA 2251286 2024-02-22 2024-02-22
#> 2 2020-01-11 CCNSA 2142711 2024-02-22 2024-02-22
#> 3 2020-01-18 CCNSA 2082416 2024-02-22 2024-02-22
#> 4 2020-01-25 CCNSA 2154866 2024-02-22 2024-02-22
#> 5 2020-02-01 CCNSA 2088693 2024-02-22 2024-02-22
#> 6 2020-02-08 CCNSA 2100473 2024-02-22 2024-02-22
# Assuming your dataframe is named continued_claims_nsadj and the date column is named "date"
continued_claims_nsadj$date <- as.Date(continued_claims_nsadj$date)
#plot it!
ggplot(continued_claims_nsadj, aes(x = date, y = value)) +
geom_line(color = "red") +
labs(title = "Continued Claims (Not Seasonally Adjusted) Over Time", x = "Date", y = "Continued Claims (CCNSA)") +
theme_minimal()
#empirical data analysis
# a) ADF test
# Install the tseries package if you haven't already
if (!requireNamespace("tseries", quietly = TRUE)) install.packages("tseries")
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
# Load the tseries library
library(tseries)
# ADF test for Initial Claims (ICNSA)
adf_test_icnsa <- adf.test(initial_claims$value, alternative = "stationary")
print(adf_test_icnsa)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: initial_claims$value
#> Dickey-Fuller = -3.2143, Lag order = 4, p-value = 0.08907
#> alternative hypothesis: stationary
# ADF test for Continued Claims - Not Seasonally Adjusted (CCNSA)
adf_test_ccnsa <- adf.test(continued_claims_nsadj$value, alternative = "stationary")
print(adf_test_ccnsa)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: continued_claims_nsadj$value
#> Dickey-Fuller = -3.3689, Lag order = 5, p-value = 0.061
#> alternative hypothesis: stationary
#b) decomposition
# Ensure your time series data is in a ts object with an appropriate frequency
# For weekly data, you might approximate the frequency as 52
icnsa_ts <- ts(initial_claims$value, frequency = 52)
ccnsa_ts <- ts(continued_claims_nsadj$value, frequency = 52)
# Decompose using STL for ICNSA
library(stats)
decomp_icnsa <- stl(icnsa_ts, s.window = "periodic", robust = TRUE)
plot(decomp_icnsa)
# Decompose using STL for CCNSA
decomp_ccnsa <- stl(ccnsa_ts, s.window = "periodic", robust = TRUE)
plot(decomp_ccnsa)
#c) ACF & PCF PLOTTING
# Load necessary library for ACF and PACF plotting
library(stats)
# ACF and PACF for ICNSA
# Autocorrelation Function (ACF)
acf(icnsa_ts, main = "ACF for ICNSA")
# Partial Autocorrelation Function (PACF)
pacf(icnsa_ts, main = "PACF for ICNSA")
# ACF and PACF for CCNSA
# Autocorrelation Function (ACF)
acf(ccnsa_ts, main = "ACF for CCNSA")
# Partial Autocorrelation Function (PACF)
pacf(ccnsa_ts, main = "PACF for CCNSA")
#####STEP2 MERGER OF THE two data sets choosen
# Ensure all date columns are in the correct date format
initial_claims$date <- as.Date(initial_claims$date)
continued_claims_nsadj$date <- as.Date(continued_claims_nsadj$date)
# Then merge the resulting data frame with continued_claims_nsadj
final_merged_data <- merge(initial_claims, continued_claims_nsadj, by = "date", all = TRUE)
# View the first few rows of the final merged dataset
head(final_merged_data)
#> date series_id.x value.x realtime_start.x realtime_end.x series_id.y
#> 1 2020-01-04 <NA> NA <NA> <NA> CCNSA
#> 2 2020-01-11 <NA> NA <NA> <NA> CCNSA
#> 3 2020-01-18 <NA> NA <NA> <NA> CCNSA
#> 4 2020-01-25 <NA> NA <NA> <NA> CCNSA
#> 5 2020-02-01 <NA> NA <NA> <NA> CCNSA
#> 6 2020-02-08 <NA> NA <NA> <NA> CCNSA
#> value.y realtime_start.y realtime_end.y
#> 1 2251286 2024-02-22 2024-02-22
#> 2 2142711 2024-02-22 2024-02-22
#> 3 2082416 2024-02-22 2024-02-22
#> 4 2154866 2024-02-22 2024-02-22
#> 5 2088693 2024-02-22 2024-02-22
#> 6 2100473 2024-02-22 2024-02-22
cleandata <- na.omit(final_merged_data)
cleandata
#> date series_id.x value.x realtime_start.x realtime_end.x series_id.y
#> 105 2022-01-01 ICNSA 314796 2024-02-22 2024-02-22 CCNSA
#> 106 2022-01-08 ICNSA 415162 2024-02-22 2024-02-22 CCNSA
#> 107 2022-01-15 ICNSA 337075 2024-02-22 2024-02-22 CCNSA
#> 108 2022-01-22 ICNSA 267637 2024-02-22 2024-02-22 CCNSA
#> 109 2022-01-29 ICNSA 258288 2024-02-22 2024-02-22 CCNSA
#> 110 2022-02-05 ICNSA 231452 2024-02-22 2024-02-22 CCNSA
#> 111 2022-02-12 ICNSA 240502 2024-02-22 2024-02-22 CCNSA
#> 112 2022-02-19 ICNSA 216574 2024-02-22 2024-02-22 CCNSA
#> 113 2022-02-26 ICNSA 196537 2024-02-22 2024-02-22 CCNSA
#> 114 2022-03-05 ICNSA 220486 2024-02-22 2024-02-22 CCNSA
#> 115 2022-03-12 ICNSA 204494 2024-02-22 2024-02-22 CCNSA
#> 116 2022-03-19 ICNSA 182581 2024-02-22 2024-02-22 CCNSA
#> 117 2022-03-26 ICNSA 197128 2024-02-22 2024-02-22 CCNSA
#> 118 2022-04-02 ICNSA 194671 2024-02-22 2024-02-22 CCNSA
#> 119 2022-04-09 ICNSA 223751 2024-02-22 2024-02-22 CCNSA
#> 120 2022-04-16 ICNSA 197219 2024-02-22 2024-02-22 CCNSA
#> 121 2022-04-23 ICNSA 202952 2024-02-22 2024-02-22 CCNSA
#> 122 2022-04-30 ICNSA 196422 2024-02-22 2024-02-22 CCNSA
#> 123 2022-05-07 ICNSA 185815 2024-02-22 2024-02-22 CCNSA
#> 124 2022-05-14 ICNSA 199631 2024-02-22 2024-02-22 CCNSA
#> 125 2022-05-21 ICNSA 187695 2024-02-22 2024-02-22 CCNSA
#> 126 2022-05-28 ICNSA 183446 2024-02-22 2024-02-22 CCNSA
#> 127 2022-06-04 ICNSA 186568 2024-02-22 2024-02-22 CCNSA
#> 128 2022-06-11 ICNSA 205890 2024-02-22 2024-02-22 CCNSA
#> 129 2022-06-18 ICNSA 206272 2024-02-22 2024-02-22 CCNSA
#> 130 2022-06-25 ICNSA 207061 2024-02-22 2024-02-22 CCNSA
#> 131 2022-07-02 ICNSA 218886 2024-02-22 2024-02-22 CCNSA
#> 132 2022-07-09 ICNSA 243485 2024-02-22 2024-02-22 CCNSA
#> 133 2022-07-16 ICNSA 237092 2024-02-22 2024-02-22 CCNSA
#> 134 2022-07-23 ICNSA 200929 2024-02-22 2024-02-22 CCNSA
#> 135 2022-07-30 ICNSA 196116 2024-02-22 2024-02-22 CCNSA
#> 136 2022-08-06 ICNSA 196308 2024-02-22 2024-02-22 CCNSA
#> 137 2022-08-13 ICNSA 187391 2024-02-22 2024-02-22 CCNSA
#> 138 2022-08-20 ICNSA 179220 2024-02-22 2024-02-22 CCNSA
#> 139 2022-08-27 ICNSA 173867 2024-02-22 2024-02-22 CCNSA
#> 140 2022-09-03 ICNSA 172835 2024-02-22 2024-02-22 CCNSA
#> 141 2022-09-10 ICNSA 152144 2024-02-22 2024-02-22 CCNSA
#> 142 2022-09-17 ICNSA 168701 2024-02-22 2024-02-22 CCNSA
#> 143 2022-09-24 ICNSA 153821 2024-02-22 2024-02-22 CCNSA
#> 144 2022-10-01 ICNSA 167378 2024-02-22 2024-02-22 CCNSA
#> 145 2022-10-08 ICNSA 198340 2024-02-22 2024-02-22 CCNSA
#> 146 2022-10-15 ICNSA 178773 2024-02-22 2024-02-22 CCNSA
#> 147 2022-10-22 ICNSA 184420 2024-02-22 2024-02-22 CCNSA
#> 148 2022-10-29 ICNSA 185973 2024-02-22 2024-02-22 CCNSA
#> 149 2022-11-05 ICNSA 206079 2024-02-22 2024-02-22 CCNSA
#> 150 2022-11-12 ICNSA 200237 2024-02-22 2024-02-22 CCNSA
#> 151 2022-11-19 ICNSA 248916 2024-02-22 2024-02-22 CCNSA
#> 152 2022-11-26 ICNSA 199323 2024-02-22 2024-02-22 CCNSA
#> 153 2022-12-03 ICNSA 287976 2024-02-22 2024-02-22 CCNSA
#> 154 2022-12-10 ICNSA 250038 2024-02-22 2024-02-22 CCNSA
#> 155 2022-12-17 ICNSA 248444 2024-02-22 2024-02-22 CCNSA
#> 156 2022-12-24 ICNSA 269877 2024-02-22 2024-02-22 CCNSA
#> 157 2022-12-31 ICNSA 278487 2024-02-22 2024-02-22 CCNSA
#> 158 2023-01-07 ICNSA 339883 2024-02-22 2024-02-22 CCNSA
#> 159 2023-01-14 ICNSA 288330 2024-02-22 2024-02-22 CCNSA
#> 160 2023-01-21 ICNSA 225228 2024-02-22 2024-02-22 CCNSA
#> 161 2023-01-28 ICNSA 225026 2024-02-22 2024-02-22 CCNSA
#> 162 2023-02-04 ICNSA 234007 2024-02-22 2024-02-22 CCNSA
#> 163 2023-02-11 ICNSA 225332 2024-02-22 2024-02-22 CCNSA
#> 164 2023-02-18 ICNSA 211007 2024-02-22 2024-02-22 CCNSA
#> 165 2023-02-25 ICNSA 202156 2024-02-22 2024-02-22 CCNSA
#> 166 2023-03-04 ICNSA 238840 2024-02-22 2024-02-22 CCNSA
#> 167 2023-03-11 ICNSA 218084 2024-02-22 2024-02-22 CCNSA
#> 168 2023-03-18 ICNSA 213003 2024-02-22 2024-02-22 CCNSA
#> 169 2023-03-25 ICNSA 224193 2024-02-22 2024-02-22 CCNSA
#> 170 2023-04-01 ICNSA 207120 2024-02-22 2024-02-22 CCNSA
#> 171 2023-04-08 ICNSA 235237 2024-02-22 2024-02-22 CCNSA
#> 172 2023-04-15 ICNSA 229319 2024-02-22 2024-02-22 CCNSA
#> 173 2023-04-22 ICNSA 225137 2024-02-22 2024-02-22 CCNSA
#> 174 2023-04-29 ICNSA 220115 2024-02-22 2024-02-22 CCNSA
#> 175 2023-05-06 ICNSA 204962 2024-02-22 2024-02-22 CCNSA
#> 176 2023-05-13 ICNSA 200738 2024-02-22 2024-02-22 CCNSA
#> 177 2023-05-20 ICNSA 202645 2024-02-22 2024-02-22 CCNSA
#> 178 2023-05-27 ICNSA 208772 2024-02-22 2024-02-22 CCNSA
#> 179 2023-06-03 ICNSA 220449 2024-02-22 2024-02-22 CCNSA
#> 180 2023-06-10 ICNSA 251384 2024-02-22 2024-02-22 CCNSA
#> 181 2023-06-17 ICNSA 250891 2024-02-22 2024-02-22 CCNSA
#> 182 2023-06-24 ICNSA 229718 2024-02-22 2024-02-22 CCNSA
#> 183 2023-07-01 ICNSA 251686 2024-02-22 2024-02-22 CCNSA
#> 184 2023-07-08 ICNSA 258252 2024-02-22 2024-02-22 CCNSA
#> 185 2023-07-15 ICNSA 258169 2024-02-22 2024-02-22 CCNSA
#> 186 2023-07-22 ICNSA 213497 2024-02-22 2024-02-22 CCNSA
#> 187 2023-07-29 ICNSA 205550 2024-02-22 2024-02-22 CCNSA
#> 188 2023-08-05 ICNSA 227917 2024-02-22 2024-02-22 CCNSA
#> 189 2023-08-12 ICNSA 213803 2024-02-22 2024-02-22 CCNSA
#> 190 2023-08-19 ICNSA 199437 2024-02-22 2024-02-22 CCNSA
#> 191 2023-08-26 ICNSA 193441 2024-02-22 2024-02-22 CCNSA
#> 192 2023-09-02 ICNSA 191353 2024-02-22 2024-02-22 CCNSA
#> 193 2023-09-09 ICNSA 175594 2024-02-22 2024-02-22 CCNSA
#> 194 2023-09-16 ICNSA 176586 2024-02-22 2024-02-22 CCNSA
#> 195 2023-09-23 ICNSA 175650 2024-02-22 2024-02-22 CCNSA
#> 196 2023-09-30 ICNSA 174701 2024-02-22 2024-02-22 CCNSA
#> 197 2023-10-07 ICNSA 199743 2024-02-22 2024-02-22 CCNSA
#> 198 2023-10-14 ICNSA 182394 2024-02-22 2024-02-22 CCNSA
#> 199 2023-10-21 ICNSA 193999 2024-02-22 2024-02-22 CCNSA
#> 200 2023-10-28 ICNSA 199306 2024-02-22 2024-02-22 CCNSA
#> 201 2023-11-04 ICNSA 214161 2024-02-22 2024-02-22 CCNSA
#> 202 2023-11-11 ICNSA 217438 2024-02-22 2024-02-22 CCNSA
#> 203 2023-11-18 ICNSA 240979 2024-02-22 2024-02-22 CCNSA
#> 204 2023-11-25 ICNSA 199750 2024-02-22 2024-02-22 CCNSA
#> 205 2023-12-02 ICNSA 294615 2024-02-22 2024-02-22 CCNSA
#> 206 2023-12-09 ICNSA 249090 2024-02-22 2024-02-22 CCNSA
#> 207 2023-12-16 ICNSA 241040 2024-02-22 2024-02-22 CCNSA
#> 208 2023-12-23 ICNSA 274840 2024-02-22 2024-02-22 CCNSA
#> 209 2023-12-30 ICNSA 269409 2024-02-22 2024-02-22 CCNSA
#> 210 2024-01-06 ICNSA 318906 2024-02-22 2024-02-22 CCNSA
#> 211 2024-01-13 ICNSA 291330 2024-02-22 2024-02-22 CCNSA
#> 212 2024-01-20 ICNSA 249947 2024-02-22 2024-02-22 CCNSA
#> 213 2024-01-27 ICNSA 263919 2024-02-22 2024-02-22 CCNSA
#> 214 2024-02-03 ICNSA 234729 2024-02-22 2024-02-22 CCNSA
#> value.y realtime_start.y realtime_end.y
#> 105 2061856 2024-02-22 2024-02-22
#> 106 2072681 2024-02-22 2024-02-22
#> 107 2009022 2024-02-22 2024-02-22
#> 108 2043402 2024-02-22 2024-02-22
#> 109 2005387 2024-02-22 2024-02-22
#> 110 1980368 2024-02-22 2024-02-22
#> 111 1918036 2024-02-22 2024-02-22
#> 112 1859328 2024-02-22 2024-02-22
#> 113 1917459 2024-02-22 2024-02-22
#> 114 1810381 2024-02-22 2024-02-22
#> 115 1727708 2024-02-22 2024-02-22
#> 116 1676374 2024-02-22 2024-02-22
#> 117 1654359 2024-02-22 2024-02-22
#> 118 1569797 2024-02-22 2024-02-22
#> 119 1467346 2024-02-22 2024-02-22
#> 120 1441824 2024-02-22 2024-02-22
#> 121 1403640 2024-02-22 2024-02-22
#> 122 1332072 2024-02-22 2024-02-22
#> 123 1283158 2024-02-22 2024-02-22
#> 124 1290047 2024-02-22 2024-02-22
#> 125 1256209 2024-02-22 2024-02-22
#> 126 1256450 2024-02-22 2024-02-22
#> 127 1270456 2024-02-22 2024-02-22
#> 128 1288323 2024-02-22 2024-02-22
#> 129 1302015 2024-02-22 2024-02-22
#> 130 1372008 2024-02-22 2024-02-22
#> 131 1327199 2024-02-22 2024-02-22
#> 132 1450142 2024-02-22 2024-02-22
#> 133 1443848 2024-02-22 2024-02-22
#> 134 1451592 2024-02-22 2024-02-22
#> 135 1457158 2024-02-22 2024-02-22
#> 136 1423854 2024-02-22 2024-02-22
#> 137 1414317 2024-02-22 2024-02-22
#> 138 1390837 2024-02-22 2024-02-22
#> 139 1365751 2024-02-22 2024-02-22
#> 140 1273057 2024-02-22 2024-02-22
#> 141 1278853 2024-02-22 2024-02-22
#> 142 1224496 2024-02-22 2024-02-22
#> 143 1233270 2024-02-22 2024-02-22
#> 144 1201432 2024-02-22 2024-02-22
#> 145 1199269 2024-02-22 2024-02-22
#> 146 1226861 2024-02-22 2024-02-22
#> 147 1239975 2024-02-22 2024-02-22
#> 148 1267368 2024-02-22 2024-02-22
#> 149 1230153 2024-02-22 2024-02-22
#> 150 1341329 2024-02-22 2024-02-22
#> 151 1261674 2024-02-22 2024-02-22
#> 152 1561449 2024-02-22 2024-02-22
#> 153 1503416 2024-02-22 2024-02-22
#> 154 1594527 2024-02-22 2024-02-22
#> 155 1577004 2024-02-22 2024-02-22
#> 156 1710626 2024-02-22 2024-02-22
#> 157 1870048 2024-02-22 2024-02-22
#> 158 1908639 2024-02-22 2024-02-22
#> 159 1863956 2024-02-22 2024-02-22
#> 160 1911203 2024-02-22 2024-02-22
#> 161 1925707 2024-02-22 2024-02-22
#> 162 1948437 2024-02-22 2024-02-22
#> 163 1928688 2024-02-22 2024-02-22
#> 164 1891796 2024-02-22 2024-02-22
#> 165 1970023 2024-02-22 2024-02-22
#> 166 1910046 2024-02-22 2024-02-22
#> 167 1878281 2024-02-22 2024-02-22
#> 168 1877314 2024-02-22 2024-02-22
#> 169 1844456 2024-02-22 2024-02-22
#> 170 1795471 2024-02-22 2024-02-22
#> 171 1788631 2024-02-22 2024-02-22
#> 172 1752764 2024-02-22 2024-02-22
#> 173 1689410 2024-02-22 2024-02-22
#> 174 1660410 2024-02-22 2024-02-22
#> 175 1612137 2024-02-22 2024-02-22
#> 176 1612163 2024-02-22 2024-02-22
#> 177 1609655 2024-02-22 2024-02-22
#> 178 1595088 2024-02-22 2024-02-22
#> 179 1649707 2024-02-22 2024-02-22
#> 180 1672995 2024-02-22 2024-02-22
#> 181 1674551 2024-02-22 2024-02-22
#> 182 1737349 2024-02-22 2024-02-22
#> 183 1724290 2024-02-22 2024-02-22
#> 184 1887580 2024-02-22 2024-02-22
#> 185 1835613 2024-02-22 2024-02-22
#> 186 1827375 2024-02-22 2024-02-22
#> 187 1809787 2024-02-22 2024-02-22
#> 188 1814134 2024-02-22 2024-02-22
#> 189 1801993 2024-02-22 2024-02-22
#> 190 1789881 2024-02-22 2024-02-22
#> 191 1747733 2024-02-22 2024-02-22
#> 192 1655858 2024-02-22 2024-02-22
#> 193 1645933 2024-02-22 2024-02-22
#> 194 1588281 2024-02-22 2024-02-22
#> 195 1585286 2024-02-22 2024-02-22
#> 196 1555186 2024-02-22 2024-02-22
#> 197 1542397 2024-02-22 2024-02-22
#> 198 1572658 2024-02-22 2024-02-22
#> 199 1574484 2024-02-22 2024-02-22
#> 200 1607728 2024-02-22 2024-02-22
#> 201 1578949 2024-02-22 2024-02-22
#> 202 1654728 2024-02-22 2024-02-22
#> 203 1555596 2024-02-22 2024-02-22
#> 204 1845080 2024-02-22 2024-02-22
#> 205 1767271 2024-02-22 2024-02-22
#> 206 1835426 2024-02-22 2024-02-22
#> 207 1836547 2024-02-22 2024-02-22
#> 208 1902098 2024-02-22 2024-02-22
#> 209 2104271 2024-02-22 2024-02-22
#> 210 2122548 2024-02-22 2024-02-22
#> 211 2053298 2024-02-22 2024-02-22
#> 212 2181429 2024-02-22 2024-02-22
#> 213 2130008 2024-02-22 2024-02-22
#> 214 2146550 2024-02-22 2024-02-22
# Calculate correlation between ICNSA and CCNSA values
correlation_coefficient <- cor(cleandata$value.x, cleandata$value.y)
# Print the correlation coefficient
print(correlation_coefficient)
#> [1] 0.6254788
#STEP 3 TRAIN MODEL using arima model
column_names_cleaned <- colnames(cleandata)
print(column_names_cleaned)
#> [1] "date" "series_id.x" "value.x" "realtime_start.x"
#> [5] "realtime_end.x" "series_id.y" "value.y" "realtime_start.y"
#> [9] "realtime_end.y"
# Convert ICNSA values to a time series object
icnsa_ts <- ts(cleandata$value.x, frequency = 52)
# Prepare CCNSA values for use as a regressor
ccnsa_regressor <- cleandata$value.y
# Ensure the forecast package is installed and loaded
if (!requireNamespace("forecast", quietly = TRUE)) install.packages("forecast")
library(forecast)
# Train the ARIMA model with CCNSA as a regressor
fit_arima <- auto.arima(icnsa_ts, xreg = ccnsa_regressor)
# Print the model summary
summary(fit_arima)
#> Series: icnsa_ts
#> Regression with ARIMA(2,0,0)(0,1,0)[52] errors
#>
#> Coefficients:
#> ar1 ar2 xreg
#> 0.5485 0.1551 0.0606
#> s.e. 0.1402 0.1721 0.0201
#>
#> sigma^2 = 189329888: log likelihood = -633.76
#> AIC=1275.53 AICc=1276.28 BIC=1283.77
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -905.732 9729.591 5475.743 -0.3606824 2.322205 0.286164
#> ACF1
#> Training set 0.009711292
# Placeholder for the next week's CCNSA value
# You need to replace this with the actual or estimated next week's CCNSA value
next_week_ccnsa <- tail(cleandata$value.y, 1)
# Forecast the next week's ICNSA value using the ARIMA model
# Note: Ensure the xreg parameter is provided with a new data frame matching the expected format
next_week_forecast <- forecast(fit_arima, xreg = matrix(next_week_ccnsa, ncol = 1), h = 1)
# Print the forecast results
print(next_week_forecast)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 3.115385 236458 218824.2 254091.8 209489.4 263426.6
Created on 2024-02-22 with reprex v2.1.0
"ARIMA(2,0,0): This part means we are using the last two observations to predict the next one. Its like saying, Based on how things went the last two weeks, heres our guess for next week. We didnt need to adjust or smooth out the data series (thats the d=0 part), because it's already stable or made stable. (0,1,0)[52]: This shows were also considering a yearly pattern that repeats every 52 weeks, but we are not using any fancy methods to predict this pattern just a basic step to acknowledge it exists.) xreg with coefficient 0.0606: Besides the main data, were also looking at another related factor (CCNSA), thinking it can affect our predictions. The positive number here means if the CCNSA goes up, our main data tends to go up a bit too.
'Statistical Significanc
The ARIMA(2,0,0)(0,1,0)[52] model with CCNSA
as an external factor is our
choice for making future predictions. Heres why it stands out:
Statistical Significance: The models parameters are reliably linked to
the series past values and the CCNSA
data, ensuring a solid foundation for predictions.
AIC, AICc, and BIC: These criteria suggest our model strikes a good balance between being detailed enough to capture the datas complexity and not overly so.
Error Measures: Indicators like ME, RMSE, MAE, and others show the model is quite accurate, outperforming simple guesses.
Seasonality: By accounting for yearly patterns with a 52-week cycle, the model aligns with the datas seasonal trends.
External Regressor (CCNSA
): The significant link between ICNSA
and
CCNSA
validates including CCNSA
for better predictions.
In short, this model is finely tuned to forecast ICNSA
values, leveraging both
historical data and external influences effectively.
yt=βzt + (1−ϕ1B−ϕ2B2)xt = (1−B52)wt β = 0.0606 ϕ1 = 0.5485 ϕ2 = 0.1551 yt=βzt + (1−ϕ1B−ϕ2B2)xt = (1−B52)wt yt=0.0606zt + (1−(0.5485)( 0.0606)−ϕ2(0.0606)2)xt = (1−(0.0606)52)wt
ASSIGNMENT 2 - FEB 28
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(tidyverse)
library(lubridate)
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
# Set FRED API key and read the ICNSA data
fredr_set_key("f4e804771fd1a4241706e9c4498f0839") # Replace with your actual API key
icnsa_data <- fredr(series_id = "ICNSA") %>%
mutate(date = as.Date(date)) %>%
arrange(date)
# Plotting the time series ICNSA
icnsa_data %>%
ggplot(aes(x = date, y = value)) +
geom_line() +
labs(title = "ICNSA over Time", x = "Date", y = "ICNSA Value") +
theme_minimal()
# Define the COVID period and set those values to NA
covid_start <- as.Date("2020-03-14")
covid_end <- as.Date("2021-01-02")
# Create a vector that indicates whether each date is within the COVID period
is_covid_period <- icnsa_data$date >= covid_start & icnsa_data$date <= covid_end
# Replace 'value' with NA for the COVID period
icnsa_data$value[is_covid_period] <- NA
# Ensure data is ordered by date
icnsa_data <- icnsa_data %>% arrange(date)
# Subset data without NA values (excluding the COVID period)
icnsa_without_covid <- icnsa_data[!is.na(icnsa_data$value), ]
# Subset data with NA values (only the COVID period)
icnsa_with_covid <- icnsa_data[is.na(icnsa_data$value), ]
# Fitting spline on non-covid data with lambda=1
spline_fit <- smooth.spline(x = as.numeric(icnsa_without_covid$date), y = icnsa_without_covid$value,spar = 0.1)
#impute new values
imputed_values <- predict(spline_fit, x = as.numeric(icnsa_with_covid$date))
icnsa_with_covid$value <- imputed_values$y
#new values replacing covid values with imputed values
updated_icnsa_data <- bind_rows(icnsa_without_covid, icnsa_with_covid)
updated_icnsa_data <- updated_icnsa_data %>% arrange(date)
# Plot the graph with the original data and the smoothed data
ggplot(updated_icnsa_data, aes(x = date)) +
geom_line(aes(y = value), color = "blue") + # Original data
geom_line(data = icnsa_without_covid, aes(y = fitted(spline_fit)), color = "red") + # Smoothed data
labs(title = "ICNSA Data with Original and Smoothed Values", x = "Date", y = "ICNSA Values") +
theme_minimal()
# Fitting spline on non-covid data with lambda=0.5
spline_fit <- smooth.spline(x = as.numeric(icnsa_without_covid$date), y = icnsa_without_covid$value,spar = 0.5)
#impute new values
imputed_values <- predict(spline_fit, x = as.numeric(icnsa_with_covid$date))
icnsa_with_covid$value <- imputed_values$y
#new values replacing covid values with imputed values
updated_icnsa_data <- bind_rows(icnsa_without_covid, icnsa_with_covid) %>%
arrange(date)
anyNA(updated_icnsa_data$value)
#> [1] FALSE
# Plot the graph with the original data and the smoothed data
ggplot(updated_icnsa_data, aes(x = date)) +
geom_line(aes(y = value), color = "blue") + # Original data
geom_line(data = icnsa_without_covid, aes(y = fitted(spline_fit)), color = "red") + # Smoothed data
labs(title = "ICNSA Data with Original and Smoothed Values", x = "Date", y = "ICNSA Values") +
theme_minimal()
# Forecasting using Holt-Winters models for that let's create timeseries object as it is weekly data frequency = 52
icnsa_ts <- ts(updated_icnsa_data$value, frequency = 52)
# Additive Holt-Winters model
holt_additive_model <- HoltWinters(icnsa_ts, seasonal = "additive")
forecast_additive_model <- forecast(holt_additive_model, h = 1)
print(forecast_additive_model)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 58.32692 163281.7 118912.4 207651.1 95424.73 231138.7
# Multiplicative Holt-Winters model
holt_multiplicative_model <- HoltWinters(icnsa_ts, seasonal = "multiplicative")
forecast_multiplicative_model <- forecast(holt_multiplicative_model, h = 1)
print(forecast_multiplicative_model)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 58.32692 182854.1 138777.3 226931 115444.4 250263.9
Created on 2024-02-28 with reprex v2.1.0
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(tidyverse)
library(lubridate)
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(reprex)
# Set FRED API key and read the ICNSA data
fredr_set_key("f4e804771fd1a4241706e9c4498f0839")
icnsa_data <- fredr(series_id = "ICNSA") %>%
mutate(date = as.Date(date)) %>%
arrange(date)
# Plotting the time series ICNSA
icnsa_data %>%
ggplot(aes(x = date, y = value)) +
geom_line() +
labs(title = "ICNSA over Time", x = "Date", y = "ICNSA Value") +
theme_minimal()
icnsa_data_org = icnsa_data
# Define the COVID period and set those values to NA
covid_start <- as.Date("2020-03-14")
covid_end <- as.Date("2021-01-02")
# Create a vector that indicates whether each date is within the COVID period
is_covid_period <- icnsa_data$date >= covid_start & icnsa_data$date <= covid_end
# Replace 'value' with NA for the COVID period
icnsa_data$value[is_covid_period] <- NA
# Ensure data is ordered by date
icnsa_data <- icnsa_data %>% arrange(date)
# We will use structTS() model to fit a best structural model to the given TS so that we can use KalmanSmooth technique to get NA values.
# But before that we need to decide on 'type' parameter of structTS(), for that we can perform some empirical analysis.
# Visual inspection
# Plot the original time series with a rolling mean for trend visualization
icnsa_data %>%
ggplot(aes(x = date, y = value)) +
geom_line(alpha = 0.5) +
geom_line(aes(y = rollapply(value, width = 52, FUN = median, na.pad = TRUE, align='right')), color = 'red') +
labs(title = "ICNSA over Time with Rolling Median", x = "Date", y = "ICNSA Value") +
theme_minimal()
#> Warning in rollapply.zoo(zoo(data), ...): na.pad argument is deprecated
#> Warning: Removed 51 rows containing missing values (`geom_line()`).
icnsa_data_without_na <- na.omit(icnsa_data)
# Convert to ts object
icnsa_ts_analysis <- ts(icnsa_data_without_na$value, frequency = 52)
# Seasonal decomposition using STL
decomposed <- stl(icnsa_ts_analysis, s.window = "periodic")
# Plot the decomposed components
plot(decomposed)
# ACF plot
acf(icnsa_ts_analysis)
# PACF plot
pacf(icnsa_ts_analysis)
# So from the above analysis specifically from ACF plot and trend component of decomposition there are indeed underlying trends in the data over time. So we can use type = "trend".
icnsa_ts_final <- ts(icnsa_data$value, start = c(start(icnsa_data$date)), frequency = 52)
# Using structTS
ss_model_icnsa <- StructTS(icnsa_ts_final, type = "trend")
summary(ss_model_icnsa)
#> Length Class Mode
#> coef 3 -none- numeric
#> loglik 1 -none- numeric
#> loglik0 1 -none- numeric
#> data 2982 ts numeric
#> residuals 2982 ts numeric
#> fitted 5964 mts numeric
#> call 3 -none- call
#> series 1 -none- character
#> code 1 -none- numeric
#> model 7 -none- list
#> model0 7 -none- list
#> xtsp 3 -none- numeric
# Let's use KalmanSmooth technique to obtain the smoothed state estimates to impute covid values
smoothed_icnsa <- tsSmooth(ss_model_icnsa)
# Recreate the index for the COVID period
covid_idx <- which(icnsa_data$date >= covid_start & icnsa_data$date <= covid_end)
# Update NA values with the smoothed estimates
icnsa_data$value[covid_idx] <- smoothed_icnsa[covid_idx]
ggplot() +
geom_line(data = icnsa_data_without_na, aes(x = date, y = value), color = "red") +
geom_line(data = icnsa_data, aes(x = date, y = value), color = "blue") +
labs(title = "ICNSA over Time with Original and Imputed Values", x = "Date", y = "ICNSA Value") +
theme_minimal()
# We have imputed covid period values, now we can fit a structural time series model with trend, seasonal and error using state space methodology.
icnsa_final_computed_ts <- ts(icnsa_data$value, start = c(start(icnsa_data$date)), frequency = 52)
# Fitting a structural time series model with both trend and seasonal components. For this we need to set type = 'BSM'. It is basic structural model which will automatically include a trend, seasonal, and error component in the state space model.
state_space_imputed_icnsa_model <- StructTS(icnsa_final_computed_ts, type = "BSM")
# Reading the IURSA data
iurnsa_data = fredr(series_id = "IURNSA")
column_names_iurnsa <- colnames(iurnsa_data)
print(column_names_iurnsa)
#> [1] "date" "series_id" "value" "realtime_start"
#> [5] "realtime_end"
head(iurnsa_data)
#> # A tibble: 6 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 1971-01-02 IURNSA 5.2 2024-03-07 2024-03-07
#> 2 1971-01-09 IURNSA 5.3 2024-03-07 2024-03-07
#> 3 1971-01-16 IURNSA 5.3 2024-03-07 2024-03-07
#> 4 1971-01-23 IURNSA 5.2 2024-03-07 2024-03-07
#> 5 1971-01-30 IURNSA 5.2 2024-03-07 2024-03-07
#> 6 1971-02-06 IURNSA 5.2 2024-03-07 2024-03-07
# Combining the data for IURNSA and ICNSA
combined_data_covariate <- left_join(icnsa_data, iurnsa_data, by = "date", suffix = c(".icnsa", ".iurnsa"))
#combined_data_covariate <- na.omit(combined_data_covariate)
# Converting combined data to timeseries object
combined_data_covariate_ts <- ts(combined_data_covariate$value.iurnsa, start = c(year(min(combined_data_covariate$date)), 1), frequency = 52)
# Fitting arima model with auto arima.
arima_model_cov <- auto.arima(icnsa_final_computed_ts, xreg = combined_data_covariate_ts, seasonal = TRUE)
next_week_icnsa <- tail(iurnsa_data$value, 1)
print(next_week_icnsa)
#> [1] 1.4
# Forecast the next week's ICNSA value using the ARIMA model
next_week_forecast <- forecast(arima_model_cov, xreg = matrix(next_week_icnsa, ncol = 1), h = 1)
# Print the forecast results
print(next_week_forecast)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 58.34615 229178.6 185816.4 272540.8 162861.9 295495.4
Created on 2024-03-07 with reprex v2.1.0
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(tidyverse)
library(lubridate)
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(readxl)
library(dplyr)
library(lubridate)
library(xts)
#>
#> ######################### Warning from 'xts' package ##########################
#> # #
#> # The dplyr lag() function breaks how base R's lag() function is supposed to #
#> # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
#> # source() into this session won't work correctly. #
#> # #
#> # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
#> # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
#> # dplyr from breaking base R's lag() function. #
#> # #
#> # Code in packages is not affected. It's protected by R's namespace mechanism #
#> # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
#> # #
#> ###############################################################################
#>
#> Attaching package: 'xts'
#> The following objects are masked from 'package:dplyr':
#>
#> first, last
library(urca)
library(vars)
#> Loading required package: MASS
#>
#> Attaching package: 'MASS'
#> The following object is masked from 'package:dplyr':
#>
#> select
#> Loading required package: strucchange
#> Loading required package: sandwich
#>
#> Attaching package: 'strucchange'
#> The following object is masked from 'package:stringr':
#>
#> boundary
#> Loading required package: lmtest
library(BigVAR)
#> Loading required package: lattice
# New file paths
file_paths <- list(
"Computer_Electronics" = '/Users/alisharuqshan/Downloads/GitHub/Computer_Electronics.xlsx',
"Elec_comp_manufacturing" = '/Users/alisharuqshan/Downloads/GitHub/Elec_comp_manufacturing.xlsx',
"Computer_storage_device_manufacturing" = '/Users/alisharuqshan/Downloads/GitHub/Computer_storage_device_manufacturing.xlsx',
"Other" = '/Users/alisharuqshan/Downloads/GitHub/Other.xlsx'
)
read_and_process_excel_v2 <- function(file_path) { df <- read_excel(file_path) %>% mutate(Period = mdy(Period), Value = as.numeric(gsub(",", "", Value))) %>% na.omit()
df$Period <- as.Date(df$Period)
xts_object <- xts(x = df$Value, order.by = df$Period) return(xts_object) }
time_series_list <- list() for (name in names(file_paths)) { time_series_list[[name]] <- read_and_process_excel_v2(file_paths[[name]]) }
computer_electronics_ts <- time_series_list[["Computer_Electronics"]] elec_comp_manufacturing_ts <- time_series_list[["Elec_comp_manufacturing"]] computer_storage_device_manufacturing_ts <- time_series_list[["Computer_storage_device_manufacturing"]] other_ts <- time_series_list[["Other"]]
par(mfrow = c(1, 1))
plot(computer_electronics_ts, main = "Computer Electronics", xlab = "Date", ylab = "Value", col = "blue")
![](https://i.imgur.com/AiPFwrF.png)<!-- -->
``` r
plot(elec_comp_manufacturing_ts, main = "Electronic Computer Manufacturing", xlab = "Date", ylab = "Value", col = "red")
plot(computer_storage_device_manufacturing_ts, main = "Computer Storage Device Manufacturing", xlab = "Date", ylab = "Value", col = "green")
plot(other_ts, main = "Other", xlab = "Date", ylab = "Value", col = "purple")
# Reset to default plotting layout
par(mfrow = c(1, 1))
#Analysis
# 1. Correlation
# Merge all time series into a single xts object
merged_ts <- merge(computer_electronics_ts, elec_comp_manufacturing_ts, computer_storage_device_manufacturing_ts, other_ts, all = FALSE)
# Compute correlation matrix for the merged time series
correlation_matrix <- cor(merged_ts)
# Print the correlation matrix
print(correlation_matrix)
#> computer_electronics_ts
#> computer_electronics_ts 1.0000000
#> elec_comp_manufacturing_ts 0.3462417
#> computer_storage_device_manufacturing_ts 0.2326591
#> other_ts 0.4640022
#> elec_comp_manufacturing_ts
#> computer_electronics_ts 0.3462417
#> elec_comp_manufacturing_ts 1.0000000
#> computer_storage_device_manufacturing_ts 0.3970249
#> other_ts 0.5093446
#> computer_storage_device_manufacturing_ts
#> computer_electronics_ts 0.2326591
#> elec_comp_manufacturing_ts 0.3970249
#> computer_storage_device_manufacturing_ts 1.0000000
#> other_ts 0.7975097
#> other_ts
#> computer_electronics_ts 0.4640022
#> elec_comp_manufacturing_ts 0.5093446
#> computer_storage_device_manufacturing_ts 0.7975097
#> other_ts 1.0000000
# 2. CCF Plot
# Set up multiple plots in a grid
par(mfrow=c(length(time_series_list) - 1, 1))
# Define the reference time series
reference_ts <- as.numeric(computer_electronics_ts)
# Loop through each time series and plot its CCF with the reference series
for(i in 2:length(time_series_list)) {
# Extract the time series data
comparison_ts <- as.numeric(time_series_list[[i]])
# Define names_list within the loop
names_list <- names(time_series_list)
# Calculate the CCF
ccf_result <- ccf(reference_ts, comparison_ts, main = paste("CCF between", names_list[1], "and", names_list[i]))
# Plot the CCF
plot(ccf_result)
}
# 3. ACF and PACF plots
# Assuming your plotting window is large enough, or you're plotting to an external file
for(i in 1:length(time_series_list)) {
# Extract the time series data
ts_data <- as.numeric(time_series_list[[i]])
# Set up a new plot window for ACF
par(mfrow=c(1, 1))
acf_result <- acf(ts_data, main = paste("ACF for", names_list[i]))
# Set up a new plot window for PACF
par(mfrow=c(1, 1))
pacf_result <- pacf(ts_data, main = paste("PACF for", names_list[i]))
}
# 4. ADF test
# Performing ADF Test
adf_test <- function(ts_data, name) {
adf_result <- ur.df(ts_data, type = "trend", lags = 3)
summary(adf_result)
}
for (i in 1:length(time_series_list)) {
ts_data <- time_series_list[[i]]
adf_result <- adf_test(ts_data, names_list[i])
print(adf_result)
}
#>
#> ###############################################
#> # Augmented Dickey-Fuller Test Unit Root Test #
#> ###############################################
#>
#> Test regression trend
#>
#>
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -1919.7 -220.1 5.1 224.8 1529.1
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 475.600416 201.969387 2.355 0.0190 *
#> z.lag.1 -0.009744 0.004033 -2.416 0.0162 *
#> tt -0.022014 0.200820 -0.110 0.9128
#> z.diff.lag1 0.302031 0.048942 6.171 1.76e-09 ***
#> z.diff.lag2 0.122270 0.051006 2.397 0.0170 *
#> z.diff.lag3 0.300490 0.049252 6.101 2.62e-09 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 427.1 on 376 degrees of freedom
#> Multiple R-squared: 0.3311, Adjusted R-squared: 0.3222
#> F-statistic: 37.22 on 5 and 376 DF, p-value: < 2.2e-16
#>
#>
#> Value of test-statistic is: -2.4163 2.0255 2.9508
#>
#> Critical values for test statistics:
#> 1pct 5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2 6.15 4.71 4.05
#> phi3 8.34 6.30 5.36
#>
#>
#> ###############################################
#> # Augmented Dickey-Fuller Test Unit Root Test #
#> ###############################################
#>
#> Test regression trend
#>
#>
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -593.34 -47.50 5.70 48.02 460.85
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 25.590274 27.549291 0.929 0.35354
#> z.lag.1 -0.007249 0.004561 -1.589 0.11284
#> tt -0.051122 0.080107 -0.638 0.52375
#> z.diff.lag1 0.103269 0.049362 2.092 0.03710 *
#> z.diff.lag2 0.152573 0.048691 3.134 0.00186 **
#> z.diff.lag3 0.269676 0.049117 5.491 7.39e-08 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 111.4 on 376 degrees of freedom
#> Multiple R-squared: 0.1455, Adjusted R-squared: 0.1342
#> F-statistic: 12.81 on 5 and 376 DF, p-value: 1.64e-11
#>
#>
#> Value of test-statistic is: -1.5893 1.3545 1.6443
#>
#> Critical values for test statistics:
#> 1pct 5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2 6.15 4.71 4.05
#> phi3 8.34 6.30 5.36
#>
#>
#> ###############################################
#> # Augmented Dickey-Fuller Test Unit Root Test #
#> ###############################################
#>
#> Test regression trend
#>
#>
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -429.92 -25.65 -2.17 24.14 253.82
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 56.45815 19.83251 2.847 0.004659 **
#> z.lag.1 -0.03200 0.01098 -2.915 0.003764 **
#> tt -0.09201 0.03750 -2.453 0.014608 *
#> z.diff.lag1 0.17199 0.05052 3.405 0.000734 ***
#> z.diff.lag2 -0.05015 0.05113 -0.981 0.327346
#> z.diff.lag3 0.18193 0.05072 3.587 0.000379 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 57.1 on 376 degrees of freedom
#> Multiple R-squared: 0.0715, Adjusted R-squared: 0.05915
#> F-statistic: 5.791 on 5 and 376 DF, p-value: 3.634e-05
#>
#>
#> Value of test-statistic is: -2.9155 3.0338 4.4042
#>
#> Critical values for test statistics:
#> 1pct 5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2 6.15 4.71 4.05
#> phi3 8.34 6.30 5.36
#>
#>
#> ###############################################
#> # Augmented Dickey-Fuller Test Unit Root Test #
#> ###############################################
#>
#> Test regression trend
#>
#>
#> Call:
#> lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -170.65 -24.36 1.21 24.66 182.37
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 43.532493 16.833890 2.586 0.01008 *
#> z.lag.1 -0.014035 0.005557 -2.526 0.01196 *
#> tt -0.099489 0.037924 -2.623 0.00906 **
#> z.diff.lag1 0.009568 0.048399 0.198 0.84339
#> z.diff.lag2 0.028286 0.048345 0.585 0.55884
#> z.diff.lag3 0.323725 0.048369 6.693 7.98e-11 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 47.62 on 376 degrees of freedom
#> Multiple R-squared: 0.1255, Adjusted R-squared: 0.1139
#> F-statistic: 10.79 on 5 and 376 DF, p-value: 1.038e-09
#>
#>
#> Value of test-statistic is: -2.5257 2.5514 3.6749
#>
#> Critical values for test statistics:
#> 1pct 5pct 10pct
#> tau3 -3.98 -3.42 -3.13
#> phi2 6.15 4.71 4.05
#> phi3 8.34 6.30 5.36
##### Fitting VAR Model #####
# Fit VAR(1) model
combined_ts_series <- do.call(cbind, time_series_list)
var_model_simple <- VAR(combined_ts_series, p = 1, type = "const")
# AIC BIC Values
aic_var1 <- AIC(var_model_simple)
bic_var1 <- BIC(var_model_simple)
# Let's try VAR for different p values
var_model_p1 <- VAR(combined_ts_series, p = 1, type = "const")
var_model_p2 <- VAR(combined_ts_series, p = 2, type = "const")
var_model_p3 <- VAR(combined_ts_series, p = 3, type = "const")
# Calculate AIC and BIC for each model
aic_values <- c(AIC(var_model_p1), AIC(var_model_p2), AIC(var_model_p3))
bic_values <- c(BIC(var_model_p1), BIC(var_model_p2), BIC(var_model_p3))
# Print AIC and BIC values
print(aic_values)
#> [1] 18813.08 18683.99 18608.97
print(bic_values)
#> [1] 18892.14 18826.22 18814.27
# From AIC and BIC values we can say that VAR with p=3 is the best choice
var_model_final <- VAR(combined_ts_series, p = 3, type = "const")
# Get AIC and BIC for VAR(3) model
aic_varp <- AIC(var_model_final)
bic_varp <- BIC(var_model_final)
# Compare AIC and BIC
print(paste("AIC of VAR(1):", aic_var1, "BIC of VAR(1):", bic_var1))
#> [1] "AIC of VAR(1): 18813.078008483 BIC of VAR(1): 18892.1428751687"
print(paste("AIC of VAR(3):", aic_varp, "BIC of VAR(3):", bic_varp))
#> [1] "AIC of VAR(3): 18608.9678563367 BIC of VAR(3): 18814.2656757741"
# From above comparison we can say that VAR(3) is better that VAR(1)
# One month forecast
# Using selected model
forecast <- predict(var_model_final, n.ahead = 1)
# Forecasted value
print(forecast)
#> $Computer_Electronics
#> fcst lower upper CI
#> Computer_Electronics.fcst 55475.59 54613.35 56337.83 862.2423
#>
#> $Elec_comp_manufacturing
#> fcst lower upper CI
#> Elec_comp_manufacturing.fcst 1890.074 1666.251 2113.897 223.8228
#>
#> $Computer_storage_device_manufacturing
#> fcst lower upper CI
#> Computer_storage_device_manufacturing.fcst 594.3616 483.2455 705.4776 111.1161
#>
#> $Other
#> fcst lower upper CI
#> Other.fcst 998.7883 903.0867 1094.49 95.70155
# Granger test
granger_results_final <- causality(var_model_final)
#> Warning in causality(var_model_final):
#> Argument 'cause' has not been specified;
#> using first variable in 'x$y' (Computer_Electronics) as cause variable.
# Print Granger causality test results
print(granger_results_final)
#> $Granger
#>
#> Granger causality H0: Computer_Electronics do not Granger-cause
#> Elec_comp_manufacturing Computer_storage_device_manufacturing Other
#>
#> data: VAR object var_model_final
#> F-Test = 2.9609, df1 = 9, df2 = 1480, p-value = 0.001709
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: Computer_Electronics and
#> Elec_comp_manufacturing Computer_storage_device_manufacturing Other
#>
#> data: VAR object var_model_final
#> Chi-squared = 85.674, df = 3, p-value < 2.2e-16
# From the test result we can say that there is evidence of instantaneous causality between the "Computer_Electronics" variable and at least one of the other variables in the model ("Elec_comp_manufacturing", "Computer_storage_device_manufacturing", "Other").
# Sparse VAR model
# Convert the xts object to a matrix
combined_original_matrix <- as.matrix(combined_ts_series)
var_model_sparse <- BigVAR.fit(combined_original_matrix, p = 3, lambda = 0.1, struct = "Lag")
# Summary
print(summary(var_model_sparse))
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> -1.1258 -0.0285 0.0158 19.7869 0.1127 737.6810
Created on 2024-04-10 with reprex v2.1.0
Created on 2024-02-09 with reprex v2.1.0