Open spranithareddy opened 5 months ago
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
# Set your FRED API key
fredr_set_key("0f0e54b3a924da49f1f99bf3c764ecdd")
# Retrieve the data for the specified series
dat <- fredr(series_id = "MSRSFL445")
dat <- na.omit(dat)
# Selecting time and value columns
dat <- dplyr::select(dat, time = date, value)
# Plot the time series
ts_plot(dat)
#> Error in ts_plot(dat): could not find function "ts_plot"
# Convert data to time series object
x <- ts(dat$value)
# Plot the time series
plot(x)
# Plot the differenced time series
plot(diff(x))
# Display ACF and PACF of differenced time series
tsdisplay(diff(x))
# Display ACF and PACF of log differenced time series
tsdisplay(diff(log(x)))
#> Warning in log(x): NaNs produced
# Fit a seasonal ARIMA model
fit <- Arima(dat$value, order = c(0, 1, 4), seasonal = list(order = c(0, 1, 1), period = 12))
# Print model summary
summary(fit)
#> Series: dat$value
#> ARIMA(0,1,4)(0,1,1)[12]
#>
#> Coefficients:
#> ma1 ma2 ma3 ma4 sma1
#> -0.2315 0.0511 -0.2840 0.1028 -1.0000
#> s.e. 0.1560 0.1448 0.1918 0.1867 0.2096
#>
#> sigma^2 = 38.95: log likelihood = -153.09
#> AIC=318.19 AICc=320.4 BIC=329.03
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE ACF1
#> Training set -0.387603 5.183105 2.923656 -3.403652 41.722 0.8177056 -0.07464527
# Plot original data
plot(dat$time, dat$value, type = "l", xlab = "Time", ylab = "Value", main = "Original Data")
# Overlay fitted values
lines(dat$time, fitted(fit), col = "red")
# Add legend
legend("topright", legend = c("Original Data", "Fitted Values"), col = c("black", "red"), lty = 1)
Created on 2024-02-14 with reprex v2.0.2
library(reprex)
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
fredr_set_key("0f0e54b3a924da49f1f99bf3c764ecdd")
icnsa_data <- fredr(series_id = "ICNSA")
cc_data <- fredr(series_id = "CCSA")
# Convert dates to Date class
icnsa_data$date <- as.Date(icnsa_data$date)
cc_data$date <- as.Date(cc_data$date)
# Merge datasets by date
data_merged <- merge(icnsa_data, cc_data, by = "date", all = TRUE)
data_clean <- na.omit(data_merged)
plot(data_clean$date, data_clean$value.x, type = "l", col = "blue", xlab = "Date", ylab = "ICNSA", main = "ICNSA and Unemployment Rate Over Time")
lines(data_clean$date, data_clean$value.y, type = "l", col = "red")
legend("topright", legend = c("ICNSA", "cc"), col = c("blue", "red"), lty = 1)
# Checking correlation
cor(data_clean$value.x, data_clean$value.y)
#> [1] 0.6881076
# ACF and PACF of ICNSA
acf(data_clean$value.x)
pacf(data_clean$value.x)
# ACF and PACF of cc
acf(data_clean$value.y)
pacf(data_clean$value.y)
# Assuming data_clean$value.x is ICNSA and data_clean$value.y is cc
model_fit <- auto.arima(data_clean$value.x, xreg = data_clean$value.y)
summary(model_fit)
#> Series: data_clean$value.x
#> Regression with ARIMA(0,1,4) errors
#>
#> Coefficients:
#> ma1 ma2 ma3 ma4 xreg
#> 0.3278 -0.1107 -0.2902 -0.1533 0.0546
#> s.e. 0.0203 0.0231 0.0273 0.0219 0.0082
#>
#> sigma^2 = 7.753e+09: log likelihood = -38129.81
#> AIC=76271.61 AICc=76271.64 BIC=76307.6
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -57.95108 87961.95 42639.68 -0.8766133 10.65509 1.130862
#> ACF1
#> Training set 0.01390792
checkresiduals(model_fit)
#>
#> Ljung-Box test
#>
#> data: Residuals from Regression with ARIMA(0,1,4) errors
#> Q* = 105.38, df = 6, p-value < 2.2e-16
#>
#> Model df: 4. Total lags used: 10
forecast_length <- 12
future_cc <- rep(mean(data_clean$value.y), forecast_length)
forecast_result <- forecast(model_fit, xreg = future_cc, h = forecast_length)
plot(forecast_result)
forecast_result
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 2980 276385.4 163543.935 389226.9 103809.27 448961.5
#> 2981 275795.8 88221.659 463369.9 -11074.07 562665.6
#> 2982 283932.5 51453.124 516411.9 -71614.02 639479.0
#> 2983 290976.2 36052.605 545899.8 -98895.79 680848.2
#> 2984 290976.2 21522.238 560430.2 -121118.07 703070.5
#> 2985 290976.2 7736.308 574216.1 -142201.83 724154.2
#> 2986 290976.2 -5409.081 587361.5 -162305.97 744258.4
#> 2987 290976.2 -17995.697 599948.1 -181555.54 763507.9
#> 2988 290976.2 -30089.263 612041.7 -200051.05 782003.4
#> 2989 290976.2 -41743.546 623695.9 -217874.74 799827.1
#> 2990 290976.2 -53003.200 634955.6 -235094.89 817047.3
#> 2991 290976.2 -63905.789 645858.2 -251768.96 833721.4
single_forecast <- forecast_result$mean[1]
single_forecast
#> [1] 276385.4
Created on 2024-02-21 with reprex v2.0.2
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(splines)
library(tidyverse)
#> Warning: package 'dplyr' was built under R version 4.2.3
library(dplyr)
library(fredr)
fredr_set_key("0f0e54b3a924da49f1f99bf3c764ecdd")
# Loading ICNSA data
ICNSA_data <- fredr(series_id = "ICNSA")
# Converting 'date' column to Date type
ICNSA_data$date <- as.Date(ICNSA_data$date)
# Plotting ICNSA time series to visualize the data
plot(ICNSA_data$date, ICNSA_data$value, type='l', xlab='Time', ylab='Value', main='ICNSA Time Series')
# Identifying start and end dates visually or through statistical analysis
start_date <- as.Date("2020-03-01")
end_date <- as.Date("2021-05-01")
# Highlighting the identified start and end dates on the plot
abline(v = start_date, col = "red", lty = 2)
abline(v = end_date, col = "red", lty = 2)
# Printing the identified start and end dates
print(paste("Start Date:", start_date))
#> [1] "Start Date: 2020-03-01"
print(paste("End Date:", end_date))
#> [1] "End Date: 2021-05-01"
# Cubic spline methodology to impute values for the Covid period
covid_period <- ICNSA_data$date >= start_date & ICNSA_data$date <= end_date
non_covid_data <- ICNSA_data[!covid_period, ]
covid_data <- ICNSA_data[covid_period, ]
# Cubic spline methodology to impute values for the Covid period
covid_period <- ICNSA_data$date >= start_date & ICNSA_data$date <= end_date
non_covid_data <- ICNSA_data[!covid_period, ]
covid_data <- ICNSA_data[covid_period, ]
# Fitting cubic spline on non-Covid data
cubic_spline <- smooth.spline(x = non_covid_data$date, y = non_covid_data$value)
# Imputing values for Covid period
imputed_values <- predict(cubic_spline, newdata = covid_data$date)$y
# Subset imputed_values to match the length of covid_data$date
imputed_values <- imputed_values[1:length(covid_data$date)]
# Replacing Covid period values with imputed values
ICNSA_data$value[covid_period] <- imputed_values
# Plotting original and imputed data
plot(ICNSA_data$date, ICNSA_data$value, type='l', xlab='Time', ylab='Value', main='Original and Imputed ICNSA Data')
lines(covid_data$date, imputed_values, col='red', lty=2)
# Holt-Winters forecasting
icnsa_ts <- ts(ICNSA_data$value, frequency = 12)
# Multiplicative model
model_mul <- HoltWinters(icnsa_ts, seasonal = "multiplicative")
forecast_mul <- forecast(model_mul, h = 1)
# Additive model
model_add <- HoltWinters(icnsa_ts, seasonal = "additive")
#> Warning in HoltWinters(icnsa_ts, seasonal = "additive"): optimization
#> difficulties: ERROR: ABNORMAL_TERMINATION_IN_LNSRCH
forecast_add <- forecast(model_add, h = 1)
# Printing forecasts
print(forecast_mul)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> Jun 249 207410.2 154134.3 260686 125931.8 288888.6
print(forecast_add)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> Jun 249 206146.2 139374.4 272918 104027.6 308264.8
Created on 2024-02-28 with reprex v2.0.2
library(fredr)
library(ggplot2)
library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.2.3
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(KFAS)
#> Please cite KFAS in publications by using:
#>
#> Jouni Helske (2017). KFAS: Exponential Family State Space Models in R. Journal of Statistical Software, 78(10), 1-39. doi:10.18637/jss.v078.i10.
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
fredr_set_key("0f0e54b3a924da49f1f99bf3c764ecdd")
ICNSA_data <- fredr(series_id = "ICNSA", observation_start = as.Date("2000-01-01")) %>%
mutate(Date = as.Date(date), ICNSA = value) %>%
select(Date, ICNSA)
UNRATE_data <- fredr(series_id = "UNRATE", observation_start = as.Date("2000-01-01")) %>%
mutate(Date = as.Date(date), UNRATE = value) %>%
select(Date, UNRATE)
start_date <- as.Date("2020-03-01")
end_date <- as.Date("2021-05-01")
non_covid_data <- ICNSA_data[!(ICNSA_data$Date >= start_date & ICNSA_data$Date <= end_date), ]
cubic_spline_fit <- smooth.spline(non_covid_data$Date, non_covid_data$ICNSA, spar=0.7)
all_dates <- ICNSA_data$Date
predictions <- predict(cubic_spline_fit, x = as.numeric(all_dates))
ICNSA_data$Cubic_Spline_Imputed <- predictions$y
data_merged <- merge(ICNSA_data, UNRATE_data, by = "Date", all.x = TRUE) %>%
na.locf()
ggplot(data_merged, aes(x = Date, y = ICNSA)) +
geom_line() + labs(title = "ICNSA Time Series", x = "Date", y = "Initial Claims") +
geom_vline(xintercept = as.numeric(as.Date("2020-03-01")), color = "red", linetype = "dashed") +
geom_vline(xintercept = as.numeric(as.Date("2021-05-01")), color = "red", linetype = "dashed")
model_spec <- SSModel(ICNSA ~ SSMtrend(degree = 2, Q = list(matrix(1), matrix(1))) +
SSMseasonal(period = 52, sea.type = "dummy"), H = matrix(1), data = data_merged)
fit <- fitSSM(model_spec, inits = rep(0.1, 2))
summary(fit)
#> Length Class Mode
#> optim.out 5 -none- list
#> model 14 SSModel list
imputed <- KFS(fit$model)
data_merged$Imputed_ICNSA <- imputed$alphahat[,"level"]
ggplot(data_merged, aes(x = Date)) +
geom_line(aes(y = ICNSA), color = "grey", linetype="solid") + # Original data
geom_line(aes(y = Cubic_Spline_Imputed), color = "green", linetype="dotted") + # Cubic spline imputed data
geom_line(aes(y = Imputed_ICNSA), color = "blue", linetype="dashed") + # State-space model imputed data
labs(title = "ICNSA: Original vs. Spline Imputed vs. State-space Imputed", x = "Date", y = "Initial Claims") +
theme_minimal()
ggplot(data_merged, aes(x = Date)) +
geom_line(aes(y = ICNSA), color = "grey") +
geom_line(aes(y = Imputed_ICNSA), color = "blue") +
labs(title = "Comparison of Original and Imputed ICNSA Data", x = "Date", y = "Initial Claims") +
geom_vline(xintercept = as.numeric(as.Date("2020-03-01")), color = "red", linetype = "dashed") +
geom_vline(xintercept = as.numeric(as.Date("2021-05-01")), color = "red", linetype = "dashed") +
theme_minimal()
forecast_results <- predict(fit$model, n.ahead = 1)
forecast_results
#> Time Series:
#> Start = 1262
#> End = 1262
#> Frequency = 1
#> fit
#> [1,] 178171.2
Created on 2024-03-07 with reprex v2.0.2
Created on 2024-01-31 with reprex v2.1.0