jlivsey / UB-sping24-time-series

3 stars 1 forks source link

Pranitha Reddy Samala #10

Open spranithareddy opened 5 months ago

spranithareddy commented 5 months ago
library(fredr)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)

fredr_set_key("0f0e54b3a924da49f1f99bf3c764ecdd")

icnsa <- fredr(series_id = "ICNSA")

last_observation <- tail(icnsa, 1)

forecast_unemployment <- last_observation$value

cat("Forecast for Thursday's unemployment figure:", forecast_unemployment, "\n")
#> Forecast for Thursday's unemployment figure: 248955

covid_years <- c(2020, 2021)

icnsa_covid <- icnsa %>%
  mutate(date = as_date(date)) %>%
  filter(year(date) %in% covid_years)

ggplot(icnsa_covid, aes(x = date, y = value)) +
  geom_line() +
  geom_vline(xintercept = as.numeric(as_date("2020-03-13")), linetype = "dashed", color = "red") +  
  geom_vline(xintercept = as.numeric(as_date("2021-03-11")), linetype = "dashed", color = "red") +  
  labs(title = "Unemployment Figures During COVID Years",
       x = "Date",
       y = "Unemployment Claims")

Created on 2024-01-31 with reprex v2.1.0

spranithareddy commented 4 months ago
library(fredr)
  library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
# Set your FRED API key
fredr_set_key("0f0e54b3a924da49f1f99bf3c764ecdd")

# Retrieve the data for the specified series
dat <- fredr(series_id = "MSRSFL445")
dat <- na.omit(dat)

# Selecting time and value columns
dat <- dplyr::select(dat, time = date, value)

# Plot the time series
ts_plot(dat)
#> Error in ts_plot(dat): could not find function "ts_plot"

# Convert data to time series object
x <- ts(dat$value)

# Plot the time series
plot(x)


# Plot the differenced time series
plot(diff(x))


# Display ACF and PACF of differenced time series
tsdisplay(diff(x))


# Display ACF and PACF of log differenced time series
tsdisplay(diff(log(x)))
#> Warning in log(x): NaNs produced


# Fit a seasonal ARIMA model
fit <- Arima(dat$value, order = c(0, 1, 4), seasonal = list(order = c(0, 1, 1), period = 12))

# Print model summary
summary(fit)
#> Series: dat$value 
#> ARIMA(0,1,4)(0,1,1)[12] 
#> 
#> Coefficients:
#>           ma1     ma2      ma3     ma4     sma1
#>       -0.2315  0.0511  -0.2840  0.1028  -1.0000
#> s.e.   0.1560  0.1448   0.1918  0.1867   0.2096
#> 
#> sigma^2 = 38.95:  log likelihood = -153.09
#> AIC=318.19   AICc=320.4   BIC=329.03
#> 
#> Training set error measures:
#>                     ME     RMSE      MAE       MPE   MAPE      MASE        ACF1
#> Training set -0.387603 5.183105 2.923656 -3.403652 41.722 0.8177056 -0.07464527

# Plot original data
plot(dat$time, dat$value, type = "l", xlab = "Time", ylab = "Value", main = "Original Data")

# Overlay fitted values
lines(dat$time, fitted(fit), col = "red")

# Add legend
legend("topright", legend = c("Original Data", "Fitted Values"), col = c("black", "red"), lty = 1)

Created on 2024-02-14 with reprex v2.0.2

spranithareddy commented 4 months ago
library(reprex)
library(fredr)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo

fredr_set_key("0f0e54b3a924da49f1f99bf3c764ecdd")

icnsa_data <- fredr(series_id = "ICNSA")

cc_data <- fredr(series_id = "CCSA")                    

# Convert dates to Date class
icnsa_data$date <- as.Date(icnsa_data$date)
cc_data$date <- as.Date(cc_data$date)

# Merge datasets by date
data_merged <- merge(icnsa_data, cc_data, by = "date", all = TRUE)

data_clean <- na.omit(data_merged)

plot(data_clean$date, data_clean$value.x, type = "l", col = "blue", xlab = "Date", ylab = "ICNSA", main = "ICNSA and Unemployment Rate Over Time")
lines(data_clean$date, data_clean$value.y, type = "l", col = "red")
legend("topright", legend = c("ICNSA", "cc"), col = c("blue", "red"), lty = 1)


# Checking correlation
cor(data_clean$value.x, data_clean$value.y)
#> [1] 0.6881076

# ACF and PACF of ICNSA
acf(data_clean$value.x)

pacf(data_clean$value.x)


# ACF and PACF of cc
acf(data_clean$value.y)

pacf(data_clean$value.y)


# Assuming data_clean$value.x is ICNSA and data_clean$value.y is cc
model_fit <- auto.arima(data_clean$value.x, xreg = data_clean$value.y)

summary(model_fit)
#> Series: data_clean$value.x 
#> Regression with ARIMA(0,1,4) errors 
#> 
#> Coefficients:
#>          ma1      ma2      ma3      ma4    xreg
#>       0.3278  -0.1107  -0.2902  -0.1533  0.0546
#> s.e.  0.0203   0.0231   0.0273   0.0219  0.0082
#> 
#> sigma^2 = 7.753e+09:  log likelihood = -38129.81
#> AIC=76271.61   AICc=76271.64   BIC=76307.6
#> 
#> Training set error measures:
#>                     ME     RMSE      MAE        MPE     MAPE     MASE
#> Training set -57.95108 87961.95 42639.68 -0.8766133 10.65509 1.130862
#>                    ACF1
#> Training set 0.01390792

checkresiduals(model_fit)

#> 
#>  Ljung-Box test
#> 
#> data:  Residuals from Regression with ARIMA(0,1,4) errors
#> Q* = 105.38, df = 6, p-value < 2.2e-16
#> 
#> Model df: 4.   Total lags used: 10

forecast_length <- 12 
future_cc <- rep(mean(data_clean$value.y), forecast_length)  
forecast_result <- forecast(model_fit, xreg = future_cc, h = forecast_length)
plot(forecast_result)

forecast_result
#>      Point Forecast      Lo 80    Hi 80      Lo 95    Hi 95
#> 2980       276385.4 163543.935 389226.9  103809.27 448961.5
#> 2981       275795.8  88221.659 463369.9  -11074.07 562665.6
#> 2982       283932.5  51453.124 516411.9  -71614.02 639479.0
#> 2983       290976.2  36052.605 545899.8  -98895.79 680848.2
#> 2984       290976.2  21522.238 560430.2 -121118.07 703070.5
#> 2985       290976.2   7736.308 574216.1 -142201.83 724154.2
#> 2986       290976.2  -5409.081 587361.5 -162305.97 744258.4
#> 2987       290976.2 -17995.697 599948.1 -181555.54 763507.9
#> 2988       290976.2 -30089.263 612041.7 -200051.05 782003.4
#> 2989       290976.2 -41743.546 623695.9 -217874.74 799827.1
#> 2990       290976.2 -53003.200 634955.6 -235094.89 817047.3
#> 2991       290976.2 -63905.789 645858.2 -251768.96 833721.4
single_forecast <- forecast_result$mean[1]
single_forecast
#> [1] 276385.4

Created on 2024-02-21 with reprex v2.0.2

spranithareddy commented 4 months ago
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(splines)
library(tidyverse)
#> Warning: package 'dplyr' was built under R version 4.2.3
library(dplyr)
library(fredr)

fredr_set_key("0f0e54b3a924da49f1f99bf3c764ecdd")

# Loading ICNSA data
ICNSA_data <- fredr(series_id = "ICNSA")

# Converting 'date' column to Date type
ICNSA_data$date <- as.Date(ICNSA_data$date)

# Plotting ICNSA time series to visualize the data
plot(ICNSA_data$date, ICNSA_data$value, type='l', xlab='Time', ylab='Value', main='ICNSA Time Series')

# Identifying start and end dates visually or through statistical analysis
start_date <- as.Date("2020-03-01")
end_date <- as.Date("2021-05-01")

# Highlighting the identified start and end dates on the plot
abline(v = start_date, col = "red", lty = 2)
abline(v = end_date, col = "red", lty = 2)


# Printing the identified start and end dates
print(paste("Start Date:", start_date))
#> [1] "Start Date: 2020-03-01"
print(paste("End Date:", end_date))
#> [1] "End Date: 2021-05-01"

# Cubic spline methodology to impute values for the Covid period
covid_period <- ICNSA_data$date >= start_date & ICNSA_data$date <= end_date
non_covid_data <- ICNSA_data[!covid_period, ]
covid_data <- ICNSA_data[covid_period, ]

# Cubic spline methodology to impute values for the Covid period
covid_period <- ICNSA_data$date >= start_date & ICNSA_data$date <= end_date
non_covid_data <- ICNSA_data[!covid_period, ]
covid_data <- ICNSA_data[covid_period, ]

# Fitting cubic spline on non-Covid data
cubic_spline <- smooth.spline(x = non_covid_data$date, y = non_covid_data$value)

# Imputing values for Covid period
imputed_values <- predict(cubic_spline, newdata = covid_data$date)$y

# Subset imputed_values to match the length of covid_data$date
imputed_values <- imputed_values[1:length(covid_data$date)]

# Replacing Covid period values with imputed values
ICNSA_data$value[covid_period] <- imputed_values

# Plotting original and imputed data
plot(ICNSA_data$date, ICNSA_data$value, type='l', xlab='Time', ylab='Value', main='Original and Imputed ICNSA Data')
lines(covid_data$date, imputed_values, col='red', lty=2)


# Holt-Winters forecasting
icnsa_ts <- ts(ICNSA_data$value, frequency = 12)

# Multiplicative model
model_mul <- HoltWinters(icnsa_ts, seasonal = "multiplicative")
forecast_mul <- forecast(model_mul, h = 1)

# Additive model
model_add <- HoltWinters(icnsa_ts, seasonal = "additive")
#> Warning in HoltWinters(icnsa_ts, seasonal = "additive"): optimization
#> difficulties: ERROR: ABNORMAL_TERMINATION_IN_LNSRCH
forecast_add <- forecast(model_add, h = 1)

# Printing forecasts
print(forecast_mul)
#>         Point Forecast    Lo 80  Hi 80    Lo 95    Hi 95
#> Jun 249       207410.2 154134.3 260686 125931.8 288888.6
print(forecast_add)
#>         Point Forecast    Lo 80  Hi 80    Lo 95    Hi 95
#> Jun 249       206146.2 139374.4 272918 104027.6 308264.8

Created on 2024-02-28 with reprex v2.0.2

spranithareddy commented 4 months ago
library(fredr)
library(ggplot2)
library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.2.3
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(KFAS)
#> Please cite KFAS in publications by using: 
#> 
#>   Jouni Helske (2017). KFAS: Exponential Family State Space Models in R. Journal of Statistical Software, 78(10), 1-39. doi:10.18637/jss.v078.i10.
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(zoo)
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
fredr_set_key("0f0e54b3a924da49f1f99bf3c764ecdd") 

ICNSA_data <- fredr(series_id = "ICNSA", observation_start = as.Date("2000-01-01")) %>%
  mutate(Date = as.Date(date), ICNSA = value) %>%
  select(Date, ICNSA)

UNRATE_data <- fredr(series_id = "UNRATE", observation_start = as.Date("2000-01-01")) %>%
  mutate(Date = as.Date(date), UNRATE = value) %>%
  select(Date, UNRATE)

start_date <- as.Date("2020-03-01")
end_date <- as.Date("2021-05-01")

non_covid_data <- ICNSA_data[!(ICNSA_data$Date >= start_date & ICNSA_data$Date <= end_date), ]

cubic_spline_fit <- smooth.spline(non_covid_data$Date, non_covid_data$ICNSA, spar=0.7)

all_dates <- ICNSA_data$Date
predictions <- predict(cubic_spline_fit, x = as.numeric(all_dates))

ICNSA_data$Cubic_Spline_Imputed <- predictions$y

data_merged <- merge(ICNSA_data, UNRATE_data, by = "Date", all.x = TRUE) %>%
  na.locf()

ggplot(data_merged, aes(x = Date, y = ICNSA)) +
  geom_line() + labs(title = "ICNSA Time Series", x = "Date", y = "Initial Claims") +
  geom_vline(xintercept = as.numeric(as.Date("2020-03-01")), color = "red", linetype = "dashed") +
  geom_vline(xintercept = as.numeric(as.Date("2021-05-01")), color = "red", linetype = "dashed")


model_spec <- SSModel(ICNSA ~ SSMtrend(degree = 2, Q = list(matrix(1), matrix(1))) + 
                        SSMseasonal(period = 52, sea.type = "dummy"), H = matrix(1), data = data_merged)

fit <- fitSSM(model_spec, inits = rep(0.1, 2))

summary(fit)
#>           Length Class   Mode
#> optim.out  5     -none-  list
#> model     14     SSModel list

imputed <- KFS(fit$model)
data_merged$Imputed_ICNSA <- imputed$alphahat[,"level"]

ggplot(data_merged, aes(x = Date)) +
  geom_line(aes(y = ICNSA), color = "grey", linetype="solid") + # Original data
  geom_line(aes(y = Cubic_Spline_Imputed), color = "green", linetype="dotted") + # Cubic spline imputed data
  geom_line(aes(y = Imputed_ICNSA), color = "blue", linetype="dashed") + # State-space model imputed data
  labs(title = "ICNSA: Original vs. Spline Imputed vs. State-space Imputed", x = "Date", y = "Initial Claims") +
  theme_minimal()


ggplot(data_merged, aes(x = Date)) +
  geom_line(aes(y = ICNSA), color = "grey") +
  geom_line(aes(y = Imputed_ICNSA), color = "blue") +
  labs(title = "Comparison of Original and Imputed ICNSA Data", x = "Date", y = "Initial Claims") +
  geom_vline(xintercept = as.numeric(as.Date("2020-03-01")), color = "red", linetype = "dashed") +
  geom_vline(xintercept = as.numeric(as.Date("2021-05-01")), color = "red", linetype = "dashed") +
  theme_minimal()


forecast_results <- predict(fit$model, n.ahead = 1)
forecast_results
#> Time Series:
#> Start = 1262 
#> End = 1262 
#> Frequency = 1 
#>           fit
#> [1,] 178171.2

Created on 2024-03-07 with reprex v2.0.2

spranithareddy commented 2 months ago

hw4 2.pdf