Open devanshpratapsingh opened 5 months ago
Forecasted value: 252,678 Actual value: 261,029
@devanshpratapsingh The values chosen for order were in the range of (0,3) and a grid search was performed to find the best order in that range. But ACF plot of ICNSA data has non-decaying lag values till lag 12 or 13 which makes p value 12 - not in the range of (0,3)?
@vgunturi Thankyou for pointing this out, the acf plot for this particular data has highest autocorrelation at lag 12 or 13 hence my model should have p value as 12 or 13. But I didn't used acf or pacf plots to determine p,q,d orders, I used grid search, I wasn't clear and thought the grid search already considers both lag 12 and lag 13 values for p-value so I fet it is not an issue. I rectified it later!
library(reprex)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(fredr)
library(ggplot2)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(tseries)
library(urca)
library(tidyverse)
library(readxl)
library(forecast)
fredr_set_key("360481124fc765b815de2697f1bf8d62")
icnsa <- fredr(series_id = "ICNSA")
icnsa$date <- as.Date(icnsa$date)
ggplot(icnsa, aes(x = date, y = value)) + geom_line() + labs(title = "Insurance Claim Over Time",x = "Year",y = "Number")
str(icnsa)
#> Error in eval(expr, envir, enclos): object 'icnsa' not found
lower_limit <- 100
upper_limit <- 200
mean_value <- mean(icnsa$value)
#> Error in eval(expr, envir, enclos): object 'icnsa' not found
# Filter the data within the range defined by the blue lines
icnsa_filtered <- icnsa %>%
filter(date >= min(date) & date <= max(date), value >= lower_limit & value <= upper_limit)
#> Error in icnsa %>% filter(date >= min(date) & date <= max(date), value >= : could not find function "%>%"
# Calculate mean and standard deviation for the filtered data
mean_value_filtered <- mean(icnsa_filtered$value)
#> Error in eval(expr, envir, enclos): object 'icnsa_filtered' not found
sd_value_filtered <- sd(icnsa_filtered$value)
#> Error in eval(expr, envir, enclos): object 'icnsa_filtered' not found
# Plot for line representing the mean value and blue lines for the lower and upper limits of the standard deviation
ggplot(icnsa, aes(x = date, y = value)) +
geom_line() +
geom_hline(yintercept = mean_value, linetype = "dashed", color = "red") +
geom_segment(aes(x = min(date), y = lower_limit, xend = max(date), yend = lower_limit), color = "blue") +
geom_segment(aes(x = min(date), y = upper_limit, xend = max(date), yend = upper_limit), color = "blue") +
geom_hline(yintercept = mean_value_filtered, linetype = "dashed", color = "green") + # Add mean line for filtered data
labs(title = "Insurance Claim Over Time", x = "Year", y = "Number")
#> Error in ggplot(icnsa, aes(x = date, y = value)): could not find function "ggplot"
# Filter the data within the range defined by the blue lines
icnsa_filtered <- icnsa %>%
filter(date >= min(date) & date <= max(date), value >= lower_limit & value <= upper_limit)
#> Error in icnsa %>% filter(date >= min(date) & date <= max(date), value >= : could not find function "%>%"
# Mean and standard deviation for the filtered data
mean_value_filtered <- mean(icnsa_filtered$value)
#> Error in eval(expr, envir, enclos): object 'icnsa_filtered' not found
sd_value_filtered <- sd(icnsa_filtered$value)
#> Error in eval(expr, envir, enclos): object 'icnsa_filtered' not found
print(paste("Mean value between the blue lines:", mean_value_filtered))
#> Error in eval(expr, envir, enclos): object 'mean_value_filtered' not found
# All points above the higher limit of the standard deviation
above_limit_points <- icnsa %>%
filter(value > upper_limit)
#> Error in icnsa %>% filter(value > upper_limit): could not find function "%>%"
# Print the points
print(above_limit_points)
#> Error in eval(expr, envir, enclos): object 'above_limit_points' not found
# Calculate Z-score
icnsa <- icnsa %>%
mutate(z_score = abs(value - mean(value)) / sd(value))
#> Error in icnsa %>% mutate(z_score = abs(value - mean(value))/sd(value)): could not find function "%>%"
# Set threshold for anomaly detection (e.g., 3 standard deviations)
threshold <- 3
# Identify anomalies
anomalies <- icnsa %>%
filter(z_score > threshold)
#> Error in icnsa %>% filter(z_score > threshold): could not find function "%>%"
# Print detected anomalies
print(anomalies)
#> Error in eval(expr, envir, enclos): object 'anomalies' not found
library(ggplot2)
# Plot the data with anomalies highlighted
ggplot(icnsa, aes(x = date, y = value)) +
geom_line() +
geom_point(data = anomalies, aes(x = date, y = value), color = "red", size = 2) +
labs(title = "Insurance Claim Over Time", x = "Year", y = "Number")
#> Error in eval(expr, envir, enclos): object 'icnsa' not found
start_date <- as.Date("2020-01-01")
end_date <- as.Date("2021-01-01")
# Filter the data for the specified range
filtered_icnsa <- icnsa %>%
filter(date >= start_date & date <= end_date)
#> Error in icnsa %>% filter(date >= start_date & date <= end_date): could not find function "%>%"
# Plot the filtered data with the specified range on the x-axis
ggplot(filtered_icnsa, aes(x = date, y = value)) +
geom_line() +
labs(title = "Insurance Claim Over Time", x = "Date", y = "Number") +
scale_x_date(date_labels = "%b %Y")
#> Error in ggplot(filtered_icnsa, aes(x = date, y = value)): could not find function "ggplot"
start_date <- as.Date("2020-03-01")
end_date <- as.Date("2021-12-30")
# Filter the data for the specified range
filtered_icnsa <- icnsa %>%
filter(date >= start_date & date <= end_date)
#> Error in icnsa %>% filter(date >= start_date & date <= end_date): could not find function "%>%"
# Plot the filtered data with the specified range on the x-axis
ggplot(filtered_icnsa, aes(x = date, y = value)) +
geom_line() +
labs(title = "Insurance Claim Over Time", x = "Date", y = "Number") +
scale_x_date(date_labels = "%b %Y")
#> Error in ggplot(filtered_icnsa, aes(x = date, y = value)): could not find function "ggplot"
# Defining the range
start_date_start <- as.Date("2020-02-01")
end_date_start <- as.Date("2020-03-20")
# Filter the data for the specified range
filtered_icnsa <- icnsa %>%
filter(date >= start_date_start & date <= end_date_start)
#> Error in icnsa %>% filter(date >= start_date_start & date <= end_date_start): could not find function "%>%"
# Calculate the differences in value and date
diff_value <- diff(filtered_icnsa$value)
#> Error in eval(expr, envir, enclos): object 'filtered_icnsa' not found
diff_date <- as.numeric(diff(filtered_icnsa$date))
#> Error in eval(expr, envir, enclos): object 'filtered_icnsa' not found
# Calculate the slopes
slopes <- diff_value / diff_date
#> Error in eval(expr, envir, enclos): object 'diff_value' not found
# Find the index of the segment with the maximum slope
max_slope_index <- which.max(slopes)
#> Error in eval(expr, envir, enclos): object 'slopes' not found
# Get the corresponding start and end dates for the segment with the highest slope
highest_slope_start_date <- filtered_icnsa$date[max_slope_index]
#> Error in eval(expr, envir, enclos): object 'filtered_icnsa' not found
highest_slope_end_date <- filtered_icnsa$date[max_slope_index + 1]
#> Error in eval(expr, envir, enclos): object 'filtered_icnsa' not found
# Plot the data with the specified range and slope highlighted
ggplot(filtered_icnsa, aes(x = date, y = value)) +
geom_line() +
geom_segment(aes(x = filtered_icnsa$date[max_slope_index],
y = filtered_icnsa$value[max_slope_index],
xend = filtered_icnsa$date[max_slope_index + 1],
yend = filtered_icnsa$value[max_slope_index + 1]),
color = "red", size = 1) +
labs(title = "Insurance Claim Over Time", x = "Date", y = "Number")
#> Error in ggplot(filtered_icnsa, aes(x = date, y = value)): could not find function "ggplot"
# Print the value of the x-axis where the slope starts
print(paste("X-axis value where slope starts:", highest_slope_start_date))
#> Error in eval(expr, envir, enclos): object 'highest_slope_start_date' not found
# Define the start and end dates
start_date_end <- as.Date("2021-01-01")
end_date_end <- as.Date("2021-12-31")
# Define the specific value (y-value)
specific_value <- 349995
# Filter the data for the specified date range
filtered_data <- icnsa_filtered %>%
filter(date >= start_date_end & date <= end_date_end)
#> Error in icnsa_filtered %>% filter(date >= start_date_end & date <= end_date_end): could not find function "%>%"
# Plot the filtered data with a horizontal line at the specific value
ggplot(filtered_data, aes(x = date, y = value)) +
geom_line(color = "blue") + # Adding color to the line
geom_hline(yintercept = specific_value, linetype = "dashed", color = "green") +
labs(title = "Data with Horizontal Line", x = "Date", y = "Value")
#> Error in ggplot(filtered_data, aes(x = date, y = value)): could not find function "ggplot"
start_date <- as.Date("2020-03-07")
end_date <- as.Date("2021-07-31")
# Plotting Covid period data with vertical lines to visualize the covid region
plot(icnsa$date, icnsa$value, type = "l",
main = "Claims Marking the Covid period",
xlab = "Year",
ylab = "Number")
#> Error in eval(expr, envir, enclos): object 'icnsa' not found
abline(v = as.numeric(start_date), col = "red", lty = 2)
#> Error in eval(expr, envir, enclos): object 'start_date' not found
abline(v = as.numeric(end_date), col = "red", lty = 2)
#> Error in eval(expr, envir, enclos): object 'end_date' not found
# Define non-Covid and Covid periods
non_covid_period <- icnsa[icnsa$date < start_date | icnsa$date > end_date, ]
#> Error in eval(expr, envir, enclos): object 'icnsa' not found
covid_period <- icnsa[icnsa$date >= start_date & icnsa$date <= end_date, ]
#> Error in eval(expr, envir, enclos): object 'icnsa' not found
# Initialize an empty list to store updated data frames for each spar value
updated_data_list <- list()
# Initialize an empty data frame to store lambda and MSE values
lambda_mse_df <- data.frame(lambda = numeric(), MSE = numeric())
spar_values <- seq(0.1, 1.0, by = 0.1)
# Implementing cubic spline with different values of spar
lambda_values <- seq(0.1, 1.0, by = 0.1)
for (lambda in lambda_values) {
# Fit cubic spline to non-Covid period with current spar value
spline_fit_covid <- smooth.spline(x = as.numeric(non_covid_period$date), y = non_covid_period$value, spar = lambda)
# Impute new values using spline fit
imputed_values <- predict(spline_fit_covid, x = as.numeric(covid_period$date))$y
# Updating Covid period data with imputed values
covid_period$value <- imputed_values
# Combining non-Covid and imputed values
updated_icnsa_data <- rbind(non_covid_period, covid_period)
updated_icnsa_data <- updated_icnsa_data %>% arrange(date)
# Store updated data frame in the list
updated_data_list[[as.character(lambda)]] <- updated_icnsa_data
# Plotting updated data
plot(updated_icnsa_data$date, updated_icnsa_data$value, type = "l", col = "red", lwd = 2,
main = paste("Comparison of Time Series (spar =", lambda, ")"), xlab = "Year", ylab = "Number")
# Add original data for comparison
lines(icnsa$date, icnsa$value, col = "blue", lwd = 2)
# Add legend
legend("topright", legend = c("Updated", "Original"), col = c("red", "blue"), lty = 1, lwd = 2)
}
#> Error in eval(expr, envir, enclos): object 'non_covid_period' not found
# Print the table of lambda and MSE values
print(lambda_mse_df)
#> [1] lambda MSE
#> <0 rows> (or 0-length row.names)
# Initialize empty vectors to store AIC and BIC values
aic_values <- numeric()
bic_values <- numeric()
# Calculate AIC and BIC for the current model
residuals <- covid_period$value - imputed_values
#> Error in eval(expr, envir, enclos): object 'covid_period' not found
rss <- sum(residuals^2)
#> Error in residuals^2: non-numeric argument to binary operator
n <- length(covid_period$value)
#> Error in eval(expr, envir, enclos): object 'covid_period' not found
k <- 3 # Number of parameters in the model (intercept, lambda, and degree of freedom)
aic_values <- c(aic_values, n * log(rss/n) + 2 * k)
#> Error in eval(expr, envir, enclos): object 'n' not found
bic_values <- c(bic_values, n * log(rss/n) + k * log(n))
#> Error in eval(expr, envir, enclos): object 'n' not found
# Find the index of the minimum AIC and BIC values
min_aic_index <- which.min(aic_values)
min_bic_index <- which.min(bic_values)
# Get the corresponding lambda values
best_lambda_aic <- lambda_values[min_aic_index]
#> Error in eval(expr, envir, enclos): object 'lambda_values' not found
best_lambda_bic <- lambda_values[min_bic_index]
#> Error in eval(expr, envir, enclos): object 'lambda_values' not found
# Print the lambda values corresponding to the minimum AIC and BIC
cat("Lambda value corresponding to minimum AIC:", best_lambda_aic, "\n")
#> Error in eval(expr, envir, enclos): object 'best_lambda_aic' not found
cat("Lambda value corresponding to minimum BIC:", best_lambda_bic, "\n")
#> Error in eval(expr, envir, enclos): object 'best_lambda_bic' not found
#Choosing λ = 0.1 achieves a delicate equilibrium between smoothing and preserving data trends. While lower values such as 0.1 minimize RMSE, they raise concerns of overfitting to noise. Conversely, higher values like 1.0 tend to oversmooth, sacrificing data nuances. λ = 0.3 effectively tackles overfitting while still capturing essential trends, making it a judicious choice for interpolation.
# Fit cubic spline to non-Covid period with spar value of 0.3
spline_fit_covid <- smooth.spline(x = as.numeric(non_covid_period$date), y = non_covid_period$value, spar = 0.1)
#> Error in eval(expr, envir, enclos): object 'non_covid_period' not found
# Impute new values using spline fit
imputed_values <- predict(spline_fit_covid, x = as.numeric(covid_period$date))$y
#> Error in eval(expr, envir, enclos): object 'spline_fit_covid' not found
# Updating Covid period data with imputed values
covid_period$value <- imputed_values
#> Error in eval(expr, envir, enclos): object 'imputed_values' not found
# Combining non-Covid and imputed values
updated_icnsa_data <- rbind(non_covid_period, covid_period)
#> Error in eval(expr, envir, enclos): object 'non_covid_period' not found
updated_icnsa_data <- updated_icnsa_data %>% arrange(date)
#> Error in updated_icnsa_data %>% arrange(date): could not find function "%>%"
# Plotting original and updated data together
ggplot() +
geom_line(data = icnsa, aes(x = date, y = value), color = "red") +
geom_line(data = updated_icnsa_data, aes(x = date, y = value), color = "blue") +
labs(title = "Comparison of Original and Updated Data", x = "Year", y = "Number")
#> Error in ggplot(): could not find function "ggplot"
icnsa_ts <- ts(updated_icnsa_data$value, frequency = 52)
#> Error in eval(expr, envir, enclos): object 'updated_icnsa_data' not found
#Applied Holt-Winters models both additive and multiplicative
hw_add <- HoltWinters(icnsa_ts, seasonal = "additive")
#> Error in eval(expr, envir, enclos): object 'icnsa_ts' not found
hw_mult <- HoltWinters(icnsa_ts, seasonal = "multiplicative")
#> Error in eval(expr, envir, enclos): object 'icnsa_ts' not found
#Forecasted next value using both models
forecast_add <- forecast::forecast(hw_add, h = 1)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
#> Error in eval(expr, envir, enclos): object 'hw_add' not found
forecast_mult <- forecast::forecast(hw_mult, h = 1)
#> Error in eval(expr, envir, enclos): object 'hw_mult' not found
point_forecast_add <- forecast_add$mean
#> Error in eval(expr, envir, enclos): object 'forecast_add' not found
point_forecast_mult <- forecast_mult$mean
#> Error in eval(expr, envir, enclos): object 'forecast_mult' not found
#Printed Point Forecast value of both model
cat("Forecasted Unemployment using Holt-Winters additive Model:", point_forecast_add, "\n")
#> Error in eval(expr, envir, enclos): object 'point_forecast_add' not found
cat("Forecasted Unemployment using Holt-Winters Multiplicative Model:", point_forecast_mult, "\n")
#> Error in eval(expr, envir, enclos): object 'point_forecast_mult' not found
Created on 2024-02-28 with reprex v2.1.0
HW2 Python implementation: https://github.com/devanshpratapsingh/TimeSeriesAnalysis/blob/main/HW2/HW2_TSA.ipynb
03/07/2023 forecast: https://github.com/devanshpratapsingh/TimeSeriesAnalysis/blob/main/HW4.pdf (190698.2)
library(reprex) library(dplyr) #> #> Attaching package: 'dplyr' #> The following objects are masked from 'package:stats': #> #> filter, lag #> The following objects are masked from 'package:base': #> #> intersect, setdiff, setequal, union library(fredr) library(ggplot2) library(forecast) #> Registered S3 method overwritten by 'quantmod': #> method from #> as.zoo.data.frame zoo library(tseries) library(urca) library(tidyverse) library(readxl) library(forecast) fredr_set_key("360481124fc765b815de2697f1bf8d62") icnsa <- fredr(series_id = "ICNSA") icnsa$date <- as.Date(icnsa$date) ggplot(icnsa, aes(x = date, y = value)) + geom_line() + labs(title = "Insurance Claim Over Time",x = "Year",y = "Number")
str(icnsa) #> Error in eval(expr, envir, enclos): object 'icnsa' not found
lower_limit <- 100 upper_limit <- 200 mean_value <- mean(icnsa$value) #> Error in eval(expr, envir, enclos): object 'icnsa' not found # Filter the data within the range defined by the blue lines icnsa_filtered <- icnsa %>% filter(date >= min(date) & date <= max(date), value >= lower_limit & value <= upper_limit) #> Error in icnsa %>% filter(date >= min(date) & date <= max(date), value >= : could not find function "%>%" # Calculate mean and standard deviation for the filtered data mean_value_filtered <- mean(icnsa_filtered$value) #> Error in eval(expr, envir, enclos): object 'icnsa_filtered' not found sd_value_filtered <- sd(icnsa_filtered$value) #> Error in eval(expr, envir, enclos): object 'icnsa_filtered' not found # Plot for line representing the mean value and blue lines for the lower and upper limits of the standard deviation ggplot(icnsa, aes(x = date, y = value)) + geom_line() + geom_hline(yintercept = mean_value, linetype = "dashed", color = "red") + geom_segment(aes(x = min(date), y = lower_limit, xend = max(date), yend = lower_limit), color = "blue") + geom_segment(aes(x = min(date), y = upper_limit, xend = max(date), yend = upper_limit), color = "blue") + geom_hline(yintercept = mean_value_filtered, linetype = "dashed", color = "green") + # Add mean line for filtered data labs(title = "Insurance Claim Over Time", x = "Year", y = "Number") #> Error in ggplot(icnsa, aes(x = date, y = value)): could not find function "ggplot"
# Filter the data within the range defined by the blue lines icnsa_filtered <- icnsa %>% filter(date >= min(date) & date <= max(date), value >= lower_limit & value <= upper_limit) #> Error in icnsa %>% filter(date >= min(date) & date <= max(date), value >= : could not find function "%>%" # Mean and standard deviation for the filtered data mean_value_filtered <- mean(icnsa_filtered$value) #> Error in eval(expr, envir, enclos): object 'icnsa_filtered' not found sd_value_filtered <- sd(icnsa_filtered$value) #> Error in eval(expr, envir, enclos): object 'icnsa_filtered' not found print(paste("Mean value between the blue lines:", mean_value_filtered)) #> Error in eval(expr, envir, enclos): object 'mean_value_filtered' not found
# All points above the higher limit of the standard deviation above_limit_points <- icnsa %>% filter(value > upper_limit) #> Error in icnsa %>% filter(value > upper_limit): could not find function "%>%" # Print the points print(above_limit_points) #> Error in eval(expr, envir, enclos): object 'above_limit_points' not found
# Calculate Z-score icnsa <- icnsa %>% mutate(z_score = abs(value - mean(value)) / sd(value)) #> Error in icnsa %>% mutate(z_score = abs(value - mean(value))/sd(value)): could not find function "%>%" # Set threshold for anomaly detection (e.g., 3 standard deviations) threshold <- 3 # Identify anomalies anomalies <- icnsa %>% filter(z_score > threshold) #> Error in icnsa %>% filter(z_score > threshold): could not find function "%>%" # Print detected anomalies print(anomalies) #> Error in eval(expr, envir, enclos): object 'anomalies' not found
library(ggplot2) # Plot the data with anomalies highlighted ggplot(icnsa, aes(x = date, y = value)) + geom_line() + geom_point(data = anomalies, aes(x = date, y = value), color = "red", size = 2) + labs(title = "Insurance Claim Over Time", x = "Year", y = "Number") #> Error in eval(expr, envir, enclos): object 'icnsa' not found
start_date <- as.Date("2020-01-01") end_date <- as.Date("2021-01-01") # Filter the data for the specified range filtered_icnsa <- icnsa %>% filter(date >= start_date & date <= end_date) #> Error in icnsa %>% filter(date >= start_date & date <= end_date): could not find function "%>%" # Plot the filtered data with the specified range on the x-axis ggplot(filtered_icnsa, aes(x = date, y = value)) + geom_line() + labs(title = "Insurance Claim Over Time", x = "Date", y = "Number") + scale_x_date(date_labels = "%b %Y") #> Error in ggplot(filtered_icnsa, aes(x = date, y = value)): could not find function "ggplot"
start_date <- as.Date("2020-03-01") end_date <- as.Date("2021-12-30") # Filter the data for the specified range filtered_icnsa <- icnsa %>% filter(date >= start_date & date <= end_date) #> Error in icnsa %>% filter(date >= start_date & date <= end_date): could not find function "%>%" # Plot the filtered data with the specified range on the x-axis ggplot(filtered_icnsa, aes(x = date, y = value)) + geom_line() + labs(title = "Insurance Claim Over Time", x = "Date", y = "Number") + scale_x_date(date_labels = "%b %Y") #> Error in ggplot(filtered_icnsa, aes(x = date, y = value)): could not find function "ggplot"
# Defining the range start_date_start <- as.Date("2020-02-01") end_date_start <- as.Date("2020-03-20") # Filter the data for the specified range filtered_icnsa <- icnsa %>% filter(date >= start_date_start & date <= end_date_start) #> Error in icnsa %>% filter(date >= start_date_start & date <= end_date_start): could not find function "%>%" # Calculate the differences in value and date diff_value <- diff(filtered_icnsa$value) #> Error in eval(expr, envir, enclos): object 'filtered_icnsa' not found diff_date <- as.numeric(diff(filtered_icnsa$date)) #> Error in eval(expr, envir, enclos): object 'filtered_icnsa' not found # Calculate the slopes slopes <- diff_value / diff_date #> Error in eval(expr, envir, enclos): object 'diff_value' not found # Find the index of the segment with the maximum slope max_slope_index <- which.max(slopes) #> Error in eval(expr, envir, enclos): object 'slopes' not found # Get the corresponding start and end dates for the segment with the highest slope highest_slope_start_date <- filtered_icnsa$date[max_slope_index] #> Error in eval(expr, envir, enclos): object 'filtered_icnsa' not found highest_slope_end_date <- filtered_icnsa$date[max_slope_index + 1] #> Error in eval(expr, envir, enclos): object 'filtered_icnsa' not found # Plot the data with the specified range and slope highlighted ggplot(filtered_icnsa, aes(x = date, y = value)) + geom_line() + geom_segment(aes(x = filtered_icnsa$date[max_slope_index], y = filtered_icnsa$value[max_slope_index], xend = filtered_icnsa$date[max_slope_index + 1], yend = filtered_icnsa$value[max_slope_index + 1]), color = "red", size = 1) + labs(title = "Insurance Claim Over Time", x = "Date", y = "Number") #> Error in ggplot(filtered_icnsa, aes(x = date, y = value)): could not find function "ggplot" # Print the value of the x-axis where the slope starts print(paste("X-axis value where slope starts:", highest_slope_start_date)) #> Error in eval(expr, envir, enclos): object 'highest_slope_start_date' not found
# Define the start and end dates start_date_end <- as.Date("2021-01-01") end_date_end <- as.Date("2021-12-31") # Define the specific value (y-value) specific_value <- 349995 # Filter the data for the specified date range filtered_data <- icnsa_filtered %>% filter(date >= start_date_end & date <= end_date_end) #> Error in icnsa_filtered %>% filter(date >= start_date_end & date <= end_date_end): could not find function "%>%" # Plot the filtered data with a horizontal line at the specific value ggplot(filtered_data, aes(x = date, y = value)) + geom_line(color = "blue") + # Adding color to the line geom_hline(yintercept = specific_value, linetype = "dashed", color = "green") + labs(title = "Data with Horizontal Line", x = "Date", y = "Value") #> Error in ggplot(filtered_data, aes(x = date, y = value)): could not find function "ggplot"
start_date <- as.Date("2020-03-07") end_date <- as.Date("2021-07-31")
# Plotting Covid period data with vertical lines to visualize the covid region plot(icnsa$date, icnsa$value, type = "l", main = "Claims Marking the Covid period", xlab = "Year", ylab = "Number") #> Error in eval(expr, envir, enclos): object 'icnsa' not found abline(v = as.numeric(start_date), col = "red", lty = 2) #> Error in eval(expr, envir, enclos): object 'start_date' not found abline(v = as.numeric(end_date), col = "red", lty = 2) #> Error in eval(expr, envir, enclos): object 'end_date' not found
# Define non-Covid and Covid periods non_covid_period <- icnsa[icnsa$date < start_date | icnsa$date > end_date, ] #> Error in eval(expr, envir, enclos): object 'icnsa' not found covid_period <- icnsa[icnsa$date >= start_date & icnsa$date <= end_date, ] #> Error in eval(expr, envir, enclos): object 'icnsa' not found # Initialize an empty list to store updated data frames for each spar value updated_data_list <- list() # Initialize an empty data frame to store lambda and MSE values lambda_mse_df <- data.frame(lambda = numeric(), MSE = numeric()) spar_values <- seq(0.1, 1.0, by = 0.1) # Implementing cubic spline with different values of spar lambda_values <- seq(0.1, 1.0, by = 0.1) for (lambda in lambda_values) { # Fit cubic spline to non-Covid period with current spar value spline_fit_covid <- smooth.spline(x = as.numeric(non_covid_period$date), y = non_covid_period$value, spar = lambda) # Impute new values using spline fit imputed_values <- predict(spline_fit_covid, x = as.numeric(covid_period$date))$y # Updating Covid period data with imputed values covid_period$value <- imputed_values # Combining non-Covid and imputed values updated_icnsa_data <- rbind(non_covid_period, covid_period) updated_icnsa_data <- updated_icnsa_data %>% arrange(date) # Store updated data frame in the list updated_data_list[[as.character(lambda)]] <- updated_icnsa_data # Plotting updated data plot(updated_icnsa_data$date, updated_icnsa_data$value, type = "l", col = "red", lwd = 2, main = paste("Comparison of Time Series (spar =", lambda, ")"), xlab = "Year", ylab = "Number") # Add original data for comparison lines(icnsa$date, icnsa$value, col = "blue", lwd = 2) # Add legend legend("topright", legend = c("Updated", "Original"), col = c("red", "blue"), lty = 1, lwd = 2) } #> Error in eval(expr, envir, enclos): object 'non_covid_period' not found # Print the table of lambda and MSE values print(lambda_mse_df) #> [1] lambda MSE #> <0 rows> (or 0-length row.names)
# Initialize empty vectors to store AIC and BIC values aic_values <- numeric() bic_values <- numeric() # Calculate AIC and BIC for the current model residuals <- covid_period$value - imputed_values #> Error in eval(expr, envir, enclos): object 'covid_period' not found rss <- sum(residuals^2) #> Error in residuals^2: non-numeric argument to binary operator n <- length(covid_period$value) #> Error in eval(expr, envir, enclos): object 'covid_period' not found k <- 3 # Number of parameters in the model (intercept, lambda, and degree of freedom) aic_values <- c(aic_values, n * log(rss/n) + 2 * k) #> Error in eval(expr, envir, enclos): object 'n' not found bic_values <- c(bic_values, n * log(rss/n) + k * log(n)) #> Error in eval(expr, envir, enclos): object 'n' not found # Find the index of the minimum AIC and BIC values min_aic_index <- which.min(aic_values) min_bic_index <- which.min(bic_values) # Get the corresponding lambda values best_lambda_aic <- lambda_values[min_aic_index] #> Error in eval(expr, envir, enclos): object 'lambda_values' not found best_lambda_bic <- lambda_values[min_bic_index] #> Error in eval(expr, envir, enclos): object 'lambda_values' not found # Print the lambda values corresponding to the minimum AIC and BIC cat("Lambda value corresponding to minimum AIC:", best_lambda_aic, "\n") #> Error in eval(expr, envir, enclos): object 'best_lambda_aic' not found cat("Lambda value corresponding to minimum BIC:", best_lambda_bic, "\n") #> Error in eval(expr, envir, enclos): object 'best_lambda_bic' not found
#Choosing λ = 0.1 achieves a delicate equilibrium between smoothing and preserving data trends. While lower values such as 0.1 minimize RMSE, they raise concerns of overfitting to noise. Conversely, higher values like 1.0 tend to oversmooth, sacrificing data nuances. λ = 0.3 effectively tackles overfitting while still capturing essential trends, making it a judicious choice for interpolation.
# Fit cubic spline to non-Covid period with spar value of 0.3 spline_fit_covid <- smooth.spline(x = as.numeric(non_covid_period$date), y = non_covid_period$value, spar = 0.1) #> Error in eval(expr, envir, enclos): object 'non_covid_period' not found # Impute new values using spline fit imputed_values <- predict(spline_fit_covid, x = as.numeric(covid_period$date))$y #> Error in eval(expr, envir, enclos): object 'spline_fit_covid' not found # Updating Covid period data with imputed values covid_period$value <- imputed_values #> Error in eval(expr, envir, enclos): object 'imputed_values' not found # Combining non-Covid and imputed values updated_icnsa_data <- rbind(non_covid_period, covid_period) #> Error in eval(expr, envir, enclos): object 'non_covid_period' not found updated_icnsa_data <- updated_icnsa_data %>% arrange(date) #> Error in updated_icnsa_data %>% arrange(date): could not find function "%>%" # Plotting original and updated data together ggplot() + geom_line(data = icnsa, aes(x = date, y = value), color = "red") + geom_line(data = updated_icnsa_data, aes(x = date, y = value), color = "blue") + labs(title = "Comparison of Original and Updated Data", x = "Year", y = "Number") #> Error in ggplot(): could not find function "ggplot"
icnsa_ts <- ts(updated_icnsa_data$value, frequency = 52) #> Error in eval(expr, envir, enclos): object 'updated_icnsa_data' not found #Applied Holt-Winters models both additive and multiplicative hw_add <- HoltWinters(icnsa_ts, seasonal = "additive") #> Error in eval(expr, envir, enclos): object 'icnsa_ts' not found hw_mult <- HoltWinters(icnsa_ts, seasonal = "multiplicative") #> Error in eval(expr, envir, enclos): object 'icnsa_ts' not found #Forecasted next value using both models forecast_add <- forecast::forecast(hw_add, h = 1) #> Registered S3 method overwritten by 'quantmod': #> method from #> as.zoo.data.frame zoo #> Error in eval(expr, envir, enclos): object 'hw_add' not found forecast_mult <- forecast::forecast(hw_mult, h = 1) #> Error in eval(expr, envir, enclos): object 'hw_mult' not found point_forecast_add <- forecast_add$mean #> Error in eval(expr, envir, enclos): object 'forecast_add' not found point_forecast_mult <- forecast_mult$mean #> Error in eval(expr, envir, enclos): object 'forecast_mult' not found #Printed Point Forecast value of both model cat("Forecasted Unemployment using Holt-Winters additive Model:", point_forecast_add, "\n") #> Error in eval(expr, envir, enclos): object 'point_forecast_add' not found cat("Forecasted Unemployment using Holt-Winters Multiplicative Model:", point_forecast_mult, "\n") #> Error in eval(expr, envir, enclos): object 'point_forecast_mult' not found
Created on 2024-02-28 with reprex v2.1.0
https://github.com/devanshpratapsingh/TimeSeriesAnalysis/blob/main/HW2/HW2.pdf
`` https://github.com/devanshpratapsingh/TimeSeriesAnalysis/blob/main/HW1/TSA_HW1.ipynb