Open Saloni-Sarbhai opened 4 months ago
# Load necessary library
library(reprex)
#> Warning: package 'reprex' was built under R version 4.2.3
library(fredr)
#> Warning: package 'fredr' was built under R version 4.2.3
library(forecast)
#> Warning: package 'forecast' was built under R version 4.2.3
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.2.3
#> Warning: package 'tibble' was built under R version 4.2.3
#> Warning: package 'dplyr' was built under R version 4.2.3
#> Warning: package 'forcats' was built under R version 4.2.3
library(lubridate)
library(tseries)
#> Warning: package 'tseries' was built under R version 4.2.3
fredr_set_key("619954596c335c6cd3c4fc1f7a346118")
icnsa <- fredr(series_id = "ICNSA")
# Display the first few rows of the data
head(icnsa_data)
#> Error in head(icnsa_data): object 'icnsa_data' not found
icnsa_data$DATE <- as.Date(icnsa_data$DATE)
#> Error in as.Date(icnsa_data$DATE): object 'icnsa_data' not found
summary(icnsa_data)
#> Error in summary(icnsa_data): object 'icnsa_data' not found
unemployment_data <- fredr(series_id = "UNRATE")
# Display the first few rows of the data
head(unemployment_data)
#> # A tibble: 6 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 1948-01-01 UNRATE 3.4 2024-02-22 2024-02-22
#> 2 1948-02-01 UNRATE 3.8 2024-02-22 2024-02-22
#> 3 1948-03-01 UNRATE 4 2024-02-22 2024-02-22
#> 4 1948-04-01 UNRATE 3.9 2024-02-22 2024-02-22
#> 5 1948-05-01 UNRATE 3.5 2024-02-22 2024-02-22
#> 6 1948-06-01 UNRATE 3.6 2024-02-22 2024-02-22
unemployment_data$date <- as.Date(unemployment_data$date)
summary(unemployment_data)
#> date series_id value realtime_start
#> Min. :1948-01-01 Length:913 Min. : 2.500 Min. :2024-02-22
#> 1st Qu.:1967-01-01 Class :character 1st Qu.: 4.400 1st Qu.:2024-02-22
#> Median :1986-01-01 Mode :character Median : 5.500 Median :2024-02-22
#> Mean :1985-12-31 Mean : 5.703 Mean :2024-02-22
#> 3rd Qu.:2005-01-01 3rd Qu.: 6.700 3rd Qu.:2024-02-22
#> Max. :2024-01-01 Max. :14.800 Max. :2024-02-22
#> realtime_end
#> Min. :2024-02-22
#> 1st Qu.:2024-02-22
#> Median :2024-02-22
#> Mean :2024-02-22
#> 3rd Qu.:2024-02-22
#> Max. :2024-02-22
library(ggplot2)
# Create the ggplot object
plot <- ggplot(data = icnsa, aes(x = date, y = value)) +
geom_line() +
labs(title = "ICNSA", x = "Year", y = "Number")
# Display the plot
print(plot)
library(ggplot2)
# Create the ggplot object and store it in a variable
plot <- ggplot(data = unemployment_data, aes(x = date, y = value)) +
geom_line() +
labs(title = "Unemployed", x = "Year", y = "Number")
# Display the plot
print(plot)
library(dplyr)
# Mutate to extract Year and Month
icnsa <- icnsa %>%
mutate(Year = lubridate::year(date),
Month = lubridate::month(date))
icnsa_monthly <- icnsa %>%
group_by(Month, Year) %>%
summarise(ICNSA = mean(value, na.rm = TRUE), .groups = "drop")
unemployment_mdata <- unemployment_data %>%
mutate(Year = lubridate::year(date),
Month = lubridate::month(date))
# Merge datasets by Year and Month, retaining all observations
merged_data <- full_join(icnsa_monthly, unemployment_mdata, by = c("Year", "Month"))
# Remove rows with missing values
merged_data <- na.omit(merged_data)
correlation <- cor(merged_data$ICNSA, merged_data$value)
cat("correlation is:",correlation)
#> correlation is: 0.5061069
icsna_ts <- ts(merged_data$ICNSA, frequency = 12, start = c(min(merged_data$Year), 1))
adf_result_icsna <- adf.test(icsna_ts)
#> Warning in adf.test(icsna_ts): p-value smaller than printed p-value
print(adf_result_icsna)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: icsna_ts
#> Dickey-Fuller = -7.8661, Lag order = 8, p-value = 0.01
#> alternative hypothesis: stationary
covariate_ts <- ts(merged_data$value, frequency = 12, start = c(min(merged_data$Year), 1))
# Perform Augmented Dickey-Fuller test for covariate
adf_result_covariate <- adf.test(covariate_ts)
#> Warning in adf.test(covariate_ts): p-value smaller than printed p-value
print(adf_result_covariate)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: covariate_ts
#> Dickey-Fuller = -8.6262, Lag order = 8, p-value = 0.01
#> alternative hypothesis: stationary
final_model <- auto.arima(y = icsna_ts, xreg = covariate_ts)
summary(final_model)
#> Series: icsna_ts
#> Regression with ARIMA(1,1,2) errors
#>
#> Coefficients:
#> ar1 ma1 ma2 xreg
#> 0.6515 -1.3357 0.3446 98138.785
#> s.e. 0.0787 0.0954 0.0930 6912.027
#>
#> sigma^2 = 3.744e+10: log likelihood = -9296.19
#> AIC=18602.39 AICc=18602.48 BIC=18625.03
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -4721.292 192795.5 83964.92 -3.101819 20.8337 0.6004364
#> ACF1
#> Training set -0.0004868408
checkresiduals(final_model)
#>
#> Ljung-Box test
#>
#> data: Residuals from Regression with ARIMA(1,1,2) errors
#> Q* = 25.514, df = 21, p-value = 0.2256
#>
#> Model df: 3. Total lags used: 24
# Generate forecast for the next time point
forecast_values <- forecast(final_model, xreg = covariate_ts, h = 1)
# Extract the mean of the forecasted values for the next time point
single_forecast <- as.numeric(forecast_values$mean[1])
# Print the next predicted value
cat("Next predicted value is", single_forecast, "\n")
#> Next predicted value is 245415.5
Created on 2024-02-22 with reprex v2.0.2
HW 2
library(reprex)
#> Warning: package 'reprex' was built under R version 4.2.3
library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.2.3
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(fredr)
#> Warning: package 'fredr' was built under R version 4.2.3
library(ggplot2)
library(forecast)
#> Warning: package 'forecast' was built under R version 4.2.3
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(tseries)
#> Warning: package 'tseries' was built under R version 4.2.3
library(urca)
#> Warning: package 'urca' was built under R version 4.2.3
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.2.3
#> Warning: package 'tibble' was built under R version 4.2.3
#> Warning: package 'forcats' was built under R version 4.2.3
library(readxl)
#> Warning: package 'readxl' was built under R version 4.2.3
library(magrittr)
#>
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#>
#> set_names
#> The following object is masked from 'package:tidyr':
#>
#> extract
library(zoo)
#> Warning: package 'zoo' was built under R version 4.2.3
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
fredr_set_key("619954596c335c6cd3c4fc1f7a346118")
icnsa_data <- fredr(series_id = "ICNSA")
head(icnsa_data)
#> # A tibble: 6 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 1967-01-07 ICNSA 346000 2024-02-28 2024-02-28
#> 2 1967-01-14 ICNSA 334000 2024-02-28 2024-02-28
#> 3 1967-01-21 ICNSA 277000 2024-02-28 2024-02-28
#> 4 1967-01-28 ICNSA 252000 2024-02-28 2024-02-28
#> 5 1967-02-04 ICNSA 274000 2024-02-28 2024-02-28
#> 6 1967-02-11 ICNSA 276000 2024-02-28 2024-02-28
icnsa_data$date <- as.Date(icnsa_data$date)
icnsa_data %>%
ggplot(aes(x = date, y = value)) +
geom_line() +
labs(title = "Claims Original ",
x = "Year",
y = "Number")
start_date <- as.Date("2020-03-01")
end_date <- as.Date("2022-01-31")
ggplot(icnsa, aes(x = date, y = value)) +
geom_line() +
geom_vline(xintercept = as.numeric(start_date), linetype = "dashed", color = "blue") +
geom_vline(xintercept = as.numeric(end_date), linetype = "dashed", color = "blue") +
labs(title = "Claims Marking the Covid period ",
x = "Year",
y = "Number")
#> Error in ggplot(icnsa, aes(x = date, y = value)): object 'icnsa' not found
icnsa_data <- icnsa_data %>%
mutate(value = ifelse(date >= start_date & date <= end_date, NA, value))
covid_period_data <- icnsa_data %>%
filter(date >= start_date & date <= end_date)
print(covid_period_data)
#> # A tibble: 100 × 5
#> date series_id value realtime_start realtime_end
#> <date> <chr> <dbl> <date> <date>
#> 1 2020-03-07 ICNSA NA 2024-02-28 2024-02-28
#> 2 2020-03-14 ICNSA NA 2024-02-28 2024-02-28
#> 3 2020-03-21 ICNSA NA 2024-02-28 2024-02-28
#> 4 2020-03-28 ICNSA NA 2024-02-28 2024-02-28
#> 5 2020-04-04 ICNSA NA 2024-02-28 2024-02-28
#> 6 2020-04-11 ICNSA NA 2024-02-28 2024-02-28
#> 7 2020-04-18 ICNSA NA 2024-02-28 2024-02-28
#> 8 2020-04-25 ICNSA NA 2024-02-28 2024-02-28
#> 9 2020-05-02 ICNSA NA 2024-02-28 2024-02-28
#> 10 2020-05-09 ICNSA NA 2024-02-28 2024-02-28
#> # ℹ 90 more rows
icnsa_data %>%
ggplot(aes(x = date, y = value)) +
geom_line() +
geom_vline(xintercept = as.numeric(start_date), linetype = "dashed", color = "blue") +
geom_vline(xintercept = as.numeric(end_date), linetype = "dashed", color = "blue") +
labs(title = "ICNSA Marking Covid Period ",
x = "Year",
y = "Number")
icnsa_data$value <- na.spline(icnsa_data$value)
ggplot(icnsa_data, aes(x = date, y = value)) +
geom_line() +
labs(title = "ICNSA over Time (Imputed Values for COVID Period)", x = "Date", y = "ICNSA Value") +
theme_minimal()
icnsa_non_covid <- icnsa_data %>%
filter(date < start_date | date > end_date)
spar_values <- c(0.1, 0.5, 1)
par(mfrow=c(3, 1))
for(spar in spar_values) {
fit_spline <- smooth.spline(as.numeric(icnsa_non_covid$date), icnsa_non_covid$value, spar=spar)
predicted_values <- predict(fit_spline, as.numeric(icnsa_data$date))
plot(icnsa_data$date, icnsa_data$value, type='l', main=paste("Spar =", spar),
xlab="Date", ylab="ICNSA Values", col="blue")
lines(icnsa_data$date, predicted_values$y, col="red")
legend("topleft", legend=c("Imputed", "Smoothed"), col=c("blue", "red"), lty=1)
}
fit_spline <- smooth.spline(as.numeric(icnsa_non_covid$date), icnsa_non_covid$value, spar=0.5)
predicted_values <- predict(fit_spline, as.numeric(icnsa_data$date))
plot(icnsa_data$date, icnsa_data$value, type='l', main=paste("Spar =", "0.5"),
xlab="Date", ylab="ICNSA Values", col="pink")
lines(icnsa_data$date, predicted_values$y, col="green")
legend("topleft", legend=c("Imputed", "Smoothed"), col=c("pink", "green"), lty=1)
ts_data <- ts(icnsa_data$value, frequency = 52)
hwm_multi <- HoltWinters(ts_data, seasonal = "multiplicative")
forecast_hwm_multi <- forecast(hwm_multi, h = 1)
print(forecast_hwm_multi)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 58.32692 185833.4 145563.8 226103 124246.3 247420.5
forecast_hwm_multi_mean <- mean(forecast_hwm_multi$mean, na.rm = TRUE)
print(forecast_hwm_multi_mean)
#> [1] 185833.4
hwm_additive <- HoltWinters(ts_data, seasonal = "additive")
forecast_hwm_additive <- forecast(hwm_additive, h = 1)
print(forecast_hwm_additive)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 58.32692 202211 161563.8 242858.2 140046.5 264375.5
forecast_hwm_additive_mean <- mean(forecast_hwm_additive$mean, na.rm = TRUE)
print(forecast_hwm_additive_mean)
#> [1] 202211
Created on 2024-02-28 with reprex v2.0.2
library(reprex)
#> Warning: package 'reprex' was built under R version 4.2.3
library(fredr)
#> Warning: package 'fredr' was built under R version 4.2.3
library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.2.3
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(imputeTS)
#> Warning: package 'imputeTS' was built under R version 4.2.3
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(forecast)
#> Warning: package 'forecast' was built under R version 4.2.3
fredr_set_key("619954596c335c6cd3c4fc1f7a346118")
icnsa_data <- fredr(series_id = "ICNSA")
start_date <- as.Date("2020-03-01")
end_date <- as.Date("2022-01-31")
# Filter data for Covid period
covid_icnsa_data <- icnsa_data %>%
filter(date >= start_date & date <= end_date)
# Create a copy for Kalman filtering
covid_kalman_icnsa_data <- covid_icnsa_data
non_covid_icnsa_data <- anti_join(icnsa_data, covid_icnsa_data, by = "date")
non_covid_kalman_icnsa_data <- non_covid_icnsa_data
# Assign NA values to Covid period
covid_icnsa_data$value <- NA
covid_kalman_icnsa_data$value <- NA
# Smooth spline to compare with state-space missing value methodology
smooth_spline_fit <- smooth.spline(x = non_covid_icnsa_data$date, y = non_covid_icnsa_data$value, lambda = 0.6)
impute_val <- predict(smooth_spline_fit, x = as.numeric(covid_icnsa_data$date))
covid_icnsa_data$value <- impute_val$y
# Combine Covid and non-Covid data
complete_icnsa_data <- rbind(non_covid_icnsa_data, covid_icnsa_data) %>% arrange(date)
# Impute null values using Kalman filtering (state-space missing value methodology)
complete_kalman_icnsa_data <- rbind(non_covid_kalman_icnsa_data, covid_kalman_icnsa_data) %>% arrange(date)
complete_kalman_icnsa_data$value <- na_kalman(complete_kalman_icnsa_data$value, model = "StructTS", smooth = TRUE)
# Create time series object
icnsa_ts <- ts(complete_kalman_icnsa_data$value, frequency = 52)
# Fit structural time series model
model <- StructTS(icnsa_ts, type = "BSM")
# Forecast future values using the fitted model
pred <- forecast(model, h = 1)
nyiclaim_data <- fredr(series_id = "NYICLAIMS")
nyiclaim_ts <- ts(nyiclaim_data$value, frequency = 52)
adjusted_icnsa <- window(icnsa_ts, start = start(nyiclaim_ts), end = end(nyiclaim_ts))
structts_model <- StructTS(adjusted_icnsa)
nyiclaim_tail_value <- tail(nyiclaim_ts, n = 1)
pred <- forecast(structts_model, newxreg = nyiclaim_tail_value, h = 1)
# Print the forecasted mean value
print(pred$mean)
#> Time Series:
#> Start = c(39, 10)
#> End = c(39, 10)
#> Frequency = 52
#> [1] 387285.8
Created on 2024-03-07 with reprex v2.0.2
Homework 4
library(reprex)
#> Warning: package 'reprex' was built under R version 4.2.3
library(readxl)
#> Warning: package 'readxl' was built under R version 4.2.3
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
library(ggplot2)
library(tseries)
#> Warning: package 'tseries' was built under R version 4.2.3
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(vars)
#> Warning: package 'vars' was built under R version 4.2.3
#> Loading required package: MASS
#> Loading required package: strucchange
#> Warning: package 'strucchange' was built under R version 4.2.3
#> Loading required package: zoo
#> Warning: package 'zoo' was built under R version 4.2.3
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
#> Loading required package: sandwich
#> Warning: package 'sandwich' was built under R version 4.2.3
#> Loading required package: urca
#> Warning: package 'urca' was built under R version 4.2.3
#> Loading required package: lmtest
#> Warning: package 'lmtest' was built under R version 4.2.3
library(BigVAR)
#> Warning: package 'BigVAR' was built under R version 4.2.3
#> Loading required package: lattice
series_1 <- read_excel("C:\\Users\\salon\\Downloads\\Assignment4_ts\\Sheet1.xlsx")
series_2 <- read_excel("C:\\Users\\salon\\Downloads\\Assignment4_ts\\Sheet2.xlsx")
series_3 <- read_excel("C:\\Users\\salon\\Downloads\\Assignment4_ts\\Sheet3.xlsx")
series_4 <- read_excel("C:\\Users\\salon\\Downloads\\Assignment4_ts\\Sheet4.xlsx")
summary(series_1)
#> Period Value
#> Min. :1992-01-01 00:00:00.00 Length:396
#> 1st Qu.:2000-03-24 06:00:00.00 Class :character
#> Median :2008-06-16 00:00:00.00 Mode :character
#> Mean :2008-06-16 02:10:54.54
#> 3rd Qu.:2016-09-08 12:00:00.00
#> Max. :2024-12-01 00:00:00.00
summary(series_2)
#> Period Value
#> Min. :1992-01-01 00:00:00.00 Length:396
#> 1st Qu.:2000-03-24 06:00:00.00 Class :character
#> Median :2008-06-16 00:00:00.00 Mode :character
#> Mean :2008-06-16 02:10:54.54
#> 3rd Qu.:2016-09-08 12:00:00.00
#> Max. :2024-12-01 00:00:00.00
summary(series_3)
#> Period Value
#> Min. :1992-01-01 00:00:00.00 Length:396
#> 1st Qu.:2000-03-24 06:00:00.00 Class :character
#> Median :2008-06-16 00:00:00.00 Mode :character
#> Mean :2008-06-16 02:10:54.54
#> 3rd Qu.:2016-09-08 12:00:00.00
#> Max. :2024-12-01 00:00:00.00
summary(series_4)
#> Period Value
#> Min. :1992-01-01 00:00:00.00 Length:396
#> 1st Qu.:2000-03-24 06:00:00.00 Class :character
#> Median :2008-06-16 00:00:00.00 Mode :character
#> Mean :2008-06-16 02:10:54.54
#> 3rd Qu.:2016-09-08 12:00:00.00
#> Max. :2024-12-01 00:00:00.00
head(series_1)
#> # A tibble: 6 × 2
#> Period Value
#> <dttm> <chr>
#> 1 1992-01-01 00:00:00 2,27,721
#> 2 1992-02-01 00:00:00 2,28,860
#> 3 1992-03-01 00:00:00 2,38,604
#> 4 1992-04-01 00:00:00 2,39,877
#> 5 1992-05-01 00:00:00 2,43,732
#> 6 1992-06-01 00:00:00 2,45,693
head(series_2)
#> # A tibble: 6 × 2
#> Period Value
#> <dttm> <chr>
#> 1 1992-01-01 00:00:00 1,95,546
#> 2 1992-02-01 00:00:00 1,94,551
#> 3 1992-03-01 00:00:00 2,02,999
#> 4 1992-04-01 00:00:00 2,03,862
#> 5 1992-05-01 00:00:00 2,06,982
#> 6 1992-06-01 00:00:00 2,08,867
head(series_3)
#> # A tibble: 6 × 2
#> Period Value
#> <dttm> <chr>
#> 1 1992-01-01 00:00:00 2,18,731
#> 2 1992-02-01 00:00:00 2,19,614
#> 3 1992-03-01 00:00:00 2,29,686
#> 4 1992-04-01 00:00:00 2,30,568
#> 5 1992-05-01 00:00:00 2,35,135
#> 6 1992-06-01 00:00:00 2,36,595
head(series_4)
#> # A tibble: 6 × 2
#> Period Value
#> <dttm> <chr>
#> 1 1992-01-01 00:00:00 85081.0
#> 2 1992-02-01 00:00:00 85693.0
#> 3 1992-03-01 00:00:00 88670.0
#> 4 1992-04-01 00:00:00 89900.0
#> 5 1992-05-01 00:00:00 90226.0
#> 6 1992-06-01 00:00:00 91023.0
str(series_1$Value)
#> chr [1:396] "2,27,721" "2,28,860" "2,38,604" "2,39,877" "2,43,732" ...
series_1$Value <- as.numeric(gsub(",", "", series_1$Value))
#> Warning: NAs introduced by coercion
sum(is.na(series_1$Value))
#> [1] 10
series_1$Value[is.na(series_1$Value)] <- mean(series_1$Value, na.rm = TRUE)
str(series_1$Value)
#> num [1:396] 227721 228860 238604 239877 243732 ...
str(series_2$Value)
#> chr [1:396] "1,95,546" "1,94,551" "2,02,999" "2,03,862" "2,06,982" ...
series_2$Value <- as.numeric(gsub(",", "", series_2$Value))
#> Warning: NAs introduced by coercion
sum(is.na(series_2$Value))
#> [1] 10
series_2$Value[is.na(series_2$Value)] <- mean(series_2$Value, na.rm = TRUE)
str(series_2$Value)
#> num [1:396] 195546 194551 202999 203862 206982 ...
str(series_3$Value)
#> chr [1:396] "2,18,731" "2,19,614" "2,29,686" "2,30,568" "2,35,135" ...
series_3$Value <- as.numeric(gsub(",", "", series_3$Value))
#> Warning: NAs introduced by coercion
sum(is.na(series_3$Value))
#> [1] 10
series_3$Value[is.na(series_3$Value)] <- mean(series_3$Value, na.rm = TRUE)
str(series_3$Value)
#> num [1:396] 218731 219614 229686 230568 235135 ...
str(series_4$Value)
#> chr [1:396] "85081.0" "85693.0" "88670.0" "89900.0" "90226.0" "91023.0" ...
series_4$Value <- as.numeric(gsub(",", "", series_4$Value))
#> Warning: NAs introduced by coercion
sum(is.na(series_4$Value))
#> [1] 10
series_4$Value[is.na(series_4$Value)] <- mean(series_4$Value, na.rm = TRUE)
str(series_4$Value)
#> num [1:396] 85081 85693 88670 89900 90226 ...
str(series_1$Period)
#> POSIXct[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...
series_1$Period <- as.Date(series_1$Period)
str(series_1$Period)
#> Date[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...
str(series_2$Period)
#> POSIXct[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...
series_2$Period <- as.Date(series_2$Period)
str(series_2$Period)
#> Date[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...
str(series_3$Period)
#> POSIXct[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...
series_3$Period <- as.Date(series_3$Period)
str(series_3$Period)
#> Date[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...
str(series_4$Period)
#> POSIXct[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...
series_4$Period <- as.Date(series_4$Period)
str(series_4$Period)
#> Date[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...
plot(series_1$Period, series_1$Value, type = "l", xlab = "Date", ylab = "Value", main = "Time Series Plot")
plot(series_2$Period, series_2$Value, type = "l", xlab = "Date", ylab = "Value", main = "Time Series Plot")
plot(series_3$Period, series_3$Value, type = "l", xlab = "Date", ylab = "Value", main = "Time Series Plot")
plot(series_4$Period, series_4$Value, type = "l", xlab = "Date", ylab = "Value", main = "Time Series Plot")
series_1_ts <- ts(series_1$Value, frequency = 12)
series_2_ts <- ts(series_2$Value, frequency = 12)
series_3_ts <- ts(series_3$Value, frequency = 12)
series_4_ts <- ts(series_4$Value, frequency = 12)
series_1_decomp <- stl(series_1_ts, s.window="periodic")
# Extract components from stl decomposition
trend <- series_1_decomp$time.series[, "trend"]
seasonal <- series_1_decomp$time.series[, "seasonal"]
remainder <- series_1_decomp$time.series[, "remainder"]
# Plot original time series and its components using base R plotting
plot(series_1_ts, type = "l", main = "Seasonal Decomposition with STL")
lines(trend, col = "blue")
lines(seasonal, col = "red")
lines(remainder, col = "green")
legend("topright", legend = c("Original", "Trend", "Seasonal", "Remainder"),
col = c("black", "blue", "red", "green"), lty = 1, cex = 0.8)
series_2_decomp <- stl(series_2_ts, s.window="periodic")
# Extract components from stl decomposition
trend <- series_2_decomp$time.series[, "trend"]
seasonal <- series_2_decomp$time.series[, "seasonal"]
remainder <- series_2_decomp$time.series[, "remainder"]
# Plot original time series and its components using base R plotting
plot(series_2_ts, type = "l", main = "Seasonal Decomposition with STL")
lines(trend, col = "blue")
lines(seasonal, col = "red")
lines(remainder, col = "green")
legend("topright", legend = c("Original", "Trend", "Seasonal", "Remainder"),
col = c("black", "blue", "red", "green"), lty = 1, cex = 0.8)
series_3_decomp <- stl(series_3_ts, s.window="periodic")
# Extract components from stl decomposition
trend <- series_3_decomp$time.series[, "trend"]
seasonal <- series_3_decomp$time.series[, "seasonal"]
remainder <- series_3_decomp$time.series[, "remainder"]
# Plot original time series and its components using base R plotting
plot(series_3_ts, type = "l", main = "Seasonal Decomposition with STL")
lines(trend, col = "blue")
lines(seasonal, col = "red")
lines(remainder, col = "green")
legend("topright", legend = c("Original", "Trend", "Seasonal", "Remainder"),
col = c("black", "blue", "red", "green"), lty = 1, cex = 0.8)
series_4_decomp <- stl(series_4_ts, s.window="periodic")
# Extract components from stl decomposition
trend <- series_4_decomp$time.series[, "trend"]
seasonal <- series_4_decomp$time.series[, "seasonal"]
remainder <- series_4_decomp$time.series[, "remainder"]
# Plot original time series and its components using base R plotting
plot(series_4_ts, type = "l", main = "Seasonal Decomposition with STL")
lines(trend, col = "blue")
lines(seasonal, col = "red")
lines(remainder, col = "green")
legend("topright", legend = c("Original", "Trend", "Seasonal", "Remainder"),
col = c("black", "blue", "red", "green"), lty = 1, cex = 0.8)
acf(series_1$Value, main = "Autocorrelation Function (ACF)")
pacf(series_1$Value, main = "Partial Autocorrelation Function (PACF)")
acf(series_2$Value, main = "Autocorrelation Function (ACF)")
pacf(series_2$Value, main = "Partial Autocorrelation Function (PACF)")
acf(series_3$Value, main = "Autocorrelation Function (ACF)")
pacf(series_3$Value, main = "Partial Autocorrelation Function (PACF)")
acf(series_4$Value, main = "Autocorrelation Function (ACF)")
pacf(series_4$Value, main = "Partial Autocorrelation Function (PACF)")
# Perform the ADF test on series_1_ts
adf_test_result <- adf.test(series_1_ts)
print(adf_test_result)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: series_1_ts
#> Dickey-Fuller = -3.3479, Lag order = 7, p-value = 0.06297
#> alternative hypothesis: stationary
adf_test_result <- adf.test(series_2_ts)
print(adf_test_result)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: series_2_ts
#> Dickey-Fuller = -3.1657, Lag order = 7, p-value = 0.09394
#> alternative hypothesis: stationary
# Perform the ADF test on series_1_ts
adf_test_result <- adf.test(series_3_ts)
print(adf_test_result)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: series_3_ts
#> Dickey-Fuller = -3.4025, Lag order = 7, p-value = 0.0537
#> alternative hypothesis: stationary
# Perform the ADF test on series_1_ts
adf_test_result <- adf.test(series_4_ts)
print(adf_test_result)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: series_4_ts
#> Dickey-Fuller = -3.5854, Lag order = 7, p-value = 0.0345
#> alternative hypothesis: stationary
data_combined <- cbind(series_1_ts, series_2_ts, series_3_ts, series_4_ts)
var_model1 <- VAR(data_combined, p = 1)
var_model2 <- VAR(data_combined, p = 2)
summary(var_model1)
#>
#> VAR Estimation Results:
#> =========================
#> Endogenous variables: series_1_ts, series_2_ts, series_3_ts, series_4_ts
#> Deterministic variables: const
#> Sample size: 395
#> Log Likelihood: -14127.372
#> Roots of the characteristic polynomial:
#> 0.9896 0.9459 0.9423 0.8988
#> Call:
#> VAR(y = data_combined, p = 1)
#>
#>
#> Estimation results for equation series_1_ts:
#> ============================================
#> series_1_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> series_1_ts.l1 2.746e-01 4.394e-01 0.625 0.532378
#> series_2_ts.l1 -1.433e-02 1.016e-01 -0.141 0.887915
#> series_3_ts.l1 8.841e-01 4.770e-01 1.854 0.064545 .
#> series_4_ts.l1 -4.761e-01 1.574e-01 -3.025 0.002655 **
#> const 1.825e+04 4.925e+03 3.705 0.000242 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 11110 on 390 degrees of freedom
#> Multiple R-Squared: 0.9844, Adjusted R-squared: 0.9843
#> F-statistic: 6166 on 4 and 390 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation series_2_ts:
#> ============================================
#> series_2_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> series_1_ts.l1 -5.229e-01 3.532e-01 -1.480 0.139580
#> series_2_ts.l1 9.333e-01 8.167e-02 11.429 < 2e-16 ***
#> series_3_ts.l1 6.974e-01 3.835e-01 1.819 0.069714 .
#> series_4_ts.l1 -3.845e-01 1.265e-01 -3.038 0.002539 **
#> const 1.477e+04 3.959e+03 3.731 0.000219 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 8934 on 390 degrees of freedom
#> Multiple R-Squared: 0.9861, Adjusted R-squared: 0.986
#> F-statistic: 6934 on 4 and 390 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation series_3_ts:
#> ============================================
#> series_3_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> series_1_ts.l1 -6.505e-01 4.290e-01 -1.516 0.130254
#> series_2_ts.l1 -1.432e-02 9.919e-02 -0.144 0.885284
#> series_3_ts.l1 1.803e+00 4.657e-01 3.871 0.000127 ***
#> series_4_ts.l1 -4.600e-01 1.537e-01 -2.993 0.002942 **
#> const 1.778e+04 4.809e+03 3.697 0.000250 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 10850 on 390 degrees of freedom
#> Multiple R-Squared: 0.9843, Adjusted R-squared: 0.9842
#> F-statistic: 6129 on 4 and 390 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation series_4_ts:
#> ============================================
#> series_4_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> series_1_ts.l1 -0.40479 0.14503 -2.791 0.00551 **
#> series_2_ts.l1 -0.02234 0.03353 -0.666 0.50575
#> series_3_ts.l1 0.50033 0.15744 3.178 0.00160 **
#> series_4_ts.l1 0.76611 0.05196 14.744 < 2e-16 ***
#> const 7729.08175 1625.67129 4.754 2.81e-06 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 3668 on 390 degrees of freedom
#> Multiple R-Squared: 0.9799, Adjusted R-squared: 0.9797
#> F-statistic: 4764 on 4 and 390 DF, p-value: < 2.2e-16
#>
#>
#>
#> Covariance matrix of residuals:
#> series_1_ts series_2_ts series_3_ts series_4_ts
#> series_1_ts 123484188 96667878 120475608 37819614
#> series_2_ts 96667878 79809630 94260185 28442083
#> series_3_ts 120475608 94260185 117725540 36855737
#> series_4_ts 37819614 28442083 36855737 13454952
#>
#> Correlation matrix of residuals:
#> series_1_ts series_2_ts series_3_ts series_4_ts
#> series_1_ts 1.0000 0.9738 0.9992 0.9278
#> series_2_ts 0.9738 1.0000 0.9724 0.8679
#> series_3_ts 0.9992 0.9724 1.0000 0.9260
#> series_4_ts 0.9278 0.8679 0.9260 1.0000
summary(var_model2)
#>
#> VAR Estimation Results:
#> =========================
#> Endogenous variables: series_1_ts, series_2_ts, series_3_ts, series_4_ts
#> Deterministic variables: const
#> Sample size: 394
#> Log Likelihood: -14060.359
#> Roots of the characteristic polynomial:
#> 0.9913 0.9499 0.9499 0.9148 0.2941 0.2285 0.1137 0.09701
#> Call:
#> VAR(y = data_combined, p = 2)
#>
#>
#> Estimation results for equation series_1_ts:
#> ============================================
#> series_1_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + series_1_ts.l2 + series_2_ts.l2 + series_3_ts.l2 + series_4_ts.l2 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> series_1_ts.l1 -0.5242 1.3616 -0.385 0.70050
#> series_2_ts.l1 0.2258 0.3058 0.738 0.46066
#> series_3_ts.l1 1.7463 1.3154 1.328 0.18512
#> series_4_ts.l1 -1.0391 0.4548 -2.285 0.02287 *
#> series_1_ts.l2 0.9589 1.3809 0.694 0.48786
#> series_2_ts.l2 -0.2388 0.3029 -0.788 0.43092
#> series_3_ts.l2 -1.0523 1.3405 -0.785 0.43292
#> series_4_ts.l2 0.6475 0.4434 1.460 0.14498
#> const 15865.7764 5110.6876 3.104 0.00205 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 11100 on 385 degrees of freedom
#> Multiple R-Squared: 0.9845, Adjusted R-squared: 0.9842
#> F-statistic: 3062 on 8 and 385 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation series_2_ts:
#> ============================================
#> series_2_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + series_1_ts.l2 + series_2_ts.l2 + series_3_ts.l2 + series_4_ts.l2 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> series_1_ts.l1 -1.4203 1.0953 -1.297 0.19551
#> series_2_ts.l1 1.2244 0.2460 4.978 9.71e-07 ***
#> series_3_ts.l1 1.5077 1.0582 1.425 0.15501
#> series_4_ts.l1 -0.6167 0.3658 -1.686 0.09268 .
#> series_1_ts.l2 1.0411 1.1108 0.937 0.34924
#> series_2_ts.l2 -0.2921 0.2436 -1.199 0.23133
#> series_3_ts.l2 -0.9750 1.0783 -0.904 0.36643
#> series_4_ts.l2 0.2939 0.3567 0.824 0.41049
#> const 13104.3752 4111.1417 3.188 0.00155 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 8925 on 385 degrees of freedom
#> Multiple R-Squared: 0.9862, Adjusted R-squared: 0.9859
#> F-statistic: 3439 on 8 and 385 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation series_3_ts:
#> ============================================
#> series_3_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + series_1_ts.l2 + series_2_ts.l2 + series_3_ts.l2 + series_4_ts.l2 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> series_1_ts.l1 -1.1503 1.3298 -0.865 0.38754
#> series_2_ts.l1 0.2007 0.2986 0.672 0.50196
#> series_3_ts.l1 2.3842 1.2846 1.856 0.06422 .
#> series_4_ts.l1 -1.0350 0.4441 -2.330 0.02030 *
#> series_1_ts.l2 0.6392 1.3486 0.474 0.63578
#> series_2_ts.l2 -0.2142 0.2958 -0.724 0.46945
#> series_3_ts.l2 -0.7480 1.3091 -0.571 0.56804
#> series_4_ts.l2 0.6525 0.4330 1.507 0.13263
#> const 15617.3320 4991.0244 3.129 0.00189 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 10840 on 385 degrees of freedom
#> Multiple R-Squared: 0.9844, Adjusted R-squared: 0.9841
#> F-statistic: 3041 on 8 and 385 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation series_4_ts:
#> ============================================
#> series_4_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + series_1_ts.l2 + series_2_ts.l2 + series_3_ts.l2 + series_4_ts.l2 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> series_1_ts.l1 -0.45616 0.44639 -1.022 0.30748
#> series_2_ts.l1 0.05236 0.10024 0.522 0.60174
#> series_3_ts.l1 0.61657 0.43124 1.430 0.15360
#> series_4_ts.l1 0.40941 0.14909 2.746 0.00632 **
#> series_1_ts.l2 0.09371 0.45270 0.207 0.83612
#> series_2_ts.l2 -0.07897 0.09929 -0.795 0.42694
#> series_3_ts.l2 -0.16375 0.43944 -0.373 0.70963
#> series_4_ts.l2 0.38386 0.14536 2.641 0.00861 **
#> const 6918.65776 1675.45331 4.129 4.46e-05 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 3637 on 385 degrees of freedom
#> Multiple R-Squared: 0.9803, Adjusted R-squared: 0.9799
#> F-statistic: 2393 on 8 and 385 DF, p-value: < 2.2e-16
#>
#>
#>
#> Covariance matrix of residuals:
#> series_1_ts series_2_ts series_3_ts series_4_ts
#> series_1_ts 123106938 96453845 120136901 37487454
#> series_2_ts 96453845 79661550 94081957 28254909
#> series_3_ts 120136901 94081957 117409501 36535456
#> series_4_ts 37487454 28254909 36535456 13230873
#>
#> Correlation matrix of residuals:
#> series_1_ts series_2_ts series_3_ts series_4_ts
#> series_1_ts 1.0000 0.9740 0.9993 0.9289
#> series_2_ts 0.9740 1.0000 0.9728 0.8703
#> series_3_ts 0.9993 0.9728 1.0000 0.9270
#> series_4_ts 0.9289 0.8703 0.9270 1.0000
forecast_1 <- predict(var_model1, n.ahead = 1)
forecast_2 <- predict(var_model2, n.ahead = 1)
forecasted_value_var1 <- forecast_1$fcst[1]$series_1_ts[1]
print(forecasted_value_var1)
#> [1] 403354.7
forecasted_value_var2 <- forecast_2$fcst[1]$series_1_ts[1]
print(forecasted_value_var2)
#> [1] 403326.7
causality(var_model1)
#> Warning in causality(var_model1):
#> Argument 'cause' has not been specified;
#> using first variable in 'x$y' (series_1_ts) as cause variable.
#> $Granger
#>
#> Granger causality H0: series_1_ts do not Granger-cause series_2_ts
#> series_3_ts series_4_ts
#>
#> data: VAR object var_model1
#> F-Test = 6.0081, df1 = 3, df2 = 1560, p-value = 0.0004554
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: series_1_ts and series_2_ts
#> series_3_ts series_4_ts
#>
#> data: VAR object var_model1
#> Chi-squared = 197.36, df = 3, p-value < 2.2e-16
causality(var_model2)
#> Warning in causality(var_model2):
#> Argument 'cause' has not been specified;
#> using first variable in 'x$y' (series_1_ts) as cause variable.
#> $Granger
#>
#> Granger causality H0: series_1_ts do not Granger-cause series_2_ts
#> series_3_ts series_4_ts
#>
#> data: VAR object var_model2
#> F-Test = 3.5488, df1 = 6, df2 = 1540, p-value = 0.001707
#>
#>
#> $Instant
#>
#> H0: No instantaneous causality between: series_1_ts and series_2_ts
#> series_3_ts series_4_ts
#>
#> data: VAR object var_model2
#> Chi-squared = 196.87, df = 3, p-value < 2.2e-16
bigvar_matrix <- cbind(series_1_ts, series_2_ts, series_3_ts, series_4_ts)
sparse_var_fit <- BigVAR.fit(Y=bigvar_matrix,struct='Basic',p=2,lambda=1)[,,1]
print(sparse_var_fit)
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 6493.742 0.4128257 0.28983109 0.3954918 0.03836217 0.001780558
#> [2,] 14955.197 0.2288468 0.61577392 0.2115015 -0.15968421 -0.113973639
#> [3,] 5031.704 0.3957032 0.26937302 0.3992144 0.07037622 -0.009087329
#> [4,] 13846.534 0.1074036 -0.05116922 0.1594062 0.28899938 -0.010718874
#> [,7] [,8] [,9]
#> [1,] -0.04855471 -0.0093409980 -0.06925488
#> [2,] 0.26899770 -0.1283131254 -0.22089028
#> [3,] -0.06323737 0.0002354252 -0.03661735
#> [4,] -0.14614741 0.0447493532 0.25200802
summary(sparse_var_fit)
#> V1 V2 V3 V4
#> Min. : 5032 Min. :0.1074 Min. :-0.05117 Min. :0.1594
#> 1st Qu.: 6128 1st Qu.:0.1985 1st Qu.: 0.18924 1st Qu.:0.1985
#> Median :10170 Median :0.3123 Median : 0.27960 Median :0.3035
#> Mean :10082 Mean :0.2862 Mean : 0.28095 Mean :0.2914
#> 3rd Qu.:14124 3rd Qu.:0.4000 3rd Qu.: 0.37132 3rd Qu.:0.3964
#> Max. :14955 Max. :0.4128 Max. : 0.61577 Max. :0.3992
#> V5 V6 V7 V8
#> Min. :-0.15968 Min. :-0.113974 Min. :-0.146147 Min. :-0.128313
#> 1st Qu.:-0.01115 1st Qu.:-0.036533 1st Qu.:-0.083965 1st Qu.:-0.039084
#> Median : 0.05437 Median :-0.009903 Median :-0.055896 Median :-0.004553
#> Mean : 0.05951 Mean :-0.033000 Mean : 0.002765 Mean :-0.023167
#> 3rd Qu.: 0.12503 3rd Qu.:-0.006370 3rd Qu.: 0.030833 3rd Qu.: 0.011364
#> Max. : 0.28900 Max. : 0.001781 Max. : 0.268998 Max. : 0.044749
#> V9
#> Min. :-0.22089
#> 1st Qu.:-0.10716
#> Median :-0.05294
#> Mean :-0.01869
#> 3rd Qu.: 0.03554
#> Max. : 0.25201
Created on 2024-04-10 with reprex v2.0.2
Created on 2024-02-09 with reprex v2.0.2