jlivsey / UB-sping24-time-series

3 stars 1 forks source link

Saloni Sarbhai #26

Open Saloni-Sarbhai opened 4 months ago

Saloni-Sarbhai commented 4 months ago
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.2.3
#> Warning: package 'tibble' was built under R version 4.2.3
#> Warning: package 'dplyr' was built under R version 4.2.3
#> Warning: package 'forcats' was built under R version 4.2.3
library(fredr)
#> Warning: package 'fredr' was built under R version 4.2.3
library(reprex)
#> Warning: package 'reprex' was built under R version 4.2.3

# Set FRED API key
fredr_set_key('4e2b405e7ea0d612c659d24c185134a1')

# Reading data directly with fredr
new_data <- fredr(series_id = 'ICNSA') %>%
  mutate(date = as.Date(date)) # Converting DATE column type from string to date

# Basic summary statistics
new_summary <- new_data$value %>%
  summary()

# Plotting Time-Series data
new_plot <- new_data %>%
  ggplot(aes(x = date, y = value)) +
  geom_line() +
  labs(title = "Time Series Plot of New Data",
       x = "Date",
       y = "ICNSA")

# Print summary statistics
print(new_summary)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  133000  265852  326938  365060  406816 6161268

# Print the plot
print(new_plot)


# Autocorrelation Plot
acf(new_data$value)


# Partial Autocorrelation Plot
pacf(new_data$value)


ts_data <- ts(new_data$value, frequency = 7)

# Fitting an ARIMA model
new_arima_model <- arima(ts_data, order = c(2,1,1))

# Forecasting for the next week
#new_forecast_week <- predict(new_arima_model, n.ahead = 2)
#print(new_forecast_week$pred)

new_last_date <- tail(new_data$date, 1)
new_target_date <- as.Date("2024-02-10")
print(new_last_date)
#> [1] "2024-02-03"
#> [1] "2024-01-27"
new_num_periods <- as.numeric(difftime(new_target_date, new_last_date, units = "weeks"))

# Forecasting for a targeted date
new_forecast_target <- predict(new_arima_model, n.ahead = new_num_periods)
print(new_forecast_target$pred)
#> Time Series:
#> Start = c(426, 5) 
#> End = c(426, 5) 
#> Frequency = 7 
#> [1] 219575.4

Created on 2024-02-09 with reprex v2.0.2

Saloni-Sarbhai commented 4 months ago
# Load necessary library
library(reprex)
#> Warning: package 'reprex' was built under R version 4.2.3
library(fredr)
#> Warning: package 'fredr' was built under R version 4.2.3
library(forecast)
#> Warning: package 'forecast' was built under R version 4.2.3
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.2.3
#> Warning: package 'tibble' was built under R version 4.2.3
#> Warning: package 'dplyr' was built under R version 4.2.3
#> Warning: package 'forcats' was built under R version 4.2.3
library(lubridate)
library(tseries)
#> Warning: package 'tseries' was built under R version 4.2.3

fredr_set_key("619954596c335c6cd3c4fc1f7a346118")

icnsa <- fredr(series_id = "ICNSA") 

# Display the first few rows of the data
head(icnsa_data)
#> Error in head(icnsa_data): object 'icnsa_data' not found
icnsa_data$DATE <- as.Date(icnsa_data$DATE)
#> Error in as.Date(icnsa_data$DATE): object 'icnsa_data' not found
summary(icnsa_data)
#> Error in summary(icnsa_data): object 'icnsa_data' not found

unemployment_data <- fredr(series_id = "UNRATE")

# Display the first few rows of the data
head(unemployment_data)
#> # A tibble: 6 × 5
#>   date       series_id value realtime_start realtime_end
#>   <date>     <chr>     <dbl> <date>         <date>      
#> 1 1948-01-01 UNRATE      3.4 2024-02-22     2024-02-22  
#> 2 1948-02-01 UNRATE      3.8 2024-02-22     2024-02-22  
#> 3 1948-03-01 UNRATE      4   2024-02-22     2024-02-22  
#> 4 1948-04-01 UNRATE      3.9 2024-02-22     2024-02-22  
#> 5 1948-05-01 UNRATE      3.5 2024-02-22     2024-02-22  
#> 6 1948-06-01 UNRATE      3.6 2024-02-22     2024-02-22

unemployment_data$date <- as.Date(unemployment_data$date)
summary(unemployment_data)
#>       date             series_id             value        realtime_start      
#>  Min.   :1948-01-01   Length:913         Min.   : 2.500   Min.   :2024-02-22  
#>  1st Qu.:1967-01-01   Class :character   1st Qu.: 4.400   1st Qu.:2024-02-22  
#>  Median :1986-01-01   Mode  :character   Median : 5.500   Median :2024-02-22  
#>  Mean   :1985-12-31                      Mean   : 5.703   Mean   :2024-02-22  
#>  3rd Qu.:2005-01-01                      3rd Qu.: 6.700   3rd Qu.:2024-02-22  
#>  Max.   :2024-01-01                      Max.   :14.800   Max.   :2024-02-22  
#>   realtime_end       
#>  Min.   :2024-02-22  
#>  1st Qu.:2024-02-22  
#>  Median :2024-02-22  
#>  Mean   :2024-02-22  
#>  3rd Qu.:2024-02-22  
#>  Max.   :2024-02-22

library(ggplot2)

# Create the ggplot object
plot <- ggplot(data = icnsa, aes(x = date, y = value)) +
  geom_line() +
  labs(title = "ICNSA", x = "Year", y = "Number")

# Display the plot
print(plot)


library(ggplot2)

# Create the ggplot object and store it in a variable
plot <- ggplot(data = unemployment_data, aes(x = date, y = value)) +
  geom_line() +
  labs(title = "Unemployed", x = "Year", y = "Number")

# Display the plot
print(plot)


library(dplyr)

# Mutate to extract Year and Month
icnsa <- icnsa %>%
  mutate(Year = lubridate::year(date),
         Month = lubridate::month(date))

icnsa_monthly <- icnsa %>%
  group_by(Month, Year) %>%
  summarise(ICNSA = mean(value, na.rm = TRUE), .groups = "drop")

unemployment_mdata <- unemployment_data %>%
  mutate(Year = lubridate::year(date),
         Month = lubridate::month(date))

# Merge datasets by Year and Month, retaining all observations
merged_data <- full_join(icnsa_monthly, unemployment_mdata, by = c("Year", "Month"))

# Remove rows with missing values
merged_data <- na.omit(merged_data)

correlation <- cor(merged_data$ICNSA, merged_data$value)

cat("correlation is:",correlation)
#> correlation is: 0.5061069

icsna_ts <- ts(merged_data$ICNSA, frequency = 12, start = c(min(merged_data$Year), 1))
adf_result_icsna <- adf.test(icsna_ts)
#> Warning in adf.test(icsna_ts): p-value smaller than printed p-value
print(adf_result_icsna)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  icsna_ts
#> Dickey-Fuller = -7.8661, Lag order = 8, p-value = 0.01
#> alternative hypothesis: stationary

covariate_ts <- ts(merged_data$value, frequency = 12, start = c(min(merged_data$Year), 1))

# Perform Augmented Dickey-Fuller test for covariate
adf_result_covariate <- adf.test(covariate_ts)
#> Warning in adf.test(covariate_ts): p-value smaller than printed p-value
print(adf_result_covariate)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  covariate_ts
#> Dickey-Fuller = -8.6262, Lag order = 8, p-value = 0.01
#> alternative hypothesis: stationary

final_model <- auto.arima(y = icsna_ts, xreg = covariate_ts)
summary(final_model)
#> Series: icsna_ts 
#> Regression with ARIMA(1,1,2) errors 
#> 
#> Coefficients:
#>          ar1      ma1     ma2       xreg
#>       0.6515  -1.3357  0.3446  98138.785
#> s.e.  0.0787   0.0954  0.0930   6912.027
#> 
#> sigma^2 = 3.744e+10:  log likelihood = -9296.19
#> AIC=18602.39   AICc=18602.48   BIC=18625.03
#> 
#> Training set error measures:
#>                     ME     RMSE      MAE       MPE    MAPE      MASE
#> Training set -4721.292 192795.5 83964.92 -3.101819 20.8337 0.6004364
#>                       ACF1
#> Training set -0.0004868408

checkresiduals(final_model)

#> 
#>  Ljung-Box test
#> 
#> data:  Residuals from Regression with ARIMA(1,1,2) errors
#> Q* = 25.514, df = 21, p-value = 0.2256
#> 
#> Model df: 3.   Total lags used: 24

# Generate forecast for the next time point
forecast_values <- forecast(final_model, xreg = covariate_ts, h = 1)

# Extract the mean of the forecasted values for the next time point
single_forecast <- as.numeric(forecast_values$mean[1])

# Print the next predicted value
cat("Next predicted value is", single_forecast, "\n")
#> Next predicted value is 245415.5

Created on 2024-02-22 with reprex v2.0.2

Saloni-Sarbhai commented 4 months ago

HW 2

library(reprex)
#> Warning: package 'reprex' was built under R version 4.2.3
library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.2.3
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(fredr)
#> Warning: package 'fredr' was built under R version 4.2.3
library(ggplot2)
library(forecast)
#> Warning: package 'forecast' was built under R version 4.2.3
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(tseries)
#> Warning: package 'tseries' was built under R version 4.2.3
library(urca)
#> Warning: package 'urca' was built under R version 4.2.3
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.2.3
#> Warning: package 'tibble' was built under R version 4.2.3
#> Warning: package 'forcats' was built under R version 4.2.3
library(readxl)
#> Warning: package 'readxl' was built under R version 4.2.3
library(magrittr)
#> 
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#> 
#>     set_names
#> The following object is masked from 'package:tidyr':
#> 
#>     extract
library(zoo)
#> Warning: package 'zoo' was built under R version 4.2.3
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric

fredr_set_key("619954596c335c6cd3c4fc1f7a346118")
icnsa_data <- fredr(series_id = "ICNSA")
head(icnsa_data)
#> # A tibble: 6 × 5
#>   date       series_id  value realtime_start realtime_end
#>   <date>     <chr>      <dbl> <date>         <date>      
#> 1 1967-01-07 ICNSA     346000 2024-02-28     2024-02-28  
#> 2 1967-01-14 ICNSA     334000 2024-02-28     2024-02-28  
#> 3 1967-01-21 ICNSA     277000 2024-02-28     2024-02-28  
#> 4 1967-01-28 ICNSA     252000 2024-02-28     2024-02-28  
#> 5 1967-02-04 ICNSA     274000 2024-02-28     2024-02-28  
#> 6 1967-02-11 ICNSA     276000 2024-02-28     2024-02-28
icnsa_data$date <- as.Date(icnsa_data$date)
icnsa_data %>%
  ggplot(aes(x = date, y = value)) +
  geom_line() +
  labs(title = "Claims Original ",
       x = "Year",
       y = "Number")

start_date <- as.Date("2020-03-01")
end_date <- as.Date("2022-01-31")
ggplot(icnsa, aes(x = date, y = value)) +
  geom_line() +
  geom_vline(xintercept = as.numeric(start_date), linetype = "dashed", color = "blue") +
  geom_vline(xintercept = as.numeric(end_date), linetype = "dashed", color = "blue") +
  labs(title = "Claims Marking the Covid period ",
       x = "Year",
       y = "Number")
#> Error in ggplot(icnsa, aes(x = date, y = value)): object 'icnsa' not found
icnsa_data <- icnsa_data %>%
  mutate(value = ifelse(date >= start_date & date <= end_date, NA, value))
covid_period_data <- icnsa_data %>%
  filter(date >= start_date & date <= end_date)
print(covid_period_data)
#> # A tibble: 100 × 5
#>    date       series_id value realtime_start realtime_end
#>    <date>     <chr>     <dbl> <date>         <date>      
#>  1 2020-03-07 ICNSA        NA 2024-02-28     2024-02-28  
#>  2 2020-03-14 ICNSA        NA 2024-02-28     2024-02-28  
#>  3 2020-03-21 ICNSA        NA 2024-02-28     2024-02-28  
#>  4 2020-03-28 ICNSA        NA 2024-02-28     2024-02-28  
#>  5 2020-04-04 ICNSA        NA 2024-02-28     2024-02-28  
#>  6 2020-04-11 ICNSA        NA 2024-02-28     2024-02-28  
#>  7 2020-04-18 ICNSA        NA 2024-02-28     2024-02-28  
#>  8 2020-04-25 ICNSA        NA 2024-02-28     2024-02-28  
#>  9 2020-05-02 ICNSA        NA 2024-02-28     2024-02-28  
#> 10 2020-05-09 ICNSA        NA 2024-02-28     2024-02-28  
#> # ℹ 90 more rows
icnsa_data %>%
  ggplot(aes(x = date, y = value)) +
  geom_line() +
  geom_vline(xintercept = as.numeric(start_date), linetype = "dashed", color = "blue") +
  geom_vline(xintercept = as.numeric(end_date), linetype = "dashed", color = "blue") +
  labs(title = "ICNSA Marking Covid Period ",
       x = "Year",
       y = "Number")

icnsa_data$value <- na.spline(icnsa_data$value)
ggplot(icnsa_data, aes(x = date, y = value)) +
  geom_line() +
  labs(title = "ICNSA over Time (Imputed Values for COVID Period)", x = "Date", y = "ICNSA Value") +
  theme_minimal()

icnsa_non_covid <- icnsa_data %>%
  filter(date < start_date | date > end_date)
spar_values <- c(0.1, 0.5, 1)
par(mfrow=c(3, 1)) 
for(spar in spar_values) {
  fit_spline <- smooth.spline(as.numeric(icnsa_non_covid$date), icnsa_non_covid$value, spar=spar)

  predicted_values <- predict(fit_spline, as.numeric(icnsa_data$date))

  plot(icnsa_data$date, icnsa_data$value, type='l', main=paste("Spar =", spar),
       xlab="Date", ylab="ICNSA Values", col="blue")
  lines(icnsa_data$date, predicted_values$y, col="red")
  legend("topleft", legend=c("Imputed", "Smoothed"), col=c("blue", "red"), lty=1)
}

fit_spline <- smooth.spline(as.numeric(icnsa_non_covid$date), icnsa_non_covid$value, spar=0.5)

predicted_values <- predict(fit_spline, as.numeric(icnsa_data$date))

plot(icnsa_data$date, icnsa_data$value, type='l', main=paste("Spar =", "0.5"),
     xlab="Date", ylab="ICNSA Values", col="pink")
lines(icnsa_data$date, predicted_values$y, col="green")
legend("topleft", legend=c("Imputed", "Smoothed"), col=c("pink", "green"), lty=1)

ts_data <- ts(icnsa_data$value, frequency = 52)

hwm_multi <- HoltWinters(ts_data, seasonal = "multiplicative")
forecast_hwm_multi <- forecast(hwm_multi, h = 1)
print(forecast_hwm_multi)
#>          Point Forecast    Lo 80  Hi 80    Lo 95    Hi 95
#> 58.32692       185833.4 145563.8 226103 124246.3 247420.5

forecast_hwm_multi_mean <- mean(forecast_hwm_multi$mean, na.rm = TRUE)
print(forecast_hwm_multi_mean)
#> [1] 185833.4

hwm_additive <- HoltWinters(ts_data, seasonal = "additive")
forecast_hwm_additive <- forecast(hwm_additive, h = 1)
print(forecast_hwm_additive)
#>          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
#> 58.32692         202211 161563.8 242858.2 140046.5 264375.5

forecast_hwm_additive_mean <- mean(forecast_hwm_additive$mean, na.rm = TRUE)
print(forecast_hwm_additive_mean)
#> [1] 202211

Created on 2024-02-28 with reprex v2.0.2

Saloni-Sarbhai commented 3 months ago
library(reprex)
#> Warning: package 'reprex' was built under R version 4.2.3
library(fredr)
#> Warning: package 'fredr' was built under R version 4.2.3
library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.2.3
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(imputeTS)
#> Warning: package 'imputeTS' was built under R version 4.2.3
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(forecast)
#> Warning: package 'forecast' was built under R version 4.2.3

fredr_set_key("619954596c335c6cd3c4fc1f7a346118")
icnsa_data <- fredr(series_id = "ICNSA")

start_date <- as.Date("2020-03-01")
end_date <- as.Date("2022-01-31")

# Filter data for Covid period
covid_icnsa_data <- icnsa_data %>%
  filter(date >= start_date & date <= end_date)

# Create a copy for Kalman filtering
covid_kalman_icnsa_data <- covid_icnsa_data
non_covid_icnsa_data <- anti_join(icnsa_data, covid_icnsa_data, by = "date")
non_covid_kalman_icnsa_data <- non_covid_icnsa_data

# Assign NA values to Covid period
covid_icnsa_data$value <- NA
covid_kalman_icnsa_data$value <- NA

# Smooth spline to compare with state-space missing value methodology
smooth_spline_fit <- smooth.spline(x = non_covid_icnsa_data$date, y = non_covid_icnsa_data$value, lambda = 0.6)
impute_val <- predict(smooth_spline_fit, x = as.numeric(covid_icnsa_data$date))
covid_icnsa_data$value <- impute_val$y

# Combine Covid and non-Covid data
complete_icnsa_data <- rbind(non_covid_icnsa_data, covid_icnsa_data) %>% arrange(date)

# Impute null values using Kalman filtering (state-space missing value methodology)
complete_kalman_icnsa_data <- rbind(non_covid_kalman_icnsa_data, covid_kalman_icnsa_data) %>% arrange(date)
complete_kalman_icnsa_data$value <- na_kalman(complete_kalman_icnsa_data$value, model = "StructTS", smooth = TRUE)

# Create time series object
icnsa_ts <- ts(complete_kalman_icnsa_data$value, frequency = 52)

# Fit structural time series model
model <- StructTS(icnsa_ts, type = "BSM")

# Forecast future values using the fitted model
pred <- forecast(model, h = 1)

nyiclaim_data <- fredr(series_id = "NYICLAIMS")
nyiclaim_ts <- ts(nyiclaim_data$value, frequency = 52)

adjusted_icnsa <- window(icnsa_ts, start = start(nyiclaim_ts), end = end(nyiclaim_ts))

structts_model <- StructTS(adjusted_icnsa)

nyiclaim_tail_value <- tail(nyiclaim_ts, n = 1)

pred <- forecast(structts_model, newxreg = nyiclaim_tail_value, h = 1)

# Print the forecasted mean value
print(pred$mean)
#> Time Series:
#> Start = c(39, 10) 
#> End = c(39, 10) 
#> Frequency = 52 
#> [1] 387285.8

Created on 2024-03-07 with reprex v2.0.2

Saloni-Sarbhai commented 2 months ago

Homework 4

library(reprex)
#> Warning: package 'reprex' was built under R version 4.2.3
library(readxl)
#> Warning: package 'readxl' was built under R version 4.2.3
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(ggplot2)
library(tseries)
#> Warning: package 'tseries' was built under R version 4.2.3
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
library(vars)
#> Warning: package 'vars' was built under R version 4.2.3
#> Loading required package: MASS
#> Loading required package: strucchange
#> Warning: package 'strucchange' was built under R version 4.2.3
#> Loading required package: zoo
#> Warning: package 'zoo' was built under R version 4.2.3
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
#> Loading required package: sandwich
#> Warning: package 'sandwich' was built under R version 4.2.3
#> Loading required package: urca
#> Warning: package 'urca' was built under R version 4.2.3
#> Loading required package: lmtest
#> Warning: package 'lmtest' was built under R version 4.2.3
library(BigVAR)
#> Warning: package 'BigVAR' was built under R version 4.2.3
#> Loading required package: lattice

series_1 <- read_excel("C:\\Users\\salon\\Downloads\\Assignment4_ts\\Sheet1.xlsx")
series_2 <- read_excel("C:\\Users\\salon\\Downloads\\Assignment4_ts\\Sheet2.xlsx")
series_3 <- read_excel("C:\\Users\\salon\\Downloads\\Assignment4_ts\\Sheet3.xlsx")
series_4 <- read_excel("C:\\Users\\salon\\Downloads\\Assignment4_ts\\Sheet4.xlsx")

summary(series_1)
#>      Period                          Value          
#>  Min.   :1992-01-01 00:00:00.00   Length:396        
#>  1st Qu.:2000-03-24 06:00:00.00   Class :character  
#>  Median :2008-06-16 00:00:00.00   Mode  :character  
#>  Mean   :2008-06-16 02:10:54.54                     
#>  3rd Qu.:2016-09-08 12:00:00.00                     
#>  Max.   :2024-12-01 00:00:00.00
summary(series_2)
#>      Period                          Value          
#>  Min.   :1992-01-01 00:00:00.00   Length:396        
#>  1st Qu.:2000-03-24 06:00:00.00   Class :character  
#>  Median :2008-06-16 00:00:00.00   Mode  :character  
#>  Mean   :2008-06-16 02:10:54.54                     
#>  3rd Qu.:2016-09-08 12:00:00.00                     
#>  Max.   :2024-12-01 00:00:00.00
summary(series_3)
#>      Period                          Value          
#>  Min.   :1992-01-01 00:00:00.00   Length:396        
#>  1st Qu.:2000-03-24 06:00:00.00   Class :character  
#>  Median :2008-06-16 00:00:00.00   Mode  :character  
#>  Mean   :2008-06-16 02:10:54.54                     
#>  3rd Qu.:2016-09-08 12:00:00.00                     
#>  Max.   :2024-12-01 00:00:00.00
summary(series_4)
#>      Period                          Value          
#>  Min.   :1992-01-01 00:00:00.00   Length:396        
#>  1st Qu.:2000-03-24 06:00:00.00   Class :character  
#>  Median :2008-06-16 00:00:00.00   Mode  :character  
#>  Mean   :2008-06-16 02:10:54.54                     
#>  3rd Qu.:2016-09-08 12:00:00.00                     
#>  Max.   :2024-12-01 00:00:00.00

head(series_1)
#> # A tibble: 6 × 2
#>   Period              Value   
#>   <dttm>              <chr>   
#> 1 1992-01-01 00:00:00 2,27,721
#> 2 1992-02-01 00:00:00 2,28,860
#> 3 1992-03-01 00:00:00 2,38,604
#> 4 1992-04-01 00:00:00 2,39,877
#> 5 1992-05-01 00:00:00 2,43,732
#> 6 1992-06-01 00:00:00 2,45,693
head(series_2)
#> # A tibble: 6 × 2
#>   Period              Value   
#>   <dttm>              <chr>   
#> 1 1992-01-01 00:00:00 1,95,546
#> 2 1992-02-01 00:00:00 1,94,551
#> 3 1992-03-01 00:00:00 2,02,999
#> 4 1992-04-01 00:00:00 2,03,862
#> 5 1992-05-01 00:00:00 2,06,982
#> 6 1992-06-01 00:00:00 2,08,867
head(series_3)
#> # A tibble: 6 × 2
#>   Period              Value   
#>   <dttm>              <chr>   
#> 1 1992-01-01 00:00:00 2,18,731
#> 2 1992-02-01 00:00:00 2,19,614
#> 3 1992-03-01 00:00:00 2,29,686
#> 4 1992-04-01 00:00:00 2,30,568
#> 5 1992-05-01 00:00:00 2,35,135
#> 6 1992-06-01 00:00:00 2,36,595
head(series_4)
#> # A tibble: 6 × 2
#>   Period              Value  
#>   <dttm>              <chr>  
#> 1 1992-01-01 00:00:00 85081.0
#> 2 1992-02-01 00:00:00 85693.0
#> 3 1992-03-01 00:00:00 88670.0
#> 4 1992-04-01 00:00:00 89900.0
#> 5 1992-05-01 00:00:00 90226.0
#> 6 1992-06-01 00:00:00 91023.0

str(series_1$Value)
#>  chr [1:396] "2,27,721" "2,28,860" "2,38,604" "2,39,877" "2,43,732" ...

series_1$Value <- as.numeric(gsub(",", "", series_1$Value))
#> Warning: NAs introduced by coercion
sum(is.na(series_1$Value)) 
#> [1] 10
series_1$Value[is.na(series_1$Value)] <- mean(series_1$Value, na.rm = TRUE) 
str(series_1$Value)
#>  num [1:396] 227721 228860 238604 239877 243732 ...

str(series_2$Value)
#>  chr [1:396] "1,95,546" "1,94,551" "2,02,999" "2,03,862" "2,06,982" ...

series_2$Value <- as.numeric(gsub(",", "", series_2$Value))
#> Warning: NAs introduced by coercion
sum(is.na(series_2$Value)) 
#> [1] 10
series_2$Value[is.na(series_2$Value)] <- mean(series_2$Value, na.rm = TRUE) 
str(series_2$Value)
#>  num [1:396] 195546 194551 202999 203862 206982 ...

str(series_3$Value)
#>  chr [1:396] "2,18,731" "2,19,614" "2,29,686" "2,30,568" "2,35,135" ...

series_3$Value <- as.numeric(gsub(",", "", series_3$Value))
#> Warning: NAs introduced by coercion
sum(is.na(series_3$Value)) 
#> [1] 10
series_3$Value[is.na(series_3$Value)] <- mean(series_3$Value, na.rm = TRUE) 
str(series_3$Value)
#>  num [1:396] 218731 219614 229686 230568 235135 ...

str(series_4$Value)
#>  chr [1:396] "85081.0" "85693.0" "88670.0" "89900.0" "90226.0" "91023.0" ...

series_4$Value <- as.numeric(gsub(",", "", series_4$Value))
#> Warning: NAs introduced by coercion
sum(is.na(series_4$Value)) 
#> [1] 10
series_4$Value[is.na(series_4$Value)] <- mean(series_4$Value, na.rm = TRUE) 
str(series_4$Value)
#>  num [1:396] 85081 85693 88670 89900 90226 ...

str(series_1$Period)
#>  POSIXct[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...
series_1$Period <- as.Date(series_1$Period)
str(series_1$Period)
#>  Date[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...

str(series_2$Period)
#>  POSIXct[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...
series_2$Period <- as.Date(series_2$Period)
str(series_2$Period)
#>  Date[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...

str(series_3$Period)
#>  POSIXct[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...
series_3$Period <- as.Date(series_3$Period)
str(series_3$Period)
#>  Date[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...

str(series_4$Period)
#>  POSIXct[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...
series_4$Period <- as.Date(series_4$Period)
str(series_4$Period)
#>  Date[1:396], format: "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-01" ...

plot(series_1$Period, series_1$Value, type = "l", xlab = "Date", ylab = "Value", main = "Time Series Plot")

plot(series_2$Period, series_2$Value, type = "l", xlab = "Date", ylab = "Value", main = "Time Series Plot")

plot(series_3$Period, series_3$Value, type = "l", xlab = "Date", ylab = "Value", main = "Time Series Plot")

plot(series_4$Period, series_4$Value, type = "l", xlab = "Date", ylab = "Value", main = "Time Series Plot")


series_1_ts <- ts(series_1$Value, frequency = 12)
series_2_ts <- ts(series_2$Value, frequency = 12)
series_3_ts <- ts(series_3$Value, frequency = 12)
series_4_ts <- ts(series_4$Value, frequency = 12)

series_1_decomp <- stl(series_1_ts, s.window="periodic")
# Extract components from stl decomposition
trend <- series_1_decomp$time.series[, "trend"]
seasonal <- series_1_decomp$time.series[, "seasonal"]
remainder <- series_1_decomp$time.series[, "remainder"]
# Plot original time series and its components using base R plotting
plot(series_1_ts, type = "l", main = "Seasonal Decomposition with STL")
lines(trend, col = "blue")
lines(seasonal, col = "red")
lines(remainder, col = "green")
legend("topright", legend = c("Original", "Trend", "Seasonal", "Remainder"),
       col = c("black", "blue", "red", "green"), lty = 1, cex = 0.8)


series_2_decomp <- stl(series_2_ts, s.window="periodic")
# Extract components from stl decomposition
trend <- series_2_decomp$time.series[, "trend"]
seasonal <- series_2_decomp$time.series[, "seasonal"]
remainder <- series_2_decomp$time.series[, "remainder"]
# Plot original time series and its components using base R plotting
plot(series_2_ts, type = "l", main = "Seasonal Decomposition with STL")
lines(trend, col = "blue")
lines(seasonal, col = "red")
lines(remainder, col = "green")
legend("topright", legend = c("Original", "Trend", "Seasonal", "Remainder"),
       col = c("black", "blue", "red", "green"), lty = 1, cex = 0.8)


series_3_decomp <- stl(series_3_ts, s.window="periodic")
# Extract components from stl decomposition
trend <- series_3_decomp$time.series[, "trend"]
seasonal <- series_3_decomp$time.series[, "seasonal"]
remainder <- series_3_decomp$time.series[, "remainder"]
# Plot original time series and its components using base R plotting
plot(series_3_ts, type = "l", main = "Seasonal Decomposition with STL")
lines(trend, col = "blue")
lines(seasonal, col = "red")
lines(remainder, col = "green")
legend("topright", legend = c("Original", "Trend", "Seasonal", "Remainder"),
       col = c("black", "blue", "red", "green"), lty = 1, cex = 0.8)


series_4_decomp <- stl(series_4_ts, s.window="periodic")
# Extract components from stl decomposition
trend <- series_4_decomp$time.series[, "trend"]
seasonal <- series_4_decomp$time.series[, "seasonal"]
remainder <- series_4_decomp$time.series[, "remainder"]
# Plot original time series and its components using base R plotting
plot(series_4_ts, type = "l", main = "Seasonal Decomposition with STL")
lines(trend, col = "blue")
lines(seasonal, col = "red")
lines(remainder, col = "green")
legend("topright", legend = c("Original", "Trend", "Seasonal", "Remainder"),
       col = c("black", "blue", "red", "green"), lty = 1, cex = 0.8)


acf(series_1$Value, main = "Autocorrelation Function (ACF)")

pacf(series_1$Value, main = "Partial Autocorrelation Function (PACF)")


acf(series_2$Value, main = "Autocorrelation Function (ACF)")

pacf(series_2$Value, main = "Partial Autocorrelation Function (PACF)")


acf(series_3$Value, main = "Autocorrelation Function (ACF)")

pacf(series_3$Value, main = "Partial Autocorrelation Function (PACF)")

acf(series_4$Value, main = "Autocorrelation Function (ACF)")

pacf(series_4$Value, main = "Partial Autocorrelation Function (PACF)")


# Perform the ADF test on series_1_ts
adf_test_result <- adf.test(series_1_ts)
print(adf_test_result)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  series_1_ts
#> Dickey-Fuller = -3.3479, Lag order = 7, p-value = 0.06297
#> alternative hypothesis: stationary

adf_test_result <- adf.test(series_2_ts)
print(adf_test_result)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  series_2_ts
#> Dickey-Fuller = -3.1657, Lag order = 7, p-value = 0.09394
#> alternative hypothesis: stationary

# Perform the ADF test on series_1_ts
adf_test_result <- adf.test(series_3_ts)
print(adf_test_result)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  series_3_ts
#> Dickey-Fuller = -3.4025, Lag order = 7, p-value = 0.0537
#> alternative hypothesis: stationary

# Perform the ADF test on series_1_ts
adf_test_result <- adf.test(series_4_ts)
print(adf_test_result)
#> 
#>  Augmented Dickey-Fuller Test
#> 
#> data:  series_4_ts
#> Dickey-Fuller = -3.5854, Lag order = 7, p-value = 0.0345
#> alternative hypothesis: stationary

data_combined <- cbind(series_1_ts, series_2_ts, series_3_ts, series_4_ts)

var_model1 <- VAR(data_combined, p = 1)

var_model2 <- VAR(data_combined, p = 2)
summary(var_model1)
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: series_1_ts, series_2_ts, series_3_ts, series_4_ts 
#> Deterministic variables: const 
#> Sample size: 395 
#> Log Likelihood: -14127.372 
#> Roots of the characteristic polynomial:
#> 0.9896 0.9459 0.9423 0.8988
#> Call:
#> VAR(y = data_combined, p = 1)
#> 
#> 
#> Estimation results for equation series_1_ts: 
#> ============================================ 
#> series_1_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + const 
#> 
#>                  Estimate Std. Error t value Pr(>|t|)    
#> series_1_ts.l1  2.746e-01  4.394e-01   0.625 0.532378    
#> series_2_ts.l1 -1.433e-02  1.016e-01  -0.141 0.887915    
#> series_3_ts.l1  8.841e-01  4.770e-01   1.854 0.064545 .  
#> series_4_ts.l1 -4.761e-01  1.574e-01  -3.025 0.002655 ** 
#> const           1.825e+04  4.925e+03   3.705 0.000242 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 11110 on 390 degrees of freedom
#> Multiple R-Squared: 0.9844,  Adjusted R-squared: 0.9843 
#> F-statistic:  6166 on 4 and 390 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation series_2_ts: 
#> ============================================ 
#> series_2_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + const 
#> 
#>                  Estimate Std. Error t value Pr(>|t|)    
#> series_1_ts.l1 -5.229e-01  3.532e-01  -1.480 0.139580    
#> series_2_ts.l1  9.333e-01  8.167e-02  11.429  < 2e-16 ***
#> series_3_ts.l1  6.974e-01  3.835e-01   1.819 0.069714 .  
#> series_4_ts.l1 -3.845e-01  1.265e-01  -3.038 0.002539 ** 
#> const           1.477e+04  3.959e+03   3.731 0.000219 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 8934 on 390 degrees of freedom
#> Multiple R-Squared: 0.9861,  Adjusted R-squared: 0.986 
#> F-statistic:  6934 on 4 and 390 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation series_3_ts: 
#> ============================================ 
#> series_3_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + const 
#> 
#>                  Estimate Std. Error t value Pr(>|t|)    
#> series_1_ts.l1 -6.505e-01  4.290e-01  -1.516 0.130254    
#> series_2_ts.l1 -1.432e-02  9.919e-02  -0.144 0.885284    
#> series_3_ts.l1  1.803e+00  4.657e-01   3.871 0.000127 ***
#> series_4_ts.l1 -4.600e-01  1.537e-01  -2.993 0.002942 ** 
#> const           1.778e+04  4.809e+03   3.697 0.000250 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 10850 on 390 degrees of freedom
#> Multiple R-Squared: 0.9843,  Adjusted R-squared: 0.9842 
#> F-statistic:  6129 on 4 and 390 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation series_4_ts: 
#> ============================================ 
#> series_4_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + const 
#> 
#>                  Estimate Std. Error t value Pr(>|t|)    
#> series_1_ts.l1   -0.40479    0.14503  -2.791  0.00551 ** 
#> series_2_ts.l1   -0.02234    0.03353  -0.666  0.50575    
#> series_3_ts.l1    0.50033    0.15744   3.178  0.00160 ** 
#> series_4_ts.l1    0.76611    0.05196  14.744  < 2e-16 ***
#> const          7729.08175 1625.67129   4.754 2.81e-06 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 3668 on 390 degrees of freedom
#> Multiple R-Squared: 0.9799,  Adjusted R-squared: 0.9797 
#> F-statistic:  4764 on 4 and 390 DF,  p-value: < 2.2e-16 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>             series_1_ts series_2_ts series_3_ts series_4_ts
#> series_1_ts   123484188    96667878   120475608    37819614
#> series_2_ts    96667878    79809630    94260185    28442083
#> series_3_ts   120475608    94260185   117725540    36855737
#> series_4_ts    37819614    28442083    36855737    13454952
#> 
#> Correlation matrix of residuals:
#>             series_1_ts series_2_ts series_3_ts series_4_ts
#> series_1_ts      1.0000      0.9738      0.9992      0.9278
#> series_2_ts      0.9738      1.0000      0.9724      0.8679
#> series_3_ts      0.9992      0.9724      1.0000      0.9260
#> series_4_ts      0.9278      0.8679      0.9260      1.0000
summary(var_model2)
#> 
#> VAR Estimation Results:
#> ========================= 
#> Endogenous variables: series_1_ts, series_2_ts, series_3_ts, series_4_ts 
#> Deterministic variables: const 
#> Sample size: 394 
#> Log Likelihood: -14060.359 
#> Roots of the characteristic polynomial:
#> 0.9913 0.9499 0.9499 0.9148 0.2941 0.2285 0.1137 0.09701
#> Call:
#> VAR(y = data_combined, p = 2)
#> 
#> 
#> Estimation results for equation series_1_ts: 
#> ============================================ 
#> series_1_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + series_1_ts.l2 + series_2_ts.l2 + series_3_ts.l2 + series_4_ts.l2 + const 
#> 
#>                  Estimate Std. Error t value Pr(>|t|)   
#> series_1_ts.l1    -0.5242     1.3616  -0.385  0.70050   
#> series_2_ts.l1     0.2258     0.3058   0.738  0.46066   
#> series_3_ts.l1     1.7463     1.3154   1.328  0.18512   
#> series_4_ts.l1    -1.0391     0.4548  -2.285  0.02287 * 
#> series_1_ts.l2     0.9589     1.3809   0.694  0.48786   
#> series_2_ts.l2    -0.2388     0.3029  -0.788  0.43092   
#> series_3_ts.l2    -1.0523     1.3405  -0.785  0.43292   
#> series_4_ts.l2     0.6475     0.4434   1.460  0.14498   
#> const          15865.7764  5110.6876   3.104  0.00205 **
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 11100 on 385 degrees of freedom
#> Multiple R-Squared: 0.9845,  Adjusted R-squared: 0.9842 
#> F-statistic:  3062 on 8 and 385 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation series_2_ts: 
#> ============================================ 
#> series_2_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + series_1_ts.l2 + series_2_ts.l2 + series_3_ts.l2 + series_4_ts.l2 + const 
#> 
#>                  Estimate Std. Error t value Pr(>|t|)    
#> series_1_ts.l1    -1.4203     1.0953  -1.297  0.19551    
#> series_2_ts.l1     1.2244     0.2460   4.978 9.71e-07 ***
#> series_3_ts.l1     1.5077     1.0582   1.425  0.15501    
#> series_4_ts.l1    -0.6167     0.3658  -1.686  0.09268 .  
#> series_1_ts.l2     1.0411     1.1108   0.937  0.34924    
#> series_2_ts.l2    -0.2921     0.2436  -1.199  0.23133    
#> series_3_ts.l2    -0.9750     1.0783  -0.904  0.36643    
#> series_4_ts.l2     0.2939     0.3567   0.824  0.41049    
#> const          13104.3752  4111.1417   3.188  0.00155 ** 
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 8925 on 385 degrees of freedom
#> Multiple R-Squared: 0.9862,  Adjusted R-squared: 0.9859 
#> F-statistic:  3439 on 8 and 385 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation series_3_ts: 
#> ============================================ 
#> series_3_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + series_1_ts.l2 + series_2_ts.l2 + series_3_ts.l2 + series_4_ts.l2 + const 
#> 
#>                  Estimate Std. Error t value Pr(>|t|)   
#> series_1_ts.l1    -1.1503     1.3298  -0.865  0.38754   
#> series_2_ts.l1     0.2007     0.2986   0.672  0.50196   
#> series_3_ts.l1     2.3842     1.2846   1.856  0.06422 . 
#> series_4_ts.l1    -1.0350     0.4441  -2.330  0.02030 * 
#> series_1_ts.l2     0.6392     1.3486   0.474  0.63578   
#> series_2_ts.l2    -0.2142     0.2958  -0.724  0.46945   
#> series_3_ts.l2    -0.7480     1.3091  -0.571  0.56804   
#> series_4_ts.l2     0.6525     0.4330   1.507  0.13263   
#> const          15617.3320  4991.0244   3.129  0.00189 **
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 10840 on 385 degrees of freedom
#> Multiple R-Squared: 0.9844,  Adjusted R-squared: 0.9841 
#> F-statistic:  3041 on 8 and 385 DF,  p-value: < 2.2e-16 
#> 
#> 
#> Estimation results for equation series_4_ts: 
#> ============================================ 
#> series_4_ts = series_1_ts.l1 + series_2_ts.l1 + series_3_ts.l1 + series_4_ts.l1 + series_1_ts.l2 + series_2_ts.l2 + series_3_ts.l2 + series_4_ts.l2 + const 
#> 
#>                  Estimate Std. Error t value Pr(>|t|)    
#> series_1_ts.l1   -0.45616    0.44639  -1.022  0.30748    
#> series_2_ts.l1    0.05236    0.10024   0.522  0.60174    
#> series_3_ts.l1    0.61657    0.43124   1.430  0.15360    
#> series_4_ts.l1    0.40941    0.14909   2.746  0.00632 ** 
#> series_1_ts.l2    0.09371    0.45270   0.207  0.83612    
#> series_2_ts.l2   -0.07897    0.09929  -0.795  0.42694    
#> series_3_ts.l2   -0.16375    0.43944  -0.373  0.70963    
#> series_4_ts.l2    0.38386    0.14536   2.641  0.00861 ** 
#> const          6918.65776 1675.45331   4.129 4.46e-05 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> 
#> Residual standard error: 3637 on 385 degrees of freedom
#> Multiple R-Squared: 0.9803,  Adjusted R-squared: 0.9799 
#> F-statistic:  2393 on 8 and 385 DF,  p-value: < 2.2e-16 
#> 
#> 
#> 
#> Covariance matrix of residuals:
#>             series_1_ts series_2_ts series_3_ts series_4_ts
#> series_1_ts   123106938    96453845   120136901    37487454
#> series_2_ts    96453845    79661550    94081957    28254909
#> series_3_ts   120136901    94081957   117409501    36535456
#> series_4_ts    37487454    28254909    36535456    13230873
#> 
#> Correlation matrix of residuals:
#>             series_1_ts series_2_ts series_3_ts series_4_ts
#> series_1_ts      1.0000      0.9740      0.9993      0.9289
#> series_2_ts      0.9740      1.0000      0.9728      0.8703
#> series_3_ts      0.9993      0.9728      1.0000      0.9270
#> series_4_ts      0.9289      0.8703      0.9270      1.0000

forecast_1 <- predict(var_model1, n.ahead = 1)
forecast_2 <- predict(var_model2, n.ahead = 1)

forecasted_value_var1 <- forecast_1$fcst[1]$series_1_ts[1]
print(forecasted_value_var1)
#> [1] 403354.7

forecasted_value_var2 <- forecast_2$fcst[1]$series_1_ts[1]
print(forecasted_value_var2)
#> [1] 403326.7

causality(var_model1)
#> Warning in causality(var_model1): 
#> Argument 'cause' has not been specified;
#> using first variable in 'x$y' (series_1_ts) as cause variable.
#> $Granger
#> 
#>  Granger causality H0: series_1_ts do not Granger-cause series_2_ts
#>  series_3_ts series_4_ts
#> 
#> data:  VAR object var_model1
#> F-Test = 6.0081, df1 = 3, df2 = 1560, p-value = 0.0004554
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: series_1_ts and series_2_ts
#>  series_3_ts series_4_ts
#> 
#> data:  VAR object var_model1
#> Chi-squared = 197.36, df = 3, p-value < 2.2e-16
causality(var_model2)
#> Warning in causality(var_model2): 
#> Argument 'cause' has not been specified;
#> using first variable in 'x$y' (series_1_ts) as cause variable.
#> $Granger
#> 
#>  Granger causality H0: series_1_ts do not Granger-cause series_2_ts
#>  series_3_ts series_4_ts
#> 
#> data:  VAR object var_model2
#> F-Test = 3.5488, df1 = 6, df2 = 1540, p-value = 0.001707
#> 
#> 
#> $Instant
#> 
#>  H0: No instantaneous causality between: series_1_ts and series_2_ts
#>  series_3_ts series_4_ts
#> 
#> data:  VAR object var_model2
#> Chi-squared = 196.87, df = 3, p-value < 2.2e-16

bigvar_matrix <- cbind(series_1_ts, series_2_ts, series_3_ts, series_4_ts)

sparse_var_fit <- BigVAR.fit(Y=bigvar_matrix,struct='Basic',p=2,lambda=1)[,,1]

print(sparse_var_fit)
#>           [,1]      [,2]        [,3]      [,4]        [,5]         [,6]
#> [1,]  6493.742 0.4128257  0.28983109 0.3954918  0.03836217  0.001780558
#> [2,] 14955.197 0.2288468  0.61577392 0.2115015 -0.15968421 -0.113973639
#> [3,]  5031.704 0.3957032  0.26937302 0.3992144  0.07037622 -0.009087329
#> [4,] 13846.534 0.1074036 -0.05116922 0.1594062  0.28899938 -0.010718874
#>             [,7]          [,8]        [,9]
#> [1,] -0.04855471 -0.0093409980 -0.06925488
#> [2,]  0.26899770 -0.1283131254 -0.22089028
#> [3,] -0.06323737  0.0002354252 -0.03661735
#> [4,] -0.14614741  0.0447493532  0.25200802

summary(sparse_var_fit)
#>        V1              V2               V3                 V4        
#>  Min.   : 5032   Min.   :0.1074   Min.   :-0.05117   Min.   :0.1594  
#>  1st Qu.: 6128   1st Qu.:0.1985   1st Qu.: 0.18924   1st Qu.:0.1985  
#>  Median :10170   Median :0.3123   Median : 0.27960   Median :0.3035  
#>  Mean   :10082   Mean   :0.2862   Mean   : 0.28095   Mean   :0.2914  
#>  3rd Qu.:14124   3rd Qu.:0.4000   3rd Qu.: 0.37132   3rd Qu.:0.3964  
#>  Max.   :14955   Max.   :0.4128   Max.   : 0.61577   Max.   :0.3992  
#>        V5                 V6                  V7                  V8           
#>  Min.   :-0.15968   Min.   :-0.113974   Min.   :-0.146147   Min.   :-0.128313  
#>  1st Qu.:-0.01115   1st Qu.:-0.036533   1st Qu.:-0.083965   1st Qu.:-0.039084  
#>  Median : 0.05437   Median :-0.009903   Median :-0.055896   Median :-0.004553  
#>  Mean   : 0.05951   Mean   :-0.033000   Mean   : 0.002765   Mean   :-0.023167  
#>  3rd Qu.: 0.12503   3rd Qu.:-0.006370   3rd Qu.: 0.030833   3rd Qu.: 0.011364  
#>  Max.   : 0.28900   Max.   : 0.001781   Max.   : 0.268998   Max.   : 0.044749  
#>        V9          
#>  Min.   :-0.22089  
#>  1st Qu.:-0.10716  
#>  Median :-0.05294  
#>  Mean   :-0.01869  
#>  3rd Qu.: 0.03554  
#>  Max.   : 0.25201

Created on 2024-04-10 with reprex v2.0.2