Open satyamti1250 opened 4 months ago
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(fredr)
library(ggplot2)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
#set api key for fredr
fredr_set_key("619954596c335c6cd3c4fc1f7a346118")
#read the ICSNA data
time_series_data <- fredr(series_id = "ICNSA")
covid_start <- as.Date("2020-02-14")
covid_end <- as.Date("2021-01-02")
time_series_data$date <- as.Date(time_series_data$date)
#Remove the covid dates
time_series_data_filtered <- time_series_data |>
filter(!(date >= covid_start & date <= covid_end))
#plot of claims
ggplot(time_series_data_filtered, aes(x = date, y = value)) +
geom_line() +
labs(title = "Claims Data",
x = "Year",
y = "Number")
acf(time_series_data_filtered$value)
#Auto Correlation Plot
pacf(time_series_data_filtered$value)
#Auto Correlation Plot
ts_data <- ts(time_series_data_filtered$value, frequency = 52)
arima_model <- auto.arima(ts_data)
forecast_week <- predict(arima_model, n.ahead = 1)
print(forecast_week$pred)
#> Time Series:
#> Start = c(57, 21)
#> End = c(57, 21)
#> Frequency = 52
#> [1] 237781.5
#Printing the values removing covid data
Unsure of the model selection hence going with auto arima Created on 2024-02-08 with reprex v2.1.0
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(fredr)
library(ggplot2)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
fredr_set_key("619954596c335c6cd3c4fc1f7a346118")
icnsa <- fredr(series_id = "ICNSA")
icnsa$date <- as.Date(icnsa$date)
unemployment_rate <- fredr(series_id = "UNRATE")
unemployment_rate$date <- as.Date(unemployment_rate$date)
summary(icnsa)
#> date series_id value realtime_start
#> Min. :1967-01-07 Length:2980 Min. : 133000 Min. :2024-02-21
#> 1st Qu.:1981-04-16 Class :character 1st Qu.: 265800 1st Qu.:2024-02-21
#> Median :1995-07-25 Mode :character Median : 326919 Median :2024-02-21
#> Mean :1995-07-25 Mean : 365013 Mean :2024-02-21
#> 3rd Qu.:2009-11-01 3rd Qu.: 406725 3rd Qu.:2024-02-21
#> Max. :2024-02-10 Max. :6161268 Max. :2024-02-21
#> realtime_end
#> Min. :2024-02-21
#> 1st Qu.:2024-02-21
#> Median :2024-02-21
#> Mean :2024-02-21
#> 3rd Qu.:2024-02-21
#> Max. :2024-02-21
summary(unemployment_rate)
#> date series_id value realtime_start
#> Min. :1948-01-01 Length:913 Min. : 2.500 Min. :2024-02-21
#> 1st Qu.:1967-01-01 Class :character 1st Qu.: 4.400 1st Qu.:2024-02-21
#> Median :1986-01-01 Mode :character Median : 5.500 Median :2024-02-21
#> Mean :1985-12-31 Mean : 5.703 Mean :2024-02-21
#> 3rd Qu.:2005-01-01 3rd Qu.: 6.700 3rd Qu.:2024-02-21
#> Max. :2024-01-01 Max. :14.800 Max. :2024-02-21
#> realtime_end
#> Min. :2024-02-21
#> 1st Qu.:2024-02-21
#> Median :2024-02-21
#> Mean :2024-02-21
#> 3rd Qu.:2024-02-21
#> Max. :2024-02-21
ggplot(icnsa, aes(x = date, y = value)) +
geom_line() +
labs(title = "Claims",
x = "Year",
y = "Number")
ggplot(unemployment_rate, aes(x = date, y = value)) +
geom_line() +
labs(title = "Unemployed ",
x = "Year",
y = "Number")
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
icnsa <- icnsa %>%
mutate(Year = year(date),
Month = month(date))
# since unemployement data is monthly converting icnsa to monthly using mean
icnsa_monthly <- icnsa %>%
group_by(Month, Year) %>%
summarise(ICNSA = mean(value, na.rm = TRUE))
#> `summarise()` has grouped output by 'Month'. You can override using the
#> `.groups` argument.
unemployment_mrate <- unemployment_rate %>%
mutate(Year = year(date), Month = month(date))
# Merge the datasets by matching months
merged_data <- merge(icnsa_monthly, unemployment_mrate, by = c("Year", "Month"), all = TRUE)
merged_data <- na.omit(merged_data)
# Compute correlation
correlation <- cor(merged_data$ICNSA, merged_data$value)
cat("correlation is:",correlation)
#> correlation is: 0.5061069
library(forecast)
library(tseries)
icsna_ts <- ts(merged_data$ICNSA, frequency = 12, start = c(min(merged_data$Year), 1))
adf_result_icsna <- adf.test(icsna_ts)
#> Warning in adf.test(icsna_ts): p-value smaller than printed p-value
adf_result_icsna
#>
#> Augmented Dickey-Fuller Test
#>
#> data: icsna_ts
#> Dickey-Fuller = -6.0879, Lag order = 8, p-value = 0.01
#> alternative hypothesis: stationary
print (adf_result_icsna)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: icsna_ts
#> Dickey-Fuller = -6.0879, Lag order = 8, p-value = 0.01
#> alternative hypothesis: stationary
covariate_ts <- ts(merged_data$value, frequency = 12, start = c(min(merged_data$Year), 1))
adf_result_covariate <- adf.test(covariate_ts)
print (adf_result_covariate)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: covariate_ts
#> Dickey-Fuller = -3.1865, Lag order = 8, p-value = 0.09025
#> alternative hypothesis: stationary
final_model <- auto.arima(icsna_ts, xreg = covariate_ts)
summary(final_model)
#> Series: icsna_ts
#> Regression with ARIMA(3,1,4)(0,0,2)[12] errors
#>
#> Coefficients:
#> ar1 ar2 ar3 ma1 ma2 ma3 ma4 sma1
#> -1.5619 -0.7586 0.0267 1.6285 0.2715 -0.9754 -0.5042 0.2090
#> s.e. 0.2091 0.1527 0.0762 0.2049 0.1690 0.1395 0.1036 0.0418
#> sma2 drift xreg
#> 0.1136 47.5389 195093.668
#> s.e. 0.0376 2453.2302 9553.436
#>
#> sigma^2 = 1.29e+10: log likelihood = -8927.72
#> AIC=17879.43 AICc=17879.9 BIC=17933.77
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -114.6615 112587.9 57531.23 -0.3778597 15.14741 0.6172634
#> ACF1
#> Training set 0.0001024701
checkresiduals(final_model)
#>
#> Ljung-Box test
#>
#> data: Residuals from Regression with ARIMA(3,1,4)(0,0,2)[12] errors
#> Q* = 20.677, df = 15, p-value = 0.1475
#>
#> Model df: 9. Total lags used: 24
forecast_values <- forecast(final_model, xreg = covariate_ts, h=1)
single_forecast <- forecast_values$mean[1]
cat ("Next predicted value is ", single_forecast)
#> Next predicted value is 296001.5
Created on 2024-02-21 with reprex v2.1.0 Mentioned views and infernces in the pdf attached
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(fredr)
library(ggplot2)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(tseries)
library(urca)
library(tidyverse)
library(readxl)
library(forecast)
fredr_set_key("619954596c335c6cd3c4fc1f7a346118")
icnsa <- fredr(series_id = "ICNSA")
icnsa$date <- as.Date(icnsa$date)
ggplot(icnsa, aes(x = date, y = value)) +
geom_line() +
labs(title = "Claims Original ",
x = "Year",
y = "Number")
start_date <- as.Date("2020-03-01")
end_date <- as.Date("2021-06-30")
ggplot(icnsa, aes(x = date, y = value)) +
geom_line() +
geom_vline(xintercept = as.numeric(start_date), linetype = "dashed", color = "red") +
geom_vline(xintercept = as.numeric(end_date), linetype = "dashed", color = "red") +
labs(title = "Claims Marking the Covid period ",
x = "Year",
y = "Number")
#Filter covid data out
non_covid_data <- icnsa %>%
filter(date < start_date | date > end_date)
#Fitted spline on non-covid data
spline_fit <- smooth.spline(x = as.numeric(non_covid_data$date), y = non_covid_data$value,lambda = 0.7)
#filtere covid set
covid_period_data <- icnsa %>%
filter(date >= start_date & date <= end_date)
#impute new values
imputed_values <- predict(spline_fit, x = as.numeric(covid_period_data$date))
covid_period_data$value <- imputed_values$y
#new values replacing covid values with imputed values
updated_icnsa_data <- bind_rows(non_covid_data, imputed_values)
updated_icnsa_data <- updated_icnsa_data %>%
filter(date < Sys.Date())
ggplot(updated_icnsa_data, aes(x = date, y = value)) +
geom_line() +
labs(title = "Claims",
x = "Year",
y = "Number")
ts_data <- ts(updated_icnsa_data$value, frequency = 12)
hwm <- HoltWinters(ts_data, seasonal = "multiplicative")
f_hwm <- forecast(hwm, h = 1)
print(f_hwm)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> Sep 243 210180.3 155439.7 264920.9 126461.8 293898.8
hwa <- HoltWinters(ts_data, seasonal = "additive")
f_hwa <- forecast(hwa, h = 1)
print(f_hwa)
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> Sep 243 212419.1 145114.7 279723.5 109485.9 315352.3
Created on 2024-02-26 with reprex v2.1.0
Why did you set frequency = 12
? This is weekly data.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(fredr)
library(ggplot2)
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(tseries)
library(urca)
library(tidyverse)
library(readxl)
library(forecast)
library(dlm)
#>
#> Attaching package: 'dlm'
#> The following object is masked from 'package:ggplot2':
#>
#> %+%
fredr_set_key("619954596c335c6cd3c4fc1f7a346118")
icnsa <- fredr(series_id = "ICNSA")
icnsa$date <- as.Date(icnsa$date)
ggplot(icnsa, aes(x = date, y = value)) +
geom_line() +
labs(title = "Claims Original ",
x = "Year",
y = "Number")
start_date <- as.Date("2020-03-01")
end_date <- as.Date("2021-06-30")
ggplot(icnsa, aes(x = date, y = value)) +
geom_line() +
geom_vline(xintercept = as.numeric(start_date), linetype = "dashed", color = "red") +
geom_vline(xintercept = as.numeric(end_date), linetype = "dashed", color = "red") +
labs(title = "Claims Marking the Covid period ",
x = "Year",
y = "Number")
start_date <- as.Date("2020-03-01")
end_date <- as.Date("2021-06-30")
covid_period_data <- icnsa %>%
filter(date >= start_date & date <= end_date)
non_covid_data <- icnsa %>%
filter(date < start_date | date > end_date)
warm_up_start_date <- as.Date("2020-01-01")
warm_up_data <- icnsa %>%
filter(date >= warm_up_start_date & date < start_date)
combined_data <- bind_rows(warm_up_data, covid_period_data)
y <- covid_period_data$value
buildModel <- function() {
dV_estimate <- var(non_covid_data$value)
dW_estimate <- 2500000
dlmModPoly(order = 1, dV = dV_estimate, dW = dW_estimate)
}
mod_fitted <- buildModel()
fit <- dlmFilter(combined_data$value, mod_fitted)
smoothed <- dlmSmooth(fit)
smoothed_estimates <- tail(smoothed$s[-1], n = nrow(covid_period_data))
icnsa_smoothed <- icnsa
icnsa_smoothed$value[icnsa$date >= start_date & icnsa$date <= end_date] <- smoothed_estimates
icnsa_smoothed$date <- as.Date(icnsa$date)
ggplot(icnsa_smoothed, aes(x = date, y = value)) +
geom_line() +
labs(title = "Claims Smooth ",
x = "Year",
y = "Number")
library(dlm)
y_smoothed<- icnsa_smoothed$value
dV_start <- var(diff(y_smoothed))
dW_start <- dV_start / 10
dS_start <- var(y_smoothed) / 2
buildModel <- function(parm) {
dV <- parm[1]
dW <- parm[2]
dS <- parm[3]
trend <- dlmModPoly(order = 2, dV = dV)
seasonal <- dlmModSeas(frequency = 12, dW = rep(dS, 11))
model <- trend + seasonal
return(model)
}
initial_parms <- c(dV_start, dW_start, dS_start)
# Fit the model
fit <- dlmMLE(y_smoothed, parm = initial_parms, build = buildModel)
mod_fitted <- buildModel(fit$par)
filtered <- dlmFilter(y_smoothed, mod_fitted)
smoothed <- dlmSmooth(filtered)
plot(y_smoothed, type = "l", col = "blue", main = "Original vs. Smoothed Series", xlab = "Time", ylab = "Value")
lines(smoothed$s[,1], col = "red") # Plotting the first state variable
legend("topright", legend = c("Original", "Smoothed"), col = c("blue", "red"), lty = 1)
forecast_length <- 1
forecasted <- dlmForecast(filtered, nAhead = forecast_length)
forecasted_mean <- forecasted$a[1]
forecasted_mean
#> [1] 239868
alaska <- fredr(series_id = "AKCCLAIMS")
alaska$date <- as.Date(alaska$date)
alaska$value[is.na(alaska$value)] <- mean(alaska$value, na.rm = TRUE)
y_smoothed_trimmed <- tail(y_smoothed, 1985)
dV_start <- var(diff(y_smoothed))
dW_start <- dV_start / 10
dS_start <- var(y_smoothed) / 2
buildModelWithCovariate <- function(parm) {
dV <- parm[1]
dW <- parm[2]
dS <- parm[3]
trend <- dlmModPoly(order = 2, dV = dV)
seasonal <- dlmModSeas(frequency = 12, dW = rep(dS, 11))
covariate <- dlmModReg(alaska$value, dV = dV)
model <- trend + seasonal + covariate
return(model)
}
initial_parms_with_covariate <- c(dV_start, dW_start, dS_start)
fit_with_covariate <- dlmMLE(y_smoothed_trimmed, parm = initial_parms_with_covariate, build = buildModelWithCovariate)
mod_fitted_with_covariate <- buildModelWithCovariate(fit_with_covariate$par)
filtered_with_covariate <- dlmFilter(y_smoothed, mod_fitted_with_covariate)
smoothed_with_covariate <- dlmSmooth(filtered_with_covariate)
library(forecast)
arima_model <- auto.arima(y_smoothed_trimmed, xreg = alaska$value)
forecast_arima <- forecast(arima_model, xreg = 300000, h = 1)
forecast_arima
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> 1986 2152040 2091898 2212182 2060061 2244020
Created on 2024-03-07 with reprex v2.1.0
1- Read the data. 2- Visualized the plot and marked covid dates. 3- I tried to capture the variance of non covid data in dv for dw I was getting very bad results with small values so tried bigger values selected the one which gave good results. Also the initial values I was getting were not very good so I used a small warm up period for my Kalman filter. used it from Jan-2020 and then discarded the initial values from the warm up period. The imputed values are better than the ones from previous spline imputation. 4- Fitted the whole new data in the structured model and smoothened the whole series, got a forecast 5- Fitted the covariate and got a forecast.
Series 1 - Textile products value of shipments
This is my primary series.
It depicts the monthly value of shipments in the textile product industry.
Series 2- Apparel value of shipments
It depicts the monthly value of shipments in the apparel industry.
Series 3- Textile mill of shipments
It depicts the monthly value of shipments in the textile mill industry.
Series 4- Textile products finished inventory
It depicts the monthly value ready industry in the textile product industry.
Part of the economy
My series deals with the clothing industry. My primary series is Textile products shipments values so it should have relationship with Apparel and Textile mills shipment values additionally for the fourth series I have the finished inventory value from the Textile products industry.
Text_Prod_VS = read.csv("/Users/satyamtiwari/Desktop/sem3/T_S/HW4/14SVS.csv")
Text_Prod_FI = read.csv("/Users/satyamtiwari/Desktop/sem3/T_S/HW4/14SFI.csv")
Text_Mill_VS = read.csv("/Users/satyamtiwari/Desktop/sem3/T_S/HW4/13SVS.csv")
Apparel_VS = read.csv("/Users/satyamtiwari/Desktop/sem3/T_S/HW4/15SVS.csv")
tail(Text_Prod_VS)
#> Period Value
#> 391
#> 392
#> 393
#> 394
#> 395
#> 396
head(Text_Prod_FI)
#> Period Value
#> 1 Jan-1992 1,639
#> 2 Feb-1992 1,641
#> 3 Mar-1992 1,633
#> 4 Apr-1992 1,590
#> 5 May-1992 1,521
#> 6 Jun-1992 1,549
head(Text_Mill_VS)
#> Period Value
#> 1 Jan-1992 4,324
#> 2 Feb-1992 4,171
#> 3 Mar-1992 4,342
#> 4 Apr-1992 4,427
#> 5 May-1992 4,354
#> 6 Jun-1992 4,432
head(Apparel_VS)
#> Period Value
#> 1 Jan-1992 4,735
#> 2 Feb-1992 4,753
#> 3 Mar-1992 4,900
#> 4 Apr-1992 4,987
#> 5 May-1992 5,027
#> 6 Jun-1992 5,223
library(ggplot2)
library(zoo)
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
convert_dates <- function(date_str) {
as.Date(zoo::as.yearmon(date_str, "%b-%Y"), frac = 1)
}
convert_values <- function(value_str) {
as.numeric(gsub(",", "", value_str))
}
Text_Prod_VS$Date <- convert_dates(Text_Prod_VS$Period)
Text_Prod_FI$Date <- convert_dates(Text_Prod_FI$Period)
Text_Mill_VS$Date <- convert_dates(Text_Mill_VS$Period)
Apparel_VS$Date <- convert_dates(Apparel_VS$Period)
Text_Prod_VS$Value <- convert_values(Text_Prod_VS$Value)
Text_Prod_FI$Value <- convert_values(Text_Prod_FI$Value)
Text_Mill_VS$Value <- convert_values(Text_Mill_VS$Value)
Apparel_VS$Value <- convert_values(Apparel_VS$Value)
# Combining the datasets
combined_data <- rbind(data.frame(Value = Text_Prod_VS$Value, Date = Text_Prod_VS$Date, Series = 'Textile Products Shipments'),
data.frame(Value = Text_Prod_FI$Value, Date = Text_Prod_FI$Date, Series = 'Textile Products Finished Inventory'),
data.frame(Value = Text_Mill_VS$Value, Date = Text_Mill_VS$Date, Series = 'Textile Mill Shipments'),
data.frame(Value = Apparel_VS$Value, Date = Apparel_VS$Date, Series = 'Apparel Shipments'))
ggplot(combined_data, aes(x = Date, y = Value, color = Series)) +
geom_line() +
theme_light() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
scale_y_continuous(name = "Value", labels = scales::comma) +
labs(title = "Value of Shipments and Inventory in the Textile and Apparel Industry",
subtitle = "Monthly Data") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#> Warning: Removed 40 rows containing missing values (`geom_line()`).
Now we do check for trend in the data we see that there is no clear upward or downward trend in the textile production, but it changes its trend along the line
library(ggplot2)
library(zoo)
RollingMean <- rollmean(Text_Prod_VS$Value, 12, fill = NA)
ggplot(Text_Prod_VS, aes(x = Date)) +
geom_line(aes(y = Value), colour = "blue") +
geom_line(aes(y = RollingMean), colour = "red") +
labs(title = "Trend in Textile Products Shipments", x = "Date", y = "Value")
#> Warning: Removed 10 rows containing missing values (`geom_line()`).
#> Warning: Removed 21 rows containing missing values (`geom_line()`).
Now we check for correlation and find that there a great correlation between Apparel_VS and Text_Mill_VS
a good correlation among Text_Mill_VS and Text_Prod_VS, Text_Prod_VS and Apparel_VS
a not so good correlation between Text_Prod_FI and the other 3 series, so maybe this 4th series was not a very good idea.
Text_Prod_VS <- na.omit(Text_Prod_VS)
Text_Prod_FI <- na.omit(Text_Prod_FI)
Text_Mill_VS <- na.omit(Text_Mill_VS)
Apparel_VS <- na.omit(Apparel_VS)
data_ts <- ts(data.frame(Text_Prod_VS$Value, Text_Prod_FI$Value, Text_Mill_VS$Value, Apparel_VS$Value), start=c(1992, 1), frequency=12)
cor_matrix_clean <- cor(data_ts)
print(cor_matrix_clean)
#> Text_Prod_VS.Value Text_Prod_FI.Value Text_Mill_VS.Value
#> Text_Prod_VS.Value 1.0000000 0.2216441 0.6436364
#> Text_Prod_FI.Value 0.2216441 1.0000000 0.1577970
#> Text_Mill_VS.Value 0.6436364 0.1577970 1.0000000
#> Apparel_VS.Value 0.6410749 0.1942598 0.9827279
#> Apparel_VS.Value
#> Text_Prod_VS.Value 0.6410749
#> Text_Prod_FI.Value 0.1942598
#> Text_Mill_VS.Value 0.9827279
#> Apparel_VS.Value 1.0000000
#
library(tseries)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
now before applying VAR we need to make sure all four of our series are stationary
adf.test(Text_Prod_VS$Value)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: Text_Prod_VS$Value
#> Dickey-Fuller = -2.2114, Lag order = 7, p-value = 0.4879
#> alternative hypothesis: stationary
#p-value = 0.4434 is the p-value so clearly it is not stationary we will now try to differentiate and check
Text_Prod_VS_diff <- diff(Text_Prod_VS$Value)
adf_test_diff <- adf.test(Text_Prod_VS_diff, alternative = "stationary")
#> Warning in adf.test(Text_Prod_VS_diff, alternative = "stationary"): p-value
#> smaller than printed p-value
print(adf_test_diff)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: Text_Prod_VS_diff
#> Dickey-Fuller = -5.8146, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
adf.test(Text_Prod_FI$Value)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: Text_Prod_FI$Value
#> Dickey-Fuller = -2.6126, Lag order = 7, p-value = 0.3185
#> alternative hypothesis: stationary
#p-value = 0.3185 is the p-value so clearly it is not stationary we will now try to differentiate and check
Text_Prod_FI_diff <- diff(Text_Prod_FI$Value)
adf_test_diff <- adf.test(Text_Prod_FI_diff, alternative = "stationary")
#> Warning in adf.test(Text_Prod_FI_diff, alternative = "stationary"): p-value
#> smaller than printed p-value
print(adf_test_diff)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: Text_Prod_FI_diff
#> Dickey-Fuller = -4.8275, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
adf.test(Text_Mill_VS$Value)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: Text_Mill_VS$Value
#> Dickey-Fuller = -2.0483, Lag order = 7, p-value = 0.5568
#> alternative hypothesis: stationary
#p-value = 0.5568 is the p-value so clearly it is not stationary we will now try to differentiate and check
Text_Mill_VS_diff <- diff(Text_Mill_VS$Value)
adf_test_diff <- adf.test(Text_Mill_VS_diff, alternative = "stationary")
#> Warning in adf.test(Text_Mill_VS_diff, alternative = "stationary"): p-value
#> smaller than printed p-value
print(adf_test_diff)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: Text_Mill_VS_diff
#> Dickey-Fuller = -6.5932, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
adf.test(Apparel_VS$Value)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: Apparel_VS$Value
#> Dickey-Fuller = -0.68814, Lag order = 7, p-value = 0.9712
#> alternative hypothesis: stationary
#p-value = 0.9712 is the p-value so clearly it is not stationary we will now try to differentiate and check
Apparel_VS_diff <- diff(Apparel_VS$Value)
adf_test_diff <- adf.test(Apparel_VS_diff, alternative = "stationary")
#> Warning in adf.test(Apparel_VS_diff, alternative = "stationary"): p-value
#> smaller than printed p-value
print(adf_test_diff)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: Apparel_VS_diff
#> Dickey-Fuller = -5.664, Lag order = 7, p-value = 0.01
#> alternative hypothesis: stationary
so differencing once resolves our issue and all 4 of our series are now stationary
library(vars)
#> Loading required package: MASS
#>
#> Attaching package: 'MASS'
#> The following object is masked from 'package:dplyr':
#>
#> select
#> Loading required package: strucchange
#> Loading required package: sandwich
#> Loading required package: urca
#> Loading required package: lmtest
data_clean <- ts(data.frame(Text_Prod_VS_diff, Text_Prod_FI_diff, Text_Mill_VS_diff, Apparel_VS_diff ), start=c(1992, 1), frequency=12)
VAR1_model <- VAR(data_clean, p=1)
summary(VAR1_model)
#>
#> VAR Estimation Results:
#> =========================
#> Endogenous variables: Text_Prod_VS_diff, Text_Prod_FI_diff, Text_Mill_VS_diff, Apparel_VS_diff
#> Deterministic variables: const
#> Sample size: 384
#> Log Likelihood: -8393.277
#> Roots of the characteristic polynomial:
#> 0.3566 0.3341 0.3341 0.1667
#> Call:
#> VAR(y = data_clean, p = 1)
#>
#>
#> Estimation results for equation Text_Prod_VS_diff:
#> ==================================================
#> Text_Prod_VS_diff = Text_Prod_VS_diff.l1 + Text_Prod_FI_diff.l1 + Text_Mill_VS_diff.l1 + Apparel_VS_diff.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Text_Prod_VS_diff.l1 -0.231726 0.052762 -4.392 1.46e-05 ***
#> Text_Prod_FI_diff.l1 0.158763 0.092230 1.721 0.086 .
#> Text_Mill_VS_diff.l1 0.034980 0.037241 0.939 0.348
#> Apparel_VS_diff.l1 -0.001025 0.028652 -0.036 0.971
#> const 0.806526 2.839823 0.284 0.777
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 55.26 on 379 degrees of freedom
#> Multiple R-Squared: 0.05815, Adjusted R-squared: 0.04821
#> F-statistic: 5.85 on 4 and 379 DF, p-value: 0.000141
#>
#>
#> Estimation results for equation Text_Prod_FI_diff:
#> ==================================================
#> Text_Prod_FI_diff = Text_Prod_VS_diff.l1 + Text_Prod_FI_diff.l1 + Text_Mill_VS_diff.l1 + Apparel_VS_diff.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Text_Prod_VS_diff.l1 0.049657 0.027449 1.809 0.0712 .
#> Text_Prod_FI_diff.l1 0.349064 0.047983 7.275 2.01e-12 ***
#> Text_Mill_VS_diff.l1 -0.036987 0.019375 -1.909 0.0570 .
#> Apparel_VS_diff.l1 0.006125 0.014907 0.411 0.6814
#> const -0.445081 1.477435 -0.301 0.7634
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 28.75 on 379 degrees of freedom
#> Multiple R-Squared: 0.1314, Adjusted R-squared: 0.1223
#> F-statistic: 14.34 on 4 and 379 DF, p-value: 6.566e-11
#>
#>
#> Estimation results for equation Text_Mill_VS_diff:
#> ==================================================
#> Text_Mill_VS_diff = Text_Prod_VS_diff.l1 + Text_Prod_FI_diff.l1 + Text_Mill_VS_diff.l1 + Apparel_VS_diff.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Text_Prod_VS_diff.l1 0.21444 0.07391 2.901 0.00393 **
#> Text_Prod_FI_diff.l1 0.07146 0.12920 0.553 0.58049
#> Text_Mill_VS_diff.l1 -0.28268 0.05217 -5.419 1.07e-07 ***
#> Apparel_VS_diff.l1 -0.04545 0.04014 -1.132 0.25819
#> const -7.72508 3.97805 -1.942 0.05289 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 77.41 on 379 degrees of freedom
#> Multiple R-Squared: 0.08689, Adjusted R-squared: 0.07725
#> F-statistic: 9.016 on 4 and 379 DF, p-value: 5.777e-07
#>
#>
#> Estimation results for equation Apparel_VS_diff:
#> ================================================
#> Apparel_VS_diff = Text_Prod_VS_diff.l1 + Text_Prod_FI_diff.l1 + Text_Mill_VS_diff.l1 + Apparel_VS_diff.l1 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Text_Prod_VS_diff.l1 -0.02605 0.09482 -0.275 0.7837
#> Text_Prod_FI_diff.l1 0.08767 0.16575 0.529 0.5972
#> Text_Mill_VS_diff.l1 0.03784 0.06693 0.565 0.5722
#> Apparel_VS_diff.l1 -0.31088 0.05149 -6.037 3.73e-09 ***
#> const -13.14700 5.10346 -2.576 0.0104 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 99.31 on 379 degrees of freedom
#> Multiple R-Squared: 0.09477, Adjusted R-squared: 0.08522
#> F-statistic: 9.92 on 4 and 379 DF, p-value: 1.212e-07
#>
#>
#>
#> Covariance matrix of residuals:
#> Text_Prod_VS_diff Text_Prod_FI_diff Text_Mill_VS_diff
#> Text_Prod_VS_diff 3053.99 -39.02 1403.10
#> Text_Prod_FI_diff -39.02 826.61 52.54
#> Text_Mill_VS_diff 1403.10 52.54 5992.72
#> Apparel_VS_diff 1309.15 149.28 2055.79
#> Apparel_VS_diff
#> Text_Prod_VS_diff 1309.1
#> Text_Prod_FI_diff 149.3
#> Text_Mill_VS_diff 2055.8
#> Apparel_VS_diff 9863.1
#>
#> Correlation matrix of residuals:
#> Text_Prod_VS_diff Text_Prod_FI_diff Text_Mill_VS_diff
#> Text_Prod_VS_diff 1.00000 -0.02456 0.32798
#> Text_Prod_FI_diff -0.02456 1.00000 0.02361
#> Text_Mill_VS_diff 0.32798 0.02361 1.00000
#> Apparel_VS_diff 0.23853 0.05228 0.26740
#> Apparel_VS_diff
#> Text_Prod_VS_diff 0.23853
#> Text_Prod_FI_diff 0.05228
#> Text_Mill_VS_diff 0.26740
#> Apparel_VS_diff 1.00000
VARp_model <- VARselect(data_clean, lag.max=20, type="both")$selection["AIC(n)"]
VARp_best_model <- VAR(data_clean, p=VARp_model)
summary(VARp_best_model)
#>
#> VAR Estimation Results:
#> =========================
#> Endogenous variables: Text_Prod_VS_diff, Text_Prod_FI_diff, Text_Mill_VS_diff, Apparel_VS_diff
#> Deterministic variables: const
#> Sample size: 381
#> Log Likelihood: -8246.913
#> Roots of the characteristic polynomial:
#> 0.8612 0.6762 0.6762 0.6524 0.605 0.605 0.5987 0.5664 0.5664 0.5414 0.5414 0.5074 0.5074 0.3799 0.3799 0.1637
#> Call:
#> VAR(y = data_clean, p = VARp_model)
#>
#>
#> Estimation results for equation Text_Prod_VS_diff:
#> ==================================================
#> Text_Prod_VS_diff = Text_Prod_VS_diff.l1 + Text_Prod_FI_diff.l1 + Text_Mill_VS_diff.l1 + Apparel_VS_diff.l1 + Text_Prod_VS_diff.l2 + Text_Prod_FI_diff.l2 + Text_Mill_VS_diff.l2 + Apparel_VS_diff.l2 + Text_Prod_VS_diff.l3 + Text_Prod_FI_diff.l3 + Text_Mill_VS_diff.l3 + Apparel_VS_diff.l3 + Text_Prod_VS_diff.l4 + Text_Prod_FI_diff.l4 + Text_Mill_VS_diff.l4 + Apparel_VS_diff.l4 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Text_Prod_VS_diff.l1 -0.243802 0.055240 -4.414 1.34e-05 ***
#> Text_Prod_FI_diff.l1 0.159719 0.103796 1.539 0.12473
#> Text_Mill_VS_diff.l1 0.037437 0.039755 0.942 0.34697
#> Apparel_VS_diff.l1 -0.023480 0.030204 -0.777 0.43743
#> Text_Prod_VS_diff.l2 -0.080844 0.057874 -1.397 0.16329
#> Text_Prod_FI_diff.l2 -0.099758 0.104902 -0.951 0.34225
#> Text_Mill_VS_diff.l2 -0.017690 0.041059 -0.431 0.66684
#> Apparel_VS_diff.l2 -0.007022 0.031591 -0.222 0.82421
#> Text_Prod_VS_diff.l3 0.150621 0.058004 2.597 0.00979 **
#> Text_Prod_FI_diff.l3 0.100410 0.104141 0.964 0.33560
#> Text_Mill_VS_diff.l3 0.014339 0.041086 0.349 0.72730
#> Apparel_VS_diff.l3 0.047203 0.031280 1.509 0.13215
#> Text_Prod_VS_diff.l4 0.003493 0.057316 0.061 0.95143
#> Text_Prod_FI_diff.l4 -0.028336 0.102702 -0.276 0.78278
#> Text_Mill_VS_diff.l4 0.057295 0.038633 1.483 0.13892
#> Apparel_VS_diff.l4 0.025735 0.029723 0.866 0.38715
#> const 0.768449 2.906277 0.264 0.79161
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 53.65 on 364 degrees of freedom
#> Multiple R-Squared: 0.1265, Adjusted R-squared: 0.08811
#> F-statistic: 3.295 on 16 and 364 DF, p-value: 2.159e-05
#>
#>
#> Estimation results for equation Text_Prod_FI_diff:
#> ==================================================
#> Text_Prod_FI_diff = Text_Prod_VS_diff.l1 + Text_Prod_FI_diff.l1 + Text_Mill_VS_diff.l1 + Apparel_VS_diff.l1 + Text_Prod_VS_diff.l2 + Text_Prod_FI_diff.l2 + Text_Mill_VS_diff.l2 + Apparel_VS_diff.l2 + Text_Prod_VS_diff.l3 + Text_Prod_FI_diff.l3 + Text_Mill_VS_diff.l3 + Apparel_VS_diff.l3 + Text_Prod_VS_diff.l4 + Text_Prod_FI_diff.l4 + Text_Mill_VS_diff.l4 + Apparel_VS_diff.l4 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Text_Prod_VS_diff.l1 0.032406 0.027297 1.187 0.235935
#> Text_Prod_FI_diff.l1 0.172298 0.051292 3.359 0.000864 ***
#> Text_Mill_VS_diff.l1 -0.024452 0.019645 -1.245 0.214047
#> Apparel_VS_diff.l1 0.009109 0.014926 0.610 0.542049
#> Text_Prod_VS_diff.l2 -0.016073 0.028599 -0.562 0.574458
#> Text_Prod_FI_diff.l2 0.224758 0.051838 4.336 1.88e-05 ***
#> Text_Mill_VS_diff.l2 0.013679 0.020290 0.674 0.500609
#> Apparel_VS_diff.l2 0.031818 0.015611 2.038 0.042250 *
#> Text_Prod_VS_diff.l3 0.043124 0.028663 1.505 0.133316
#> Text_Prod_FI_diff.l3 0.072484 0.051462 1.408 0.159842
#> Text_Mill_VS_diff.l3 0.009392 0.020303 0.463 0.643953
#> Apparel_VS_diff.l3 0.021418 0.015457 1.386 0.166709
#> Text_Prod_VS_diff.l4 0.012191 0.028323 0.430 0.667143
#> Text_Prod_FI_diff.l4 0.170684 0.050751 3.363 0.000852 ***
#> Text_Mill_VS_diff.l4 0.010425 0.019091 0.546 0.585357
#> Apparel_VS_diff.l4 0.013754 0.014688 0.936 0.349682
#> const 0.967188 1.436164 0.673 0.501087
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 26.51 on 364 degrees of freedom
#> Multiple R-Squared: 0.2774, Adjusted R-squared: 0.2457
#> F-statistic: 8.735 on 16 and 364 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation Text_Mill_VS_diff:
#> ==================================================
#> Text_Mill_VS_diff = Text_Prod_VS_diff.l1 + Text_Prod_FI_diff.l1 + Text_Mill_VS_diff.l1 + Apparel_VS_diff.l1 + Text_Prod_VS_diff.l2 + Text_Prod_FI_diff.l2 + Text_Mill_VS_diff.l2 + Apparel_VS_diff.l2 + Text_Prod_VS_diff.l3 + Text_Prod_FI_diff.l3 + Text_Mill_VS_diff.l3 + Apparel_VS_diff.l3 + Text_Prod_VS_diff.l4 + Text_Prod_FI_diff.l4 + Text_Mill_VS_diff.l4 + Apparel_VS_diff.l4 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Text_Prod_VS_diff.l1 0.267945 0.077103 3.475 0.000572 ***
#> Text_Prod_FI_diff.l1 0.025166 0.144877 0.174 0.862195
#> Text_Mill_VS_diff.l1 -0.306465 0.055489 -5.523 6.35e-08 ***
#> Apparel_VS_diff.l1 -0.065186 0.042159 -1.546 0.122922
#> Text_Prod_VS_diff.l2 0.096778 0.080780 1.198 0.231678
#> Text_Prod_FI_diff.l2 -0.195889 0.146421 -1.338 0.181781
#> Text_Mill_VS_diff.l2 -0.128491 0.057310 -2.242 0.025561 *
#> Apparel_VS_diff.l2 0.054316 0.044094 1.232 0.218808
#> Text_Prod_VS_diff.l3 0.313273 0.080961 3.869 0.000129 ***
#> Text_Prod_FI_diff.l3 0.075738 0.145360 0.521 0.602657
#> Text_Mill_VS_diff.l3 0.054167 0.057348 0.945 0.345529
#> Apparel_VS_diff.l3 -0.001703 0.043660 -0.039 0.968903
#> Text_Prod_VS_diff.l4 -0.016957 0.080001 -0.212 0.832259
#> Text_Prod_FI_diff.l4 0.205927 0.143351 1.437 0.151712
#> Text_Mill_VS_diff.l4 0.069084 0.053923 1.281 0.200958
#> Apparel_VS_diff.l4 0.021785 0.041487 0.525 0.599836
#> const -7.968639 4.056558 -1.964 0.050246 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 74.88 on 364 degrees of freedom
#> Multiple R-Squared: 0.1646, Adjusted R-squared: 0.1279
#> F-statistic: 4.482 on 16 and 364 DF, p-value: 3.756e-08
#>
#>
#> Estimation results for equation Apparel_VS_diff:
#> ================================================
#> Apparel_VS_diff = Text_Prod_VS_diff.l1 + Text_Prod_FI_diff.l1 + Text_Mill_VS_diff.l1 + Apparel_VS_diff.l1 + Text_Prod_VS_diff.l2 + Text_Prod_FI_diff.l2 + Text_Mill_VS_diff.l2 + Apparel_VS_diff.l2 + Text_Prod_VS_diff.l3 + Text_Prod_FI_diff.l3 + Text_Mill_VS_diff.l3 + Apparel_VS_diff.l3 + Text_Prod_VS_diff.l4 + Text_Prod_FI_diff.l4 + Text_Mill_VS_diff.l4 + Apparel_VS_diff.l4 + const
#>
#> Estimate Std. Error t value Pr(>|t|)
#> Text_Prod_VS_diff.l1 0.07763 0.10010 0.776 0.43854
#> Text_Prod_FI_diff.l1 -0.03473 0.18809 -0.185 0.85362
#> Text_Mill_VS_diff.l1 -0.01290 0.07204 -0.179 0.85801
#> Apparel_VS_diff.l1 -0.34978 0.05473 -6.391 5.06e-10 ***
#> Text_Prod_VS_diff.l2 0.30414 0.10487 2.900 0.00396 **
#> Text_Prod_FI_diff.l2 -0.17678 0.19010 -0.930 0.35302
#> Text_Mill_VS_diff.l2 -0.08086 0.07440 -1.087 0.27785
#> Apparel_VS_diff.l2 -0.09836 0.05725 -1.718 0.08662 .
#> Text_Prod_VS_diff.l3 0.20920 0.10511 1.990 0.04731 *
#> Text_Prod_FI_diff.l3 0.54317 0.18872 2.878 0.00424 **
#> Text_Mill_VS_diff.l3 -0.04616 0.07445 -0.620 0.53570
#> Apparel_VS_diff.l3 -0.02891 0.05668 -0.510 0.61031
#> Text_Prod_VS_diff.l4 -0.02794 0.10386 -0.269 0.78811
#> Text_Prod_FI_diff.l4 -0.05286 0.18611 -0.284 0.77654
#> Text_Mill_VS_diff.l4 0.11803 0.07001 1.686 0.09266 .
#> Apparel_VS_diff.l4 -0.05399 0.05386 -1.002 0.31685
#> const -16.82112 5.26654 -3.194 0.00153 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 97.22 on 364 degrees of freedom
#> Multiple R-Squared: 0.1593, Adjusted R-squared: 0.1224
#> F-statistic: 4.312 on 16 and 364 DF, p-value: 9.455e-08
#>
#>
#>
#> Covariance matrix of residuals:
#> Text_Prod_VS_diff Text_Prod_FI_diff Text_Mill_VS_diff
#> Text_Prod_VS_diff 2878.32 -55.06 1150
#> Text_Prod_FI_diff -55.06 702.87 -27
#> Text_Mill_VS_diff 1150.42 -27.00 5608
#> Apparel_VS_diff 1154.77 178.39 1808
#> Apparel_VS_diff
#> Text_Prod_VS_diff 1154.8
#> Text_Prod_FI_diff 178.4
#> Text_Mill_VS_diff 1807.9
#> Apparel_VS_diff 9451.8
#>
#> Correlation matrix of residuals:
#> Text_Prod_VS_diff Text_Prod_FI_diff Text_Mill_VS_diff
#> Text_Prod_VS_diff 1.00000 -0.03871 0.2863
#> Text_Prod_FI_diff -0.03871 1.00000 -0.0136
#> Text_Mill_VS_diff 0.28635 -0.01360 1.0000
#> Apparel_VS_diff 0.22139 0.06921 0.2483
#> Apparel_VS_diff
#> Text_Prod_VS_diff 0.22139
#> Text_Prod_FI_diff 0.06921
#> Text_Mill_VS_diff 0.24832
#> Apparel_VS_diff 1.00000
##comparing the 2 models I can see that Log Likelihood is better for varp also R-squared is better for varp.Both models show some correlation between residuals but comparing the two, based on the higher log likelihood, higher R-squared values, and the greater number of significant predictors, the var(p) model seems better.
forecast_VAR1 <- forecast(VAR1_model, h=1)
#> Error in forecast(VAR1_model, h = 1): could not find function "forecast"
forecast_VARp <- forecast(VARp_best_model, h=1)
#> Error in forecast(VARp_best_model, h = 1): could not find function "forecast"
last_value_Text_Prod_VS<- tail(Text_Prod_VS$Value, 1)
mean_forecast_var1_Text_Prod_VS_diff <- forecast_VAR1$forecast$Text_Prod_VS_diff$mean
#> Error in eval(expr, envir, enclos): object 'forecast_VAR1' not found
mean_forecast_varp_Text_Prod_VS_diff <- forecast_VARp$forecast$Text_Prod_VS_diff$mean
#> Error in eval(expr, envir, enclos): object 'forecast_VARp' not found
original_forecast_VAR1_Text_Prod_VS <- last_value_Text_Prod_VS + mean_forecast_var1_Text_Prod_VS_diff
#> Error in eval(expr, envir, enclos): object 'mean_forecast_var1_Text_Prod_VS_diff' not found
original_forecast_VARP_Text_Prod_VS <- last_value_Text_Prod_VS + mean_forecast_varp_Text_Prod_VS_diff
#> Error in eval(expr, envir, enclos): object 'mean_forecast_varp_Text_Prod_VS_diff' not found
print("Var1 prediction for textile production value")
#> [1] "Var1 prediction for textile production value"
print(original_forecast_VAR1_Text_Prod_VS)
#> Error in eval(expr, envir, enclos): object 'original_forecast_VAR1_Text_Prod_VS' not found
print("Var(p) prediction for textile production value")
#> [1] "Var(p) prediction for textile production value"
print(original_forecast_VARP_Text_Prod_VS)
#> Error in eval(expr, envir, enclos): object 'original_forecast_VARP_Text_Prod_VS' not found
causality_test_result <- causality(VARp_best_model, cause="Text_Prod_VS_diff")
print(causality_test_result$Granger)
#>
#> Granger causality H0: Text_Prod_VS_diff do not Granger-cause
#> Text_Prod_FI_diff Text_Mill_VS_diff Apparel_VS_diff
#>
#> data: VAR object VARp_best_model
#> F-Test = 3.3546, df1 = 12, df2 = 1456, p-value = 7.629e-05
So seeing this we can reject the null hypothesis so my series Textile_Products does contain some information about the other series.
library(BigVAR)
#> Loading required package: lattice
p_optimal =5
model_spec <- constructModel(data_clean, p = p_optimal, struct = "SparseLag", gran=c(50,10))
ms <- constructModel(data_clean, p = p_optimal, struct = "SparseOO", gran=c(50,10))
results=cv.BigVAR(model_spec)
#> Validation Stage: SparseLag | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 17% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 23% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 27% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 33% | |======================== | 34% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 66% | |=============================================== | 67% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 83% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%[1] "Evaluation Stage"
print(results)
#> *** BIGVAR MODEL Results ***
#> Structure
#> [1] "SparseLag"
#> Loss
#> [1] "L2"
#> Forecast Horizon
#> [1] 1
#> Minnesota VAR
#> [1] FALSE
#> Maximum Lag Order
#> [1] 5
#> Optimal Lambda
#> [1] 59490
#> Grid Depth
#> [1] 50
#> Index of Optimal Lambda
#> [1] 5
#> Fraction of active coefficients
#> [1] 0.6432
#> In-Sample Loss
#> [1] 13300
#> BigVAR Out of Sample Loss
#> [1] 6230
#> *** Benchmark Results ***
#> Conditional Mean Out of Sample Loss
#> [1] 6340
#> AIC Out of Sample Loss
#> [1] 7290
#> BIC Out of Sample Loss
#> [1] 6280
#> RW Out of Sample Loss
#> [1] 12900
plot(results)
results2=cv.BigVAR(ms)
#> Validation Stage: SparseOO | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 17% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 23% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 27% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 33% | |======================== | 34% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 66% | |=============================================== | 67% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 83% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%[1] "Evaluation Stage"
print(results2)
#> *** BIGVAR MODEL Results ***
#> Structure
#> [1] "SparseOO"
#> Loss
#> [1] "L2"
#> Forecast Horizon
#> [1] 1
#> Minnesota VAR
#> [1] FALSE
#> Maximum Lag Order
#> [1] 5
#> Optimal Lambda
#> [1] 16.82
#> Grid Depth
#> [1] 50
#> Index of Optimal Lambda
#> [1] 10
#> Fraction of active coefficients
#> [1] 1
#> In-Sample Loss
#> [1] 13200
#> BigVAR Out of Sample Loss
#> [1] 6270
#> *** Benchmark Results ***
#> Conditional Mean Out of Sample Loss
#> [1] 6340
#> AIC Out of Sample Loss
#> [1] 7290
#> BIC Out of Sample Loss
#> [1] 6280
#> RW Out of Sample Loss
#> [1] 12900
plot(results2)
Comparing SparseLag and SparseOO they have similar loss sparse00 has Fraction of active coefficients as 100% so it is probably overfitting so considering the current state SparseLag seems better.
Created on 2024-04-10 with reprex v2.1.0
library(ggplot2)
Nvidia_share<- read.csv("/Users/satyamtiwari/Desktop/PR/Nvidia_share.csv")
head(Nvidia_share)
#> Date Close.Last Volume Open High Low
#> 1 03/19/2024 $893.98 67217130 $867.00 $905.44 $850.10
#> 2 03/18/2024 $884.55 66897590 $903.88 $924.05 $870.85
#> 3 03/15/2024 $878.365 64208620 $869.30 $895.46 $862.57
#> 4 03/14/2024 $879.44 60231820 $895.77 $906.46 $866.00
#> 5 03/13/2024 $908.88 63571290 $910.55 $915.04 $884.35
#> 6 03/12/2024 $919.13 66807520 $880.49 $919.60 $861.501
Nvidia_share$Date <- as.Date(Nvidia_share$Date, format = "%m/%d/%Y")
Nvidia_share$Open <- as.numeric(gsub("\\$", "", Nvidia_share$Open))
ggplot(Nvidia_share, aes(x = Date, y = Open)) +
geom_line() +
labs(title = "Claims",
x = "Year",
y = "Number")
amd_share<- read.csv("/Users/satyamtiwari/Desktop/PR/amd_share.csv")
amd_share$Date <- as.Date(amd_share$Date, format = "%m/%d/%Y")
amd_share$Open <- as.numeric(gsub("\\$", "", amd_share$Open))
ggplot(amd_share, aes(x = Date, y = Open)) +
geom_line() +
labs(title = "Claims",
x = "Year",
y = "Number")
merged_data <- merge(Nvidia_share, amd_share, by = "Date")
covid_start_date <- as.Date("2020-03-01")
covid_end_date <- as.Date("2021-03-30")
Bitcoin_bull_run <- as.Date("2017-12-17")
China_ban <- as.Date("2021-06-21")
p1 <- ggplot(merged_data, aes(x = Date)) +
geom_line(aes(y = Open.x, color = "NVIDA")) +
labs(y = "NVIDIA vs AMD Share price", color = "Series") +
theme_minimal()
p2 <- p1 + geom_line(aes(y = Open.y, color = "AMD"))+geom_vline(xintercept = as.numeric(covid_start_date), linetype = "dashed", color = "red") +geom_vline(xintercept = as.numeric(covid_end_date), linetype = "dashed", color = "red") +geom_vline(xintercept = as.numeric(Bitcoin_bull_run), linetype = "dashed", color = "green") +geom_vline(xintercept = as.numeric(China_ban), linetype = "dashed", color = "black")
final_plot <- p2 + scale_y_continuous(sec.axis = sec_axis(~., name = "NVIDIA vs AMD Share price"))
print(final_plot)
Markers Green - The Bitcoin Bull run starts Red Line - Start and end of the Covid pandemic Black - China Bans mining of crypto
library(ggplot2)
revenue<- read.csv("/Users/satyamtiwari/Desktop/PR/revenues.csv")
head(revenue)
#> Date Nvidia_Revenue Amd_Revenue X X.1 X.2
#> 1 1/31/24 $22,103 $6,168 NA NA NA
#> 2 10/31/23 $18,120 $5,800 NA NA NA
#> 3 7/31/23 $13,507 $5,359 NA NA NA
#> 4 4/30/23 $7,192 $5,353 NA NA NA
#> 5 1/31/23 $6,051 $5,599 NA NA NA
#> 6 10/31/22 $5,931 $5,565 NA NA NA
revenue$Date <- as.Date(revenue$Date, format = "%m/%d/%y")
revenue$Nvidia_Revenue <- as.numeric(gsub(",", "", gsub("\\$", "", revenue$Nvidia_Revenue)))
revenue$Amd_Revenue <- as.numeric(gsub(",", "", gsub("\\$", "", revenue$Amd_Revenue)))
p1 <- ggplot(revenue, aes(x = Date)) +
geom_line(aes(y = Nvidia_Revenue, color = "NVIDA")) +
labs(y = "NVIDIA vs AMD Revenue in Million $", color = "Series") +
theme_minimal()
p2 <- p1 + geom_line(aes(y = Amd_Revenue, color = "AMD"))+geom_vline(xintercept = as.numeric(covid_start_date), linetype = "dashed", color = "red") +geom_vline(xintercept = as.numeric(covid_end_date), linetype = "dashed", color = "red") +geom_vline(xintercept = as.numeric(Bitcoin_bull_run), linetype = "dashed", color = "green") +geom_vline(xintercept = as.numeric(China_ban), linetype = "dashed", color = "black")
final_plot <- p2 + scale_y_continuous(sec.axis = sec_axis(~., name = "NVIDIA vs AMD Revenue in Million $"))
print(final_plot)
#> Warning: Removed 2 rows containing missing values (`geom_line()`).
#> Removed 2 rows containing missing values (`geom_line()`).
Markers Green - The Bitcoin Bull run starts Red Line - Start and end of the Covid pandemic Black - China Bans mining of crypto
library(tseries)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
NV <- ts(Nvidia_share$Open, frequency=365)
AM <- ts(amd_share$Open, frequency=365)
NV_adf_result <- adf.test(NV, alternative = "stationary")
#> Warning in adf.test(NV, alternative = "stationary"): p-value smaller than
#> printed p-value
AM_adf_result <- adf.test(AM, alternative = "stationary")
print(NV_adf_result)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: NV
#> Dickey-Fuller = -6.3817, Lag order = 13, p-value = 0.01
#> alternative hypothesis: stationary
print(AM_adf_result)
#>
#> Augmented Dickey-Fuller Test
#>
#> data: AM
#> Dickey-Fuller = -3.4419, Lag order = 13, p-value = 0.04788
#> alternative hypothesis: stationary
cor(NV, AM)
#> [1] 0.8939226
data<- ts(data.frame(NV, AM), frequency=365)
library(lmtest)
#> Loading required package: zoo
#>
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#>
#> as.Date, as.Date.numeric
grangertest(data, order=1)
#> Granger causality test
#>
#> Model 1: AM ~ Lags(AM, 1:1) + Lags(NV, 1:1)
#> Model 2: AM ~ Lags(AM, 1:1)
#> Res.Df Df F Pr(>F)
#> 1 2513
#> 2 2514 -1 0.011 0.9166
library(vars)
#> Loading required package: MASS
#> Loading required package: strucchange
#> Loading required package: sandwich
#> Loading required package: urca
var_model <- VAR(data, p=12, type="both")
summary(var_model)
#>
#> VAR Estimation Results:
#> =========================
#> Endogenous variables: NV, AM
#> Deterministic variables: both
#> Sample size: 2505
#> Log Likelihood: -12417.018
#> Roots of the characteristic polynomial:
#> 0.9954 0.9892 0.8265 0.8265 0.8244 0.8097 0.8097 0.7964 0.7964 0.7819 0.7819 0.7408 0.7408 0.7344 0.7344 0.7254 0.7254 0.7193 0.7193 0.7183 0.7183 0.6934 0.2834 0.2834
#> Call:
#> VAR(y = data, p = 12, type = "both")
#>
#>
#> Estimation results for equation NV:
#> ===================================
#> NV = NV.l1 + AM.l1 + NV.l2 + AM.l2 + NV.l3 + AM.l3 + NV.l4 + AM.l4 + NV.l5 + AM.l5 + NV.l6 + AM.l6 + NV.l7 + AM.l7 + NV.l8 + AM.l8 + NV.l9 + AM.l9 + NV.l10 + AM.l10 + NV.l11 + AM.l11 + NV.l12 + AM.l12 + const + trend
#>
#> Estimate Std. Error t value Pr(>|t|)
#> NV.l1 0.9777200 0.0284920 34.316 < 2e-16 ***
#> AM.l1 -0.1944993 0.0749753 -2.594 0.009538 **
#> NV.l2 0.0079813 0.0392854 0.203 0.839024
#> AM.l2 0.1313869 0.1060265 1.239 0.215393
#> NV.l3 -0.0281947 0.0391917 -0.719 0.471960
#> AM.l3 -0.1430864 0.1056072 -1.355 0.175575
#> NV.l4 0.0598652 0.0390962 1.531 0.125841
#> AM.l4 0.2304950 0.1056629 2.181 0.029246 *
#> NV.l5 0.0244619 0.0366825 0.667 0.504927
#> AM.l5 -0.0748564 0.1041312 -0.719 0.472291
#> NV.l6 -0.0878701 0.0353141 -2.488 0.012903 *
#> AM.l6 0.2647589 0.1022669 2.589 0.009685 **
#> NV.l7 0.1392643 0.0350993 3.968 7.46e-05 ***
#> AM.l7 -0.3016037 0.1014831 -2.972 0.002988 **
#> NV.l8 -0.0518147 0.0349555 -1.482 0.138386
#> AM.l8 0.0117976 0.1013141 0.116 0.907309
#> NV.l9 -0.0458495 0.0348398 -1.316 0.188294
#> AM.l9 0.0236104 0.1011528 0.233 0.815460
#> NV.l10 0.0660668 0.0347182 1.903 0.057164 .
#> AM.l10 -0.0558620 0.1009492 -0.553 0.580062
#> NV.l11 -0.1707172 0.0345710 -4.938 8.41e-07 ***
#> AM.l11 0.2018303 0.1008654 2.001 0.045503 *
#> NV.l12 0.0991442 0.0256564 3.864 0.000114 ***
#> AM.l12 -0.0965049 0.0721201 -1.338 0.180982
#> const 2.3105572 0.7134279 3.239 0.001217 **
#> trend -0.0011102 0.0003584 -3.097 0.001975 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 5.617 on 2479 degrees of freedom
#> Multiple R-Squared: 0.9983, Adjusted R-squared: 0.9983
#> F-statistic: 5.943e+04 on 25 and 2479 DF, p-value: < 2.2e-16
#>
#>
#> Estimation results for equation AM:
#> ===================================
#> AM = NV.l1 + AM.l1 + NV.l2 + AM.l2 + NV.l3 + AM.l3 + NV.l4 + AM.l4 + NV.l5 + AM.l5 + NV.l6 + AM.l6 + NV.l7 + AM.l7 + NV.l8 + AM.l8 + NV.l9 + AM.l9 + NV.l10 + AM.l10 + NV.l11 + AM.l11 + NV.l12 + AM.l12 + const + trend
#>
#> Estimate Std. Error t value Pr(>|t|)
#> NV.l1 0.0113727 0.0108623 1.047 0.2952
#> AM.l1 0.9518544 0.0285836 33.301 < 2e-16 ***
#> NV.l2 -0.0198626 0.0149772 -1.326 0.1849
#> AM.l2 0.0178332 0.0404216 0.441 0.6591
#> NV.l3 0.0006365 0.0149415 0.043 0.9660
#> AM.l3 0.0041763 0.0402618 0.104 0.9174
#> NV.l4 0.0135986 0.0149051 0.912 0.3617
#> AM.l4 0.0064382 0.0402830 0.160 0.8730
#> NV.l5 0.0221460 0.0139849 1.584 0.1134
#> AM.l5 -0.0160912 0.0396991 -0.405 0.6853
#> NV.l6 -0.0528597 0.0134632 -3.926 8.86e-05 ***
#> AM.l6 0.0923060 0.0389883 2.368 0.0180 *
#> NV.l7 0.0625901 0.0133813 4.677 3.06e-06 ***
#> AM.l7 -0.0946120 0.0386895 -2.445 0.0145 *
#> NV.l8 -0.0178955 0.0133265 -1.343 0.1794
#> AM.l8 -0.0336717 0.0386251 -0.872 0.3834
#> NV.l9 -0.0098058 0.0132824 -0.738 0.4604
#> AM.l9 0.0136049 0.0385636 0.353 0.7243
#> NV.l10 -0.0107292 0.0132360 -0.811 0.4177
#> AM.l10 0.0595358 0.0384860 1.547 0.1220
#> NV.l11 -0.0216828 0.0131799 -1.645 0.1001
#> AM.l11 0.0106257 0.0384540 0.276 0.7823
#> NV.l12 0.0214904 0.0097813 2.197 0.0281 *
#> AM.l12 -0.0176267 0.0274951 -0.641 0.5215
#> const 0.7696165 0.2719880 2.830 0.0047 **
#> trend -0.0003736 0.0001367 -2.734 0.0063 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#>
#> Residual standard error: 2.141 on 2479 degrees of freedom
#> Multiple R-Squared: 0.9977, Adjusted R-squared: 0.9977
#> F-statistic: 4.385e+04 on 25 and 2479 DF, p-value: < 2.2e-16
#>
#>
#>
#> Covariance matrix of residuals:
#> NV AM
#> NV 31.549 8.598
#> AM 8.598 4.585
#>
#> Correlation matrix of residuals:
#> NV AM
#> NV 1.0000 0.7149
#> AM 0.7149 1.0000
library(vars)
var_model <- VAR(data, p=12, type="both")
irf_results <- irf(var_model, impulse="NV", response="AM", n.ahead=20)
plot(irf_results)
library(urca)
johansen_test <- ca.jo(data, spec = "transitory", type = "eigen", K = 2)
summary(johansen_test)
#>
#> ######################
#> # Johansen-Procedure #
#> ######################
#>
#> Test type: maximal eigenvalue statistic (lambda max) , with linear trend
#>
#> Eigenvalues (lambda):
#> [1] 0.03796007 0.00157393
#>
#> Values of teststatistic and critical values of test:
#>
#> test 10pct 5pct 1pct
#> r <= 1 | 3.96 6.50 8.18 11.65
#> r = 0 | 97.33 12.91 14.90 19.19
#>
#> Eigenvectors, normalised to first column:
#> (These are the cointegration relations)
#>
#> NV.l1 AM.l1
#> NV.l1 1.000000 1.000000
#> AM.l1 -1.621176 -4.513087
#>
#> Weights W:
#> (This is the loading matrix)
#>
#> NV.l1 AM.l1
#> NV.d -0.012316618 0.0012271644
#> AM.d -0.001261921 0.0008388962
Implications Presence of Cointegration: The presence of at least one cointegrating vector implies that despite short-term fluctuations, there exists a stable long-term relationship between NVIDIA and AMD share prices. This can be interpreted as indicating that the stocks move together over the long term, perhaps due to similar market forces affecting them. Strategic Decisions: For strategic decision-making, this could imply that positions in one of these stocks might be hedged by positions in the other, reflecting their long-term linkage. Created on 2024-05-12 with reprex v2.1.0
Week 1 ICNSA using all the data
Reason for selecting Model:
ACF plot shows a quick decay and PACF plot has a notable spike at lag 1, with all other lags being pretty much inside the confidence interval. The strong correlation at lag 1 suggests that the current value of the series is significantly correlated with its immediate past value. Hence,the AR(1) model is supported.
Created on 2024-02-08 with reprex v2.1.0