Closed crossxwill closed 3 years ago
I think this should solve the problem. If the partial model is superior to the prior model, then the partial model should be stored into the ecm
object. This ensures that ecm
is populated even if the stepwise algorithm eliminates all the explanatory variables.
The following line was added to the AIC/BIC and Adjusted-R-squared while loops:
ecm <- full ## store the improved model in "ecm" object
ecmback_custom <- function (y, xeq, xtr, lags = 1, includeIntercept = T, criterion = "AIC",
weights = NULL, keep = NULL, ...)
{
if (sum(grepl("^delta|Lag1$", names(xtr))) > 0 | sum(grepl("^delta|Lag1$",
names(xeq))) > 0) {
warning("You have column name(s) in xeq or xtr that begin with 'delta' or end with 'Lag1'. It is strongly recommended that you change this, otherwise the function 'ecmpredict' will result in errors or incorrect predictions.")
}
if (class(xtr) != "data.frame" | class(xeq) != "data.frame") {
stop("xeq or xtr is not of class 'data.frame'. See details on how to input them as data frames.")
}
if (nrow(xeq) < (lags + 1)) {
stop("Insufficient data for the lags specified.")
}
xeqnames <- names(xeq)
xeqnames <- paste0(xeqnames, paste0("Lag", as.character(lags)))
xeq <- data.frame(sapply(xeq, lagpad, lags))
xtrnames <- names(xtr)
xtrnames <- paste0("delta", xtrnames)
xtr <- data.frame(apply(xtr, 2, diff, lags))
if (class(y) == "data.frame") {
if (ncol(y) > 1) {
warning("You have more than one column in y, only the first will be used")
}
y <- y[, 1]
}
yLag <- y[1:(length(y) - lags)]
x <- cbind(xtr, xeq[complete.cases(xeq), ])
x <- cbind(x, yLag)
names(x) <- c(xtrnames, xeqnames, paste0("yLag", as.character(lags)))
x$dy <- diff(y, lags)
if (includeIntercept) {
formula <- "dy ~ ."
}
else {
formula <- "dy ~ . - 1"
}
full <- lm(as.formula(formula), data = x, weights = weights,
...)
dontdropIdx <- numeric(2)
if (criterion == "AIC" | criterion == "BIC") {
if (criterion == "AIC") {
kIC = 2
}
else if (criterion == "BIC") {
kIC = log(nrow(x))
}
fullAIC <- partialAIC <- AIC(full, k = kIC)
while (partialAIC <= fullAIC & length(rownames(drop1(full))) >
length(dontdropIdx)) {
dontdropVars <- "^<none>$|^yLag1$"
if (!is.null(keep)) {
for (i in 1:length(keep)) {
dontdropVars <- paste0(dontdropVars, "|^delta",
keep[i], "$", "|^", keep[i], "Lag1$")
}
}
dontdropIdx <- grep(dontdropVars, rownames(drop1(full,
k = kIC)))
todrop <- rownames(drop1(full, k = kIC))[-dontdropIdx][which.min(drop1(full,
k = kIC)$AIC[-dontdropIdx])]
x <- x[-which(names(x) %in% todrop)]
possible <- lm(as.formula(formula), data = x, weights = weights,
...)
partialAIC <- AIC(possible)
if (partialAIC < fullAIC & length(rownames(drop1(full))) >
length(dontdropIdx)) {
fullAIC <- partialAIC
full <- possible
ecm <- full ## store the improved model in "ecm" object
}
else {
ecm <- full
}
}
}
else if (criterion == "adjustedR2") {
fullAdjR2 <- partialAdjR2 <- summary(full)$adj.r.sq
while (partialAdjR2 >= fullAdjR2 & length(full$coefficients) >
length(dontdropIdx)) {
fullAdjR2 <- summary(full)$adj.r.sq
if (!is.null(keep)) {
dontdropVars <- paste0("^delta", keep, "$", "|^",
keep, "Lag1$")
dontdropVars <- paste0(dontdropVars, collapse = "|")
dontdropIdx <- grep(dontdropVars, rownames(summary(full)$coef))
if (includeIntercept) {
dontdropIdx <- c(1, dontdropIdx)
}
todrop <- which.max(summary(full)$coef[-dontdropIdx,
4])
}
else {
if (includeIntercept) {
todrop <- which.max(summary(full)$coef[-1,
4])
}
else {
todrop <- which.max(summary(full)$coef[, 4])
}
}
newx <- x[-todrop]
partial <- lm(dy ~ ., data = newx, weights = weights,
...)
partialAdjR2 <- summary(partial)$adj.r.sq
if (partialAdjR2 >= fullAdjR2 & length(full$coefficients) >
length(dontdropIdx)) {
x <- newx
full <- partial
ecm <- full ## store the improved model in "ecm" object
}
else {
ecm <- full
}
}
}
if (sum(grepl("^delta", names(ecm$coefficients))) == 0) {
warning("Backwards selection has opted to leave out all transient terms from the final model. This means you have a first order differenced autoregressive model of sorts, not a full error correction model.")
}
else if (sum(grepl("Lag1$", names(ecm$coefficients))) ==
0) {
warning("Backwards selection has opted to leave out all equilibrium terms from the final model. This means you have a first order differenced autoregressive model of sorts, not a full error correction model.")
}
return(ecm)
}
Thanks for pointing this out. 'ecmback' and 'ecmaveback' have been fixed based on your recommendation and the new code is on github under version 6.3.0. Will push to cran soon.
I get an error when backward selection eliminates all the variables.