Closed kunstler closed 4 years ago
AIC comparison in full glmnet ?
library(glmnet)
#convert training data to matrix format
s_time <- Sys.time()
x <- model.matrix(response~ Dist_lisiere_act + Dist_route + FORMATION + STRUCTURE + Altitude +
poly(Altitude, 2) + Pente + poly(Pente, 2)+
exposition + poly(exposition, 2) + pH + poly(pH, 2) +
phosphore + poly(phosphore, 2)+
Azote + poly(Azote, 2)+
limons + argile,test_fit)[, -1]
#perform grid search to find optimal value of lambda
#family= binomial => logistic regression, alpha=1 => lasso
cvfit = cv.glmnet(x, test_fit$response, family = "binomial", type.measure = "auc")
#plot result
var_lasso <- as.matrix(coef(cvfit, s = "lambda.1se"))
vars <- gsub( '(2\\)).*', '\\1', row.names(var_lasso)[var_lasso>0])
vars <- vars[! vars == "(Intercept)"]
x1 <- model.matrix(eval(parse(text=paste("response ~ ", paste(c(vars), collapse = " + ")))),
data = test_fit)[, -1]
x2 <- model.matrix(eval(parse(text=paste("response ~ ", paste(c("TYPE_FORET", vars), collapse = " + ")))),
test_fit)[, -1]
glmnet_envir1l <- glmnet(x1, test_fit$response, family = "binomial",
data = test_fit, lambda = 0)
glmnet_type_F1l <- glmnet(x2, test_fit$response, family = "binomial",
data = test_fit, lambda = 0)
AIC_glmnet <- function(fit){
tLL <- fit$nulldev - deviance(fit)
k <- fit$df
AIC <- -tLL+2*k
AIC
}
AIC_glmnet(glmnet_envir1l)
AIC_glmnet(glmnet_type_F1l)
AIC_glmnet(glmnet_type_F1l) + 5 < AIC_glmnet(glmnet_envir1l)
e_time <- Sys.time()
e_time - s_time
gives
> library(glmnet)
> #convert training data to matrix format
> s_time <- Sys.time()
> x <- model.matrix(response~ Dist_lisiere_act + Dist_route + FORMATION + STRUCTURE + Altitude +
+ poly(Altitude, 2) + Pente + poly(Pente, 2)+
+ exposition + poly(exposition, 2) + pH + poly(pH, 2) +
+ phosphore + poly(phosphore, 2)+
+ Azote + poly(Azote, 2)+
+ limons + argile,test_fit)[, -1]
> #perform grid search to find optimal value of lambda
> #family= binomial => logistic regression, alpha=1 => lasso
> cvfit = cv.glmnet(x, test_fit$response, family = "binomial", type.measure = "auc")
> #plot result
> var_lasso <- as.matrix(coef(cvfit, s = "lambda.1se"))
> vars <- gsub( '(2\\)).*', '\\1', row.names(var_lasso)[var_lasso>0])
> vars <- vars[! vars == "(Intercept)"]
>
> x1 <- model.matrix(eval(parse(text=paste("response ~ ", paste(c(vars), collapse = " + ")))),
+ data = test_fit)[, -1]
> x2 <- model.matrix(eval(parse(text=paste("response ~ ", paste(c("TYPE_FORET", vars), collapse = " + ")))),
+ test_fit)[, -1]
> glmnet_envir1l <- glmnet(x1, test_fit$response, family = "binomial",
+ data = test_fit, lambda = 0)
> glmnet_type_F1l <- glmnet(x2, test_fit$response, family = "binomial",
+ data = test_fit, lambda = 0)
>
> AIC_glmnet <- function(fit){
+ tLL <- fit$nulldev - deviance(fit)
+ k <- fit$df
+ AIC <- -tLL+2*k
+ AIC
+ }
> AIC_glmnet(glmnet_envir1l)
[1] -308.7293
> AIC_glmnet(glmnet_type_F1l)
[1] -324.8918
>
> e_time <- Sys.time()
> e_time - s_time
Time difference of 3.296071 secs
> AIC_glmnet(glmnet_type_F1l) + 5 < AIC_glmnet(glmnet_envir1l)
[1] TRUE
> AIC(glm_type_F1) - AIC(glm_envir1)
[1] -26.61796
> AIC_glmnet(glmnet_type_F1l) - AIC_glmnet(glmnet_envir1l)
[1] -16.1625
Both lasso glmnet and stepAIC glm give a better AIC for model wiith Foret Recente
stick to glm
takes 2.8 seconds compared to
which takes 13.9 secondes
But mixing lasso and glm might be weird