boost-R / gamboostLSS

Boosting models for fitting generalized additive models for location, shape and scale (GAMLSS) to potentially high dimensional data. The current relase version can be found on CRAN (https://cran.r-project.org/package=gamboostLSS).
26 stars 11 forks source link

gamlss.dist::BB, gamlss.dist::BI don't work #12

Closed fabian-s closed 8 years ago

fabian-s commented 8 years ago

These are supposed to be called with a 2-column response giving successes & failures, but that doesn't seem to have been implemented yet here.

I have an ugly fix for cases where # of trials is constant for all the data (see below), but that's unsatisfactory for most practical applications, probably.

> options(error=traceback, deparse.max.lines = 10)
> library("gamboostLSS")
> library("gamlss")
> 
> # y ~ Binomial(p = invlogit(x + z), n= 60)
> n <- 100
> x <- rnorm(n)
> z <- rnorm(n)
> data <- data.frame(y = rbinom(n, p = plogis(x + z), size = 60), x = x, z= z)
> data$ymat <- with(data, cbind(success = data$y, fail = 60 - data$y))
> (m_gamlss <- gamlss(ymat ~ x + z, data = data, family = BB))
GAMLSS-RS iteration 1: Global Deviance = 700.266 
GAMLSS-RS iteration 2: Global Deviance = 553.8168 
GAMLSS-RS iteration 3: Global Deviance = 532.8859 
GAMLSS-RS iteration 4: Global Deviance = 532.8656 
GAMLSS-RS iteration 5: Global Deviance = 532.8656 

Family:  c("BB", "Beta Binomial") 
Fitting method: RS() 

Call:  gamlss(formula = ymat ~ x + z, family = BB, data = data) 

Mu Coefficients:
(Intercept)            x            z  
  -0.005859     1.028064     0.994731  
Sigma Coefficients:
(Intercept)  
     -5.265  

 Degrees of Freedom for the fit: 4 Residual Deg. of Freedom   96 
Global Deviance:     532.866 
            AIC:     540.866 
            SBC:     551.286 
> # gamlss(ymat ~ x + z, data = data, family = BI) # samesame
> 
> Betabin <- as.families(fname="BB")
> gamboostLSS(ymat ~ x + z, data = data, families = Betabin)
Error in eval(expr, envir, enclos) : object 'bd' not found
No traceback available 
> # bd is the "binomial denominator" (i.e., # of trials), not being handed over!
> 
> #-------------------------------------------------------------------------------
> # ugly, hacky fix: set bd-defaults to 60 wherever it's needed
> Betabin60 <- Betabin
> add_bd_60 <- function(f){
+   frmls <- formals(f)
+   frmls$bd <- 60
+   formals(f) <- frmls
+   f
+ }
> with(environment(Betabin60[[1]]@ngradient), {
+   #all slots share same env, so enough to do this for one slot
+   FAM$dldm <- add_bd_60(FAM$dldm)
+   FAM$d2ldm2  <- add_bd_60(FAM$d2ldm2)
+   FAM$d2ldm2  <- add_bd_60(FAM$d2ldm2)
+   FAM$dldd  <- add_bd_60(FAM$dldd)
+   FAM$d2ldd2  <- add_bd_60(FAM$d2ldd2)
+   FAM$G.dev.incr <- add_bd_60(FAM$G.dev.incr)
+   FAM$mu.initial <- quote(mu <- (y + 0.5)/(60 + 1))
+   pdf <- add_bd_60(pdf)
+ })
> with(environment(Betabin60[[2]]@ngradient), {
+   FAM$dldm <- add_bd_60(FAM$dldm)
+   FAM$d2ldm2  <- add_bd_60(FAM$d2ldm2)
+   FAM$d2ldm2  <- add_bd_60(FAM$d2ldm2)
+   FAM$dldd  <- add_bd_60(FAM$dldd)
+   FAM$d2ldd2  <- add_bd_60(FAM$d2ldd2)
+   FAM$G.dev.incr <- add_bd_60(FAM$G.dev.incr)
+   FAM$mu.initial <- quote(mu <- (y + 0.5)/(60 + 1))
+   pdf <- add_bd_60(pdf)
+ })
> #-------------------------------------------------------------------------------
> data$ones <- rep(1, n)
> # matrix-response like gamlss produces multivariate model for success and failure
> # probabilities (I guess?!?), not model for sucess probabilities: 
> coef(gamboostLSS(list(
+   mu = ymat ~ bols(x) + bols(z), 
+   sigma = ymat ~ bols(ones, intercept=FALSE)),
+   data = data, families = Betabin60))
$mu
$mu$`bols(x)`
(Intercept)           x        <NA>        <NA> 
-0.20896247  0.97583503  0.02081331 -0.97472539 

$mu$`bols(z)`
  (Intercept)             z          <NA>          <NA> 
 0.0001546734  0.9402122029 -0.2000348943 -0.9390756321 

attr(,"offset")
[1] 0.1940493

$sigma
$sigma$`bols(ones, intercept = FALSE)`
     ones      <NA> 
-3.221820 -3.210209 

attr(,"offset")
[1] 0
>
> coef(gamboostLSS(list(
+   mu = y ~ bols(x) + bols(z),
+   sigma = y ~ bols(ones, intercept=FALSE)),
+   data = data, families = Betabin60))
$mu
$mu$`bols(x)`
(Intercept)           x 
 -0.2089625   0.9758350 

$mu$`bols(z)`
 (Intercept)            z 
0.0001546734 0.9402122029 

attr(,"offset")
[1] 0.1940493

$sigma
$sigma$`bols(ones, intercept = FALSE)`
    ones 
-3.22182 

attr(,"offset")
[1] 0

codefile is here: https://gist.github.com/fabian-s/b952063213f22c31ab9ea99f681a56d7

hofnerb commented 8 years ago

@mayrandy can you have a look at this issue?

Do we have any opportunity to work with matrix calued outcomes within the families? Actually this should be possible as we can also use complex outcomes such as Surv(t, delta). I think we need to have specialized families here. Is this worth it?

If we do not have a proper fix we should at least issue a warning or an error if someone tries to use these families.

mayrandy commented 8 years ago

As far as I can see, this should not be a big issue. We only need to change as.families() so that it checks if this beta denominator exists as an argument, i.e, something like "bd" %in% names(formals(pdf)).

If that is the case, we'll have to do bd <- y[,1] +y[,2] y <- y[,1] the rest should be playing around with the arguments so that bd is included when needed in ngradient, loss, mu.initial, etc.

mayrandy commented 8 years ago

OK, this should work now with this commit to devel:

library("gamboostLSS")
library("gamlss")

set.seed(123)
n <- 100
x <- rnorm(n)
z <- rnorm(n)
data <- data.frame(y = rbinom(n, p = plogis(x + z), size = 60), x = x, z= z)
data$ymat <- with(data, cbind(success = data$y, fail = 60 - data$y))

m_gamlss <- gamlss(ymat ~ x + z, data = data, family = BB)

Betabin <- as.families(fname="BB")
g1 <- glmboostLSS(ymat ~ x + z, data = data, families = Betabin)

coef(m_gamlss)
#(Intercept)           x           z 
#-0.01365737  1.00916892  0.96522587 

coef(g1[1000], off2int = TRUE)$mu
#$mu
#(Intercept)           x           z 
#-0.01324596  1.00947447  0.96550888 

# now BI via mboost 
g2 <- glmboost(ymat ~x + z, data = data, family = as.families("BI"))
# ignore warning
coef(g2, off2int = TRUE)
#(Intercept)           x           z 
#-0.01368482  1.00917328  0.96521239 
hofnerb commented 8 years ago

Thx.