chjackson / flexsurv

The flexsurv R package for flexible parametric survival and multi-state modelling
http://chjackson.github.io/flexsurv/
53 stars 28 forks source link

Catching segfaults when using certain distributions with `flexsurvreg` #121

Closed mattwarkentin closed 2 years ago

mattwarkentin commented 2 years ago

Picking up from #120

I am not sure if this is a "me" problem or not, but I am catching segfaults when fitting with several of the choices for dist (exp and weibull work fine, gengamma and others do not).

 *** caught segfault ***
address 0x0, cause 'memory not mapped'

The traceback seems to suggest the issue shows up in {dist}_work() functions (e.g. dgengamma_work(x, mu, sigma, Q, log)). Are you able to fit the above model without issue, @chjackson?

I wanted to add in test coverage for more distributions than just weibull but my R session just crashes so I can't verify the test.

mattwarkentin commented 2 years ago

Full traceback:

Traceback:
 1: dgengamma_work(x, mu, sigma, Q, log)
 2: (function (x, mu = 0, sigma = 1, Q, log = FALSE) {    dgengamma_work(x, mu, sigma, Q, log)})(mu = c(6.19110723407803, 6.19110723407803, 6.19110723407803, 6.19110723407803, 6.19110723407803, 6.19110723407803, 6.19110723407803, 6.19110723407803, 6.19110723407803, 6.19110723407803, 6.19110723407803, 6.19110723407803), sigma = c(0.735773189554244, 0.735773189554244, 0.735773189554244, 0.735773189554244, 0.735773189554244, 0.735773189554244, 0.735773189554244, 0.735773189554244, 0.735773189554244, 0.735773189554244, 0.735773189554244, 0.735773189554244), Q = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), x = c(`1` = 59, `2` = 115, `3` = 156, `5` = 431, `7` = 464, `8` = 475, `10` = 563, `11` = 638, `22` = 268, `23` = 329, `24` = 353, `25` = 365), log = TRUE)
 3: do.call(fn, args)
 4: withCallingHandlers(do.call(fn, args), warning = function(w) {    if (grepl(x = w$message, pattern = "NaNs produced"))         invokeRestart("muffleWarning")})
 5: call_distfn_quiet(dfns$d, dargs)
 6: fn(par, ...)
 7: (function (par) fn(par, ...))(c(mu = 6.19110723407803, sigma = -0.30683337411279, Q = 0, age = 0))
 8: optim(method = "BFGS", par = c(mu = 6.19110723407803, sigma = -0.30683337411279, Q = 0, age = 0), fn = function (optpars, ...) {    pars[insert.locations] <- optpars    raw.pars <- pars    pars <- as.list(pars)    pars.event <- pars.nevent <- pars    if (npars > nbpars) {        beta <- raw.pars[(nbpars + 1):npars]        for (i in dlist$pars) {            pars[[i]] <- pars[[i]] + X[, mx[[i]], drop = FALSE] %*%                 beta[mx[[i]]]            pars.event[[i]] <- pars[[i]][event]            pars.nevent[[i]] <- pars[[i]][!event]        }    }    fnargs <- c(par.transform(pars), aux.pars)    fnargs.event <- c(par.transform(pars.event), aux.pars)    fnargs.nevent <- c(par.transform(pars.nevent), aux.pars)    dargs <- fnargs.event    dargs$x <- event.times    dargs$log <- TRUE    logdens <- call_distfn_quiet(dfns$d, dargs)    if (any(!event)) {        pmaxargs <- fnargs.nevent        pmaxargs$q <- left.censor        pmax <- call_distfn_quiet(dfns$p, pmaxargs)        pmax[pmaxargs$q == Inf] <- 1        pargs <- fnargs.nevent        pargs$q <- right.censor        pmin <- call_distfn_quiet(dfns$p, pargs)    }    targs <- fnargs    targs$q <- Y[, "start"]    plower <- call_distfn_quiet(dfns$p, targs)    targs$q <- rtrunc    pupper <- call_distfn_quiet(dfns$p, targs)    pupper[rtrunc == Inf] <- 1    pobs <- pupper - plower    if (do.hazard) {        pargs <- fnargs.event        pargs$q <- event.times        pminb <- call_distfn_quiet(dfns$p, pargs)        loghaz <- logdens - log(1 - pminb)        offseti <- log(1 + bhazard[event]/exp(loghaz) * weights[event])    }    else {        offseti <- default.offset    }    loglik[event] <- (logdens * event.weights) + offseti    if (any(!event))         loglik[!event] <- (log(pmax - pmin) * no.event.weights)    loglik <- loglik - log(pobs) * weights    ret <- -sum(loglik)    attr(ret, "indiv") <- loglik    ret}, gr = NULL, Y = c(59, 115, 156, 421, 431, 448, 464, 475, 477, 563, 638, 744, 769, 770, 803, 855, 1040, 1106, 1129, 1206, 1227, 268, 329, 353, 365, 377, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 115, 156, 421, 431, 448, 464, 475, 477, 563, 638, 744, 769, 770, 803, 855, 1040, 1106, 1129, 1206, 1227, 268, 329, 353, 365, 377, 59, 115, 156, 421, 431, 448, 464, 475, 477, 563, 638, 744, 769, 770, 803, 855, 1040, 1106, 1129, 1206, 1227, 268, 329, 353, 365, 377, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf), X = c(72.3315, 74.4932, 66.4658, 53.3644, 50.3397, 56.4301, 56.937, 59.8548, 64.1753, 55.1781, 56.7562, 50.1096, 59.6301, 57.0521, 39.2712, 43.1233, 38.8932, 44.6, 53.9068, 44.2055, 59.589, 74.5041, 43.137, 63.2192, 64.4247, 58.3096), weights = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), bhazard = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rtrunc = c(Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf, Inf), dlist = list(    name = "gengamma", pars = c("mu", "sigma", "Q"), location = "mu",     transforms = list(function (x)     x, .Primitive("log"), function (x)     x), inv.transforms = list(function (x)     x, .Primitive("exp"), function (x)     x), inits = function (t, mf, mml, aux)     {        lt <- log(t[t > 0])        c(mean(lt), sd(lt), 0)    }), inits = c(mu = 6.19110723407803, sigma = -0.30683337411279, Q = 0, age = 0), dfns = list(p = function (q, mu = 0, sigma = 1,     Q, lower.tail = TRUE, log.p = FALSE) {    pgengamma_work(q, mu, sigma, Q, lower.tail, log.p)}, d = function (x, mu = 0, sigma = 1, Q, log = FALSE) {    dgengamma_work(x, mu, sigma, Q, log)}, h = function (x, mu = 0, sigma = 1, Q) {    logdens <- dgengamma(x = x, mu = mu, sigma = sigma, Q = Q,         log = TRUE)    logsurv <- pgengamma(q = x, mu = mu, sigma = sigma, Q = Q,         lower.tail = FALSE, log.p = TRUE)    loghaz <- logdens - logsurv    exp(loghaz)}, H = function (x, mu = 0, sigma = 1, Q) {    -pgengamma(q = x, mu = mu, sigma = sigma, Q = Q, lower.tail = FALSE,         log.p = TRUE)}, r = function (n, mu = 0, sigma = 1, Q) {    r <- rbase("gengamma", n = n, mu = mu, sigma = sigma, Q = Q)    for (i in seq_along(r)) assign(names(r)[i], r[[i]])    ret[ind][Q == 0] <- rlnorm(n, mu, sigma)    qn0 <- Q != 0    if (any(qn0)) {        mu <- mu[qn0]        sigma <- sigma[qn0]        Q <- Q[qn0]        w <- log(Q^2 * rgamma(n, 1/Q^2, 1))/Q        ret[ind][qn0] <- exp(mu + sigma * w)    }    ret}, DLd = NULL, DLS = NULL, rmst = function (t, mu = 0, sigma = 1,     Q, start = 0) {    rmst_generic(pgengamma, t, start = start, mu = mu, sigma = sigma,         Q = Q)}, mean = function (mu = 0, sigma = 1, Q) {    rmst_generic(pgengamma, Inf, start = 0, mu = mu, sigma = sigma,         Q = Q)}, q = function (p, mu = 0, sigma = 1, Q, lower.tail = TRUE,     log.p = FALSE) {    d <- dbase("gengamma", lower.tail = lower.tail, log = log.p,         p = p, mu = mu, sigma = sigma, Q = Q)    for (i in seq_along(d)) assign(names(d)[i], d[[i]])    p[Q < 0] <- 1 - p[Q < 0]    ret[ind] <- numeric(sum(ind))    ret[ind][Q == 0] <- qlnorm(p[Q == 0], mu[Q == 0], sigma[Q ==         0])    qn0 <- Q != 0    p <- p[qn0]    mu <- mu[qn0]    sigma <- sigma[qn0]    Q <- Q[qn0]    ret[ind][qn0] <- exp(mu + sigma * (log(Q^2 * qgamma(p, 1/Q^2,         1))/Q))    ret}, deriv = FALSE), aux = NULL, mx = list(mu = 1L, sigma = NULL,     Q = NULL), fixedpars = NULL, hessian = TRUE)
 9: do.call("optim", optim.args)
10: flexsurvreg(Surv(futime, fustat) ~ age, data = ovarian, dist = "gengamma")
chjackson commented 2 years ago

That model works fine for me, using RStudio and loading the current devel version of flexsurv using devtools::load_all(".")