nlmixrdevelopment / nlmixr

nlmixr: an R package for population PKPD modeling
https://nlmixrdevelopment.github.io/nlmixr/
GNU General Public License v2.0
115 stars 45 forks source link

Feature provision: Any interest in a likelihood profiling function? #524

Open billdenney opened 3 years ago

billdenney commented 3 years ago

I needed likelihood profiling for a project. So, I wrote it. Any interest as this as a pull request?

It would require modification to make it fit with nlmixr coding styles and to add tests. Limitations of the current code are that it doesn't work if there is model nonconvergence (well, I'm pretty sure that what would happen is that it would stop looking in the region of nonconvergence, but that could yield loss of an answer in some cases) and that it only works on one parameter at a time (though expanding to multiple parameters would be pretty trivial, and that could be done before the PR).

I would assume that the only external function would be the method for the profile() generic.

#' Perform likelihood profiling on an nlmixr fitted object
#' 
#' @details Estimation may fail if the model does not converge during
#'   estimation.
#' 
#' @param fitted The nlmixr fit object
#' @param which The theta parameter name (fixed effect) to fit (character string)
#' @param reestimate Should the original, \code{fitted} model be re-estimated
#'   with the parameter fixed (fixing can sometimes change the objective
#'   function affecting results)
#' @param lower,upper The lower and upper bounds not to pass in estimation
#' @param start The starting points to use for estimation (defaults to Wald-type
#'   confidence interval estimates)
#' @param maxpts The maximum number of estimates to perform (note that the
#'   actual maximum number may be slightly lower than this value).
#' @param delta_objf,level The target change in objf, if not provided, generated
#'   assuming a chi-squared distribution with one degree of freedom from the
#'   \code{level}.  \code{level} is ignored if \code{delta_objf} is provided.
#' @param trace Show more information during fitting?
#' @param tol_objf,tol_step What is the minimum objf (\code{tol_objf}) or step
#'   in the \code{which} parameter (\code{tol_step}) to take?  Estimation stops
#'   when either of these is achieved.
#' @param ... Ignored (included as a parameter to match the generic)
#' @return A list with a single element named \code{which}.  The list element
#'   contains a list with names of "interval" and "data".  "interval" contains
#'   the estimated interval, and "data" contains a data.frame with the "which"
#'   parameter, "value" of the parameter, "objf" from when that parameter was
#'   fixed, if it is the "original" model, and if it "minimized".
#' @export
profile.nlmixrFitCore <- function(fitted, which, reestimate=TRUE,
                                  lower=-Inf, upper=Inf, start=NULL,
                                  maxpts=100, delta_objf=qchisq(p=level, df=1), level=0.95, trace=TRUE,
                                  tol_objf=0.01, tol_step=0.001, ...) {
  if (!(which %in% names(fixef(fitted)))) {
    stop("'which' is not a fixed effect in the model")
  }
  numeric_scalar_inputs <- list(lower=lower, upper=upper, maxpts=maxpts, level=level, tol_objf=tol_objf)
  for (current_name in names(numeric_scalar_inputs)) {
    current_value <- numeric_scalar_inputs[[current_name]]
    if (!is.numeric(current_value)) {
      stop("'", current_name, "' must be numeric")
    } else if (length(current_value) != 1) {
      stop("'", current_name, "' must have length 1")
    }
  }
  if (lower >= upper) {
    stop("'lower' must be less than 'upper'")
  } else if (maxpts < 3) {
    stop("'maxpts' must be >= 3")
  } else if (level <= 0 | level >= 1) {
    stop("'level' must be between 0 and 1")
  } else if (tol_objf <= 0) {
    stop("'tol_objf' must be > 0")
  }
  if (tol_objf >= 1) {
    warning("'tol_objf' is usually < 1")
  }
  if (maxpts <= 20) {
    warning("'maxpts' is usually > 20")
  }

  ret_original <-
    data.frame(
      which=which,
      value=fixef(fitted)[[which]],
      objf=fitted$objf,
      # Keep track of which fit is the original model fit in case logLik values
      # are not monotonic
      original=TRUE,
      # Assume that the initial model has minimized (TODO: is that a good
      # assumption?)
      minimized=TRUE
    )
  if (reestimate) {
    ret_original$minimized <- NA
    ret_original$objf <- NA_real_
  }

  # Setup the starting estimate
  fit_lower <- fitted$ini$lower[fitted$ini$name %in% which]
  fit_upper <- fitted$ini$upper[fitted$ini$name %in% which]
  if (is.numeric(start)) {
    if (length(start) != 2) {
      stop("'start' must have a lower and upper bound, if provided")
    } else if (all(start < fit_lower)) {
      warning("'start' is not in the bounds allowed by the model, all are below the lower bound.  Ignoring 'start'.")
      start <- NULL
    } else if (all(start > fit_upper)) {
      warning("'start' is not in the bounds allowed by the model, all are above the upper bound.  Ignoring 'start'.")
      start <- NULL
    }
    start_orig <- sort(start)
    start <- pmax(pmin(start_orig, fit_upper), fit_lower)
    if (!all(start == start_orig)) {
      warning(
        "'start' was constrained to be between the initial conditions lower and upper bounds: ",
        paste(signif(start, 4), collapse=", ")
      )
    }
  } else if (!is.null(start)) {
    stop("'start' must be numeric or NULL")
  }
  if (is.null(start)) {
    start <- qnorm(p=c(0.05, 0.95), mean=ret_original$value, sd=vcov(fitted)[which, which])
    message(
      "'start' not provided, using Wald-type interval for 'start': ",
      paste(signif(start, 4), collapse=", ")
    )
  }
  ret_start <-
    data.frame(
      which=which,
      value=start,
      objf=NA_real_,
      original=FALSE,
      minimized=NA
    )
  ret <- rbind(ret_original, ret_start)[order(c(ret_original$value, ret_start$value)), , drop=FALSE]

  # perform profiling
  while (any(is.na(ret$minimized)) & (nrow(ret) <= (maxpts + 1))) {
    for (idx_current in which(is.na(ret$minimized))) {
      # Estimate the new objective function value at new poitns
      new_objf <- profile_get_objf(model=fitted, which=which, value=ret$value[idx_current], trace=trace)
      ret$objf[idx_current] <- new_objf
      ret$minimized <- !is.infinite(new_objf)
    }
    ret <-
      get_new_profile_points(
        data=ret,
        delta_objf=delta_objf, lower=lower, upper=upper,
        tol_objf=tol_objf, tol_step=tol_step
      )
    if (trace) {
      print(ret)
    }
  }
  interval_value <-
    get_new_profile_points(
      data=ret,
      delta_objf=delta_objf, lower=lower, upper=upper,
      tol_objf=0, tol_step=0
    )
  level_est <- pchisq(q=delta_objf, df=1)
  ret_interval <-
    setNames(
      interval_value$value[is.na(interval_value$minimized)],
      sprintf("%0.3g%%", 0.5+c(-1,1)*level_est/2)
    )
  setNames(
    list(
      list(
        interval=ret_interval,
        data=ret
      )
    ),
    nm=which
  )
}

# Get the new likelihood for a model at a 'value' for a parameter ('which'),
# optionally showing more information ('trace')
profile_get_objf <- function(model, which, value, trace) {
  if (trace) message("Trying ", which, "=", value, appendLF=FALSE)
  current_model <-
    do.call(
      ini,
      setNames(
        list(
          model,
          str2lang(sprintf("fixed(%g)", value))
        ),
        c("", which)
      )
    )
  current_fit <-
    try(
      nlmixr(current_model, est=model$est, control=list(print=0))
    )
  if (inherits(current_fit, what="nlmixrFitCore")) {
    if (trace) message(" minimized. objf=", current_fit$objf)
    objf <- current_fit$objf
    minimized <- TRUE
  } else {
    if (trace) message(" did not minimize.")
    objf <- Inf
    minimized <- TRUE
  }
  objf
}

get_new_profile_points_interp_extrap <- function(data, delta_objf) {
  value_at_min_objf <- data$value[data$objf == min(data$objf)]
  new_points_raw <- interp_extrap(yout=delta_objf, x=data$value, y=data$objf - min(data$objf, na.rm=TRUE))
  new_points_below <- sapply(X=new_points_raw, FUN=function(x) sum(x < data$value))
  new_points_above <- sapply(X=new_points_raw, FUN=function(x) sum(x > data$value))

  # If the point is interpolated in the current range, keep it
  mask_interpolated <-
    (new_points_below == rev(seq_along(new_points_raw))) &
    (new_points_above == seq_along(new_points_raw))
  # If the point is extrapolated one step outside of the current range, keep it
  mask_extrap_1 <-
    (new_points_below == (rev(seq_along(new_points_raw)) - 1)) &
    (new_points_above == (seq_along(new_points_raw) - 1))
  new_points_interp <-
    sort(new_points_raw[mask_interpolated | mask_extrap_1])

  new_points_low <- new_points_interp[new_points_interp < value_at_min_objf][1]
  new_points_high <- rev(new_points_interp[new_points_interp > value_at_min_objf])[1]

  # Only extrapolate from the correct edge (below the lower for the first range
  # and above the upper for the last range), and only do so if there are no
  # points in the correct direction (below or above) the minimum objf value.
  #
  # The [1] at the end forces it to be NA if there is no value matching the condition
  new_points_extrap_edge_low <- new_points_raw[1][new_points_raw[1] < min(data$value)][1]
  if (is.na(new_points_low)) {
    new_points_low <- new_points_extrap_edge_low
  }
  new_points_extrap_edge_high <- new_points_raw[length(new_points_raw)][new_points_raw[length(new_points_raw)] > max(data$value)][1]
  if (is.na(new_points_high)) {
    new_points_high <- new_points_extrap_edge_high
  }

  c(low=new_points_low, high=new_points_high)
}

get_new_profile_points_expand <- function(data) {
  expand_range <- diff(range(data$value))
  new_points_low <- min(data$value) - expand_range
  new_points_high <- max(data$value) + expand_range
  c(low=new_points_low, high=new_points_high)
}

get_new_profile_points_tol_step <- function(data, new_points, tol_step) {
  # Ensure that the step size is sufficiently wide
  for (nm in names(new_points)) {
    value_below <- -Inf
    value_above <- Inf
    if (any(data$value < new_points[[nm]])) {
      value_below <- max(data$value[data$value < new_points[[nm]]])
    }
    if (any(data$value > new_points[[nm]])) {
      value_above <- min(data$value[data$value > new_points[[nm]]])
    }
    tol_below_okay <- (new_points[[nm]] - value_below) > tol_step
    tol_above_okay <- (value_above - new_points[[nm]]) > tol_step
    if (tol_above_okay & tol_below_okay) {
      # Do nothing, we're fine
    } else if ((value_above - value_below) < tol_step) {
      # We're done looking, we are within the tolerance
      new_points <- new_points[setdiff(names(new_points), nm)]
    } else if ((value_above - value_below) < (2*tol_step)) {
      # if the values above and below are less than 2 tolerance steps apart, shift to the middle
      new_points[[nm]] <- (value_above + value_below)/2
    } else if (tol_below_okay) {
      # Shift down a bit
      new_points[[nm]] <- value_above - tol_step
    } else {
      # Shift up a bit
      new_points[[nm]] <- value_below + tol_step
    }
  }
  new_points
}

get_new_profile_points_tol_objf <- function(data, new_points, tol_objf) {
  for (nm in names(new_points)) {
    objf_left <- NA_real_
    objf_right <- NA_real_
    if (any(data$value < new_points[[nm]])) {
      mask_below <- data$value < new_points[[nm]]
      value_below <- max(data$value[mask_below])
      objf_left <- data$objf[value_below == data$value]
    }
    if (any(data$value > new_points[[nm]])) {
      mask_above <- data$value > new_points[[nm]]
      value_above <- min(data$value[mask_above])
      objf_right <- data$objf[value_above == data$value]
    }
    if (is.na(objf_left) | is.na(objf_right)) {
      # We are extrapolating, keep the point
    } else if (abs(objf_left - objf_right) < tol_objf) {
      # Drop the point because we are within tolerance
      new_points <- new_points[setdiff(names(new_points), nm)]
    }
  }
  new_points
}

get_new_profile_points <- function(data, delta_objf, lower, upper, tol_objf, tol_step) {
  data <- data[order(data$value), , drop=FALSE]
  value_at_min_objf <- data$value[data$objf == min(data$objf)]
  if (length(value_at_min_objf) != 1) {
    # There really should never be more than one value at the minimum objf
    warning("Multiple values at the minimum objf, possible problems with model.")
  }

  # Determine the points to use for expansion
  interp_points <- get_new_profile_points_interp_extrap(data=data, delta_objf=delta_objf)
  expand_points <- get_new_profile_points_expand(data)
  new_points <-
    c(
      low=
        max(
          ifelse(
            is.na(interp_points["low"]),
            expand_points["low"],
            interp_points["low"]),
          lower
        ),
      high=
        min(
          ifelse(
            is.na(interp_points["high"]),
            expand_points["high"],
            interp_points["high"]
          ),
          upper
        )
    )
  # Don't repeat analysis
  new_points <- new_points[!(new_points %in% data$value)] # Use this method rather than setdiff to keep the names
  new_points <- get_new_profile_points_tol_step(data=data, new_points=new_points, tol_step=tol_step)
  new_points <- get_new_profile_points_tol_objf(data=data, new_points=new_points, tol_objf=tol_objf)
  if (length(new_points) > 0) {
    ret_new_points <-
      data.frame(
        which=data$which[[1]],
        value=new_points,
        objf=NA_real_,
        original=FALSE,
        minimized=NA
      )
    ret <-
      rbind(data, ret_new_points)[
        order(c(data$value, ret_new_points$value)), , drop=FALSE
      ]
    rownames(ret) <- NULL
  } else {
    # No new points are added
    ret <- data
  }
  ret
}

interp_extrap <- function(yout, x, y) {
  # Linear interpolation and extrapolation.

  # TODO: Chi-squared interpolation/extrapolation may be better.
  # TODO: How to handle nonconverged results (when y is Inf)?
  slope <- (y[-length(y)] - y[-1])/(x[-length(x)] - x[-1])
  intercept <- y[-1] - slope*x[-1]
  xout <- (yout - intercept)/slope
  xout
}