CABSEL / ProTINA

BSD 3-Clause "New" or "Revised" License
6 stars 7 forks source link

ProTINA R package does not work with glmnet > 2.0-18 and fails with 'could not find function "coef.cv.glmnet"' #1

Open omsai opened 4 days ago

omsai commented 4 days ago

The protina() function fails with glmnet >= 3.

Rscript to reproduce the fault using the example data from the README:

## devtools::install_github("CABSEL/ProTINA/protina_R/protina_v1.0_R")
library(protina)
library(httr2)   # request req_* resp_*
library(xml2)    # xml_find_all as_list

## Preparation of protina inputs #####################################
data(lfc, glist, tobject, tftg, ppi)
tp <- tobject$time
group <- tobject$group
slope <- generateSlope(lfc = lfc,
                       tp = tp,
                       group = group)
system.time({
  pgn <- generatePGN(glist = glist,
                     tftg = tftg,
                     ppi = ppi,
                     tftg_thre = 0,
                     ptf_thre = 0,
                     ppi_thre = 500)
})
##>   user  system elapsed 
##> 35.675   6.285  41.983 

## List older glmnet URLs and versions.
url_prefix <- "https://cran.r-project.org/src/contrib/Archive/glmnet/"
req <- request(url_prefix)
resp <- req_perform(req)
html  <- resp |> resp_body_html()

files <-
  xml_find_all(html, ".//tr//a[starts-with(@href, 'glmnet')]/text()") %>%
  as_list() %>%
  unlist()

versions <-
  stringr::str_extract(files, "^glmnet_(.+)[.]tar[.]gz$", group = 1) %>%
  package_version()

urls <- file.path(url_prefix, files[versions >= "2"])

## Bisect the glmnet versions to find the working version.
protina_works <- function() {
  timeout <- NULL
  tried <- NULL
  top <-
    try({
      capture.output({
        timeout <-
          R.utils::withTimeout({
            tried <-
              try({
                library(protina)
                ret <-
                  result <- protina(lfc = lfc,
                                    slope = slope,
                                    pgn = pgn,
                                    grplist = group,
                                    kfold = 3L)
              })
          },
          timeout = 2L,
          onTimeout = "warning")
      }, file = "/dev/null")
    })
  ## browser()
  attr(timeout, "condition")$message == "reached elapsed time limit"
}
left <- 0L
right <- length(urls) - 1L
while (left <= right) {
  i <- floor((left + right) / 2L)
  message(sprintf("[%d, %d, %d]", left, i, right))
  url <- urls[i + 1L]
  version <-
    stringr::str_extract(basename(url),
                         "^glmnet_(.+)[.]tar[.]gz$",
                         group = 1)
  message("Trying ", version)
  ## Try next version of glmnet.
  unloadNamespace("protina")
  unloadNamespace("glmnet")
  system.time({
    remotes::install_url(url, quiet = TRUE)
  })
  library(protina)
  okay <- protina_works()
  message(version,
          " ",
          ifelse(okay, "works", "does not work"))
  if (okay) {
    left <- i + 1L
  } else {
    right <- i - 1L
  }
}

Output from the R test script:

[0, 11, 23]
Trying 2.0-18
Error in stopifnot(length(class2) == 1L) : reached elapsed time limit
2.0-18 works
[12, 17, 23]
Trying 4.1-1
Error in .Fortran("get_int_parms2", epsnr = double(1), mxitnr = integer(1),  : 
  "get_int_parms2" not available for .Fortran() for package "glmnet"
4.1-1 does not work
[12, 14, 16]
Trying 3.0
Error in coef.cv.glmnet(cvfit, s = "lambda.min") : 
  could not find function "coef.cv.glmnet"
3.0 does not work
[12, 12, 13]
Trying 3.0-1
Error in coef.cv.glmnet(cvfit, s = "lambda.min") : 
  could not find function "coef.cv.glmnet"
3.0-1 does not work
omsai commented 1 day ago

The fix is replacing the internal coef.cv.glmnet() function with the coef() generic function. This fix works for all versions of glmnet 2 through the current 4 version. I think the glmnet authors should not have made that internal function public to begin with. Pull request incoming.