pat-s / oddsratio

Simplified odds ratio calculation of binomial GAM/GLM models
https://pat-s.github.io/oddsratio
Other
32 stars 2 forks source link

plot_gam "operation not valid for atomic vectors" #54

Open rikivillalba opened 1 year ago

rikivillalba commented 1 year ago

It seems that function plot_gam does not extract correctly the data of the gam plot when the predictor name appears more than one time in the model

See for example https://stackoverflow.com/questions/74237170/having-issues-with-or-gam-function-in-oddsratio-package/74237712#74237712

Problem appears to be here: grep find two elements containing the same string, thus when indexing plot data to extract the relevant values it extracts a second level element instead (since [[]] operator when given a vector extracts an element indexing at every depth level).

rikivillalba commented 1 year ago
heart <- structure(list(died = structure(c(2L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), levels = c("Survive", 
"Died"), class = c("labelled", "factor"), label = "Death", format = "%8.0g", value.label.table = structure(0:1, names = c("Survive", 
"Died"))), procedure = structure(c(2L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 
1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L), levels = c("PTCA", 
"CABG"), class = c("labelled", "factor"), label = "1=CABG; 0=PTCA", format = "%8.0g", value.label.table = structure(0:1, names = c("PTCA", 
"CABG"))), age = structure(c(65L, 69L, 76L, 65L, 69L, 67L, 69L, 
66L, 74L, 67L, 76L, 76L, 68L, 77L, 77L, 70L, 68L, 69L, 68L, 70L, 
71L, 75L, 69L, 72L, 66L, 77L, 68L, 78L, 77L, 69L, 65L, 70L, 65L, 
70L), label = "PATIENT AGE", class = c("labelled", "integer"), format = "%8.0g"), 
    gender = structure(c(2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 
    2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L), levels = c("Female", 
    "Male"), class = c("labelled", "factor"), label = "Gender", format = "%8.0g", value.label.table = structure(0:1, names = c("Female", 
    "Male"))), los = structure(c(10L, 7L, 7L, 8L, 1L, 7L, 2L, 
    9L, 3L, 1L, 6L, 14L, 4L, 13L, 10L, 4L, 2L, 2L, 10L, 2L, 6L, 
    12L, 4L, 2L, 2L, 14L, 3L, 2L, 5L, 9L, 3L, 3L, 3L, 2L), label = "LOS", class = c("labelled", 
    "integer"), format = "%8.0g"), type = structure(c(1L, 2L, 
    2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 
    2L, 1L), levels = c("Elective", "Emer/Urg"), class = c("labelled", 
    "factor"), label = "Severity", format = "%8.0g", value.label.table = structure(0:1, names = c("Elective", 
    "Emer/Urg")))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-34L), stata.info = list(datalabel = "AZ 1991; CABG(106,107) & PTCA(112)", 
    version = 12L, time.stamp = "17 Feb 2015 12:45", val.labels = c("diedlb", 
    "pxllb", "", "sexllb", "", "typellb"), label.table = list(
        pxllb = structure(0:1, names = c("PTCA", "CABG")), typellb = structure(0:1, names = c("Elective", 
        "Emer/Urg")), sexllb = structure(0:1, names = c("Female", 
        "Male")), diedlb = structure(0:1, names = c("Survive", 
        "Died")))))

#### Load Libraries ####
library(oddsratio)
library(mgcv)

#### Fit GAM With Tensor Splines ####
fit <- gam(factor(died) 
          ~ ti(age)
          + ti(los)
          + ti(age,los),
          data = heart,
          family = binomial)

plot_gam(model = fit,
         pred = "age")

# Error in plot_df[[set_pred]]$x : $ operator is invalid for atomic vectors
Teufelkoenig commented 1 year ago

It looks like or_gam is having similar issues.

or_gam(data = heart,
       model = fit,
       pred = "los",
       percentage = 10)

Which gives this error that the predictor isn't found:

Error in eval(predvars, data, env) : object 'los' not found
In addition: Warning message:
In predict.gam(model, data, type = "link", se.fit = TRUE) :
  not all required variables have been supplied in  newdata!
pat-s commented 1 year ago

Hey guys, just seeing this know as I didn't "watch" the repo actively. Will try to take a look in the next days!

pat-s commented 1 year ago

@rikivillalba contains an updated subsetting logic that now accounts for mixed/multiple predictor specifications.

A reprex based on your example is attached below. Using pred = "age, los" also works and extracts the respective list related to this predictor combination. I don't know however if the plotted result of such mixed predictors is meaningful.

Could you have a look if the linked PR fixes your issue?

heart <- structure(list(
  died = structure(c(
    2L, 1L, 1L, 1L, 1L, 1L, 1L,
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
    1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L
  ), levels = c(
    "Survive",
    "Died"
  ), class = c("labelled", "factor"), label = "Death", format = "%8.0g", value.label.table = structure(0:1, names = c(
    "Survive",
    "Died"
  ))), procedure = structure(c(
    2L, 2L, 1L, 2L, 1L, 2L, 1L,
    2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
    1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L
  ), levels = c(
    "PTCA",
    "CABG"
  ), class = c("labelled", "factor"), label = "1=CABG; 0=PTCA", format = "%8.0g", value.label.table = structure(0:1, names = c(
    "PTCA",
    "CABG"
  ))), age = structure(c(
    65L, 69L, 76L, 65L, 69L, 67L, 69L,
    66L, 74L, 67L, 76L, 76L, 68L, 77L, 77L, 70L, 68L, 69L, 68L, 70L,
    71L, 75L, 69L, 72L, 66L, 77L, 68L, 78L, 77L, 69L, 65L, 70L, 65L,
    70L
  ), label = "PATIENT AGE", class = c("labelled", "integer"), format = "%8.0g"),
  gender = structure(c(
    2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
    2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L
  ), levels = c(
    "Female",
    "Male"
  ), class = c("labelled", "factor"), label = "Gender", format = "%8.0g", value.label.table = structure(0:1, names = c(
    "Female",
    "Male"
  ))), los = structure(c(
    10L, 7L, 7L, 8L, 1L, 7L, 2L,
    9L, 3L, 1L, 6L, 14L, 4L, 13L, 10L, 4L, 2L, 2L, 10L, 2L, 6L,
    12L, 4L, 2L, 2L, 14L, 3L, 2L, 5L, 9L, 3L, 3L, 3L, 2L
  ), label = "LOS", class = c(
    "labelled",
    "integer"
  ), format = "%8.0g"), type = structure(c(
    1L, 2L,
    2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L,
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
    2L, 1L
  ), levels = c("Elective", "Emer/Urg"), class = c(
    "labelled",
    "factor"
  ), label = "Severity", format = "%8.0g", value.label.table = structure(0:1, names = c(
    "Elective",
    "Emer/Urg"
  )))
), class = c("tbl_df", "tbl", "data.frame"), row.names = c(
  NA,
  -34L
), stata.info = list(
  datalabel = "AZ 1991; CABG(106,107) & PTCA(112)",
  version = 12L, time.stamp = "17 Feb 2015 12:45", val.labels = c(
    "diedlb",
    "pxllb", "", "sexllb", "", "typellb"
  ), label.table = list(
    pxllb = structure(0:1, names = c("PTCA", "CABG")), typellb = structure(0:1, names = c(
      "Elective",
      "Emer/Urg"
    )), sexllb = structure(0:1, names = c(
      "Female",
      "Male"
    )), diedlb = structure(0:1, names = c(
      "Survive",
      "Died"
    ))
  )
))

#### Load Libraries ####
library(oddsratio)
library(mgcv)
#> Loading required package: nlme
#> This is mgcv 1.8-40. For overview type 'help("mgcv-package")'.

#### Fit GAM With Tensor Splines ####
fit <- gam(
  factor(died)
  ~ ti(age, los) + ti(age) + ti(los),
  data = heart,
  family = binomial
)

plot_gam(
  model = fit,
  pred = "age"
)

Created on 2022-12-22 with reprex v2.0.2