eth-mds / ricu

🏥 ICU data with R 🏥
https://eth-mds.github.io/ricu/
GNU General Public License v3.0
37 stars 10 forks source link

Floating point issues in `sofa_cardio` #59

Open prockenschaub opened 7 months ago

prockenschaub commented 7 months ago

I stumbled over a minor inconsistency in sofa_cardio that assigns incorrect scores, most likely due to a floating point issue. See the following reprex with the latest main branch.

Local .Rprofile detected at /Users/patrick/projects/ricu-package/.Rprofile

devtools::load_all()
#> ℹ Loading ricu
#> 
#> ── ricu 0.6.0 ──────────────────────────────────────────────────────────────────
#> 
#> The following data sources are configured to be attached:
#> (the environment variable `RICU_SRC_LOAD` controls this)
#> 
#> ✔ mimic: 26 of 26 tables available
#> ✔ mimic_demo: 25 of 25 tables available
#> ✔ eicu: 31 of 31 tables available
#> ✔ eicu_demo: 31 of 31 tables available
#> ✔ hirid: 5 of 5 tables available
#> ✔ aumc: 7 of 7 tables available
#> ✔ miiv: 31 of 31 tables available
#> ✖ sic: 7 of 8 tables available
#> 
#> ────────────────────────────────────────────────────────────────────────────────

src = "mimic_demo"

map = load_concepts("map", src = src)
#> ── Loading 1 concept ───────────────────────────────────────────────────────────
#> • map  ◯ removed 22 (0.14%) of rows due to `NA` values  ◯ removed 13 (0.08%) of rows due to out of range entries
#> ────────────────────────────────────────────────────────────────────────────────
dopa60 = load_concepts("dopa60", src = src)
#> ── Loading 1 concept ───────────────────────────────────────────────────────────
#> • dopa60• dopa_rate  ◯ removed 2788 (5.21%) of rows due to `NA` values• dopa_dur
#> ────────────────────────────────────────────────────────────────────────────────
norepi60 = load_concepts("norepi60", src = src)
#> ── Loading 1 concept ───────────────────────────────────────────────────────────
#> • norepi60• norepi_rate  ◯ removed 3482 (2.62%) of rows due to `NA` values  ◯ removed 12 (0.01%) of rows due to out of range entries• norepi_dur
#> ────────────────────────────────────────────────────────────────────────────────
dobu60 = load_concepts("dobu60", src = src)
#> ── Loading 1 concept ───────────────────────────────────────────────────────────
#> • dobu60• dobu_rate  ◯ removed 240 (6.77%) of rows due to `NA` values• dobu_dur
#> ────────────────────────────────────────────────────────────────────────────────
epi60 = load_concepts("epi60", src = src)
#> ── Loading 1 concept ───────────────────────────────────────────────────────────
#> • epi60• epi_rate• epi_dur
#> ────────────────────────────────────────────────────────────────────────────────

# Get sofa_cardio as currently implemented ----------------------------------------
input = mget(c("map", "dopa60", "norepi60", "dobu60", "epi60"))
cur = sofa_cardio(input)

# Recalculate with a function that adjusts for floating point imprecision ---------
sofa_cardio_upd <- function(..., interval = NULL) {
  score_calc <- function(map, dopa, norepi, dobu, epi) {
    eps <- .Machine$double.eps ^ 0.5
    fifelse(
      is_true(dopa > 15 + eps | epi > 0.1 + eps | norepi > 0.1 + eps), 4L, fifelse(
        is_true(dopa > 5 + eps | (epi > 0 + eps &    epi <= 0.1 + eps) |
                  (norepi > 0 + eps & norepi <= 0.1 + eps)), 3L, fifelse(
                    is_true((dopa > 0 + eps & dopa <= 5 + eps) | dobu > 0 + eps), 2L, fifelse(
                      is_true(map < 70), 1L, 0L
    ))))
  }
  cnc <- c("map", "dopa60", "norepi60", "dobu60", "epi60")
  dat <- collect_dots(cnc, interval, ..., merge_dat = TRUE)
  dat <- dat[, c("sofa_cardio") := score_calc(
    get("map"), get("dopa60"), get("norepi60"), get("dobu60"), get("epi60")
  )]
  #dat <- rm_cols(dat, cnc, by_ref = TRUE) <---- keep variables to investigate diffs
  dat
}

upd = sofa_cardio_upd(input)

# Compare the results obtained with each method -------------------------------------

# We can see that the results are not equal
setequal(cur, upd)
#> [1] FALSE

# There is a difference in 6 rows, which should have been classified as 3 but were 4.
# The updated function gets it right. 
comb = merge(upd, cur, by = c("icustay_id", "charttime"), suffixes = c(".upd", ".cur"))
setnames(comb, c("sofa_cardio.cur", "sofa_cardio.upd"), c("cur", "upd"))
as.data.frame(comb[cur != upd])
#>   icustay_id charttime  map dopa60 norepi60 dobu60 epi60 upd cur
#> 1     203766  82 hours 81.0      6      0.1     NA    NA   3   4
#> 2     223177  29 hours 75.5      4      0.1     NA    NA   3   4
#> 3     241562  -1 hours 78.5     NA      0.1     NA    NA   3   4
#> 4     248755  49 hours 65.0     NA      0.1     NA    NA   3   4
#> 5     248755 104 hours 63.0     NA      0.1     NA    NA   3   4
#> 6     248755 105 hours 61.0     NA      0.1     NA    NA   3   4

Created on 2024-04-11 with reprex v2.1.0