ddsjoberg / dcurves

Decision Curve Analysis
http://www.danieldsjoberg.com/dcurves/
Other
37 stars 14 forks source link

How will you calculate a difference in net benefit between two predictors of the disease? #18

Closed snijesh closed 1 year ago

snijesh commented 1 year ago

I have a dataframe as follows: dput(dfs)

structure(list(PatientID = c(1001L, 1002L, 1003L, 1004L, 1006L, 
1014L, 1015L, 1016L, 1018L, 1022L, 1024L, 1032L, 1040L, 1042L, 
1049L, 1056L, 1059L, 1060L, 1066L, 1084L, 1087L, 1090L, 1093L, 
1096L, 1097L, 1098L, 1099L, 1200L, 1205L, 1216L, 1221L, 1222L, 
1225L, 1226L, 1233L, 1239L), TimeMonths = c(9L, 8L, 69L, 104L, 
104L, 100L, 24L, 85L, 100L, 99L, 67L, 58L, 7L, 94L, 93L, 90L, 
91L, 90L, 89L, 72L, 84L, 84L, 11L, 82L, 39L, 46L, 82L, 82L, 9L, 
34L, 75L, 76L, 52L, 20L, 29L, 70L), Event = c(1L, 0L, 0L, 0L, 
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L
), P1 = c(0.1, 0.03, 0.02, 0.05, 0.01, 0.04, 0.03, 
0.06, 0.02, 0.03, 0, 0, 0.11, 0.01, 0.03, 0, 0.01, 0.01, 0.01, 
0, 0, 0, 0.05, 0.01, 0, 0, 0, 0, 0.04, 0, 0.07, 0.01, 0.01, 0, 
0, 0), Age = c(88L, 49L, 60L, 46L, 50L, 60L, 38L, 74L, 39L, 65L, 
80L, 35L, 54L, 40L, 54L, 55L, 60L, 38L, 64L, 74L, 71L, 57L, 55L, 
49L, 42L, 30L, 63L, 46L, 47L, 58L, 34L, 72L, 50L, 60L, 73L, 51L
), Grade = c(2L, 2L, 2L, 3L, 3L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 
3L, 1L, 3L, NA, 2L, 3L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L, 2L, 1L, 
2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L), Tsize = c(2.5, 6, 4, 4, 3.5, 
1.8, 1.5, 1.5, 2, 3.5, 4, 2, 4.5, 3, 3, 2, 3, 1.5, 2.5, 5, 5, 
3, 6, 3, 2.5, 5, 1.5, 2, 5.5, 5, 3, 6, 4, 3.5, 4, 5), LNstatus = c(0L, 
1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 
1L, 1L, 0L), Time = c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
5, 5), Model1 = c(0.0754111003682264, 0.789025824308725, 0.0888669573431788, 
0.162944690463524, 0.148205788806964, 0.0232412222825853, 0.335639290548721, 
0.427877640015971, 0.0437151408243315, 0.340118759660636, 0.104749557872707, 
0.191041558905319, 0.901494025297558, 0.0271621117477482, 0.134690848022469, 
NA, 0.0681964605449713, 0.0799110215826685, 0.0617151301818596, 
0.129265190806531, 0.760358458474655, NA, 0.805815477115913, 
0.0622010539620427, 0.4275233016386, 0.632818820863808, 0.39812097019303, 
0.0217733118415042, 0.118190758626649, 0.721016961718498, 0.449778991144841, 
0.164102951793386, 0.857780996059435, 0.576334735175668, 0.668186510212559, 
0.107238793230798), Model2 = c(0.444752330666943, 0.930853771862407, 
0.0980811416950194, 0.311714208416219, 0.108779198079316, 0.0297296718406789, 
0.255165235219636, 0.652922467522166, 0.0301952043412281, 0.405213119172736, 
0.0800242041001141, 0.0767147984837693, 0.999999999862285, 0.0181219543140756, 
0.14865830843794, NA, 0.0496630840467432, 0.0382325860289233, 
0.0421016493675688, 0.113455005788149, 0.665969059849803, NA, 
0.991966233874329, 0.0424861062828985, 0.206267128670341, 0.453591375821231, 
0.180419209497713, 0.00979786791522574, 0.243955485600603, 0.596589087934302, 
0.778048018064795, 0.211053490646643, 0.747102269303274, 0.37666971629859, 
0.511588083962541, 0.0826035699229486)), class = "data.frame", row.names = c(NA, 
-36L))

I created the dca for both model using the following command:

dca(Surv(Time, Event) ~ Model1 + Model2, data = dfs,
label = list(Model1 = "Model1", Model2 = "Model2"), time = 5, thresholds = 1:35 / 100) %>% plot(smooth = TRUE)

image

Here model2 works better than model1. But how do I calculate the net benefit between the two models?

ddsjoberg commented 1 year ago

@snijesh here's an example calculating the NB difference:

library(dcurves)
library(tidyverse)

df <- 
  structure(list(PatientID = c(
    1001L, 1002L, 1003L, 1004L, 1006L,
    1014L, 1015L, 1016L, 1018L, 1022L, 1024L, 1032L, 1040L, 1042L,
    1049L, 1056L, 1059L, 1060L, 1066L, 1084L, 1087L, 1090L, 1093L,
    1096L, 1097L, 1098L, 1099L, 1200L, 1205L, 1216L, 1221L, 1222L,
    1225L, 1226L, 1233L, 1239L
  ), TimeMonths = c(
    9L, 8L, 69L, 104L,
    104L, 100L, 24L, 85L, 100L, 99L, 67L, 58L, 7L, 94L, 93L, 90L,
    91L, 90L, 89L, 72L, 84L, 84L, 11L, 82L, 39L, 46L, 82L, 82L, 9L,
    34L, 75L, 76L, 52L, 20L, 29L, 70L
  ), Event = c(
    1L, 0L, 0L, 0L,
    0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
    0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L
  ), P1 = c(
    0.1, 0.03, 0.02, 0.05, 0.01, 0.04, 0.03,
    0.06, 0.02, 0.03, 0, 0, 0.11, 0.01, 0.03, 0, 0.01, 0.01, 0.01,
    0, 0, 0, 0.05, 0.01, 0, 0, 0, 0, 0.04, 0, 0.07, 0.01, 0.01, 0,
    0, 0
  ), Age = c(
    88L, 49L, 60L, 46L, 50L, 60L, 38L, 74L, 39L, 65L,
    80L, 35L, 54L, 40L, 54L, 55L, 60L, 38L, 64L, 74L, 71L, 57L, 55L,
    49L, 42L, 30L, 63L, 46L, 47L, 58L, 34L, 72L, 50L, 60L, 73L, 51L
  ), Grade = c(
    2L, 2L, 2L, 3L, 3L, 1L, 2L, 2L, 2L, 1L, 2L, 1L,
    3L, 1L, 3L, NA, 2L, 3L, 2L, 2L, 2L, NA, 2L, 2L, 2L, 2L, 2L, 1L,
    2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L
  ), Tsize = c(
    2.5, 6, 4, 4, 3.5,
    1.8, 1.5, 1.5, 2, 3.5, 4, 2, 4.5, 3, 3, 2, 3, 1.5, 2.5, 5, 5,
    3, 6, 3, 2.5, 5, 1.5, 2, 5.5, 5, 3, 6, 4, 3.5, 4, 5
  ), LNstatus = c(
    0L,
    1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L,
    0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L,
    1L, 1L, 0L
  ), Time = c(
    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
    5, 5
  ), Model1 = c(
    0.0754111003682264, 0.789025824308725, 0.0888669573431788,
    0.162944690463524, 0.148205788806964, 0.0232412222825853, 0.335639290548721,
    0.427877640015971, 0.0437151408243315, 0.340118759660636, 0.104749557872707,
    0.191041558905319, 0.901494025297558, 0.0271621117477482, 0.134690848022469,
    NA, 0.0681964605449713, 0.0799110215826685, 0.0617151301818596,
    0.129265190806531, 0.760358458474655, NA, 0.805815477115913,
    0.0622010539620427, 0.4275233016386, 0.632818820863808, 0.39812097019303,
    0.0217733118415042, 0.118190758626649, 0.721016961718498, 0.449778991144841,
    0.164102951793386, 0.857780996059435, 0.576334735175668, 0.668186510212559,
    0.107238793230798
  ), Model2 = c(
    0.444752330666943, 0.930853771862407,
    0.0980811416950194, 0.311714208416219, 0.108779198079316, 0.0297296718406789,
    0.255165235219636, 0.652922467522166, 0.0301952043412281, 0.405213119172736,
    0.0800242041001141, 0.0767147984837693, 0.999999999862285, 0.0181219543140756,
    0.14865830843794, NA, 0.0496630840467432, 0.0382325860289233,
    0.0421016493675688, 0.113455005788149, 0.665969059849803, NA,
    0.991966233874329, 0.0424861062828985, 0.206267128670341, 0.453591375821231,
    0.180419209497713, 0.00979786791522574, 0.243955485600603, 0.596589087934302,
    0.778048018064795, 0.211053490646643, 0.747102269303274, 0.37666971629859,
    0.511588083962541, 0.0826035699229486
  )), class = "data.frame", row.names = c(
    NA,
    -36L
  ))

dca <- 
  dca(
    Surv(Time, Event) ~ Model1 + Model2,
      data = df,
      label = list(Model1 = "Model1", Model2 = "Model2"), time = 5, thresholds = 1:35 / 100
  )

as_tibble(dca) |> 
  filter(variable %in% c("Model1", "Model2")) %>%
  select(threshold, variable, net_benefit) %>%
  pivot_wider(id_cols = threshold, 
              names_from = variable,
              values_from = net_benefit) %>%
  mutate(
    nb_difference = Model1 - Model2
  )
#> # A tibble: 35 × 4
#>    threshold Model1 Model2 nb_difference
#>        <dbl>  <dbl>  <dbl>         <dbl>
#>  1      0.01  0.346  0.347     -0.000297
#>  2      0.02  0.340  0.341     -0.00120 
#>  3      0.03  0.336  0.336      0       
#>  4      0.04  0.330  0.332     -0.00245 
#>  5      0.05  0.325  0.331     -0.00619 
#>  6      0.06  0.319  0.327     -0.00751 
#>  7      0.07  0.320  0.322     -0.00221 
#>  8      0.08  0.288  0.320     -0.0320  
#>  9      0.09  0.286  0.321     -0.0352  
#> 10      0.1   0.281  0.320     -0.0392  
#> # … with 25 more rows

Created on 2022-12-08 with reprex v2.0.2