ddsjoberg / dcurves

Decision Curve Analysis
http://www.danieldsjoberg.com/dcurves/
Other
37 stars 14 forks source link

How will you choose the time in dca analysis for cox model? #17

Closed snijesh closed 2 months ago

snijesh commented 1 year ago

I have a data as follows: dput(df)


structure(list(ID = c(1001L, 1002L, 1003L, 1004L, 1006L, 1014L, 
1015L, 1016L, 1018L, 1022L, 1024L, 1032L, 1040L, 1042L, 1049L, 
1056L, 1059L, 1060L, 1066L, 1084L, 1087L, 1090L, 1093L, 1096L, 
1097L, 1098L, 1099L, 1200L, 1205L, 1216L, 1221L, 1222L, 1225L, 
1226L, 1233L, 1239L), Time = c(9L, 8L, 69L, 104L, 104L, 100L, 
24L, 85L, 100L, 99L, 67L, 58L, 7L, 94L, 93L, 90L, 91L, 90L, 89L, 
72L, 84L, 84L, 11L, 82L, 39L, 46L, 82L, 82L, 9L, 34L, 75L, 76L, 
52L, 20L, 29L, 70L), Event = c(1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 
1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L), Risk1 = c(0.1, 
0.03, 0.02, 0.05, 0.01, 0.04, 0.03, 0.06, 0.02, 0.03, 0, 0, 0.11, 
0.01, 0.03, 0, 0.01, 0.01, 0.01, 0, 0, 0, 0.05, 0.01, 0, 0, 0, 
0, 0.04, 0, 0.07, 0.01, 0.01, 0, 0, 0), Risk2 = c(88L, 49L, 60L, 
46L, 50L, 60L, 38L, 74L, 39L, 65L, 80L, 35L, 54L, 40L, 54L, 55L, 
60L, 38L, 64L, 74L, 71L, 57L, 55L, 49L, 42L, 30L, 63L, 46L, 47L, 
58L, 34L, 72L, 50L, 60L, 73L, 51L), Risk3 = c(2L, 2L, 2L, 3L, 
3L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 3L, 1L, 3L, NA, 2L, 3L, 2L, 2L, 
2L, NA, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L
), Risk4 = c(2.5, 6, 4, 4, 3.5, 1.8, 1.5, 1.5, 2, 3.5, 4, 2, 
4.5, 3, 3, 2, 3, 1.5, 2.5, 5, 5, 3, 6, 3, 2.5, 5, 1.5, 2, 5.5, 
5, 3, 6, 4, 3.5, 4, 5), Risk5 = c(0L, 1L, 0L, 0L, 0L, 0L, 1L, 
1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 
0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L)), class = "data.frame", row.names = c(NA, 
-36L))

I am trying draw dca with cox model. But when I give parameter time =1.5, it throws an error as follows:

Error: Cannot calculate outcome prevalence at specified time, likely due to no observed data beyond selected time.
Traceback:

1. dca(Surv(Time, Event) ~ Risk1, data = df, time = 1.5, thresholds = seq(0, 
 .     0.5, by = 0.01), label = list(Risk1 = "Prediction Model")) %>% 
 .     plot(smooth = TRUE)
2. plot(., smooth = TRUE)
3. dca(Surv(Time, Event) ~ Risk1, data = df, time = 1.5, thresholds = seq(0, 
 .     0.5, by = 0.01), label = list(Risk1 = "Prediction Model"))
4. test_consequences_data_frame(model_frame = model_frame, outcome_name = outcome_name, 
 .     outcome_type = outcome_type, statistics = c("pos_rate", "tp_rate", 
 .         "fp_rate", "harm", "net_benefit"), thresholds = thresholds, 
 .     label = label, time = time, prevalence = prevalence, harm = harm)
5. names(model_frame) %>% setdiff(outcome_name) %>% lapply(function(x) {
 .     .calculate_test_consequences(model_frame[[outcome_name]], 
 .         model_frame[[x]], thresholds = thresholds, outcome_type = outcome_type, 
 .         prevalence = prevalence, time = time) %>% dplyr::mutate(variable = x, 
 .         label = .env$label[[x]] %||% attr(model_frame[[x]], "label") %||% 
 .             x, harm = .env$harm[[x]] %||% 0)
 . }) %>% dplyr::bind_rows() %>% dplyr::mutate(label = factor(.data$label, 
 .     levels = unique(.data$label)), neg_rate = 1 - .data$pos_rate, 
 .     fn_rate = .data$pos_rate - .data$tp_rate, tn_rate = .data$neg_rate - 
 .         .data$fp_rate) %>% tibble::as_tibble()
6. tibble::as_tibble(.)
7. dplyr::mutate(., label = factor(.data$label, levels = unique(.data$label)), 
 .     neg_rate = 1 - .data$pos_rate, fn_rate = .data$pos_rate - 
 .         .data$tp_rate, tn_rate = .data$neg_rate - .data$fp_rate)
8. dplyr::bind_rows(.)
9. list2(...)
10. lapply(., function(x) {
  .     .calculate_test_consequences(model_frame[[outcome_name]], 
  .         model_frame[[x]], thresholds = thresholds, outcome_type = outcome_type, 
  .         prevalence = prevalence, time = time) %>% dplyr::mutate(variable = x, 
  .         label = .env$label[[x]] %||% attr(model_frame[[x]], "label") %||% 
  .             x, harm = .env$harm[[x]] %||% 0)
  . })
11. FUN(X[[i]], ...)
12. .calculate_test_consequences(model_frame[[outcome_name]], model_frame[[x]], 
  .     thresholds = thresholds, outcome_type = outcome_type, prevalence = prevalence, 
  .     time = time) %>% dplyr::mutate(variable = x, label = .env$label[[x]] %||% 
  .     attr(model_frame[[x]], "label") %||% x, harm = .env$harm[[x]] %||% 
  .     0)
13. dplyr::mutate(., variable = x, label = .env$label[[x]] %||% attr(model_frame[[x]], 
  .     "label") %||% x, harm = .env$harm[[x]] %||% 0)
14. .calculate_test_consequences(model_frame[[outcome_name]], model_frame[[x]], 
  .     thresholds = thresholds, outcome_type = outcome_type, prevalence = prevalence, 
  .     time = time)
15. paste("Cannot calculate outcome prevalence at specified time,", 
  .     "likely due to no observed data beyond selected time.") %>% 
  .     stop(call. = FALSE)
16. stop(., call. = FALSE)

my command was:

dca(Surv(Time, Event) ~ Risk1, 
    data = df, 
    time = 1.5,
    thresholds = seq(0, 0.50, by = 0.01),
    label = list(Risk1 = "Prediction Model")) %>%
  plot(smooth = TRUE)

How to specifically set the parameter time in dca ?

ddsjoberg commented 1 year ago

In your case, there are no observed events at time 1.5, so the DCA cannot be calculated. I should, however, update the error messaging in this case.

library(dcurves)
library(ggsurvfit)
#> Loading required package: ggplot2

df <- structure(list(ID = c(1001L, 1002L, 1003L, 1004L, 1006L, 1014L, 
                      1015L, 1016L, 1018L, 1022L, 1024L, 1032L, 1040L, 1042L, 1049L, 
                      1056L, 1059L, 1060L, 1066L, 1084L, 1087L, 1090L, 1093L, 1096L, 
                      1097L, 1098L, 1099L, 1200L, 1205L, 1216L, 1221L, 1222L, 1225L, 
                      1226L, 1233L, 1239L), Time = c(9L, 8L, 69L, 104L, 104L, 100L, 
                                                     24L, 85L, 100L, 99L, 67L, 58L, 7L, 94L, 93L, 90L, 91L, 90L, 89L, 
                                                     72L, 84L, 84L, 11L, 82L, 39L, 46L, 82L, 82L, 9L, 34L, 75L, 76L, 
                                                     52L, 20L, 29L, 70L), Event = c(1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 
                                                                                    0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 
                                                                                    1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L), Risk1 = c(0.1, 
                                                                                                                                               0.03, 0.02, 0.05, 0.01, 0.04, 0.03, 0.06, 0.02, 0.03, 0, 0, 0.11, 
                                                                                                                                               0.01, 0.03, 0, 0.01, 0.01, 0.01, 0, 0, 0, 0.05, 0.01, 0, 0, 0, 
                                                                                                                                               0, 0.04, 0, 0.07, 0.01, 0.01, 0, 0, 0), Risk2 = c(88L, 49L, 60L, 
                                                                                                                                                                                                 46L, 50L, 60L, 38L, 74L, 39L, 65L, 80L, 35L, 54L, 40L, 54L, 55L, 
                                                                                                                                                                                                 60L, 38L, 64L, 74L, 71L, 57L, 55L, 49L, 42L, 30L, 63L, 46L, 47L, 
                                                                                                                                                                                                 58L, 34L, 72L, 50L, 60L, 73L, 51L), Risk3 = c(2L, 2L, 2L, 3L, 
                                                                                                                                                                                                                                               3L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 3L, 1L, 3L, NA, 2L, 3L, 2L, 2L, 
                                                                                                                                                                                                                                               2L, NA, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L
                                                                                                                                                                                                 ), Risk4 = c(2.5, 6, 4, 4, 3.5, 1.8, 1.5, 1.5, 2, 3.5, 4, 2, 
                                                                                                                                                                                                              4.5, 3, 3, 2, 3, 1.5, 2.5, 5, 5, 3, 6, 3, 2.5, 5, 1.5, 2, 5.5, 
                                                                                                                                                                                                              5, 3, 6, 4, 3.5, 4, 5), Risk5 = c(0L, 1L, 0L, 0L, 0L, 0L, 1L, 
                                                                                                                                                                                                                                                1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 
                                                                                                                                                                                                                                                0L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L)), class = "data.frame", row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                          -36L))

survfit2(
  Surv(Time, Event) ~ 1,
  data = df
) |> 
  ggsurvfit()


dca(
  Surv(Time, Event) ~ Risk1, 
  data = df, 
  time = 25, 
  thresholds = seq(0,  0.5, by = 0.01), 
  label = list(Risk1 = "Prediction Model")
)
#> Printing with `plot(x, type = 'net_benefit', smooth = FALSE, show_ggplot_code = FALSE)`

Created on 2022-11-24 with reprex v2.0.2