boennecd / dynamichazard

R package for survival analysis with state space models
9 stars 3 forks source link

Devel #1

Closed boennecd closed 7 years ago

boennecd commented 7 years ago

Test script

I ran the following script to test the speed. Results are changed with the test in the package.

library(dynamichazard)
library(microbenchmark)

sim_func <- function(n, p){
  func <- asNamespace("dynamichazard")$test_sim_func_logit
  set.seed(101)
  t_max <- 30L
  func(n_series = n, n_vars = p, t_max = t_max, x_range = 1, x_mean = 0,
       beta_start = runif(p, -1.5, 1.5),
       intercept_start = -3, sds = c(.1, rep(.25, p)),
       tstart_sampl_func = function(t0, t_max)
         max(0, runif(1, -t_max, t_max - 1L)),
       lambda = 1 / 10)
}

get_rune_time_summary <- function(n, p){
  sims <- sim_func(n, p)

  out <- summary(microbenchmark(
    EKF_one =
      suppressMessages(ddhazard(
        formula = Surv(tstart, tstop, event) ~ . - id,
        data = sims$res,
        model = "logit",
        id = sims$res$id,
        by = 1L,
        max_T = 30L,
        Q_0 = diag(1e6, p + 1L),
        Q = diag(1e-1, p + 1L),
        control = list(n_threads = 7))),

    EKF_more =
      suppressMessages(ddhazard(
        formula = Surv(tstart, tstop, event) ~ . - id,
        data = sims$res,
        model = "logit",
        id = sims$res$id,
        by = 1L,
        max_T = 30L,
        Q_0 = diag(1, p + 1L),
        Q = diag(1e-1, p + 1L),
        control = list(
          NR_eps = 1e-3,
          n_threads = 7))),

    EKF_one_single =
      suppressMessages(ddhazard(
        formula = Surv(tstart, tstop, event) ~ . - id,
        data = sims$res,
        model = "logit",
        id = sims$res$id,
        by = 1L,
        max_T = 30L,
        Q_0 = diag(1e6, p + 1L),
        Q = diag(1e-1, p + 1L),
        control = list(n_threads = 1))),

    EKF_more_single =
      suppressMessages(ddhazard(
        formula = Surv(tstart, tstop, event) ~ . - id,
        data = sims$res,
        model = "logit",
        id = sims$res$id,
        by = 1L,
        max_T = 30L,
        Q_0 = diag(1, p + 1L),
        Q = diag(1e-1, p + 1L),
        control = list(
          NR_eps = 1e-3,
          n_threads = 1))),

    times = 5
  ))

  cat("(n, p) = (", n, ", ", p, ")",
      ". Units is ", sQuote(attr(out, "unit")), "\n", sep = "")

  print(out[, c("expr", "lq", "median", "uq", "cld")], row.names = FALSE)

  cat("\n")
}

get_rune_time_summary(1e3, 15)

get_rune_time_summary(25e3, 15)

get_rune_time_summary(100e3, 15)

Before changes

> get_rune_time_summary(1e3, 15)
(n, p) = (1000, 15). Units is ‘milliseconds’
            expr        lq    median        uq cld
         EKF_one  80.86005  80.91931  84.54795  a 
        EKF_more 156.44997 158.41975 159.27506   b
  EKF_one_single  82.82311  83.06607  83.20395  a 
 EKF_more_single 157.03427 159.33827 164.03714  ab

> 
> get_rune_time_summary(25e3, 15)
(n, p) = (25000, 15). Units is ‘milliseconds’
            expr        lq    median        uq cld
         EKF_one  744.7012  776.8091  831.3059 a  
        EKF_more 1062.2811 1065.8003 1068.4974  b 
  EKF_one_single  977.5293  989.6142 1001.4601  b 
 EKF_more_single 1911.9463 1977.5573 1996.1450   c

 > get_rune_time_summary(100e3, 15)
(n, p) = (1e+05, 15). Units is ‘seconds’
            expr       lq   median       uq cld
         EKF_one 2.316435 2.507648 3.074332  a 
        EKF_more 2.787232 2.814526 2.933001  a 
  EKF_one_single 3.768895 3.769759 4.234333  ab
 EKF_more_single 5.475471 5.799057 5.918236   b

Changed the thread_pool implementation

> get_rune_time_summary(1e3, 15)
(n, p) = (1000, 15). Units is ‘milliseconds’
            expr        lq    median        uq cld
         EKF_one  86.45768  87.46351  89.86667   a
        EKF_more 160.92760 170.30400 248.01659   a
  EKF_one_single  86.93768  87.57017  90.52681   a
 EKF_more_single 162.70064 164.89442 166.47664   a

> 
> get_rune_time_summary(25e3, 15)
(n, p) = (25000, 15). Units is ‘milliseconds’
            expr        lq    median        uq cld
         EKF_one  742.4474  773.5364  866.4036  a 
        EKF_more 1004.3899 1063.6385 1065.8793  a 
  EKF_one_single  934.9156  951.4809  959.9897  a 
 EKF_more_single 1677.4795 1738.3723 1797.3523   b

> 
> get_rune_time_summary(100e3, 15)
(n, p) = (1e+05, 15). Units is ‘seconds’
            expr       lq   median       uq cld
         EKF_one 2.507902 2.529769 2.591977 a  
        EKF_more 2.708361 2.811922 2.860654 a  
  EKF_one_single 3.610834 3.639446 3.740219  b 
 EKF_more_single 5.269544 5.348535 5.598641   c

Changed EFK_helper implementation

> get_rune_time_summary(1e3, 15)
(n, p) = (1000, 15). Units is ‘milliseconds’
            expr       lq    median       uq cld
         EKF_one  82.8559  86.56198  88.3441 a  
        EKF_more 164.2125 167.65156 167.8914   c
  EKF_one_single  89.6158  93.41867 167.4908 ab 
 EKF_more_single 161.4850 166.01007 168.6546  bc

> 
> get_rune_time_summary(25e3, 15)
(n, p) = (25000, 15). Units is ‘milliseconds’
            expr        lq    median        uq cld
         EKF_one  663.2636  709.0315  753.5964 a  
        EKF_more  938.4518  943.4272 1125.3136  b 
  EKF_one_single  845.8414  860.3710  950.5801  b 
 EKF_more_single 1658.8567 1754.4150 1780.4555   c

> 
> get_rune_time_summary(100e3, 15)
(n, p) = (1e+05, 15). Units is ‘seconds’
            expr       lq   median       uq cld
         EKF_one 2.289487 2.369060 2.448422 a  
        EKF_more 2.727496 2.745346 2.751623 a  
  EKF_one_single 3.519627 3.585456 3.644883  b 
 EKF_more_single 5.431487 5.719187 5.729336   c

Changed dger to dsyr

> get_rune_time_summary(1e3, 15)
(n, p) = (1000, 15). Units is ‘milliseconds’
            expr        lq    median        uq cld
         EKF_one  77.85205  78.99022  83.36119   a
        EKF_more 135.72938 142.28543 142.50825   a
  EKF_one_single  78.38894  85.08168  86.35259   a
 EKF_more_single 135.54291 149.28197 247.92099   a

> 
> get_rune_time_summary(25e3, 15)
(n, p) = (25000, 15). Units is ‘milliseconds’
            expr        lq    median        uq cld
         EKF_one  758.7745  790.8354  941.7272  a 
        EKF_more 1049.5648 1051.8736 1122.3261  a 
  EKF_one_single  941.8809  955.5405 1157.2156  a 
 EKF_more_single 1526.3080 1531.9992 1643.1447   b

> 
> get_rune_time_summary(100e3, 15)
(n, p) = (1e+05, 15). Units is ‘seconds’
            expr       lq   median       uq cld
         EKF_one 2.333461 2.373363 2.761739 a  
        EKF_more 2.775776 2.839043 2.904525 a  
  EKF_one_single 3.307661 3.409441 3.421127  b 
 EKF_more_single 4.879773 4.945290 5.439717   c

Site note

I tried to sort the rows but it slowed down the computation:

> get_rune_time_summary(1e3, 15)
(n, p) = (1000, 15). Units is ‘milliseconds’
            expr       lq   median       uq cld
         EKF_one 114.3313 195.0858 195.1704  a 
        EKF_more 273.0260 363.0266 390.5347   b
  EKF_one_single 111.3011 113.3590 164.2074  a 
 EKF_more_single 272.7052 278.4648 491.3395  ab

> 
> get_rune_time_summary(25e3, 15)
(n, p) = (25000, 15). Units is ‘seconds’
            expr       lq   median       uq cld
         EKF_one 1.373352 1.395487 1.643374  a 
        EKF_more 1.631374 1.731395 1.788932  a 
  EKF_one_single 1.416539 1.503603 1.830966  a 
 EKF_more_single 2.216484 2.350622 2.447617   b

> 
> get_rune_time_summary(100e3, 15)
(n, p) = (1e+05, 15). Units is ‘seconds’
            expr       lq   median       uq cld
         EKF_one 4.504713 5.004235 5.069915  a 
        EKF_more 4.806569 4.944134 5.041487  a 
  EKF_one_single 5.117595 5.148371 5.226720  a 
 EKF_more_single 6.413533 6.551310 6.694746   b