andrewallenbruce / provider

Public Healthcare Provider APIs :stethoscope:
https://andrewallenbruce.github.io/provider/
Other
18 stars 2 forks source link

Benchmarks: purrr vs. furrr #29

Closed andrewallenbruce closed 8 months ago

andrewallenbruce commented 8 months ago

utilization(type = "provider")

library(tidyverse)
library(furrr)
library(bench)
library(provider)
plan(multisession, workers = 5)

res <- mark(
  purrr = map_dfr(util_years(), ~utilization(year = .x, npi = 1043477615, type = "provider")),
  furrr = future_map_dfr(util_years(), ~utilization(year = .x, npi = 1043477615, type = "provider")),
  iterations = 2) |> 
  select(expression:total_time)

res
#> # A tibble: 2 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 purrr        10.37s   10.54s    0.0949    92.5MB    1.19 
#> 2 furrr         2.96s    2.97s    0.337     12.9MB    0.169

Created on 2023-10-31 with reprex v2.0.2

andrewallenbruce commented 8 months ago

utilization(type = "service")

library(tidyverse)
library(furrr)
library(bench)
library(provider)
plan(multisession, workers = 4)

res <- mark(
  purrr = map_dfr(util_years(), ~utilization(year = .x, npi = 1043477615, type = "service")),
  furrr = future_map_dfr(util_years(), ~utilization(year = .x, npi = 1043477615, type = "service")),
  iterations = 2) |> 
  select(expression:total_time)

res
#> # A tibble: 2 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 purrr         1.05m    1.06m    0.0157   411.1MB   0.933 
#> 2 furrr        29.67s   29.81s    0.0335    10.8MB   0.0168

Created on 2023-10-31 with reprex v2.0.2

andrewallenbruce commented 8 months ago

utilization(type = "service")

library(tidyverse)
library(furrr)
library(bench)
library(provider)
plan(multisession, workers = 4)

res <- mark(
  purrr = map_dfr(util_years(), ~utilization(year = .x, npi = 1043477615, type = "service")),
  furrr = future_map_dfr(util_years(), ~utilization(year = .x, npi = 1043477615, type = "service")),
  iterations = 2) |> 
  select(expression:total_time)

res
#> # A tibble: 2 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 purrr         1.01m     1.1m    0.0151   414.1MB   0.582 
#> 2 furrr        28.26s    28.3s    0.0354    10.8MB   0.0177

Created on 2023-10-31 with reprex v2.0.2

andrewallenbruce commented 8 months ago

open_payments()

library(tidyverse)
library(furrr)
library(bench)
library(provider)
plan(multisession, workers = 4)

res <- mark(
  map_dfr  = map_dfr(2020:2022, ~open_payments(year = .x, npi = 1043477615)),
  map_pipe = 2020:2022 |> map(\(x) open_payments(year = x, npi = 1043477615)) |> list_rbind(),
  furrr    = open_payments_(year = 2020:2022, npi = 1043477615),
  iterations = 3,
  check = TRUE)

res |> select(expression:total_time)
#> # A tibble: 3 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 map_dfr       2.31s    2.56s     0.400    27.5MB    2.67 
#> 2 map_pipe      2.27s    2.32s     0.419    13.8MB    2.79 
#> 3 furrr         1.24s    1.57s     0.684    11.9MB    0.228

summary(res, relative = TRUE)
#> # A tibble: 3 × 6
#>   expression   min median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <dbl>  <dbl>     <dbl>     <dbl>    <dbl>
#> 1 map_dfr     1.86   1.63      1         2.31     11.7
#> 2 map_pipe    1.83   1.48      1.05      1.16     12.2
#> 3 furrr       1      1         1.71      1         1

plan(sequential)

Created on 2023-11-01 with reprex v2.0.2

andrewallenbruce commented 8 months ago

Functions using furrr are showing a warning:

utilization_(npi = 1043477615, type = "provider")

#> Warning: UNRELIABLE VALUE: Future ('<none>') unexpectedly generated random
#> numbers without specifying argument 'seed'. There is a risk that those random
#> numbers are not statistically sound and the overall results might be invalid.
#> To fix this, specify 'seed=TRUE'. This ensures that proper, parallel-safe
#> random numbers are produced via the L'Ecuyer-CMRG method. To disable this
#> check, use 'seed=NULL', or set option 'future.rng.onMisuse' to "ignore".
andrewallenbruce commented 8 months ago

fixed by adding this bit of code to the furrr call:

furrr::future_pmap_dfr(x, utilization, .options = furrr::furrr_options(seed = NULL))