r-lib / sparsevctrs

Sparse vector class using ALTREP
https://r-lib.github.io/sparsevctrs/
Other
12 stars 1 forks source link

Sparse dummy #80

Closed EmilHvitfeldt closed 2 weeks ago

EmilHvitfeldt commented 3 weeks ago

ref: #49

library(sparsevctrs)

create_factor <- function(n_obs, n_lvls) {
  res <- sample(seq_len(n_lvls), n_obs, TRUE)

  factor(res, levels = seq_len(n_lvls))
}

create_factor(2, 4)
#> [1] 3 3
#> Levels: 1 2 3 4

result <- bench::press(
  n_obs = 10^(0:5),
  n_lvls = 10^(0:4),
  {
    x <- create_factor(n_obs, n_lvls)
    bench::mark(
      sparse = tibble::new_tibble(sparse_dummy(x, one_hot = TRUE)),
      hardhat = tibble::as_tibble(hardhat::fct_encode_one_hot(x))
    )
  }
)

print(result, n = Inf)
#> # A tibble: 60 × 15
#>    expression  n_obs n_lvls      min   median `itr/sec` mem_alloc `gc/sec` n_itr
#>    <bch:expr>  <dbl>  <dbl> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl> <int>
#>  1 sparse          1      1   7.13µs   7.87µs   1.21e+5   19.77KB    36.4   9997
#>  2 hardhat         1      1  46.45µs  50.14µs   1.92e+4  339.03KB    36.3   8991
#>  3 sparse         10      1   7.22µs      8µs   1.18e+5        0B    35.3   9997
#>  4 hardhat        10      1  46.74µs  50.47µs   1.91e+4        0B    33.9   9017
#>  5 sparse        100      1   7.46µs   8.16µs   1.16e+5      896B    34.9   9997
#>  6 hardhat       100      1  47.77µs  51.37µs   1.88e+4    3.02KB    36.4   8767
#>  7 sparse       1000      1   8.49µs   9.76µs   9.79e+4    7.91KB    29.4   9997
#>  8 hardhat      1000      1  53.71µs  61.99µs   1.57e+4   27.62KB    27.7   7348
#>  9 sparse      10000      1  17.47µs  23.86µs   4.03e+4   78.22KB    60.6   9985
#> 10 hardhat     10000      1 109.31µs 156.58µs   6.30e+3  273.72KB    34.1   2405
#> 11 sparse     100000      1 106.39µs 150.72µs   6.58e+3  781.34KB   112.    2476
#> 12 hardhat    100000      1 823.89µs   1.02ms   9.77e+2    2.67MB    60.0    326
#> 13 sparse          1     10   8.65µs   9.39µs   1.02e+5        0B    40.8   9996
#> 14 hardhat         1     10  58.51µs  62.28µs   1.55e+4        0B    35.9   7343
#> 15 sparse         10     10   8.73µs   9.51µs   1.00e+5        0B    40.1   9996
#> 16 hardhat        10     10  58.55µs   63.3µs   1.53e+4      448B    36.1   7202
#> 17 sparse        100     10   9.22µs  10.21µs   9.20e+4      448B    46.0   9995
#> 18 hardhat       100     10  60.06µs  65.44µs   1.49e+4   10.47KB    34.5   6895
#> 19 sparse       1000     10  13.41µs  15.21µs   6.31e+4   12.75KB    25.2   9996
#> 20 hardhat      1000     10  67.24µs  80.48µs   1.21e+4   98.36KB    28.8   5466
#> 21 sparse      10000     10  46.78µs  56.01µs   1.76e+4  118.19KB    44.6   7496
#> 22 hardhat     10000     10 133.66µs 235.87µs   4.07e+3  977.27KB    89.0   1371
#> 23 sparse     100000     10 391.75µs  471.7µs   2.15e+3    1.15MB    91.6    870
#> 24 hardhat    100000     10   1.23ms   1.56ms   6.39e+2    9.54MB   389.     110
#> 25 sparse          1    100  15.46µs   17.3µs   5.51e+4    3.36KB    66.2   9988
#> 26 hardhat         1    100 179.13µs 194.83µs   5.05e+3   10.23KB    38.6   2351
#> 27 sparse         10    100  15.62µs  17.55µs   5.38e+4    3.36KB    70.1   9987
#> 28 hardhat        10    100 179.05µs 193.15µs   5.09e+3   13.75KB    39.0   2219
#> 29 sparse        100    100  16.28µs  17.43µs   5.48e+4     3.8KB    65.8   9988
#> 30 hardhat       100    100 184.71µs 203.44µs   4.83e+3    94.8KB    38.3   2142
#> 31 sparse       1000    100  20.62µs  22.71µs   4.19e+4    7.31KB    54.6   9987
#> 32 hardhat      1000    100 207.79µs 265.23µs   3.74e+3   815.5KB    86.3   1472
#> 33 sparse      10000    100  56.29µs  65.31µs   1.38e+4  130.38KB    60.4   4789
#> 34 hardhat     10000    100 434.19µs 866.17µs   1.19e+3    7.83MB   458.     229
#> 35 sparse     100000    100 371.01µs 436.69µs   2.30e+3    1.16MB    81.9    871
#> 36 hardhat    100000    100   6.75ms   8.22ms   1.24e+2   78.22MB   409.      10
#> 37 sparse          1   1000  81.39µs  94.73µs   1.03e+4   31.48KB   108.    4292
#> 38 hardhat         1   1000    1.4ms    1.5ms   6.65e+2   91.03KB    46.0    289
#> 39 sparse         10   1000  82.04µs  94.05µs   1.03e+4   31.48KB   114.    4087
#> 40 hardhat        10   1000    1.4ms   1.49ms   6.66e+2  126.19KB    45.6    292
#> 41 sparse        100   1000   82.7µs  94.34µs   1.03e+4   31.92KB   115.    4233
#> 42 hardhat       100   1000   1.43ms   1.57ms   6.30e+2  917.39KB    46.6    257
#> 43 sparse       1000   1000   90.9µs 101.76µs   9.51e+3   35.44KB   106.    3683
#> 44 hardhat      1000   1000   1.82ms   2.28ms   4.40e+2    7.78MB    38.4    172
#> 45 sparse      10000   1000    131µs 143.66µs   6.82e+3   70.59KB    76.6   2936
#> 46 hardhat     10000   1000   6.12ms   7.25ms   1.35e+2   76.62MB   304.      12
#> 47 sparse     100000   1000 528.65µs 567.03µs   1.13e+3    1.27MB    12.0    568
#> 48 hardhat    100000   1000 125.51ms 145.57ms   6.94e+0  764.98MB    15.6      4
#> 49 sparse          1  10000 751.08µs 850.36µs   1.16e+3  312.73KB   129.     362
#> 50 hardhat         1  10000  14.19ms  15.03ms   6.35e+1     999KB    95.2     10
#> 51 sparse         10  10000 763.75µs 855.05µs   1.16e+3  312.73KB   135.     369
#> 52 hardhat        10  10000  13.94ms  14.28ms   6.99e+1    1.32MB    94.8     14
#> 53 sparse        100  10000 754.81µs 890.99µs   1.06e+3  313.17KB   138.     298
#> 54 hardhat       100  10000  15.59ms  16.06ms   6.24e+1    9.03MB   118.       9
#> 55 sparse       1000  10000 748.54µs 884.04µs   1.06e+3  316.69KB    94.2    326
#> 56 hardhat      1000  10000  21.73ms  23.09ms   4.06e+1   77.71MB    58.0      7
#> 57 sparse      10000  10000 852.35µs 961.65µs   1.03e+3  351.84KB   100.     307
#> 58 hardhat     10000  10000  93.56ms  93.61ms   1.07e+1  764.53MB    21.4      2
#> 59 sparse     100000  10000   1.25ms    1.4ms   5.04e+2  703.41KB    44.0    252
#> 60 hardhat    100000  10000    2.25s    2.25s   4.44e-1    7.45GB     1.78     1
#> # ℹ 6 more variables: n_gc <dbl>, total_time <bch:tm>, result <list>,
#> #   memory <list>, time <list>, gc <list>

ggplot2::autoplot(result)