r-lib / rray

Simple Arrays
https://rray.r-lib.org
GNU General Public License v3.0
130 stars 12 forks source link

Use `noalias()` in subset-assignment #219

Closed DavisVaughan closed 5 years ago

DavisVaughan commented 5 years ago

Closes #184

Big speed boost from this when value is a large object because value won't have a temporary copy of it made.

Before adding noalias():

library(rray)

x <- matrix(1:1000000 + 0L, dimnames = list(NULL, NULL))

value <- 1:1000000 + 1L

bench::mark(
  rray_subset_assign(x, , 1, value = value),
  `[<-`(x, , 1, value = value),
  {x[, 1] <- value; x},
  iterations = 1000
)
#> # A tibble: 3 x 10
#>   expression    min   mean median     max `itr/sec` mem_alloc  n_gc n_itr
#>   <chr>      <bch:> <bch:> <bch:> <bch:t>     <dbl> <bch:byt> <dbl> <int>
#> 1 rray_subs… 6.56ms 8.26ms 8.29ms 13.82ms      121.    4.11MB   149   851
#> 2 `[<-`(x, … 1.95ms 2.44ms 2.41ms  3.81ms      410.    7.63MB   325   675
#> 3 {...       1.34ms 1.62ms 1.61ms  2.36ms      616.    7.63MB   161   839
#> # … with 1 more variable: total_time <bch:tm>

Created on 2019-06-12 by the reprex package (v0.2.1)

After adding noalias():

library(rray)

x <- matrix(1:1000000 + 0L, dimnames = list(NULL, NULL))

value <- 1:1000000 + 1L

bench::mark(
  rray_subset_assign(x, , 1, value = value),
  `[<-`(x, , 1, value = value),
  {x[, 1] <- value; x},
  iterations = 1000
)
#> # A tibble: 3 x 10
#>   expression      min   mean median    max `itr/sec` mem_alloc  n_gc n_itr
#>   <chr>      <bch:tm> <bch:> <bch:> <bch:>     <dbl> <bch:byt> <dbl> <int>
#> 1 rray_subs… 815.31µs 1.18ms 1.08ms 4.58ms      845.    4.11MB   149   851
#> 2 `[<-`(x, …      2ms 2.46ms 2.42ms 5.04ms      406.    7.63MB   325   675
#> 3 {...         1.36ms 1.65ms 1.64ms 2.61ms      605.    7.63MB   161   839
#> # … with 1 more variable: total_time <bch:tm>

Created on 2019-06-12 by the reprex package (v0.2.1)

Loving noalias() @wolfv

DavisVaughan commented 5 years ago

This uses the dynamic view because the indexer is not contiguous. Not as impressive of a speed up, :/

Before:

library(rray)

# (250000, 4)
x_vals <- 1:1000000 + 0L
x <- matrix(x_vals, c(length(x_vals) / 4, 4), dimnames = list(NULL, NULL))

# (250000, 2) - will be assigned into columns 1 and 3 of `x`
value <- matrix(seq(1, length(x_vals) / 2) + 1L, ncol = 2)

bench::mark(
  rray_subset_assign(x, , c(1L, 3L), value = value),
  `[<-`(x, , c(1L, 3L), value = value),
  {x[, c(1L, 3L)] <- value; x},
  iterations = 1000
)
#> # A tibble: 3 x 10
#>   expression      min     mean   median     max `itr/sec` mem_alloc  n_gc
#>   <chr>      <bch:tm> <bch:tm> <bch:tm> <bch:t>     <dbl> <bch:byt> <dbl>
#> 1 rray_subs…   6.26ms    7.5ms   7.48ms 12.71ms      133.    4.11MB   111
#> 2 `[<-`(x, … 970.27µs   1.29ms   1.27ms  2.93ms      773.    4.77MB   131
#> 3 {...       442.72µs 565.25µs  565.4µs  1.33ms     1769.    4.77MB    25
#> # … with 2 more variables: n_itr <int>, total_time <bch:tm>

Created on 2019-06-12 by the reprex package (v0.2.1)

After:

library(rray)

# (250000, 4)
x_vals <- 1:1000000 + 0L
x <- matrix(x_vals, c(length(x_vals) / 4, 4), dimnames = list(NULL, NULL))

# (250000, 2) - will be assigned into columns 1 and 3 of `x`
value <- matrix(seq(1, length(x_vals) / 2) + 1L, ncol = 2)

bench::mark(
  rray_subset_assign(x, , c(1L, 3L), value = value),
  `[<-`(x, , c(1L, 3L), value = value),
  {x[, c(1L, 3L)] <- value; x},
  iterations = 1000
)
#> # A tibble: 3 x 10
#>   expression     min     mean   median     max `itr/sec` mem_alloc  n_gc
#>   <chr>      <bch:t> <bch:tm> <bch:tm> <bch:t>     <dbl> <bch:byt> <dbl>
#> 1 rray_subs…   5.6ms   6.51ms   6.27ms 15.55ms      154.    4.11MB   111
#> 2 `[<-`(x, … 968.1µs   1.34ms   1.31ms  3.04ms      746.    4.77MB   131
#> 3 {...       442.8µs  553.4µs 558.49µs  1.42ms     1807.    4.77MB    25
#> # … with 2 more variables: n_itr <int>, total_time <bch:tm>

Created on 2019-06-12 by the reprex package (v0.2.1)