r-lib / rray

Simple Arrays
https://rray.r-lib.org
GNU General Public License v3.0
130 stars 12 forks source link

Inner and Container cast/type functions #176

Closed DavisVaughan closed 5 years ago

DavisVaughan commented 5 years ago

Closes #172

Adds cast/type functions for the container type and the inner type. This will greatly speed up functions such as rray_bind() so they don't have to use vec_data() to detect the inner types. Essentially it is all static and extensible now.

The "container" type is either logical() or rray(logical()) (and is extensible for other S3's)

The "inner" type is always logical(), numeric(), integer() or character(). (So rrays return one of these as well)

DavisVaughan commented 5 years ago
library(abind)
library(rray)

x <- 1:5000 + 0L # integer
y <- 1:5000 + 0 # double
z <- as_rray(y)

garbage <- profmem::profmem(rray_bind(x, x, axis = 1))

# same type
bench::mark(
  rray_bind(x, x, axis = 1), 
  abind(x, x, along = 1)
)
#> # A tibble: 2 x 10
#>   expression   min  mean median   max `itr/sec` mem_alloc  n_gc n_itr
#>   <chr>      <bch> <bch> <bch:> <bch>     <dbl> <bch:byt> <dbl> <int>
#> 1 rray_bind… 110µs 161µs  159µs 381µs     6196.    39.1KB     6  2942
#> 2 abind(x, … 277µs 398µs  409µs 522µs     2512.   534.3KB     7  1140
#> # … with 1 more variable: total_time <bch:tm>

profmem::profmem(rray_bind(x, x, axis = 1))
#> Rprofmem memory profiling of:
#> rray_bind(x, x, axis = 1)
#> 
#> Memory allocations:
#>        what bytes                       calls
#> 1     alloc 40048 rray_bind() -> rray__bind()
#> total       40048

profmem::profmem(abind(x, x, along = 1))
#> Rprofmem memory profiling of:
#> abind(x, x, along = 1)
#> 
#> Memory allocations:
#>        what  bytes                                       calls
#> 1     alloc  20048 abind() -> as.array() -> as.array.default()
#> 2     alloc  20048 abind() -> as.array() -> as.array.default()
#> 3     alloc  40048                                     abind()
#> 4     alloc  40048                                     abind()
#> 5     alloc  40048                                     abind()
#> 6     alloc  80048                                     abind()
#> 7     alloc  40048              abind() -> array() -> unlist()
#> 8     alloc  40048                          abind() -> array()
#> total       320384

# different type
bench::mark(
  rray_bind(x, y, axis = 1),
  abind(x, y, along = 1)
)
#> # A tibble: 2 x 10
#>   expression   min  mean median   max `itr/sec` mem_alloc  n_gc n_itr
#>   <chr>      <bch> <bch> <bch:> <bch>     <dbl> <bch:byt> <dbl> <int>
#> 1 rray_bind… 143µs 195µs  200µs 316µs     5135.     154KB     5  2393
#> 2 abind(x, … 332µs 422µs  401µs 510µs     2372.     411KB     8  1064
#> # … with 1 more variable: total_time <bch:tm>

profmem::profmem(rray_bind(x, y, axis = 1))
#> Rprofmem memory profiling of:
#> rray_bind(x, y, axis = 1)
#> 
#> Memory allocations:
#>        what  bytes
#> 1     alloc  40048
#> 2     alloc  80048
#> total       120096
#>                                                                                                                                             calls
#> 1     rray_bind() -> map() -> lapply() -> FUN() -> vec_cast_inner.double() -> vec_cast_inner.double.integer() -> vec_inner_caster() -> vec_cast()
#> 2                                                                                                                     rray_bind() -> rray__bind()
#> total

profmem::profmem(abind(x, y, along = 1))
#> Rprofmem memory profiling of:
#> abind(x, y, along = 1)
#> 
#> Memory allocations:
#>        what  bytes                                       calls
#> 1     alloc  20048 abind() -> as.array() -> as.array.default()
#> 2     alloc  40048 abind() -> as.array() -> as.array.default()
#> 3     alloc  40048                                     abind()
#> 4     alloc  40048                                     abind()
#> 5     alloc  40048                                     abind()
#> 6     alloc  80048                                     abind()
#> 7     alloc  80048              abind() -> array() -> unlist()
#> 8     alloc  80048                          abind() -> array()
#> total       420384

# different type + rray
bench::mark(
  rray_bind(x, z, axis = 1)
)
#> # A tibble: 1 x 10
#>   expression   min  mean median   max `itr/sec` mem_alloc  n_gc n_itr
#>   <chr>      <bch> <bch> <bch:> <bch>     <dbl> <bch:byt> <dbl> <int>
#> 1 rray_bind… 189µs 258µs  246µs 387µs     3880.     224KB     7  1773
#> # … with 1 more variable: total_time <bch:tm>

profmem::profmem(rray_bind(x, z, axis = 1))
#> Rprofmem memory profiling of:
#> rray_bind(x, z, axis = 1)
#> 
#> Memory allocations:
#>        what  bytes
#> 1     alloc  40048
#> 2     alloc  80048
#> 3     alloc  80048
#> total       200144
#>                                                                                                                                                                  calls
#> 1                          rray_bind() -> map() -> lapply() -> FUN() -> vec_cast_inner.double() -> vec_cast_inner.double.integer() -> vec_inner_caster() -> vec_cast()
#> 2                                                                                                                                          rray_bind() -> rray__bind()
#> 3     rray_bind() -> vec_cast_container() -> vec_cast_container.vctrs_rray() -> vec_cast_container.vctrs_rray.double() -> new_rray() -> new_vctr2() -> vec_structure()
#> total

# 2nd axis, same type
bench::mark(
  rray_bind(x, x, axis = 2),
  abind(x, x, along = 2)
)
#> # A tibble: 2 x 10
#>   expression   min  mean median   max `itr/sec` mem_alloc  n_gc n_itr
#>   <chr>      <bch> <bch> <bch:> <bch>     <dbl> <bch:byt> <dbl> <int>
#> 1 rray_bind… 127µs 156µs  152µs 304µs     6390.    39.1KB     7  2985
#> 2 abind(x, … 179µs 226µs  226µs 336µs     4417.   117.4KB     8  2059
#> # … with 1 more variable: total_time <bch:tm>

profmem::profmem(rray_bind(x, x, axis = 2))
#> Rprofmem memory profiling of:
#> rray_bind(x, x, axis = 2)
#> 
#> Memory allocations:
#>        what bytes                       calls
#> 1     alloc 40048 rray_bind() -> rray__bind()
#> total       40048

profmem::profmem(abind(x, x, along = 2))
#> Rprofmem memory profiling of:
#> abind(x, x, along = 2)
#> 
#> Memory allocations:
#>        what  bytes                                       calls
#> 1     alloc  20048 abind() -> as.array() -> as.array.default()
#> 2     alloc  20048 abind() -> as.array() -> as.array.default()
#> 3     alloc  40048              abind() -> array() -> unlist()
#> 4     alloc  40048                          abind() -> array()
#> total       120192

Created on 2019-05-18 by the reprex package (v0.2.1.9000)