Open hadley opened 4 years ago
Can more efficiently implement this (for the default case where default
is a missing value) by using vec_slice(x, c(NA, idx))
etc.
Adding a note that pmin()
is much slower than min()
. I don't think we need it here
bench::mark(pmin(1, 2), min(1, 2))
#> # A tibble: 2 x 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 pmin(1, 2) 4.71µs 5.49µs 178269. 80.8KB 107.
#> 2 min(1, 2) 232ns 341ns 2773155. 0B 0
Created on 2020-02-11 by the reprex package (v0.3.0)
And consider unify into a single function, as in https://github.com/tidyverse/dplyr/issues/5260
shift()
Potential full implementation using vec_shift()
as the base for vec_lag()
and vec_lead()
(we can drop the vec_)
library(vctrs)
library(rlang)
vec_lag <- function(x, n = 1L, default = NULL, order_by = NULL) {
vec_assert(n, size = 1L, arg = "n")
n <- vec_cast(n, integer(), x_arg = "n")
if (n < 0L) {
abort("`n` must be positive.")
}
vec_shift(x, n, default, order_by)
}
vec_lead <- function(x, n = 1L, default = NULL, order_by = NULL) {
vec_assert(n, size = 1L, arg = "n")
n <- vec_cast(n, integer(), x_arg = "n")
if (n < 0L) {
abort("`n` must be positive.")
}
n <- n * -1L
vec_shift(x, n, default, order_by)
}
vec_shift <- function(x, n = 1L, default = NULL, order_by = NULL) {
size <- vec_size(x)
if (!is.null(order_by)) {
out <- with_order(x, order_by, size, vec_shift, n = n, default = default)
return(out)
}
vec_assert(n, size = 1L, arg = "n")
n <- vec_cast(n, integer(), x_arg = "n")
if (identical(n, 0L)) {
return(x)
}
lag <- sign(n) > 0L
n <- abs(n)
if (n > size) {
n <- size
}
if (is.null(default)) {
vec_shift_slice(x, n, size, lag)
} else {
vec_shift_c(x, n, size, lag, default)
}
}
vec_shift_slice <- function(x, n, size, lag) {
idx_default <- vec_rep(NA_integer_, n)
if (lag) {
idx <- seq2(1L, size - n)
idx <- c(idx_default, idx)
vec_slice(x, idx)
} else {
idx <- seq2(1L + n, size)
idx <- c(idx, idx_default)
vec_slice(x, idx)
}
}
vec_shift_c <- function(x, n, size, lag, default) {
vec_assert(default, size = 1L, arg = "default")
default <- vec_cast(default, x, x_arg = "default")
default <- vec_rep(default, n)
if (lag) {
idx <- seq2(1L, size - n)
x <- vec_slice(x, idx)
vec_c(default, x)
} else {
idx <- seq2(1L + n, size)
x <- vec_slice(x, idx)
vec_c(x, default)
}
}
with_order <- function(.x, .order_by, .size, .fn, ...) {
vec_assert(.order_by, size = .size)
o <- vec_order(.order_by)
x <- vec_slice(.x, o)
out <- .fn(x, ...)
vec_slice(out, vec_order(o))
}
Also fixes two issues with current dplyr version:
# shouldnt return size 1
dplyr::lag(1:5, order_by = 1)
#> [1] NA
vec_lag(1:5, order_by = 1)
#> Error: `.order_by` must have size 5, not size 1.
# should cast default->x, not take common type
class(dplyr::lag(1:5, default = NA_real_))
#> [1] "numeric"
class(vec_lag(1:5, default = NA_real_))
#> [1] "integer"
Start from @DavisVaughan