tidyverse / funs

Collection of low-level functions for working with vctrs
Other
34 stars 7 forks source link

Interleaving vectors #10

Closed huftis closed 2 years ago

huftis commented 7 years ago

Just discovered this package. A package for handling vectors sounds very useful. Here’s a feature request (with code) for a vector operation I occasionally find use for.

A function for interleaving vectors would be nice. Here’s some code for such a function:

library(purrr)
interleave = function(...) {
  vecs = list(...)
  n_vecs = length(vecs)                    # Number of arguments/vectors
  max_n = vecs %>% map_int(length) %>% max # Max number of elements in a vector
  n_out = n_vecs * max_n                   # Number of elements in output vector
  x = vector(mode = mode(vecs[[1]]), n_vecs * max_n)
  if (n_out > 0) {
    for (i in seq_along(vecs))
    {
      x[seq(1, n_out, by = n_vecs) + i - 1] =
        rep(vecs[[i]], length.out = max_n)
    }
  }
  x
}

A few examples:

> # Some test data
> x = 1:4
> y = 10 * x
> z = 100 * (1:5)
> 
> # Interleaving two vectors
> interleave(x, y)
[1]  1 10  2 20  3 30  4 40
> 
> # Interleaving vectors with different
> # number of elements causes short
> # vectors to be recycled
> interleave(x, y, z)
 [1]   1  10 100   2  20 200   3  30 300   4  40 400   1  10 500
> 
> # Interleaving vectors of different classes/modes
> # causes class coercion
> interleave(x, LETTERS[1:4])
[1] "1" "A" "2" "B" "3" "C" "4" "D"
> 
> 
> ## A few edge cases
> 
> # Interleaving a single vector
> interleave(x)
[1] 1 2 3 4
> 
> # Interleaving empty vectors
> interleave(numeric(), y, 99)
 [1] NA 10 99 NA 20 99 NA 30 99 NA 40 99
> 
> # Interleaving a single empty vector
> interleave(logical())
logical(0)
hadley commented 6 years ago

Here's my stab at it:

interleave <- function(..., .ptype = NULL) {
  args <- list2(...)
  p <- length(args)
  if (p == 0)
    return(NULL)

  args <- vec_recycle(!!!args)
  vals <- vec_c(!!!args, .ptype = .ptype)

  idx <- matrix(seq_along(vals), nrow = p, byrow = TRUE)
  vals[as.integer(idx)]
}
DavisVaughan commented 4 years ago

Small update:

(I suppose in theory name repair and the name spec should be passed on to vec_c)

library(vctrs)
library(rlang)

vec_interleave <- function(..., .ptype = NULL) {
  args <- list2(...)
  n_args <- vec_size(args)

  if (n_args == 0L) {
    return(NULL)
  }

  args <- vec_recycle_common(!!! args)
  out <- vec_c(!!! args, .ptype = .ptype)

  pos <- vec_seq_along(out)
  pos <- as.integer(matrix(pos, nrow = n_args, byrow = TRUE))

  vec_slice(out, pos)
}

vec_interleave(1:5, 6:10, 11:15)
#>  [1]  1  6 11  2  7 12  3  8 13  4  9 14  5 10 15

cars <- mtcars[1:3,]

vec_interleave(cars, vec_init(cars))
#>    mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> 1 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
#> 2   NA  NA   NA  NA   NA    NA    NA NA NA   NA   NA
#> 3 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
#> 4   NA  NA   NA  NA   NA    NA    NA NA NA   NA   NA
#> 5 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
#> 6   NA  NA   NA  NA   NA    NA    NA NA NA   NA   NA
romainfrancois commented 3 years ago
library(vctrs)
library(rlang)

vec_interleave <- function(..., .ptype = NULL, .name_spec = NULL, .name_repair = c("minimal", "unique", "check_unique", "universal")) {
  args <- list2(...)
  n_args <- vec_size(args)

  if (n_args == 0L) {
    return(NULL)
  }

  args <- vec_recycle_common(!!! args)
  out <- vec_c(!!! args, .ptype = .ptype, .name_spec = .name_spec, .name_repair = .name_repair)

  pos <- vec_seq_along(out)
  pos <- as.integer(matrix(pos, nrow = n_args, byrow = TRUE))

  vec_slice(out, pos)
}

vec_interleave(1:5, 6:10, 11:15)
#>  [1]  1  6 11  2  7 12  3  8 13  4  9 14  5 10 15
vec_interleave(a = 1:5, b = 6:10, .name_spec = "{outer}")
#>  a  b  a  b  a  b  a  b  a  b 
#>  1  6  2  7  3  8  4  9  5 10

cars <- mtcars[1:3,]

not sure about this though

vec_interleave(cars, vec_init(cars))
#>                mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4     21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
#> ...2            NA  NA   NA  NA   NA    NA    NA NA NA   NA   NA
#> Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
#> ...4            NA  NA   NA  NA   NA    NA    NA NA NA   NA   NA
#> Datsun 710    22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
#> ...6            NA  NA   NA  NA   NA    NA    NA NA NA   NA   NA

which happens because ...n seems to have special treatment

df <- data.frame(x = 1:3)
row.names(df) <- c("a", "...2", "b")
df
#>      x
#> a    1
#> ...2 2
#> b    3
vec_slice(df, c(1, 3, 2))
#>      x
#> a    1
#> b    3
#> ...3 2

Created on 2021-05-05 by the reprex package (v2.0.0)

DavisVaughan commented 3 years ago

Yea initializing a data frame with vec_init(x) always gives unique row names to start with (if x originally had row names), and then vec_c() and vec_slice() with data frames will silently strip any ...n and then re-add them to correctly reflect the new positions. So I think it is ok?

library(vctrs)
#> Warning: package 'vctrs' was built under R version 4.0.2

x <- mtcars[1:2,]
y <- vec_init(mtcars, 2)
x
#>               mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4      21   6  160 110  3.9 2.620 16.46  0  1    4    4
#> Mazda RX4 Wag  21   6  160 110  3.9 2.875 17.02  0  1    4    4
y
#>      mpg cyl disp hp drat wt qsec vs am gear carb
#> ...1  NA  NA   NA NA   NA NA   NA NA NA   NA   NA
#> ...2  NA  NA   NA NA   NA NA   NA NA NA   NA   NA

xy <- vec_c(x, y)
xy
#>               mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4      21   6  160 110  3.9 2.620 16.46  0  1    4    4
#> Mazda RX4 Wag  21   6  160 110  3.9 2.875 17.02  0  1    4    4
#> ...3           NA  NA   NA  NA   NA    NA    NA NA NA   NA   NA
#> ...4           NA  NA   NA  NA   NA    NA    NA NA NA   NA   NA

vec_slice(xy, c(4, 2, 1, 3))
#>               mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> ...1           NA  NA   NA  NA   NA    NA    NA NA NA   NA   NA
#> Mazda RX4 Wag  21   6  160 110  3.9 2.875 17.02  0  1    4    4
#> Mazda RX4      21   6  160 110  3.9 2.620 16.46  0  1    4    4
#> ...4           NA  NA   NA  NA   NA    NA    NA NA NA   NA   NA

Created on 2021-05-05 by the reprex package (v1.0.0)