tidyverse / dtplyr

Data table backend for dplyr
https://dtplyr.tidyverse.org
Other
664 stars 58 forks source link

select + semi_join generates a warning when not immutable #472

Open dakvid opened 4 months ago

dakvid commented 4 months ago

Something seems to be not quite right with the non immutable translation of a select and semi_join - it issues a warning about dropped columns not existing to remove.

library(dplyr, warn.conflicts = FALSE)      # 1.1.4
library(data.table, warn.conflicts = FALSE) # 1.15.4
library(dtplyr, warn.conflicts = FALSE)     # 1.3.1

x <- data.table(a = 1:3, b = 1:3, c = 1:3)
y <- data.table(b = 2L)

x |>
  lazy_dt() |>
  select(a, b) |>
  semi_join(y, by = "b")
# Source: local data table [1 x 2]
# Call:   `_DT53`[, .(a, b)][unique(`_DT53`[, .(a, b)][`_DT54`, which = TRUE, 
#     nomatch = NULL, on = .(b)])]
# 
#       a     b
#   <int> <int>
# 1     2     2

x |>
  lazy_dt(immutable = FALSE) |>
  select(a, b) |>
  semi_join(y, by = "b")
# Source: local data table [1 x 2]
# Call:   `_DT55`[, `:=`("c", NULL)][unique(`_DT55`[, `:=`("c", NULL)][`_DT56`, 
#     which = TRUE, nomatch = NULL, on = .(b)])]
# 
#       a     b
#   <int> <int>
# 1     2     2
# 
# # Use as.data.table()/as.data.frame()/as_tibble() to access results
# Warning message:
# In `[.data.table`(`_DT55`, , `:=`("c", NULL)) :
#   Column 'c' does not exist to remove

Compare with inner_join:

x <- data.table(a = 1:3, b = 1:3, c = 1:3)
y <- data.table(b = 2L)

x |>
  lazy_dt() |>
  select(a, b) |>
  inner_join(y, by = "b")
# Source: local data table [1 x 2]
# Call:   `_DT59`[, .(a, b)][`_DT60`, on = .(b), nomatch = NULL, allow.cartesian = TRUE]
# 
#       a     b
#   <int> <int>
# 1     2     2

x |>
  lazy_dt(immutable = FALSE) |>
  select(a, b) |>
  inner_join(y, by = "b")
# Source: local data table [1 x 2]
# Call:   `_DT61`[, `:=`("c", NULL)][`_DT62`, on = .(b), nomatch = NULL, 
#     allow.cartesian = TRUE]
# 
#       a     b
#   <int> <int>
# 1     2     2

Or left_join:

x <- data.table(a = 1:3, b = 1:3, c = 1:3)
y <- data.table(b = 2L)

x |>
  lazy_dt() |>
  select(a, b) |>
  left_join(y, by = "b")
# Source: local data table [3 x 2]
# Call:   setcolorder(`_DT64`[`_DT63`[, .(a, b)], on = .(b), allow.cartesian = TRUE], 
#     2:1)
# 
#       a     b
#   <int> <int>
# 1     1     1
# 2     2     2
# 3     3     3

x |>
  lazy_dt(immutable = FALSE) |>
  select(a, b) |>
  left_join(y, by = "b")
# Source: local data table [3 x 2]
# Call:   setcolorder(`_DT66`[`_DT65`[, `:=`("c", NULL)], on = .(b), allow.cartesian = TRUE], 
#     2:1)
# 
#       a     b
#   <int> <int>
# 1     1     1
# 2     2     2
# 3     3     3