tidyverse / dtplyr

Data table backend for dplyr
https://dtplyr.tidyverse.org
Other
670 stars 57 forks source link

`count()` drops last group #356

Closed jmbarbone closed 2 years ago

jmbarbone commented 2 years ago

Only the last group is dropped when using count(). I think this could be in tally.dtplyr_step() where summarise() is called.

library(dplyr, warn.conflicts = FALSE)
library(dtplyr, warn.conflicts = FALSE)

x <- tibble(
  x = c(1, 1, 2, 2),
  y = c(1, 1, 1, 2)
)

# dplyr behavior ----------------------------------------------------------

# no groups
x %>% 
  count(x, y) %>% 
  mutate(z = mean(n))
#> # A tibble: 3 × 4
#>       x     y     n     z
#>   <dbl> <dbl> <int> <dbl>
#> 1     1     1     2  1.33
#> 2     2     1     1  1.33
#> 3     2     2     1  1.33

# groups retained
x %>% 
  group_by(x, y) %>% 
  count() %>% 
  mutate(z = mean(n))
#> # A tibble: 3 × 4
#> # Groups:   x, y [3]
#>       x     y     n     z
#>   <dbl> <dbl> <int> <dbl>
#> 1     1     1     2     2
#> 2     2     1     1     1
#> 3     2     2     1     1

# dtplyr behavior ---------------------------------------------------------

# y is dropped but not x (or others if available)
x %>% 
  lazy_dt() %>% 
  count(x, y) %>% 
  mutate(z = mean(n))
#> Source: local data table [3 x 4]
#> Groups: x
#> Call:   `_DT1`[, .(n = .N), keyby = .(x, y)][, `:=`(z = mean(n)), by = .(x)]
#> 
#>       x     y     n     z
#>   <dbl> <dbl> <int> <dbl>
#> 1     1     1     2     2
#> 2     2     1     1     1
#> 3     2     2     1     1
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results

# need explicit ungroup() to fix previous
x %>% 
  lazy_dt() %>% 
  count(x, y) %>% 
  ungroup() %>% 
  mutate(z = mean(n))
#> Source: local data table [3 x 4]
#> Call:   `_DT2`[, .(n = .N), keyby = .(x, y)][, `:=`(z = mean(n))]
#> 
#>       x     y     n     z
#>   <dbl> <dbl> <int> <dbl>
#> 1     1     1     2  1.33
#> 2     2     1     1  1.33
#> 3     2     2     1  1.33
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results

# y is dropped rather than retained
x %>% 
  lazy_dt() %>% 
  group_by(x, y) %>% 
  count() %>% 
  mutate(z = mean(n))
#> Source: local data table [3 x 4]
#> Groups: x
#> Call:   `_DT3`[, .(n = .N), keyby = .(x, y)][, `:=`(z = mean(n)), by = .(x)]
#> 
#>       x     y     n     z
#>   <dbl> <dbl> <int> <dbl>
#> 1     1     1     2     2
#> 2     2     1     1     1
#> 3     2     2     1     1
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results

Created on 2022-05-11 by the reprex package (v2.0.1)

markfairbanks commented 2 years ago

@jmbarbone - All fixed! Thanks for catching this.

library(dplyr, warn.conflicts = FALSE)
library(dtplyr, warn.conflicts = FALSE)

x <- tibble(
  x = c(1, 1, 2, 2),
  y = c(1, 1, 1, 2)
)

x %>% 
  lazy_dt() %>% 
  count(x, y) %>%
  mutate(z = mean(n))
#> Source: local data table [3 x 4]
#> Call:   `_DT1`[, .(n = .N), keyby = .(x, y)][, `:=`(z = mean(n))]
#> 
#>       x     y     n     z
#>   <dbl> <dbl> <int> <dbl>
#> 1     1     1     2  1.33
#> 2     2     1     1  1.33
#> 3     2     2     1  1.33
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results

x %>% 
  lazy_dt() %>% 
  group_by(x, y) %>% 
  count() %>%
  mutate(z = mean(n))
#> Source: local data table [3 x 4]
#> Groups: x, y
#> Call:   `_DT2`[, .(n = .N), keyby = .(x, y)][, `:=`(z = mean(n)), by = .(x, 
#>     y)]
#> 
#>       x     y     n     z
#>   <dbl> <dbl> <int> <dbl>
#> 1     1     1     2     2
#> 2     2     1     1     1
#> 3     2     2     1     1
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results