tidyverse / dbplyr

Database (DBI) backend for dplyr
https://dbplyr.tidyverse.org
Other
475 stars 174 forks source link

Working across rows in dbplyr #591

Closed abalter closed 3 years ago

abalter commented 3 years ago

I know that some aspects of this were taken care of in #588 in relation to #578. However, I'm still finding that operations I thought would work do not. Or I'm doing things wrong.

library(dplyr, warn.conflicts = FALSE)
library(bigrquery)

set.seed(02042021)

Sys.setenv(BIGQUERY_TEST_PROJECT = "elite-magpie-257717")
bq_deauth()
bq_auth(email="ariel.balter@gmail.com")

conn = DBI::dbConnect(
  bigrquery::bigquery(),
  project = bq_test_project(),
  dataset = "test_dataset"
)

my_table = data.frame(
  A = replicate(10, paste(sample(letters[1:3], 3, replace=T), collapse="")),
  B = replicate(10, paste(sample(letters[1:3], 3, replace=T), collapse="")),
  C = replicate(10, paste(sample(letters[1:3], 3, replace=T), collapse="")),
  D = runif(10)
)
my_table
#>      A   B   C         D
#> 1  bcb cbb bbb 0.3620390
#> 2  aac aac bba 0.5505868
#> 3  aca abb bcb 0.4028455
#> 4  bca baa bbb 0.3247477
#> 5  bcc cac ccc 0.6861223
#> 6  cac bba baa 0.6970764
#> 7  bcb bbc acc 0.6873332
#> 8  bca acb acb 0.5391651
#> 9  cba ccc abc 0.9442450
#> 10 cca cbc bcc 0.6319561

my_table %>%
  mutate(
    has_ab = if_any(everything(), ~grepl("ab", .))
  )
#>      A   B   C         D has_ab
#> 1  bcb cbb bbb 0.3620390  FALSE
#> 2  aac aac bba 0.5505868  FALSE
#> 3  aca abb bcb 0.4028455   TRUE
#> 4  bca baa bbb 0.3247477  FALSE
#> 5  bcc cac ccc 0.6861223  FALSE
#> 6  cac bba baa 0.6970764  FALSE
#> 7  bcb bbc acc 0.6873332  FALSE
#> 8  bca acb acb 0.5391651  FALSE
#> 9  cba ccc abc 0.9442450   TRUE
#> 10 cca cbc bcc 0.6319561  FALSE

my_table %>%
  mutate(
    has_ab = if_any(where(is.numeric), ~grepl("ab", .))
  )
#>      A   B   C         D has_ab
#> 1  bcb cbb bbb 0.3620390  FALSE
#> 2  aac aac bba 0.5505868  FALSE
#> 3  aca abb bcb 0.4028455  FALSE
#> 4  bca baa bbb 0.3247477  FALSE
#> 5  bcc cac ccc 0.6861223  FALSE
#> 6  cac bba baa 0.6970764  FALSE
#> 7  bcb bbc acc 0.6873332  FALSE
#> 8  bca acb acb 0.5391651  FALSE
#> 9  cba ccc abc 0.9442450  FALSE
#> 10 cca cbc bcc 0.6319561  FALSE

dbRemoveTable(
  conn=conn,
  name="test_dataset.mytable",
  value=my_table,
  overwrite=T
)

dbWriteTable(
  conn=conn,
  name="test_dataset.mytable",
  value=my_table,
  overwrite=T
)

my_table_bq = tbl(conn, "mytable")

my_table_bq %>%
  mutate(
    has_ab = if_any(everything(), ~grepl("ab", .))
  )
#> Error in UseMethod("escape"): no applicable method for 'escape' applied to an object of class "formula"

my_table_bq %>%
  mutate(
    has_ab = if_any(where(is.numeric), ~grepl("ab", .))
  )
#> Error in UseMethod("escape"): no applicable method for 'escape' applied to an object of class "function"

Created on 2021-02-05 by the reprex package (v1.0.0)

mgirlich commented 3 years ago

if_any() and if_all() are not yet supported in the CRAN version of dbplyr (but in the development version which you can install with devtools::install_github("tidyverse/dbplyr")).