tidyverse / dbplyr

Database (DBI) backend for dplyr
https://dbplyr.tidyverse.org
Other
474 stars 173 forks source link

unexpected error when using is.na() with dbplyr::filter() #1509

Open sjdshapiro opened 4 months ago

sjdshapiro commented 4 months ago

When trying to filter NAs using is.na() in filter(), dbplyr is not working as expected.

I have code that I regularly run and it was working as expected until recently (maybe a month or so ago, I'm not sure).

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(dbplyr)
#> 
#> Attaching package: 'dbplyr'
#> The following objects are masked from 'package:dplyr':
#> 
#>     ident, sql

mf <- memdb_frame(g = c(NA, 1, 2, 2, 2), x = 1:5, y = 5:1)

mf_filtered <- mf |> 
  filter(is.na(g))

# I expect the `mf_filtered ` object to give me the filtered rows of `mf`, but there is
# an unexpected error if I try to run mf_filtered .
mf_filtered 
#> Error in `collect()`:
#> ! Failed to collect lazy table.
#> Caused by error:
#> ! near ")": syntax error
#> Backtrace:
#>      ▆
#>   1. ├─base::tryCatch(...)
#>   2. │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>   3. │   ├─base (local) tryCatchOne(...)
#>   4. │   │ └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#>   5. │   └─base (local) tryCatchList(expr, names[-nh], parentenv, handlers[-nh])
#>   6. │     └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>   7. │       └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#>   8. ├─base::withCallingHandlers(...)
#>   9. ├─base::saveRDS(...)
#>  10. ├─base::do.call(...)
#>  11. ├─base (local) `<fn>`(...)
#>  12. └─global `<fn>`(input = base::quote("legal-mink_reprex.R"))
#>  13.   └─rmarkdown::render(input, quiet = TRUE, envir = globalenv(), encoding = "UTF-8")
#>  14.     └─knitr::knit(knit_input, knit_output, envir = envir, quiet = quiet)
#>  15.       └─knitr:::process_file(text, output)
#>  16.         ├─base::withCallingHandlers(...)
#>  17.         ├─base::withCallingHandlers(...)
#>  18.         ├─knitr:::process_group(group)
#>  19.         └─knitr:::process_group.block(group)
#>  20.           └─knitr:::call_block(x)
#>  21.             └─knitr:::block_exec(params)
#>  22.               └─knitr:::eng_r(options)
#>  23.                 ├─knitr:::in_input_dir(...)
#>  24.                 │ └─knitr:::in_dir(input_dir(), expr)
#>  25.                 └─knitr (local) evaluate(...)
#>  26.                   └─evaluate::evaluate(...)
#>  27.                     └─evaluate:::evaluate_call(...)
#>  28.                       ├─evaluate (local) handle(...)
#>  29.                       │ └─base::try(f, silent = TRUE)
#>  30.                       │   └─base::tryCatch(...)
#>  31.                       │     └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>  32.                       │       └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>  33.                       │         └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#>  34.                       ├─base::withCallingHandlers(...)
#>  35.                       ├─base::withVisible(value_fun(ev$value, ev$visible))
#>  36.                       └─knitr (local) value_fun(ev$value, ev$visible)
#>  37.                         └─knitr (local) fun(x, options = options)
#>  38.                           ├─base::withVisible(knit_print(x, ...))
#>  39.                           ├─knitr::knit_print(x, ...)
#>  40.                           └─rmarkdown:::knit_print.tbl_sql(x, ...)
#>  41.                             ├─context$df_print(x)
#>  42.                             └─dbplyr:::print.tbl_sql(x)
#>  43.                               ├─dbplyr:::cat_line(format(x, ..., n = n, width = width, n_extra = n_extra))
#>  44.                               │ ├─base::cat(paste0(..., "\n"), sep = "")
#>  45.                               │ └─base::paste0(..., "\n")
#>  46.                               ├─base::format(x, ..., n = n, width = width, n_extra = n_extra)
#>  47.                               └─pillar:::format.tbl(x, ..., n = n, width = width, n_extra = n_extra)
#>  48.                                 └─pillar:::format_tbl(...)
#>  49.                                   └─pillar::tbl_format_setup(...)
#>  50.                                     ├─pillar:::tbl_format_setup_dispatch(...)
#>  51.                                     └─pillar:::tbl_format_setup.tbl(...)
#>  52.                                       └─pillar:::df_head(x, n + 1)
#>  53.                                         ├─base::as.data.frame(head(x, n))
#>  54.                                         └─dbplyr:::as.data.frame.tbl_sql(head(x, n))
#>  55.                                           ├─base::as.data.frame(collect(x, n = n))
#>  56.                                           ├─dplyr::collect(x, n = n)
#>  57.                                           └─dbplyr:::collect.tbl_sql(x, n = n)
#>  58.                                             └─base::tryCatch(...)
#>  59.                                               └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>  60.                                                 └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>  61.                                                   └─value[[3L]](cond)
#>  62.                                                     └─cli::cli_abort("Failed to collect lazy table.", parent = cnd)
#>  63.                                                       └─rlang::abort(...)

# I expect the SQL translation to be:

#   SELECT *
#   FROM `dbplyr_001`
# WHERE ((`g` IS NULL))

# However, this is what happens: 
mf_filtered |> show_query()
#> <SQL>
#> SELECT *
#> FROM `dbplyr_001`
#> WHERE ((`g` IS ))

sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: x86_64-redhat-linux-gnu
#> Running under: Red Hat Enterprise Linux 8.10 (Ootpa)
#> 
#> Matrix products: default
#> BLAS/LAPACK: /usr/lib64/libopenblaso-r0.3.15.so;  LAPACK version 3.9.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: America/Chicago
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] dbplyr_2.3.3 dplyr_1.1.3 
#> 
#> loaded via a namespace (and not attached):
#>  [1] vctrs_0.6.5       cli_3.6.2         knitr_1.44        rlang_1.1.3      
#>  [5] xfun_0.39         DBI_1.1.3         purrr_1.0.2       generics_0.1.3   
#>  [9] bit_4.0.5         glue_1.7.0        htmltools_0.5.7   fansi_1.0.5      
#> [13] rmarkdown_2.23    evaluate_0.23     tibble_3.2.1      fastmap_1.1.1    
#> [17] yaml_2.3.7        lifecycle_1.0.4   memoise_2.0.1     compiler_4.4.0   
#> [21] blob_1.2.4        RSQLite_2.3.1     fs_1.6.3          pkgconfig_2.0.3  
#> [25] rstudioapi_0.15.0 digest_0.6.35     R6_2.5.1          reprex_2.0.2     
#> [29] tidyselect_1.2.0  utf8_1.2.4        pillar_1.9.0      magrittr_2.0.3   
#> [33] bit64_4.0.5       tools_4.4.0       withr_3.0.0       cachem_1.0.8

Created on 2024-06-03 with reprex v2.0.2