tidyverse / dbplyr

Database (DBI) backend for dplyr
https://dbplyr.tidyverse.org
Other
474 stars 173 forks source link

pivot_wider() generates invalid SQL for MS SQL Server #948

Closed joe-rodd closed 2 years ago

joe-rodd commented 2 years ago

We use dbplyr extensively to perform querying on a MS SQL Server database. pivot_wider() has worked previously in this context, but with an update to tidyverse (I can't work out which package is to blame) it no longer generates valid SQL for SQL Server.

See below for sessionInfo and reprex. Not entirely reproducible because of the dependency to SQL Server, of course.

library(tidyverse)
# table of fake data
n <- tribble(~person, ~school, ~D_RST_EXJAAR, ~vak, ~CYFCE, ~CYFSE,
             "8f828a53", "0d93",   "2012",         "Ned",   " 71", " 58",
             "8f828a53", "0d93",   "2012",         "Eng",   " 78", " 74",
             "67464a9f", "691d",   "2012",         "Ned",   " 58", " 69",
             "67464a9f", "691d",   "2012",         "Eng",   " 54", " 67",
             "f0449224", "691d",   "2012",         "Ned",   " 51", " 65",
             "f0449224", "691d",   "2012",         "Eng",   " 65", " 57",
             "64a6ae38", "898b",   "2012",         "Ned",   " 59", " 66",
             "64a6ae38", "898b",   "2012",         "Eng",   " 53", " 68",
             "6c00d5f3", "691d",   "2012",         "Ned",   " 58", " 68",
             "6c00d5f3", "691d",   "2012",         "Eng",   " 52", " 58",
             "97a309f3", "bea3",   "2012",         "Ned",   " 69", " 74",
             "97a309f3", "bea3",   "2012",         "Eng",   " 72", " 75",
             "5991dbe6", "cdcb",   "2012",         "Ned",   " 59", " 60",
             "5991dbe6", "cdcb",   "2012",         "Eng",   " 85", " 60",
             "07c5594c", "a207",   "2012",         "Ned",   " 65", " 62")

# copy to database. DIP is a database in Microsoft SQL Server, Version: 15.00.4198
n_DIP <- copy_to(DIP,n)

n_DIP %>%
  pivot_wider(
    names_from = c(vak),
    values_from = c(CYFCE, CYFSE)
  )  %>% dbplyr:::sql_render()
# Sql code invalid
SELECT
  "person",
  "school",
  "D_RST_EXJAAR",
  MAX(IIF("vak" = 'Eng', "CYFCE", )) AS "CYFCE_Eng",
  MAX(IIF("vak" = 'Ned', "CYFCE", )) AS "CYFCE_Ned",
  MAX(IIF("vak" = 'Eng', "CYFSE", )) AS "CYFSE_Eng",
  MAX(IIF("vak" = 'Ned', "CYFSE", )) AS "CYFSE_Ned"
FROM "#n"
GROUP BY "person", "school", "D_RST_EXJAAR"
n_DIP %>%
  pivot_wider(
    names_from = c(vak),
    values_from = c(CYFCE, CYFSE)
  ) 

# gives error

sqlite <- DBI::dbConnect(RSQLite::SQLite())

n_SQLite <- copy_to(sqlite,n)

n_SQLite %>%
  pivot_wider(
    names_from = c(vak),
    values_from = c(CYFCE, CYFSE)
  ) %>% dbplyr:::sql_render()

# generates valid sql code in sqlite dialect
SELECT
  `person`,
  `school`,
  `D_RST_EXJAAR`,
  MAX(CASE WHEN (`vak` = 'Ned') THEN `CYFCE` END) AS `CYFCE_Ned`,
  MAX(CASE WHEN (`vak` = 'Eng') THEN `CYFCE` END) AS `CYFCE_Eng`,
  MAX(CASE WHEN (`vak` = 'Ned') THEN `CYFSE` END) AS `CYFSE_Ned`,
  MAX(CASE WHEN (`vak` = 'Eng') THEN `CYFSE` END) AS `CYFSE_Eng`
FROM `n`
GROUP BY `person`, `school`, `D_RST_EXJAAR`
n_SQLite %>%
  pivot_wider(
    names_from = c(vak),
    values_from = c(CYFCE, CYFSE)
  )
# works
> sessionInfo()

R version 4.2.0 (2022-04-22 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18363)

Matrix products: default

locale:
[1] LC_COLLATE=Dutch_Netherlands.utf8  LC_CTYPE=Dutch_Netherlands.utf8   
[3] LC_MONETARY=Dutch_Netherlands.utf8 LC_NUMERIC=C                      
[5] LC_TIME=Dutch_Netherlands.utf8    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] reprex_2.0.1    DIP_0.2.1.6     forcats_0.5.1   stringr_1.4.0   dplyr_1.0.9    
 [6] purrr_0.3.4     readr_2.1.2     tidyr_1.2.0     tibble_3.1.7    ggplot2_3.3.6  
[11] tidyverse_1.3.1

loaded via a namespace (and not attached):
 [1] tidyselect_1.1.2 haven_2.5.0      colorspace_2.0-3 vctrs_0.4.1     
 [5] generics_0.1.2   yaml_2.3.5       utf8_1.2.2       blob_1.2.3      
 [9] rlang_1.0.3      pillar_1.7.0     glue_1.6.2       withr_2.5.0     
[13] DBI_1.1.3        bit64_4.0.5      dbplyr_2.2.1     uuid_1.1-0      
[17] modelr_0.1.8     readxl_1.4.0     lifecycle_1.0.1  munsell_0.5.0   
[21] gtable_0.3.0     cellranger_1.1.0 rvest_1.0.2      memoise_2.0.1   
[25] fastmap_1.1.0    tzdb_0.3.0       fansi_1.0.3      broom_1.0.0     
[29] Rcpp_1.0.8.3     openssl_2.0.2    scales_1.2.0     backports_1.4.1 
[33] cachem_1.0.6     bcputility_0.3.0 jsonlite_1.8.0   fs_1.5.2        
[37] bit_4.0.4        ids_1.0.1        askpass_1.1      hms_1.1.1       
[41] stringi_1.7.6    grid_4.2.0       cli_3.3.0        tools_4.2.0     
[45] odbc_1.3.3       magrittr_2.0.3   RSQLite_2.2.14   crayon_1.5.1    
[49] pkgconfig_2.0.3  ellipsis_0.3.2   xml2_1.3.3       lubridate_1.8.0 
[53] assertthat_0.2.1 httr_1.4.3       rstudioapi_0.13  R6_2.5.1        
[57] compiler_4.2.0
mgirlich commented 2 years ago

Fixed in dev version via #942.

You can install the development version from GitHub:

# install.packages("devtools")
devtools::install_github("tidyverse/dbplyr")