DyfanJones / noctua

Connect R to Athena using paws SDK (DBI Interface)
https://dyfanjones.github.io/noctua/
Other
45 stars 5 forks source link

dbExistsTable() returns an incorrect result when the table name is defined by Id() or SQL() #210

Open jiojanen opened 9 months ago

jiojanen commented 9 months ago

Issue Description

dbExistsTable() returns an incorrect result when the table name is defined by Id() or SQL().

Problem may be with the quoted form of the SQL() result.

Reproducible Example

Reprex (cleaned from all personal data):

library(DBI)

# Connection
con <- dbConnect(noctua::athena())

# Write data.frame to Athena table
dbWriteTable(con,
             DBI::Id(schema = "myschema", table = "mtcars"),
             mtcars,
             partition = c("TIMESTAMP" = format(Sys.Date(), "%Y%m%d")),
             s3.location = "s3://mybucket/myschema/noctua-staging/data/",
             file.type = "parquet"
)

# Case 1 SUCCESS - name as a character string
name_string <- "myschema.mtcars"
dbExistsTable(con, name_string)
#> [1] TRUE

# Case 2 FAIL - name as an Id
name_id <- DBI::Id(schema = "myschema", table = "mtcars")
dbExistsTable(con, name_id)
#> [1] FALSE

# Case 3 FAIL - name as an SQL
name_sql <- DBI::SQL('"myschema"."mtcars"')
dbExistsTable(con, name_sql)
#> [1] FALSE

# Problem seems to be in Connection.R, in the dbExistsTable method
# block starting at row 681:
#  conn@ptr$Athena$get_table_metadata(
#    CatalogName = ll[["db.catalog"]],
#    DatabaseName = ll[["dbms.name"]],
#    TableName = ll[["table"]]
#  )

# Case 1 - string
ll <- noctua:::db_detect(con, name_string)
ll
#> $db.catalog
#> [1] "AwsDataCatalog"
#> 
#> $dbms.name
#> [1] "myschema"
#> 
#> $table
#> [1] "mtcars"
a <- con@ptr$Athena$get_table_metadata(
  CatalogName = ll[["db.catalog"]],
  DatabaseName = ll[["dbms.name"]],
  TableName = ll[["table"]]
)

# Case 2 - Id
# dbExistsTable method for Id class converts the Id to SQL
name_id_conv <- DBI::dbQuoteIdentifier(con, name_id)
ll <- noctua:::db_detect(con, name_id_conv)
ll
#> $db.catalog
#> [1] "AwsDataCatalog"
#> 
#> $dbms.name
#> [1] "\"myschema\""
#> 
#> $table
#> [1] "\"mtcars\""
# Cannot find the table metadata due to quoted schema name and table name
a <- con@ptr$Athena$get_table_metadata(
  CatalogName = ll[["db.catalog"]],
  DatabaseName = ll[["dbms.name"]],
  TableName = ll[["table"]]
)
#> Error: MetadataException (HTTP 400). Entity Not Found (Service: AmazonDataCatalog; Status Code: 400; Error Code: EntityNotFoundException; Request ID: 2235ef36-df2b-43e8-81a3-18f4f6b2a768; Proxy: null)

# Case 3- Sql, same as case 2
# Cannot find the table due to quoted schema name and table name
ll <- noctua:::db_detect(con, name_sql)
a <- con@ptr$Athena$get_table_metadata(
  CatalogName = ll[["db.catalog"]],
  DatabaseName = ll[["dbms.name"]],
  TableName = ll[["table"]]
)
#> Error: MetadataException (HTTP 400). Entity Not Found (Service: AmazonDataCatalog; Status Code: 400; Error Code: EntityNotFoundException; Request ID: 5180190a-450e-41db-8b66-bb216e49bbda; Proxy: null)

# remove test table
dbRemoveTable(con, name_string, delete_data = TRUE, confirm = TRUE)
#> INFO: The S3 objects in prefix will be deleted:
#> s3://mybucket/myschema/noctua-staging/data/mtcars/

# Disconnect connection
dbDisconnect(con)
Session Info ``` r devtools::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.2.0 (2022-04-22) #> os Ubuntu 20.04.6 LTS #> system x86_64, linux-gnu #> ui X11 #> language (EN) #> collate C.UTF-8 #> ctype C.UTF-8 #> tz Etc/UTC #> date 2023-09-20 #> pandoc 2.19.2 @ /usr/lib/rstudio-server/bin/quarto/bin/tools/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> cachem 1.0.8 2023-05-01 [1] RSPM #> callr 3.7.3 2022-11-02 [1] RSPM #> cli 3.6.1 2023-03-23 [1] RSPM #> crayon 1.5.2 2022-09-29 [1] RSPM #> devtools 2.4.5 2022-10-11 [1] RSPM #> digest 0.6.33 2023-07-07 [1] RSPM #> ellipsis 0.3.2 2021-04-29 [1] RSPM #> evaluate 0.21 2023-05-05 [1] RSPM #> fansi 1.0.4 2023-01-22 [1] RSPM #> fastmap 1.1.1 2023-02-24 [1] RSPM #> fs 1.6.3 2023-07-20 [1] RSPM #> glue 1.6.2 2022-02-24 [1] RSPM (R 4.2.0) #> htmltools 0.5.6 2023-08-10 [1] RSPM (R 4.2.0) #> htmlwidgets 1.5.4 2021-09-08 [1] RSPM (R 4.2.0) #> httpuv 1.6.11 2023-05-11 [1] RSPM #> knitr 1.44 2023-09-11 [1] RSPM (R 4.2.0) #> later 1.3.1 2023-05-02 [1] RSPM #> lifecycle 1.0.3 2022-10-07 [1] RSPM (R 4.2.0) #> magrittr 2.0.3 2022-03-30 [1] RSPM #> memoise 2.0.1 2021-11-26 [1] RSPM #> mime 0.12 2021-09-28 [1] RSPM #> miniUI 0.1.1.1 2018-05-18 [1] RSPM (R 4.2.0) #> pillar 1.9.0 2023-03-22 [1] RSPM (R 4.2.0) #> pkgbuild 1.4.2 2023-06-26 [1] RSPM #> pkgconfig 2.0.3 2019-09-22 [1] RSPM #> pkgload 1.3.2.1 2023-07-08 [1] RSPM #> prettyunits 1.1.1 2020-01-24 [1] RSPM #> processx 3.8.2 2023-06-30 [1] RSPM #> profvis 0.3.7 2020-11-02 [1] RSPM (R 4.2.0) #> promises 1.2.1 2023-08-10 [1] RSPM #> ps 1.7.5 2023-04-18 [1] RSPM #> purrr 1.0.2 2023-08-10 [1] RSPM #> R.cache 0.16.0 2022-07-21 [1] RSPM (R 4.2.0) #> R.methodsS3 1.8.2 2022-06-13 [1] RSPM (R 4.2.0) #> R.oo 1.25.0 2022-06-12 [1] RSPM (R 4.2.0) #> R.utils 2.12.0 2022-06-28 [1] RSPM (R 4.2.0) #> R6 2.5.1 2021-08-19 [1] RSPM #> Rcpp 1.0.11 2023-07-06 [1] RSPM #> remotes 2.4.2.1 2023-07-18 [1] RSPM #> reprex 2.0.2 2022-08-17 [2] RSPM (R 4.2.0) #> rlang 1.1.1 2023-04-28 [1] RSPM (R 4.2.0) #> rmarkdown 2.24 2023-08-14 [1] RSPM #> rstudioapi 0.15.0 2023-07-07 [1] RSPM (R 4.2.0) #> sessioninfo 1.2.2 2021-12-06 [1] RSPM #> shiny 1.7.5 2023-08-12 [1] RSPM #> stringi 1.7.12 2023-01-11 [1] RSPM (R 4.2.0) #> stringr 1.5.0 2022-12-02 [1] RSPM (R 4.2.0) #> styler 1.7.0 2022-03-13 [1] RSPM (R 4.2.0) #> tibble 3.2.1 2023-03-20 [1] RSPM (R 4.2.0) #> urlchecker 1.0.1 2021-11-30 [1] RSPM #> usethis 2.2.2 2023-07-06 [1] RSPM #> utf8 1.2.3 2023-01-31 [1] RSPM #> vctrs 0.6.3 2023-06-14 [1] RSPM #> withr 2.5.0 2022-03-03 [1] RSPM #> xfun 0.40 2023-08-09 [1] RSPM #> xtable 1.8-4 2019-04-21 [1] RSPM #> yaml 2.3.7 2023-01-23 [1] RSPM (R 4.2.0) #> #> [1] /home/myusername/R/x86_64-pc-linux-gnu-library/4.2 #> [2] /opt/R/4.2.0/lib/R/library #> #> ────────────────────────────────────────────────────────────────────────────── ```
DyfanJones commented 9 months ago

Sorry about that, thanks for identifying the issue :) really appreciate it 😄