Closed ablack3 closed 4 months ago
Also I think the top 1
syntax is not translated to limit 1
on spark.
SqlRender::translate("select top 1 * from cdm.person", "spark")
#> [1] "select top 1 * from cdm.person"
#> attr(,"sqlDialect")
#> [1] "spark"
Created on 2024-06-01 with reprex v2.1.0
That last one is easy: Don't forget the closing semicolon:
SqlRender::translate("select top 1 * from cdm.person;", "spark")
#> [1] "SELECT * from cdm.person LIMIT 1;"
#> attr(,"sqlDialect")
[#> 1] "spark"
The first one is an odd one: I see that according to the documentation you are right. Yet when I use DATE_ADD(DAY, ..., ...)
against my Databricks test server it works! (it is actually part of one of DatabaseConnector's passing unit tests). I'm also a bit suspicious that the DATEADD()
function documentation is dated as April 18, 2022. Perhaps this is a recent change that hasn't been applied to all servers?
Anyway, seems DATEADD(DAY, ..., ...)
also works on my test server, so I'll change SqlRender's behavior.
Should be fixed now in develop.
Perhaps you're not using useNativeQuery = 1
? This was recommended by Databricks engineers: https://github.com/OHDSI/DatabaseConnector/blob/main/R/Connect.R#L674C51-L674C67
I tried different options for useNativeQuery and the DATE_ADD(day,...
syntax always gives an error so I'm surprised it works with DatabaseConnector.
ohdsisql <- "select observation_period_start_date,
dateadd(day, 1, observation_period_start_date) as next_day
from omop531.observation_period limit 5"
sparksql <- SqlRender::translate(ohdsisql, "spark")
print(sparksql)
#> [1] "select observation_period_start_date, \n DATE_ADD(day,1,observation_period_start_date) as next_day\n from omop531.observation_period limit 5"
#> attr(,"sqlDialect")
#> [1] "spark"
con <- DBI::dbConnect(
odbc::databricks(),
httpPath = Sys.getenv("DATABRICKS_HTTPPATH"),
useNativeQuery = FALSE
)
result <- DBI::dbGetQuery(con, sparksql)
#> Error in eval(expr, envir, enclos): nanodbc/nanodbc.cpp:1710: 00000
#> [Simba][Hardy] (80) Syntax or semantic analysis error thrown in server while executing query. Error message from server: org.apache.hive.service.cli.HiveSQLException: Error running query: [UNRESOLVED_COLUMN.WITH_SUGGESTION] org.apache.spark.sql.AnalysisException: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `day` cannot be resolved. Did you mean one of the following? [`spark_catalog`.`omop531`.`observation_period`.`PERSON_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_ID`, `spark_catalog`.`omop531`.`observation_period`.`PERIOD_TYPE_CONCEPT_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_END_DATE`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_START_DATE`].; line 2 pos 10
#> at org.apache.spark.sql.hive.thriftserver.HiveThriftServerErrors$.runningQueryError(HiveThriftServerErrors.scala:48)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:611)
#> at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#> at com.databricks.unity.EmptyHandle$.runWith(UCSHandle.scala:125)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:501)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:361)
#> at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#> at com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
#> at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties(ThriftLocalProperties.scala:156)
#> at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties$(ThriftLocalProperties.scala:51)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:62)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:339)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:324)
#> at java.security.AccessController.doPrivileged(Native Method)
#> at javax.security.auth.Subject.doAs(Subject.java:422)
#> at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:373)
#> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
#> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
#> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
#> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
#> at java.lang.Thread.run(Thread.java:750)
#> Caused by: org.apache.spark.sql.AnalysisException: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `day` cannot be resolved. Did you mean one of the following? [`spark_catalog`.`omop531`.`observation_period`.`PERSON_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_ID`, `spark_catalog`.`omop531`.`observation_period`.`PERIOD_TYPE_CONCEPT_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_END_DATE`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_START_DATE`].; line 2 pos 10
#> at org.apache.spark.sql.AnalysisException.copy(AnalysisException.scala:111)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:588)
#> ... 20 more
#>
DBI::dbDisconnect(con)
con <- DBI::dbConnect(
odbc::databricks(),
httpPath = Sys.getenv("DATABRICKS_HTTPPATH"),
useNativeQuery = TRUE
)
result <- DBI::dbGetQuery(con, sparksql)
#> Error in eval(expr, envir, enclos): nanodbc/nanodbc.cpp:1710: 00000
#> [Simba][Hardy] (80) Syntax or semantic analysis error thrown in server while executing query. Error message from server: org.apache.hive.service.cli.HiveSQLException: Error running query: [UNRESOLVED_COLUMN.WITH_SUGGESTION] org.apache.spark.sql.AnalysisException: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `day` cannot be resolved. Did you mean one of the following? [`spark_catalog`.`omop531`.`observation_period`.`PERSON_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_ID`, `spark_catalog`.`omop531`.`observation_period`.`PERIOD_TYPE_CONCEPT_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_END_DATE`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_START_DATE`].; line 2 pos 10
#> at org.apache.spark.sql.hive.thriftserver.HiveThriftServerErrors$.runningQueryError(HiveThriftServerErrors.scala:48)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:611)
#> at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#> at com.databricks.unity.EmptyHandle$.runWith(UCSHandle.scala:125)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:501)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:361)
#> at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#> at com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
#> at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties(ThriftLocalProperties.scala:156)
#> at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties$(ThriftLocalProperties.scala:51)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:62)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:339)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:324)
#> at java.security.AccessController.doPrivileged(Native Method)
#> at javax.security.auth.Subject.doAs(Subject.java:422)
#> at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:373)
#> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
#> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
#> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
#> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
#> at java.lang.Thread.run(Thread.java:750)
#> Caused by: org.apache.spark.sql.AnalysisException: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `day` cannot be resolved. Did you mean one of the following? [`spark_catalog`.`omop531`.`observation_period`.`PERSON_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_ID`, `spark_catalog`.`omop531`.`observation_period`.`PERIOD_TYPE_CONCEPT_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_END_DATE`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_START_DATE`].; line 2 pos 10
#> at org.apache.spark.sql.AnalysisException.copy(AnalysisException.scala:111)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:588)
#> ... 20 more
#>
DBI::dbDisconnect(con)
con <- DBI::dbConnect(
odbc::databricks(),
httpPath = Sys.getenv("DATABRICKS_HTTPPATH"),
useNativeQuery = 1L
)
result <- DBI::dbGetQuery(con, sparksql)
#> Error in eval(expr, envir, enclos): nanodbc/nanodbc.cpp:1710: 00000
#> [Simba][Hardy] (80) Syntax or semantic analysis error thrown in server while executing query. Error message from server: org.apache.hive.service.cli.HiveSQLException: Error running query: [UNRESOLVED_COLUMN.WITH_SUGGESTION] org.apache.spark.sql.AnalysisException: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `day` cannot be resolved. Did you mean one of the following? [`spark_catalog`.`omop531`.`observation_period`.`PERSON_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_ID`, `spark_catalog`.`omop531`.`observation_period`.`PERIOD_TYPE_CONCEPT_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_END_DATE`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_START_DATE`].; line 2 pos 10
#> at org.apache.spark.sql.hive.thriftserver.HiveThriftServerErrors$.runningQueryError(HiveThriftServerErrors.scala:48)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:611)
#> at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#> at com.databricks.unity.EmptyHandle$.runWith(UCSHandle.scala:125)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:501)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:361)
#> at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#> at com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
#> at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties(ThriftLocalProperties.scala:156)
#> at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties$(ThriftLocalProperties.scala:51)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:62)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:339)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:324)
#> at java.security.AccessController.doPrivileged(Native Method)
#> at javax.security.auth.Subject.doAs(Subject.java:422)
#> at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:373)
#> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
#> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
#> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
#> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
#> at java.lang.Thread.run(Thread.java:750)
#> Caused by: org.apache.spark.sql.AnalysisException: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `day` cannot be resolved. Did you mean one of the following? [`spark_catalog`.`omop531`.`observation_period`.`PERSON_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_ID`, `spark_catalog`.`omop531`.`observation_period`.`PERIOD_TYPE_CONCEPT_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_END_DATE`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_START_DATE`].; line 2 pos 10
#> at org.apache.spark.sql.AnalysisException.copy(AnalysisException.scala:111)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:588)
#> ... 20 more
#>
DBI::dbDisconnect(con)
con <- DBI::dbConnect(
odbc::databricks(),
httpPath = Sys.getenv("DATABRICKS_HTTPPATH"),
useNativeQuery = 0L
)
result <- DBI::dbGetQuery(con, sparksql)
#> Error in eval(expr, envir, enclos): nanodbc/nanodbc.cpp:1710: 00000
#> [Simba][Hardy] (80) Syntax or semantic analysis error thrown in server while executing query. Error message from server: org.apache.hive.service.cli.HiveSQLException: Error running query: [UNRESOLVED_COLUMN.WITH_SUGGESTION] org.apache.spark.sql.AnalysisException: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `day` cannot be resolved. Did you mean one of the following? [`spark_catalog`.`omop531`.`observation_period`.`PERSON_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_ID`, `spark_catalog`.`omop531`.`observation_period`.`PERIOD_TYPE_CONCEPT_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_END_DATE`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_START_DATE`].; line 2 pos 10
#> at org.apache.spark.sql.hive.thriftserver.HiveThriftServerErrors$.runningQueryError(HiveThriftServerErrors.scala:48)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:611)
#> at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#> at com.databricks.unity.EmptyHandle$.runWith(UCSHandle.scala:125)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:501)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:361)
#> at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#> at com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
#> at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties(ThriftLocalProperties.scala:156)
#> at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties$(ThriftLocalProperties.scala:51)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:62)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:339)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:324)
#> at java.security.AccessController.doPrivileged(Native Method)
#> at javax.security.auth.Subject.doAs(Subject.java:422)
#> at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:373)
#> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
#> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
#> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
#> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
#> at java.lang.Thread.run(Thread.java:750)
#> Caused by: org.apache.spark.sql.AnalysisException: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `day` cannot be resolved. Did you mean one of the following? [`spark_catalog`.`omop531`.`observation_period`.`PERSON_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_ID`, `spark_catalog`.`omop531`.`observation_period`.`PERIOD_TYPE_CONCEPT_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_END_DATE`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_START_DATE`].; line 2 pos 10
#> at org.apache.spark.sql.AnalysisException.copy(AnalysisException.scala:111)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:588)
#> ... 20 more
#>
DBI::dbDisconnect(con)
Created on 2024-06-10 with reprex v2.1.0
Here is a reprex using DatabaseConnector that gives an error.
library(DatabaseConnector)
connectionDetails <- createConnectionDetails(
dbms = "spark",
user = Sys.getenv('DATABRICKS_USER'),
password = Sys.getenv('DATABRICKS_TOKEN'),
connectionString = Sys.getenv('DATABRICKS_CONNECTION_STRING')
)
con <- connect(connectionDetails)
#> Connecting using Spark JDBC driver
ohdsisql <- "select observation_period_start_date,
dateadd(day, 1, observation_period_start_date) as next_day
from omop531.observation_period limit 5"
(sparksql <- SqlRender::translate(ohdsisql, "spark"))
#> [1] "select observation_period_start_date, \n DATE_ADD(day,1,observation_period_start_date) as next_day\n from omop531.observation_period limit 5"
#> attr(,"sqlDialect")
#> [1] "spark"
querySql(con, sparksql)
#> Error in `.createErrorReport()`:
#> ! Error executing SQL:
#> java.sql.SQLException: [Databricks][DatabricksJDBCDriver](500051) ERROR processing query/statement. Error Code: 0, SQL state: org.apache.hive.service.cli.HiveSQLException: Error running query: [UNRESOLVED_COLUMN.WITH_SUGGESTION] org.apache.spark.sql.AnalysisException: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `day` cannot be resolved. Did you mean one of the following? [`spark_catalog`.`omop531`.`observation_period`.`PERSON_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_ID`, `spark_catalog`.`omop531`.`observation_period`.`PERIOD_TYPE_CONCEPT_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_END_DATE`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_START_DATE`].; line 2 pos 10
#> at org.apache.spark.sql.hive.thriftserver.HiveThriftServerErrors$.runningQueryError(HiveThriftServerErrors.scala:48)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:611)
#> at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#> at com.databricks.unity.EmptyHandle$.runWith(UCSHandle.scala:125)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:501)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:361)
#> at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#> at com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
#> at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties(ThriftLocalProperties.scala:156)
#> at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties$(ThriftLocalProperties.scala:51)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:62)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:339)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:324)
#> at java.security.AccessController.doPrivileged(Native Method)
#> at javax.security.auth.Subject.doAs(Subject.java:422)
#> at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:373)
#> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
#> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
#> at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
#> at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
#> at java.lang.Thread.run(Thread.java:750)
#> Caused by: org.apache.spark.sql.AnalysisException: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `day` cannot be resolved. Did you mean one of the following? [`spark_catalog`.`omop531`.`observation_period`.`PERSON_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_ID`, `spark_catalog`.`omop531`.`observation_period`.`PERIOD_TYPE_CONCEPT_ID`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_END_DATE`, `spark_catalog`.`omop531`.`observation_period`.`OBSERVATION_PERIOD_START_DATE`].; line 2 pos 10
#> at org.apache.spark.sql.AnalysisException.copy(AnalysisException.scala:111)
#> at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:588)
#> ... 20 more
#> , Query: select obs***.
#> An error report has been created at /private/var/folders/xx/01v98b6546ldnm1rg1_bvk000000gn/T/RtmpchJtMO/reprex-ce096368e0c-legal-mink/errorReportSql.txt
disconnect(con)
sessionInfo()
#> R version 4.3.1 (2023-06-16)
#> Platform: aarch64-apple-darwin20 (64-bit)
#> Running under: macOS Sonoma 14.0
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> time zone: Europe/Amsterdam
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] DatabaseConnector_6.3.2
#>
#> loaded via a namespace (and not attached):
#> [1] vctrs_0.6.5 cli_3.6.2 knitr_1.45 rlang_1.1.3
#> [5] xfun_0.43 DBI_1.2.2 purrr_1.0.2 styler_1.10.3
#> [9] rJava_1.0-11 glue_1.7.0 bit_4.0.5 backports_1.5.0
#> [13] htmltools_0.5.8.1 fansi_1.0.6 rmarkdown_2.26 R.cache_0.16.0
#> [17] evaluate_0.23 fastmap_1.1.1 yaml_2.3.8 lifecycle_1.0.4
#> [21] compiler_4.3.1 SqlRender_1.18.0 fs_1.6.4 rstudioapi_0.16.0
#> [25] R.oo_1.26.0 R.utils_2.12.3 digest_0.6.35 utf8_1.2.4
#> [29] reprex_2.1.0 pillar_1.9.0 magrittr_2.0.3 checkmate_2.3.1
#> [33] R.methodsS3_1.8.2 tools_4.3.1 withr_3.0.0 bit64_4.0.5
Created on 2024-06-10 with reprex v2.1.0
Thanks for the fix!
develop branch works with odbc native query T or F and with DatabaseConnector.
remotes::install_github("ohdsi/SqlRender", "develop", force = T)
#> Using github PAT from envvar GITHUB_PAT. Use `gitcreds::gitcreds_set()` and unset GITHUB_PAT in .Renviron (or elsewhere) if you want to use the more secure git credential store instead.
#> Downloading GitHub repo ohdsi/SqlRender@develop
#> rlang (1.1.3 -> 1.1.4) [CRAN]
#> Installing 1 packages: rlang
#>
#> The downloaded binary packages are in
#> /var/folders/xx/01v98b6546ldnm1rg1_bvk000000gn/T//Rtmp51F9cJ/downloaded_packages
#> ── R CMD build ─────────────────────────────────────────────────────────────────
#> * checking for file ‘/private/var/folders/xx/01v98b6546ldnm1rg1_bvk000000gn/T/Rtmp51F9cJ/remotesd1184bbca245/OHDSI-SqlRender-acdd6f3/DESCRIPTION’ ... OK
#> * preparing ‘SqlRender’:
#> * checking DESCRIPTION meta-information ... OK
#> * checking for LF line-endings in source and make files and shell scripts
#> * checking for empty or unneeded directories
#> * building ‘SqlRender_1.18.1.tar.gz’
ohdsisql <- "select observation_period_start_date,
dateadd(day, 1, observation_period_start_date) as next_day
from omop531.observation_period limit 5"
(sparksql <- SqlRender::translate(ohdsisql, "spark"))
#> [1] "select observation_period_start_date, \n dateadd(day, 1, observation_period_start_date) as next_day\n from omop531.observation_period limit 5"
#> attr(,"sqlDialect")
#> [1] "spark"
con <- DBI::dbConnect(
odbc::databricks(),
httpPath = Sys.getenv("DATABRICKS_HTTPPATH"),
useNativeQuery = FALSE
)
result <- DBI::dbGetQuery(con, sparksql)
DBI::dbDisconnect(con)
con <- DBI::dbConnect(
odbc::databricks(),
httpPath = Sys.getenv("DATABRICKS_HTTPPATH"),
useNativeQuery = TRUE
)
result <- DBI::dbGetQuery(con, sparksql)
DBI::dbDisconnect(con)
con <- DBI::dbConnect(
odbc::databricks(),
httpPath = Sys.getenv("DATABRICKS_HTTPPATH"),
useNativeQuery = 1L
)
result <- DBI::dbGetQuery(con, sparksql)
DBI::dbDisconnect(con)
con <- DBI::dbConnect(
odbc::databricks(),
httpPath = Sys.getenv("DATABRICKS_HTTPPATH"),
useNativeQuery = 0L
)
result <- DBI::dbGetQuery(con, sparksql)
DBI::dbDisconnect(con)
library(DatabaseConnector)
connectionDetails <- createConnectionDetails(
dbms = "spark",
user = Sys.getenv('DATABRICKS_USER'),
password = Sys.getenv('DATABRICKS_TOKEN'),
connectionString = Sys.getenv('DATABRICKS_CONNECTION_STRING')
)
con <- connect(connectionDetails)
#> Connecting using Spark JDBC driver
disconnect(con)
Created on 2024-06-10 with reprex v2.1.0
well actually I get a different error with DatabaseConnector now but I don't think it is related to the sql. Perhaps my driver is not set up correctly.
remotes::install_github("ohdsi/SqlRender", "develop")
#> Using github PAT from envvar GITHUB_PAT. Use `gitcreds::gitcreds_set()` and unset GITHUB_PAT in .Renviron (or elsewhere) if you want to use the more secure git credential store instead.
#> Skipping install of 'SqlRender' from a github remote, the SHA1 (acdd6f36) has not changed since last install.
#> Use `force = TRUE` to force installation
ohdsisql <- "select observation_period_start_date,
dateadd(day, 1, observation_period_start_date) as next_day
from omop531.observation_period limit 5"
(sparksql <- SqlRender::translate(ohdsisql, "spark"))
#> [1] "select observation_period_start_date, \n dateadd(day, 1, observation_period_start_date) as next_day\n from omop531.observation_period limit 5"
#> attr(,"sqlDialect")
#> [1] "spark"
library(DatabaseConnector)
connectionDetails <- createConnectionDetails(
dbms = "spark",
user = Sys.getenv('DATABRICKS_USER'),
password = Sys.getenv('DATABRICKS_TOKEN'),
connectionString = Sys.getenv('DATABRICKS_CONNECTION_STRING')
)
con <- connect(connectionDetails)
#> Connecting using Spark JDBC driver
querySql(con, sparksql)
#> Error in `.createErrorReport()`:
#> ! Error executing SQL:
#> java.sql.SQLException: [Databricks][DatabricksJDBCDriver](500540) Error caught in BackgroundFetcher. Foreground thread ID: 1. Background thread ID: 33. Error caught: null.
#> An error report has been created at /private/var/folders/xx/01v98b6546ldnm1rg1_bvk000000gn/T/RtmpuSZ4O9/reprex-d03d75fc7217-super-sable/errorReportSql.txt
disconnect(con)
Created on 2024-06-10 with reprex v2.1.0
Just for my own sanity, I ran the old syntax on my Databricks testing instance, and it works just fine. I have no explanation:
library(DatabaseConnector)
connectionDetails = details <- createConnectionDetails(
dbms = "spark",
user = Sys.getenv("CDM5_SPARK_USER"),
password = URLdecode(Sys.getenv("CDM5_SPARK_PASSWORD")),
connectionString = Sys.getenv("CDM5_SPARK_CONNECTION_STRING")
)
conn <- connect(connectionDetails)
querySql(conn, "select observation_period_start_date, \n DATE_ADD(day,1,observation_period_start_date) as next_day\n from eunomia.observation_period limit 5")
# OBSERVATION_PERIOD_START_DATE NEXT_DAY
# 1 1944-03-04 1944-03-05
# 2 1959-03-28 1959-03-29
# 3 1985-05-26 1985-05-27
# 4 1959-04-25 1959-04-26
# 5 2009-07-30 2009-07-31
(It may actually be the ODBC interface. I've seen differences in SQL dialect related to ODBC in the past)
Just for my own sanity, I ran the old syntax on my Databricks testing instance, and it works just fine. I have no explanation:
Hahaha so weird. I'm having trouble using the jdbc driver. Maybe it is due to some issue with Java version, R version and apple ARM chip.
library(DatabaseConnector)
connectionDetails <- createConnectionDetails(
dbms = "spark",
user = Sys.getenv("CDM5_SPARK_USER"),
password = URLdecode(Sys.getenv("CDM5_SPARK_PASSWORD")),
connectionString = Sys.getenv("CDM5_SPARK_CONNECTION_STRING"),
)
conn <- connect(connectionDetails)
# connection works as does getTableNames
#> Connecting using Spark JDBC driver
getTableNames(conn, "eunomia")
#> [1] "care_site" "cdm_source" "cohort"
#> [4] "cohort_attribute" "concept" "concept_ancestor"
#> [7] "concept_class" "concept_relationship" "concept_synonym"
#> [10] "condition_era" "condition_occurrence" "cost"
#> [13] "death" "device_exposure" "domain"
#> [16] "dose_era" "drug_era" "drug_exposure"
#> [19] "drug_strength" "fact_relationship" "location"
#> [22] "measurement" "metadata" "note"
#> [25] "note_nlp" "observation" "observation_period"
#> [28] "payer_plan_period" "person" "procedure_occurrence"
#> [31] "provider" "relationship" "source_to_concept_map"
#> [34] "specimen" "visit_detail" "visit_occurrence"
#> [37] "vocabulary"
# but SQL does not work
querySql(conn, "select 1;")
#> Error in `.createErrorReport()`:
#> ! Error executing SQL:
#> java.sql.SQLException: [Databricks][DatabricksJDBCDriver](500540) Error caught in BackgroundFetcher. Foreground thread ID: 1. Background thread ID: 33. Error caught: null.
#> An error report has been created at /private/var/folders/xx/01v98b6546ldnm1rg1_bvk000000gn/T/Rtmpc7xahJ/reprex-e1854bfd425c-fussy-agama/errorReportSql.txt
querySql(conn, "select observation_period_start_date, \n DATE_ADD(day,1,observation_period_start_date) as next_day\n from eunomia.observation_period limit 5")
#> Error in `.createErrorReport()`:
#> ! Error executing SQL:
#> java.sql.SQLException: [Databricks][DatabricksJDBCDriver](500540) Error caught in BackgroundFetcher. Foreground thread ID: 1. Background thread ID: 33. Error caught: Could not initialize class com.databricks.client.jdbc42.internal.apache.arrow.memory.util.MemoryUtil.
#> An error report has been created at /private/var/folders/xx/01v98b6546ldnm1rg1_bvk000000gn/T/Rtmpc7xahJ/reprex-e1854bfd425c-fussy-agama/errorReportSql.txt
querySql(conn, "select observation_period_start_date, \n DATEADD(day,1,observation_period_start_date) as next_day\n from eunomia.observation_period limit 5")
#> Error in `.createErrorReport()`:
#> ! Error executing SQL:
#> java.sql.SQLException: [Databricks][DatabricksJDBCDriver](500540) Error caught in BackgroundFetcher. Foreground thread ID: 1. Background thread ID: 33. Error caught: Could not initialize class com.databricks.client.jdbc42.internal.apache.arrow.memory.util.MemoryUtil.
#> An error report has been created at /private/var/folders/xx/01v98b6546ldnm1rg1_bvk000000gn/T/Rtmpc7xahJ/reprex-e1854bfd425c-fussy-agama/errorReportSql.txt
disconnect(conn)
Created on 2024-06-11 with reprex v2.1.0
https://github.com/dbeaver/dbeaver/issues/17303#issuecomment-1232263953
odbc works fine though. But really weird that the SQL dialect and driver are not independent. Anyway thanks for the fix. Works fine for me now.
@ablack3 I'm experiencing the same issue where using odbc sql executes as expected but in database connector I'm getting the same error.
! Error executing SQL:
java.sql.SQLException: [Databricks][JDBCDriver](500540) Error caught in BackgroundFetcher. Foreground thread ID: 1. Background thread ID: 43. Error caught: Could not initialize class com.databricks.client.jdbc42.internal.apache.arrow.memory.util.MemoryUtil.
It seems adding EnableArrow=0;
to my connection string param fixed the issue on my side. I'm using JDK17 and this came from a workaround suggested databricks folks.
Thanks @burrowse ! Do you think it would make sense for DatabaseConnector's connect()
function to enforce EnableArrow=0
?
The HADES website recommends running Java 8 (ideally Corretto), although I realize not everyone will be able to install that.
@schuemie Good question...I don't think it should be enforced in the connection string by default. It seems that while the proposed workaround came from databricks, the community (in several threads) is calling for an update for higher versions of Java because disabling arrow degrades performance and memory usage for some use-cases.
Maybe adding a note instead in the guide for creating a connection in the sparks/databricks section would help temporarily?
I think the SQL translation for DATEADD is incorrect on Spark. I think the function should be
DATEADD
instead ofDATE_ADD
It looks like there are two functions that could be used:
https://docs.databricks.com/en/sql/language-manual/functions/date_add.html
https://docs.databricks.com/en/sql/language-manual/functions/dateadd.html
Created on 2024-06-01 with reprex v2.1.0
Session info
``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.3.1 (2023-06-16) #> os macOS Sonoma 14.0 #> system aarch64, darwin20 #> ui X11 #> language (EN) #> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz Europe/Amsterdam #> date 2024-06-01 #> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> backports 1.5.0 2024-05-23 [1] CRAN (R 4.3.3) #> bit 4.0.5 2022-11-15 [1] CRAN (R 4.3.0) #> bit64 4.0.5 2020-08-30 [1] CRAN (R 4.3.0) #> blob 1.2.4 2023-03-17 [1] CRAN (R 4.3.0) #> checkmate 2.3.1 2023-12-04 [1] CRAN (R 4.3.1) #> cli 3.6.2 2023-12-11 [1] CRAN (R 4.3.1) #> DBI 1.2.2 2024-02-16 [1] CRAN (R 4.3.1) #> digest 0.6.35 2024-03-11 [1] CRAN (R 4.3.1) #> evaluate 0.23 2023-11-01 [1] CRAN (R 4.3.1) #> fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.0) #> fs 1.6.4 2024-04-25 [1] CRAN (R 4.3.1) #> glue 1.7.0 2024-01-09 [1] CRAN (R 4.3.1) #> hms 1.1.3 2023-03-21 [1] CRAN (R 4.3.0) #> htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.3.1) #> knitr 1.45 2023-10-30 [1] CRAN (R 4.3.1) #> lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.3.1) #> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.0) #> odbc 1.4.2 2024-01-22 [1] CRAN (R 4.3.1) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.0) #> purrr 1.0.2 2023-08-10 [1] CRAN (R 4.3.0) #> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.3.0) #> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.3.0) #> R.oo 1.26.0 2024-01-24 [1] CRAN (R 4.3.1) #> R.utils 2.12.3 2023-11-18 [1] CRAN (R 4.3.1) #> Rcpp 1.0.12 2024-01-09 [1] CRAN (R 4.3.1) #> reprex 2.1.0 2024-01-11 [1] CRAN (R 4.3.1) #> rJava 1.0-11 2024-01-26 [1] CRAN (R 4.3.1) #> rlang 1.1.3 2024-01-10 [1] CRAN (R 4.3.1) #> rmarkdown 2.26 2024-03-05 [1] CRAN (R 4.3.1) #> rstudioapi 0.16.0 2024-03-24 [1] CRAN (R 4.3.1) #> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.3.0) #> SqlRender 1.17.0 2024-03-20 [1] CRAN (R 4.3.1) #> styler 1.10.3 2024-04-07 [1] CRAN (R 4.3.1) #> vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.3.1) #> withr 3.0.0 2024-01-16 [1] CRAN (R 4.3.1) #> xfun 0.43 2024-03-25 [1] CRAN (R 4.3.1) #> yaml 2.3.8 2023-12-11 [1] CRAN (R 4.3.1) #> #> [1] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library #> #> ────────────────────────────────────────────────────────────────────────────── ```