r-dbi / odbc

Connect to ODBC databases (using the DBI interface)
https://odbc.r-dbi.org/
Other
387 stars 107 forks source link

DBI::dbWriteTable fails when useNativeQuery = TRUE #801

Closed ablack3 closed 1 month ago

ablack3 commented 4 months ago

I have a databricks odbc connection and I would like to use DBI::dbWriteTable. I think by default the driver is using the sql translation feature (useNativeQuery = TRUE). With this option dbWriteTable fails.

If I set useNativeQuery to FALSE then dbWriteTable does seem to work.

con <- DBI::dbConnect(
  odbc::databricks(),
  httpPath = Sys.getenv("DATABRICKS_HTTP_PATH")
)

DBI::dbWriteTable(con, "test10", data.frame(letter = "a", number = 1L))
#> Error: nanodbc/nanodbc.cpp:1771: 00000
#> [Simba][Hardy] (80) Syntax or semantic analysis error thrown in server while executing query. Error message from server: org.apache.hive.service.cli.HiveSQLException: Error running query: [UNBOUND_SQL_PARAMETER] org.apache.spark.sql.catalyst.ExtendedAnalysisException: [UNBOUND_SQL_PARAMETER] Found the unbound parameter: _50. Please, fix `args` and provide a mapping of the parameter to either a SQL literal or collection constructor functions such as `map()`, `array()`, `struct()`. SQLSTATE: 42P02; line 2 pos 8
#>  at org.apache.spark.sql.hive.thriftserver.HiveThriftServerErrors$.runningQueryError(HiveThriftServerErrors.scala:48)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:706)
#>  at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#>  at com.databricks.unity.EmptyHandle$.runWith(UCSHandle.scala:128)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:559)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:403)
#>  at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#>  at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:420)
#>  at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:418)
#>  at com.databricks.spark.util.PublicDBLogging.withAttributionContext(DatabricksSparkUsageLogger.scala:27)
#>  at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:472)
#>  at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:455)
#>  at com.databricks.spark.util.PublicDBLogging.withAttributionTags(DatabricksSparkUsageLogger.scala:27)
#>  at com.databricks.spark.util.PublicDBLogging.withAttributionTags0(DatabricksSparkUsageLogger.scala:72)
#>  at com.databricks.spark.util.DatabricksSparkUsageLogger.withAttributionTags(DatabricksSparkUsageLogger.scala:172)
#>  at com.databricks.spark.util.UsageLogging.$anonfun$withAttributionTags$1(UsageLogger.scala:491)
#>  at com.databricks.spark.util.UsageLogging$.withAttributionTags(UsageLogger.scala:603)
#>  at com.databricks.spark.util.UsageLogging$.withAttributionTags(UsageLogger.scala:612)
#>  at com.databricks.spark.util.UsageLogging.withAttributionTags(UsageLogger.scala:491)
#>  at com.databricks.spark.util.UsageLogging.withAttributionTags$(UsageLogger.scala:489)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withAttributionTags(SparkExecuteStatementOperation.scala:67)
#>  at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.$anonfun$withLocalProperties$8(ThriftLocalProperties.scala:183)
#>  at com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
#>  at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties(ThriftLocalProperties.scala:178)
#>  at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties$(ThriftLocalProperties.scala:71)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:67)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:381)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:367)
#>  at java.security.AccessController.doPrivileged(Native Method)
#>  at javax.security.auth.Subject.doAs(Subject.java:422)
#>  at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:415)
#>  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
#>  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
#>  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
#>  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
#>  at java.lang.Thread.run(Thread.java:750)
#> Caused by: org.apache.spark.sql.catalyst.ExtendedAnalysisException: [UNBOUND_SQL_PARAMETER] Found the unbound parameter: _50. Please, fix `args` and provide a mapping of the parameter to either a SQL literal or collection constructor functions such as `map()`, `array()`, `struct()`. SQLSTATE: 42P02; line 2 pos 8
#>  at org.apache.spark.sql.catalyst.ExtendedAnalysisException.copyPlan(ExtendedAnalysisException.scala:91)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:676)
#>  ... 35 more
#>  
#> <SQL> 'INSERT INTO `test10` (`letter`, `number`)
#> VALUES (?, ?)'
DBI::dbGetQuery(con, "select * from test10")
#> [1] letter number
#> <0 rows> (or 0-length row.names)
DBI::dbRemoveTable(con, "test10")
DBI::dbDisconnect(con)

con <- DBI::dbConnect(
  odbc::databricks(),
  httpPath = Sys.getenv("DATABRICKS_HTTP_PATH"),
  useNativeQuery = TRUE
)

DBI::dbWriteTable(con, "test10", data.frame(letter = "a", number = 1L))
#> Error: nanodbc/nanodbc.cpp:1771: 00000
#> [Simba][Hardy] (80) Syntax or semantic analysis error thrown in server while executing query. Error message from server: org.apache.hive.service.cli.HiveSQLException: Error running query: [UNBOUND_SQL_PARAMETER] org.apache.spark.sql.catalyst.ExtendedAnalysisException: [UNBOUND_SQL_PARAMETER] Found the unbound parameter: _50. Please, fix `args` and provide a mapping of the parameter to either a SQL literal or collection constructor functions such as `map()`, `array()`, `struct()`. SQLSTATE: 42P02; line 2 pos 8
#>  at org.apache.spark.sql.hive.thriftserver.HiveThriftServerErrors$.runningQueryError(HiveThriftServerErrors.scala:48)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:706)
#>  at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#>  at com.databricks.unity.EmptyHandle$.runWith(UCSHandle.scala:128)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.org$apache$spark$sql$hive$thriftserver$SparkExecuteStatementOperation$$execute(SparkExecuteStatementOperation.scala:559)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.$anonfun$run$2(SparkExecuteStatementOperation.scala:403)
#>  at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
#>  at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:420)
#>  at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:418)
#>  at com.databricks.spark.util.PublicDBLogging.withAttributionContext(DatabricksSparkUsageLogger.scala:27)
#>  at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:472)
#>  at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:455)
#>  at com.databricks.spark.util.PublicDBLogging.withAttributionTags(DatabricksSparkUsageLogger.scala:27)
#>  at com.databricks.spark.util.PublicDBLogging.withAttributionTags0(DatabricksSparkUsageLogger.scala:72)
#>  at com.databricks.spark.util.DatabricksSparkUsageLogger.withAttributionTags(DatabricksSparkUsageLogger.scala:172)
#>  at com.databricks.spark.util.UsageLogging.$anonfun$withAttributionTags$1(UsageLogger.scala:491)
#>  at com.databricks.spark.util.UsageLogging$.withAttributionTags(UsageLogger.scala:603)
#>  at com.databricks.spark.util.UsageLogging$.withAttributionTags(UsageLogger.scala:612)
#>  at com.databricks.spark.util.UsageLogging.withAttributionTags(UsageLogger.scala:491)
#>  at com.databricks.spark.util.UsageLogging.withAttributionTags$(UsageLogger.scala:489)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withAttributionTags(SparkExecuteStatementOperation.scala:67)
#>  at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.$anonfun$withLocalProperties$8(ThriftLocalProperties.scala:183)
#>  at com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
#>  at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties(ThriftLocalProperties.scala:178)
#>  at org.apache.spark.sql.hive.thriftserver.ThriftLocalProperties.withLocalProperties$(ThriftLocalProperties.scala:71)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.withLocalProperties(SparkExecuteStatementOperation.scala:67)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:381)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3.run(SparkExecuteStatementOperation.scala:367)
#>  at java.security.AccessController.doPrivileged(Native Method)
#>  at javax.security.auth.Subject.doAs(Subject.java:422)
#>  at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2.run(SparkExecuteStatementOperation.scala:415)
#>  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
#>  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
#>  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
#>  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
#>  at java.lang.Thread.run(Thread.java:750)
#> Caused by: org.apache.spark.sql.catalyst.ExtendedAnalysisException: [UNBOUND_SQL_PARAMETER] Found the unbound parameter: _50. Please, fix `args` and provide a mapping of the parameter to either a SQL literal or collection constructor functions such as `map()`, `array()`, `struct()`. SQLSTATE: 42P02; line 2 pos 8
#>  at org.apache.spark.sql.catalyst.ExtendedAnalysisException.copyPlan(ExtendedAnalysisException.scala:91)
#>  at org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.$anonfun$execute$1(SparkExecuteStatementOperation.scala:676)
#>  ... 35 more
#>  
#> <SQL> 'INSERT INTO `test10` (`letter`, `number`)
#> VALUES (?, ?)'
DBI::dbGetQuery(con, "select * from test10")
#> [1] letter number
#> <0 rows> (or 0-length row.names)
DBI::dbRemoveTable(con, "test10")
DBI::dbDisconnect(con)

con <- DBI::dbConnect(
  odbc::databricks(),
  httpPath = Sys.getenv("DATABRICKS_HTTP_PATH"),
  useNativeQuery = FALSE
)

DBI::dbWriteTable(con, "test10", data.frame(letter = "a", number = 1L))
DBI::dbGetQuery(con, "select * from test10")
#>   letter number
#> 1      a      1
DBI::dbRemoveTable(con, "test10")
DBI::dbDisconnect(con)

Created on 2024-05-09 with reprex v2.1.0

hadley commented 4 months ago

This is as documented; I'm not sure if we can do better.

simonpcouch commented 1 month ago

Since this is functioning as documented and hasn't seen any recent activity, I'm going to go ahead and close.