Open suisenkotoba opened 8 months ago
Traceback:
Py4JJavaError: An error occurred while calling o86.sql.
: org.apache.spark.SparkUnsupportedOperationException: UPDATE TABLE is not supported temporarily.
at org.apache.spark.sql.errors.QueryExecutionErrors$.ddlUnsupportedTemporarilyError(QueryExecutionErrors.scala:1109)
at org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:896)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$1(QueryPlanner.scala:63)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
at org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:70)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78)
at scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:196)
at scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:194)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:199)
at scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:192)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1431)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
at org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:70)
at org.apache.spark.sql.execution.QueryExecution$.createSparkPlan(QueryExecution.scala:478)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$sparkPlan$1(QueryExecution.scala:162)
at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:202)
at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:528)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:202)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:201)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:162)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:155)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:175)
at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:202)
at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:528)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:202)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:201)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:175)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:168)
at org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:221)
at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:266)
at org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:235)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:112)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:195)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:103)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:94)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:512)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:512)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:31)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:488)
at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:94)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:81)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:79)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:219)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96)
at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:640)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:630)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:662)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.base/java.lang.Thread.run(Thread.java:829)
The UPDATE TABLE is not supported temporarily
is an indication that you're performing an UPDATE
against a non-Iceberg table. The error itself is coming from Spark.
Against which catalog & table are you performing the UPDATE
?
you're performing an UPDATE against a non-Iceberg table.
hmm that's strange. the table is created like this:
ddl = f"""
CREATE TABLE IF NOT EXISTS {table_name} USING iceberg
TBLPROPERTIES ('format-version'='2',
'write.parquet.compression-codec'='snappy',
'write.delete.mode'='copy-on-write',
'write.update.mode'='copy-on-write',,
'write.merge.mode'='copy-on-write',,
'write.metadata.delete-after-commit.enabled'=true,
'write.metadata.previous-versions-max'=50,
'history.expire.min-snapshots-to-keep'=50,
'history.expire.max-snapshot-age-ms'=86400000,
'write.distribution-mode'='hash',
'write.merge.distribution-mode'='hash'
)
AS (
SELECT {table_columns},
TO_TIMESTAMP(timestamp/1000) start_at,
is_current,
CURRENT_TIMESTAMP() ingested_at
FROM {{df}} )
"""
spark.sql(ddl, df=df)
@nastra how can I make sure if the table is an iceberg table?
In your example you configured two catalogs. Which of those catalogs are you using when you're creating and updating that table?
.set(f'spark.sql.catalog.{catalog}', 'org.apache.iceberg.spark.SparkCatalog')
.set(f'spark.sql.catalog.{catalog}.type', 'hive')
this {catalog}
configured here
@suisenkotoba that catalog isn't enabled by default. Are you using the full identifier when updating (e.g. UPDATE TABLE <catalog>.<namespace>.<tableName>
) or are you using USE <catalog>
prior to the update stmt?
@nastra yes I use full identifier including catalog name. How can I make sure the table is indeed an iceberg table. I checked the table has metadata tables like snapshots, partitions, data_files, etc.
FYI. I've the same issue using: spark-version: 3.5 scala-version: 2.12 iceberg-version: 1.5.0 or 1.5.2
Using spark-version: 3.4 it works and I can update tables.
@jurossiar what's your catalog configuration? Are you using org.apache.iceberg.spark.SparkSessionCatalog
?
Here an example with the minimun configs:
from pyspark import SparkConf
from pyspark.sql import SparkSession
self.conf = (SparkConf()
.setMaster(self.spark_master).setAppName("Iceberg Rest Catalog")
### you can overwrite your full path here too, if needed (in this case it's indicated in .env file)
# .set("spark.home", os.getenv("SPARK_HOME"))
### packes to be pulled from public artifactorie and/or local cahche
.set("spark.jars.packages", f"org.apache.iceberg:iceberg-spark-runtime-{self.spark_version}_{self.scala_version}:{self.iceberg_version},software.amazon.awssdk:bundle:{self.awssdk_version},software.amazon.awssdk:url-connection-client:{self.awssdk_version}")
.set("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions")
### From this point on, the properties follow this pattern:
.set("spark.sql.defaultCatalog", self.catalog_name)
.set(f"spark.sql.catalog.{self.catalog_name}", "org.apache.iceberg.spark.SparkCatalog")
.set(f"spark.sql.catalog.{self.catalog_name}.type",'rest')
.set(f"spark.sql.catalog.{self.catalog_name}.uri", self.catalog_url) #Catalog Url
.set(f"spark.sql.catalog.{self.catalog_name}.token", self.lakehouse_token)
.set(f"spark.sql.catalog.{self.catalog_name}.warehouse", self.warehouse)
.set(f"spark.sql.catalog.{self.catalog_name}.io-impl", "org.apache.iceberg.aws.s3.S3FileIO")
.set(f"spark.sql.catalog.{self.catalog_name}.s3.endpoint", self.aws_endpoint)
.set("spark.executorEnv.AWS_ACCESS_KEY_ID", self.aws_access_key_id)
.set("spark.executorEnv.AWS_SECRET_ACCESS_KEY", self.aws_secret_access_key)
.set("spark.executorEnv.AWS_REGION", self.aws_region)
)
self.spark = SparkSession.builder.config(conf=self.conf).getOrCreate()
@jurossiar can you please add some additional details on what SQL command you're running when this issue happens? Also I'm guessing you don't have any other catalogs set up and you're targeting that particular catalog you configured above via USE <catalog>
?
Any simple update query fails using spark 3.5 but works with spark 3.4. e.g.:
self.spark.sql(f'update {namespace}.{table} set open = (extra == "open")')
self.spark.get_spark().sql(f"UPDATE {namespace}.{table} SET name = test name' WHERE name = 'experiment')
The same error occurs with sparksql or using the catalog name explicitly.
%%sparksql
UPDATE test.table1
SET name = test name'
WHERE name = 'experiment'
Although I can add columns with alter table and query them.
@nastra Any update on this I am using currently INTO glue ETL and facing same error able to do all operation just for merge statement and update statement facing this issue Spark 3.3.0 iceberg 1.0.0
error:
Py4JJavaError: An error occurred while calling o76.sql.
: java.lang.UnsupportedOperationException: UPDATE TABLE is not supported temporarily.
at org.apache.spark.sql.errors.QueryExecutionErrors$.ddlUnsupportedTemporarilyError(QueryExecutionErrors.scala:891)
at org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:893)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$1(QueryPlanner.scala:63)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
at org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:72)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78)
at scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:196)
at scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:194)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:199)
at scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:192)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1431)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
at org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:72)
at org.apache.spark.sql.execution.QueryExecution$.createSparkPlan(QueryExecution.scala:495)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$sparkPlan$1(QueryExecution.scala:153)
at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:192)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:213)
at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:552)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:213)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:212)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:153)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:146)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:166)
at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:192)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:213)
at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:552)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:213)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:212)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:163)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:159)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$writePlans$5(QueryExecution.scala:298)
at org.apache.spark.sql.catalyst.plans.QueryPlan$.append(QueryPlan.scala:657)
at org.apache.spark.sql.execution.QueryExecution.writePlans(QueryExecution.scala:298)
at org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:313)
at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:267)
at org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:246)
at org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:107)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$7(SQLExecution.scala:139)
at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:224)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:139)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:245)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:138)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:100)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:96)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:615)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:177)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:615)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:591)
at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:96)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:83)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:81)
at org.apache.spark.sql.Dataset.
I'm wondering if you have any update on this issue? -> Previous comments: https://github.com/apache/iceberg/issues/9960#issuecomment-2197375635 I've just tried using: spark-version: 3.5 scala-version: 2.12 iceberg-version: 1.6.1
and still get the same errors.
Steps: Create the table:
%%sparksql
CREATE TABLE julian.tmp_julian (
user_id STRING,
access_type STRING,
open boolean
)
USING ICEBERG
LOCATION 's3a://<bucket>/tables/julian/tmp_julian'
add row:
%%sparksql
insert into julian.tmp_julian values ('a','B', false)
Update schema
%%sparksql
update julian.tmp_julian set open = (access_type == "B")
Error:
{
"name": "Py4JJavaError",
"message": "An error occurred while calling o52.sql.
: org.apache.spark.SparkUnsupportedOperationException: UPDATE TABLE is not supported temporarily.
\tat org.apache.spark.sql.errors.QueryExecutionErrors$.ddlUnsupportedTemporarilyError(QueryExecutionErrors.scala:1109)
\tat org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:896)
\tat org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$1(QueryPlanner.scala:63)
\tat scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)
\tat org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
\tat org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:70)
\tat org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78)
\tat scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:196)
\tat scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:194)
\tat scala.collection.Iterator.foreach(Iterator.scala:943)
\tat scala.collection.Iterator.foreach$(Iterator.scala:943)
\tat scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
\tat scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:199)
\tat scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:192)
\tat scala.collection.AbstractIterator.foldLeft(Iterator.scala:1431)
\tat org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75)
\tat scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
\tat org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
\tat org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:70)
\tat org.apache.spark.sql.execution.QueryExecution$.createSparkPlan(QueryExecution.scala:476)
\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$sparkPlan$1(QueryExecution.scala:162)
\tat org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:202)
\tat org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:526)
\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:202)
\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
\tat org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:201)
\tat org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:162)
\tat org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:155)
\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:175)
\tat org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:202)
\tat org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:526)
\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:202)
\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
\tat org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:201)
\tat org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:175)
\tat org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:168)
\tat org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:221)
\tat org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:266)
\tat org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:235)
\tat org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:112)
\tat org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:195)
\tat org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:103)
\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
\tat org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:94)
\tat org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:512)
\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:512)
\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:31)
\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:488)
\tat org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:94)
\tat org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:81)
\tat org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:79)
\tat org.apache.spark.sql.Dataset.<init>(Dataset.scala:219)
\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)
\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96)
\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:640)
\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:630)
\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:662)
\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)
\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
\tat py4j.Gateway.invoke(Gateway.java:282)
\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
\tat py4j.commands.CallCommand.execute(CallCommand.java:79)
\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)
\tat java.base/java.lang.Thread.run(Thread.java:840)
",
"stack": "---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
Cell In[9], line 1
----> 1 get_ipython().run_cell_magic('sparksql', '', 'update julian.tmp_julian set open = (access_type == \"B\")\
')
File ~/miniconda3/envs/dmf-library-dev/lib/python3.10/site-packages/IPython/core/interactiveshell.py:2541, in InteractiveShell.run_cell_magic(self, magic_name, line, cell)
2539 with self.builtin_trap:
2540 args = (magic_arg_s, cell)
-> 2541 result = fn(*args, **kwargs)
2543 # The code below prevents the output from being displayed
2544 # when using magics with decorator @output_can_be_silenced
2545 # when the last Python token in the expression is a ';'.
2546 if getattr(fn, magic.MAGIC_OUTPUT_CAN_BE_SILENCED, False):
File ~/miniconda3/envs/dmf-library-dev/lib/python3.10/site-packages/sparksql_magic/sparksql.py:40, in SparkSql.sparksql(self, line, cell, local_ns)
37 print(\"active spark session is not found\")
38 return
---> 40 df = spark.sql(bind_variables(cell, user_ns))
41 if args.cache or args.eager:
42 print('cache dataframe with %s load' % ('eager' if args.eager else 'lazy'))
File ~/miniconda3/envs/dmf-library-dev/lib/python3.10/site-packages/pyspark/sql/session.py:1440, in SparkSession.sql(self, sqlQuery, args, **kwargs)
1438 try:
1439 litArgs = {k: _to_java_column(lit(v)) for k, v in (args or {}).items()}
-> 1440 return DataFrame(self._jsparkSession.sql(sqlQuery, litArgs), self)
1441 finally:
1442 if len(kwargs) > 0:
File ~/miniconda3/envs/dmf-library-dev/lib/python3.10/site-packages/py4j/java_gateway.py:1322, in JavaMember.__call__(self, *args)
1316 command = proto.CALL_COMMAND_NAME +\\
1317 self.command_header +\\
1318 args_command +\\
1319 proto.END_COMMAND_PART
1321 answer = self.gateway_client.send_command(command)
-> 1322 return_value = get_return_value(
1323 answer, self.gateway_client, self.target_id, self.name)
1325 for temp_arg in temp_args:
1326 if hasattr(temp_arg, \"_detach\"):
File ~/miniconda3/envs/dmf-library-dev/lib/python3.10/site-packages/pyspark/errors/exceptions/captured.py:169, in capture_sql_exception.<locals>.deco(*a, **kw)
167 def deco(*a: Any, **kw: Any) -> Any:
168 try:
--> 169 return f(*a, **kw)
170 except Py4JJavaError as e:
171 converted = convert_exception(e.java_exception)
File ~/miniconda3/envs/dmf-library-dev/lib/python3.10/site-packages/py4j/protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 \"An error occurred while calling {0}{1}{2}.\
\".
328 format(target_id, \".\", name), value)
329 else:
330 raise Py4JError(
331 \"An error occurred while calling {0}{1}{2}. Trace:\
{3}\
\".
332 format(target_id, \".\", name, value))
Py4JJavaError: An error occurred while calling o52.sql.
: org.apache.spark.SparkUnsupportedOperationException: UPDATE TABLE is not supported temporarily.
\tat org.apache.spark.sql.errors.QueryExecutionErrors$.ddlUnsupportedTemporarilyError(QueryExecutionErrors.scala:1109)
\tat org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:896)
\tat org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$1(QueryPlanner.scala:63)
\tat scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)
\tat org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
\tat org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:70)
\tat org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$3(QueryPlanner.scala:78)
\tat scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:196)
\tat scala.collection.TraversableOnce$folder$1.apply(TraversableOnce.scala:194)
\tat scala.collection.Iterator.foreach(Iterator.scala:943)
\tat scala.collection.Iterator.foreach$(Iterator.scala:943)
\tat scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
\tat scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:199)
\tat scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:192)
\tat scala.collection.AbstractIterator.foldLeft(Iterator.scala:1431)
\tat org.apache.spark.sql.catalyst.planning.QueryPlanner.$anonfun$plan$2(QueryPlanner.scala:75)
\tat scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
\tat org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
\tat org.apache.spark.sql.execution.SparkStrategies.plan(SparkStrategies.scala:70)
\tat org.apache.spark.sql.execution.QueryExecution$.createSparkPlan(QueryExecution.scala:476)
\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$sparkPlan$1(QueryExecution.scala:162)
\tat org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:202)
\tat org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:526)
\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:202)
\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
\tat org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:201)
\tat org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:162)
\tat org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:155)
\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executedPlan$1(QueryExecution.scala:175)
\tat org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:202)
\tat org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:526)
\tat org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:202)
\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
\tat org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:201)
\tat org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:175)
\tat org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:168)
\tat org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:221)
\tat org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$explainString(QueryExecution.scala:266)
\tat org.apache.spark.sql.execution.QueryExecution.explainString(QueryExecution.scala:235)
\tat org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:112)
\tat org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:195)
\tat org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:103)
\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
\tat org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
\tat org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:94)
\tat org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:512)
\tat org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104)
\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:512)
\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:31)
\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
\tat org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
\tat org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
\tat org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:488)
\tat org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:94)
\tat org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:81)
\tat org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:79)
\tat org.apache.spark.sql.Dataset.<init>(Dataset.scala:219)
\tat org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)
\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96)
\tat org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:640)
\tat org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:827)
\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:630)
\tat org.apache.spark.sql.SparkSession.sql(SparkSession.scala:662)
\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
\tat java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
\tat java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
\tat java.base/java.lang.reflect.Method.invoke(Method.java:568)
\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
\tat py4j.Gateway.invoke(Gateway.java:282)
\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
\tat py4j.commands.CallCommand.execute(CallCommand.java:79)
\tat py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
\tat py4j.ClientServerConnection.run(ClientServerConnection.java:106)
\tat java.base/java.lang.Thread.run(Thread.java:840)
"
}
But works with spark 3.4.
+1
Apache Iceberg version
1.5.0 (latest release)
Query engine
Spark
Please describe the bug 🐞
Previously my pipeline was using iceberg 1.3 on Dataproc (image version 2.1 which has spark 3.3 and scala 2.12) Then I upgraded the dataproc image version to 2.2 which has spark 3.5 and scala 2.12 and use iceberg 1.5. After upgrading, the pipeline encountered error when updating a table:
org.apache.spark.SparkUnsupportedOperationException: UPDATE TABLE is not supported temporarily.
. This is spark configuration I used: