databricks-demos / dbdemos

Demos to implement your Databricks Lakehouse
Other
283 stars 95 forks source link

uc-05-upgrade error when accessing demo s3 bucket #70

Closed barashe closed 1 year ago

barashe commented 1 year ago

When running cmd5 in the 00-Upgrade-database-to-UC notebook:

%run ./_resources/00-setup $catalog=dbdemos

I get:

---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
File <command-2354161675129384>:51
     48 df.repartition(3).write.mode('overwrite').format("delta").saveAsTable("hive_metastore.uc_database_to_upgrade.users")
     50 #Note: this requires hard-coded external location.
---> 51 df.repartition(3).write.mode('overwrite').format("delta").save(external_location_path+"/transactions")

File /databricks/spark/python/pyspark/instrumentation_utils.py:48, in _wrap_function.<locals>.wrapper(*args, **kwargs)
     46 start = time.perf_counter()
     47 try:
---> 48     res = func(*args, **kwargs)
     49     logger.log_success(
     50         module_name, class_name, function_name, time.perf_counter() - start, signature
     51     )
     52     return res

File /databricks/spark/python/pyspark/sql/readwriter.py:1397, in DataFrameWriter.save(self, path, format, mode, partitionBy, **options)
   1395     self._jwrite.save()
   1396 else:
-> 1397     self._jwrite.save(path)

File /databricks/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/java_gateway.py:1321, in JavaMember.__call__(self, *args)
   1315 command = proto.CALL_COMMAND_NAME +\
   1316     self.command_header +\
   1317     args_command +\
   1318     proto.END_COMMAND_PART
   1320 answer = self.gateway_client.send_command(command)
-> 1321 return_value = get_return_value(
   1322     answer, self.gateway_client, self.target_id, self.name)
   1324 for temp_arg in temp_args:
   1325     temp_arg._detach()

File /databricks/spark/python/pyspark/errors/exceptions.py:228, in capture_sql_exception.<locals>.deco(*a, **kw)
    226 def deco(*a: Any, **kw: Any) -> Any:
    227     try:
--> 228         return f(*a, **kw)
    229     except Py4JJavaError as e:
    230         converted = convert_exception(e.java_exception)

File /databricks/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)
    324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
    325 if answer[1] == REFERENCE_TYPE:
--> 326     raise Py4JJavaError(
    327         "An error occurred while calling {0}{1}{2}.\n".
    328         format(target_id, ".", name), value)
    329 else:
    330     raise Py4JError(
    331         "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
    332         format(target_id, ".", name, value))

Py4JJavaError: An error occurred while calling o655.save.
: java.nio.file.AccessDeniedException: s3a://databricks-e2demofieldengwest/external_location_uc_upgrade/transactions: getFileStatus on s3a://databricks-e2demofieldengwest/external_location_uc_upgrade/transactions: com.amazonaws.services.s3.model.AmazonS3Exception: Forbidden; request: HEAD https://databricks-e2demofieldengwest.s3.us-west-2.amazonaws.com external_location_uc_upgrade/transactions {} Hadoop 3.3.4, aws-sdk-java/1.12.189 Linux/5.15.0-1039-aws OpenJDK_64-Bit_Server_VM/25.362-b09 java/1.8.0_362 scala/2.12.14 kotlin/1.6.0 vendor/Azul_Systems,_Inc. cfg/retry-mode/legacy com.amazonaws.services.s3.model.GetObjectMetadataRequest; Request ID: 7XW71GWXA3FZBM1X, Extended Request ID: iBzUesdhw/9yoLK1emq0fpXfcGyiNJV6aXvAZNIF3csJVXfbuPC3rD842SMthwlfDt573PmlfUs=, Cloud Provider: AWS, Instance ID: i-08204b432528ff063 credentials-provider: com.amazonaws.auth.AnonymousAWSCredentials credential-header: no-credential-header signature-present: false (Service: Amazon S3; Status Code: 403; Error Code: 403 Forbidden; Request ID: 7XW71GWXA3FZBM1X; S3 Extended Request ID: iBzUesdhw/9yoLK1emq0fpXfcGyiNJV6aXvAZNIF3csJVXfbuPC3rD842SMthwlfDt573PmlfUs=; Proxy: null), S3 Extended Request ID: iBzUesdhw/9yoLK1emq0fpXfcGyiNJV6aXvAZNIF3csJVXfbuPC3rD842SMthwlfDt573PmlfUs=:403 Forbidden
    at shaded.databricks.org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:296)
    at shaded.databricks.org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:201)
    at shaded.databricks.org.apache.hadoop.fs.s3a.S3AFileSystem.s3GetFileStatus(S3AFileSystem.java:3941)
    at shaded.databricks.org.apache.hadoop.fs.s3a.S3AFileSystem.innerGetFileStatus(S3AFileSystem.java:3875)
    at shaded.databricks.org.apache.hadoop.fs.s3a.S3AFileSystem.getFileStatus(S3AFileSystem.java:3769)
    at com.databricks.common.filesystem.LokiFileSystem.getFileStatus(LokiFileSystem.scala:116)
    at com.databricks.sql.acl.fs.CredentialScopeFileSystem.getFileStatus(CredentialScopeFileSystem.scala:290)
    at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1760)
    at com.databricks.sql.transaction.tahoe.DeltaValidation$.validateDeltaWrite(DeltaValidation.scala:166)
    at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:281)
    at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:250)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
    at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
    at py4j.Gateway.invoke(Gateway.java:306)
    at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
    at py4j.commands.CallCommand.execute(CallCommand.java:79)
    at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)
    at py4j.ClientServerConnection.run(ClientServerConnection.java:115)
    at java.lang.Thread.run(Thread.java:750)
Caused by: com.amazonaws.services.s3.model.AmazonS3Exception: Forbidden; request: HEAD https://databricks-e2demofieldengwest.s3.us-west-2.amazonaws.com external_location_uc_upgrade/transactions {} Hadoop 3.3.4, aws-sdk-java/1.12.189 Linux/5.15.0-1039-aws OpenJDK_64-Bit_Server_VM/25.362-b09 java/1.8.0_362 scala/2.12.14 kotlin/1.6.0 vendor/Azul_Systems,_Inc. cfg/retry-mode/legacy com.amazonaws.services.s3.model.GetObjectMetadataRequest; Request ID: 7XW71GWXA3FZBM1X, Extended Request ID: iBzUesdhw/9yoLK1emq0fpXfcGyiNJV6aXvAZNIF3csJVXfbuPC3rD842SMthwlfDt573PmlfUs=, Cloud Provider: AWS, Instance ID: i-08204b432528ff063 credentials-provider: com.amazonaws.auth.AnonymousAWSCredentials credential-header: no-credential-header signature-present: false (Service: Amazon S3; Status Code: 403; Error Code: 403 Forbidden; Request ID: 7XW71GWXA3FZBM1X; S3 Extended Request ID: iBzUesdhw/9yoLK1emq0fpXfcGyiNJV6aXvAZNIF3csJVXfbuPC3rD842SMthwlfDt573PmlfUs=; Proxy: null), S3 Extended Request ID: iBzUesdhw/9yoLK1emq0fpXfcGyiNJV6aXvAZNIF3csJVXfbuPC3rD842SMthwlfDt573PmlfUs=
    at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1862)
    at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleServiceErrorResponse(AmazonHttpClient.java:1415)
    at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
    at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1154)
    at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:811)
    at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:779)
    at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:753)
    at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:713)
    at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:695)
    at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:559)
    at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:539)
    at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5453)
    at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5400)
    at com.amazonaws.services.s3.AmazonS3Client.getObjectMetadata(AmazonS3Client.java:1372)
    at shaded.databricks.org.apache.hadoop.fs.s3a.EnforcingDatabricksS3Client.getObjectMetadata(EnforcingDatabricksS3Client.scala:222)
    at shaded.databricks.org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$getObjectMetadata$5(S3AFileSystem.java:2321)
    at shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:417)
    at shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:378)
    at shaded.databricks.org.apache.hadoop.fs.s3a.S3AFileSystem.getObjectMetadata(S3AFileSystem.java:2315)
    at shaded.databricks.org.apache.hadoop.fs.s3a.S3AFileSystem.getObjectMetadata(S3AFileSystem.java:2291)
    at shaded.databricks.org.apache.hadoop.fs.s3a.S3AFileSystem.s3GetFileStatus(S3AFileSystem.java:3930)
    ... 20 more

I am using the cluster created automatically by the demo:

{
    "num_workers": 0,
    "cluster_name": "dbdemos-uc-05-upgrade-eran",
    "spark_version": "12.2.x-cpu-ml-scala2.12",
    "spark_conf": {
        "spark.databricks.cluster.profile": "singleNode",
        "spark.master": "local[*]",
        "spark.databricks.dataLineage.enabled": "true"
    },
    "aws_attributes": {
        "first_on_demand": 1,
        "availability": "SPOT_WITH_FALLBACK",
        "zone_id": "us-east-1f",
        "spot_bid_price_percent": 100,
        "ebs_volume_count": 0
    },
    "node_type_id": "i3.xlarge",
    "driver_node_type_id": "i3.xlarge",
    "ssh_public_keys": [],
    "custom_tags": {
        "ResourceClass": "SingleNode",
        "demo": "uc-05-upgrade",
        "project": "dbdemos"
    },
    "spark_env_vars": {},
    "autotermination_minutes": 60,
    "enable_elastic_disk": false,
    "cluster_source": "API",
    "init_scripts": [],
    "single_user_name": "user@company.com",
    "enable_local_disk_encryption": false,
    "data_security_mode": "SINGLE_USER",
    "runtime_engine": "STANDARD",
    "cluster_id": "0905-102905-cyj0cnzq"
}
QuentinAmbard commented 1 year ago

hey, yes for this demo to work you need an external location created. I'll add more details in the main notebook to make it more clear