Open NicholasFiorentini opened 5 days ago
I have also just tried upgrading our runtime to 15.4 LTS and I am experiencing the same problem of seeing LTS image shipped with Release candidate python
➜ databricks (update-dbx-image-15.4) docker run databricksruntime/python:15.4-LTS /databricks/python3/bin/python --version
Python 3.11.0rc1
Just for sanity, I checked the expected runtime from a vanilla 15.4 cluster (non dockerized) 😱
Cluster JSON:
{
"cluster_id": "***",
"creator_user_name": "***",
"driver": {
"private_ip": "***",
"node_id": "***",
"instance_id": "i-***",
"start_timestamp": 1726564676211,
"node_aws_attributes": {
"is_spot": false
},
"node_attributes": {
"is_spot": false
},
"host_private_ip": "***"
},
"executors": [
{
"private_ip": "***",
"node_id": "***",
"instance_id": "i-***",
"start_timestamp": 1726564676170,
"node_aws_attributes": {
"is_spot": true
},
"node_attributes": {
"is_spot": true
},
"host_private_ip": "***"
}
],
"spark_context_id": ***,
"driver_healthy": true,
"jdbc_port": 10000,
"cluster_name": "Unity Cluster 15.4 LTS",
"spark_version": "15.4.x-scala2.12",
"spark_conf": {
"spark.databricks.delta.schema.autoMerge.enabled": "true",
"spark.databricks.unityCatalog.volumes.enabled": "true",
"spark.sql.streaming.metricsEnabled": "true",
"spark.ui.prometheus.enabled": "true",
"spark.executor.processTreeMetrics.enabled": "true"
},
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "auto",
"instance_profile_arn": "arn:aws:iam::***:instance-profile/***",
"spot_bid_price_percent": 100,
"ebs_volume_type": "GENERAL_PURPOSE_SSD",
"ebs_volume_count": 3,
"ebs_volume_size": 100
},
"node_type_id": "m-fleet.2xlarge",
"driver_node_type_id": "m-fleet.2xlarge",
"custom_tags": {
"team": "***"
},
"cluster_log_conf": {
"s3": {
"destination": "s3://***",
"region": "***",
"enable_encryption": true,
"canned_acl": "bucket-owner-full-control"
}
},
"autotermination_minutes": 60,
"enable_elastic_disk": false,
"disk_spec": {
"disk_type": {
"ebs_volume_type": "GENERAL_PURPOSE_SSD"
},
"disk_count": 3,
"disk_size": 100
},
"cluster_source": "UI",
"single_user_name": "***",
"policy_id": "***",
"enable_local_disk_encryption": false,
"instance_source": {
"node_type_id": "m-fleet.2xlarge"
},
"driver_instance_source": {
"node_type_id": "m-fleet.2xlarge"
},
"data_security_mode": "SINGLE_USER",
"runtime_engine": "STANDARD",
"effective_spark_version": "15.4.x-scala2.12",
"state": "RUNNING",
"state_message": "",
"start_time": 1725530772611,
"last_state_loss_time": 1726564768418,
"last_activity_time": 1726564725412,
"last_restarted_time": 1726564768503,
"autoscale": {
"min_workers": 1,
"max_workers": 2,
"target_workers": 1
},
"cluster_memory_mb": 65536,
"cluster_cores": 16,
"default_tags": {
"Vendor": "Databricks",
"Creator": "***",
"ClusterName": "Unity Cluster 15.4 LTS",
"ClusterId": "***"
},
"cluster_log_status": {
"last_attempted": 1726565051254
},
"pinned_by_user_name": "***",
"init_scripts_safe_mode": false,
"spec": {
"cluster_name": "Cluster 15.4 LTS",
"spark_version": "15.4.x-scala2.12",
"spark_conf": {
"spark.databricks.delta.schema.autoMerge.enabled": "true",
"spark.databricks.unityCatalog.volumes.enabled": "true",
"spark.sql.streaming.metricsEnabled": "true",
"spark.ui.prometheus.enabled": "true",
"spark.executor.processTreeMetrics.enabled": "true"
},
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "auto",
"instance_profile_arn": "arn:aws:iam::***:instance-profile/***",
"spot_bid_price_percent": 100,
"ebs_volume_type": "GENERAL_PURPOSE_SSD",
"ebs_volume_count": 3,
"ebs_volume_size": 100
},
"node_type_id": "m-fleet.2xlarge",
"driver_node_type_id": "m-fleet.2xlarge",
"custom_tags": {
"team": "***",
},
"cluster_log_conf": {
"s3": {
"destination": "s3://***",
"region": "***",
"enable_encryption": true,
"canned_acl": "bucket-owner-full-control"
}
},
"autotermination_minutes": 60,
"enable_elastic_disk": false,
"single_user_name": "***",
"policy_id": "***",
"enable_local_disk_encryption": false,
"data_security_mode": "SINGLE_USER",
"runtime_engine": "STANDARD",
"effective_spark_version": "14.3.x-scala2.12",
"autoscale": {
"min_workers": 1,
"max_workers": 2
},
"apply_policy_default_values": false
}
}
On the side not of that, how come on the very end of the spec we can see
"effective_spark_version": "14.3.x-scala2.12",
even though earlier we see
"effective_spark_version": "15.4.x-scala2.12",
@jakubbaron I noticed that. I think Databricks APIs set that. I'm trying with a cluster without any custom cluster policy.
BTW, that effective_spark_version
is undocumented: https://docs.databricks.com/api/workspace/clusters/get
@jakubbaron Confirmed with an unrestricted cluster.
{
"cluster_name": "Cluster",
"spark_version": "15.4.x-scala2.12",
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "auto",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0
},
"node_type_id": "r6id.xlarge",
"autotermination_minutes": 120,
"single_user_name": "***",
"data_security_mode": "SINGLE_USER",
"runtime_engine": "PHOTON",
"autoscale": {
"min_workers": 2,
"max_workers": 8
}
}
{
"cluster_id": "***",
"creator_user_name": "***",
"driver": {
"private_ip": "***",
"node_id": "***",
"instance_id": "i-***",
"start_timestamp": 1726566021224,
"node_aws_attributes": {
"is_spot": false
},
"node_attributes": {
"is_spot": false
},
"host_private_ip": "***"
},
"executors": [
{
"private_ip": "***",
"node_id": "***",
"instance_id": "i-0a8b3a308c50fcb3c",
"start_timestamp": 1726566021196,
"node_aws_attributes": {
"is_spot": true
},
"node_attributes": {
"is_spot": true
},
"host_private_ip": "***"
},
{
"private_ip": "***",
"node_id": "***",
"instance_id": "i-***",
"start_timestamp": 1726566021169,
"node_aws_attributes": {
"is_spot": true
},
"node_attributes": {
"is_spot": true
},
"host_private_ip": "***"
}
],
"spark_context_id": ***,
"driver_healthy": true,
"jdbc_port": 10000,
"cluster_name": "Cluster",
"spark_version": "15.4.x-scala2.12",
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "auto",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0
},
"node_type_id": "r6id.xlarge",
"driver_node_type_id": "r6id.xlarge",
"autotermination_minutes": 120,
"enable_elastic_disk": false,
"disk_spec": {
"disk_count": 0
},
"cluster_source": "UI",
"single_user_name": "***",
"enable_local_disk_encryption": false,
"instance_source": {
"node_type_id": "r6id.xlarge"
},
"driver_instance_source": {
"node_type_id": "r6id.xlarge"
},
"data_security_mode": "SINGLE_USER",
"runtime_engine": "PHOTON",
"effective_spark_version": "15.4.x-photon-scala2.12",
"state": "RUNNING",
"state_message": "",
"start_time": 1726565887122,
"last_state_loss_time": 0,
"last_activity_time": 1726566183422,
"last_restarted_time": 1726566162280,
"autoscale": {
"min_workers": 2,
"max_workers": 8,
"target_workers": 2
},
"cluster_memory_mb": 98304,
"cluster_cores": 12,
"default_tags": {
"Vendor": "Databricks",
"Creator": "***",
"ClusterName": "Cluster",
"ClusterId": "***"
},
"init_scripts_safe_mode": false,
"spec": {
"cluster_name": "Cluster",
"spark_version": "15.4.x-scala2.12",
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "auto",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0
},
"node_type_id": "r6id.xlarge",
"autotermination_minutes": 120,
"single_user_name": "***",
"data_security_mode": "SINGLE_USER",
"runtime_engine": "PHOTON",
"autoscale": {
"min_workers": 2,
"max_workers": 8
}
}
}
It appears that the issue is caused down the line of ubuntu's repositories
As of Ubuntu 24.04's release, Python 3.11 may not be available directly through the official Ubuntu repositories or through Deadsnakes PPA
source: https://askubuntu.com/a/1512163
I'm not expecting the LTS to be shipped with a Python release candidate.
To replicate:
docker run --rm -it --platform linux/amd64 databricksruntime/standard:15.4-LTS /bin/bash
/databricks/python3/bin/python3 --version
Python 3.11.0rc1
/databricks/python-lsp/bin/python3 --version
.