databricks / containers

Sample base images for Databricks Container Services
Apache License 2.0
165 stars 116 forks source link

Databricks 15.4 LTS is shipped with Python 3.11.0rc1 #201

Open NicholasFiorentini opened 5 days ago

NicholasFiorentini commented 5 days ago

I'm not expecting the LTS to be shipped with a Python release candidate.

To replicate:

  1. docker run --rm -it --platform linux/amd64 databricksruntime/standard:15.4-LTS /bin/bash
  2. /databricks/python3/bin/python3 --version
    • The output is Python 3.11.0rc1
  3. Same with /databricks/python-lsp/bin/python3 --version.
jakubbaron commented 4 days ago

I have also just tried upgrading our runtime to 15.4 LTS and I am experiencing the same problem of seeing LTS image shipped with Release candidate python

➜  databricks (update-dbx-image-15.4) docker run databricksruntime/python:15.4-LTS  /databricks/python3/bin/python --version
Python 3.11.0rc1
NicholasFiorentini commented 4 days ago

Just for sanity, I checked the expected runtime from a vanilla 15.4 cluster (non dockerized) 😱 image

Cluster JSON:

{
    "cluster_id": "***",
    "creator_user_name": "***",
    "driver": {
        "private_ip": "***",
        "node_id": "***",
        "instance_id": "i-***",
        "start_timestamp": 1726564676211,
        "node_aws_attributes": {
            "is_spot": false
        },
        "node_attributes": {
            "is_spot": false
        },
        "host_private_ip": "***"
    },
    "executors": [
        {
            "private_ip": "***",
            "node_id": "***",
            "instance_id": "i-***",
            "start_timestamp": 1726564676170,
            "node_aws_attributes": {
                "is_spot": true
            },
            "node_attributes": {
                "is_spot": true
            },
            "host_private_ip": "***"
        }
    ],
    "spark_context_id": ***,
    "driver_healthy": true,
    "jdbc_port": 10000,
    "cluster_name": "Unity Cluster 15.4 LTS",
    "spark_version": "15.4.x-scala2.12",
    "spark_conf": {
        "spark.databricks.delta.schema.autoMerge.enabled": "true",
        "spark.databricks.unityCatalog.volumes.enabled": "true",
        "spark.sql.streaming.metricsEnabled": "true",
        "spark.ui.prometheus.enabled": "true",
        "spark.executor.processTreeMetrics.enabled": "true"
    },
    "aws_attributes": {
        "first_on_demand": 1,
        "availability": "SPOT_WITH_FALLBACK",
        "zone_id": "auto",
        "instance_profile_arn": "arn:aws:iam::***:instance-profile/***",
        "spot_bid_price_percent": 100,
        "ebs_volume_type": "GENERAL_PURPOSE_SSD",
        "ebs_volume_count": 3,
        "ebs_volume_size": 100
    },
    "node_type_id": "m-fleet.2xlarge",
    "driver_node_type_id": "m-fleet.2xlarge",
    "custom_tags": {
        "team": "***"
    },
    "cluster_log_conf": {
        "s3": {
            "destination": "s3://***",
            "region": "***",
            "enable_encryption": true,
            "canned_acl": "bucket-owner-full-control"
        }
    },
    "autotermination_minutes": 60,
    "enable_elastic_disk": false,
    "disk_spec": {
        "disk_type": {
            "ebs_volume_type": "GENERAL_PURPOSE_SSD"
        },
        "disk_count": 3,
        "disk_size": 100
    },
    "cluster_source": "UI",
    "single_user_name": "***",
    "policy_id": "***",
    "enable_local_disk_encryption": false,
    "instance_source": {
        "node_type_id": "m-fleet.2xlarge"
    },
    "driver_instance_source": {
        "node_type_id": "m-fleet.2xlarge"
    },
    "data_security_mode": "SINGLE_USER",
    "runtime_engine": "STANDARD",
    "effective_spark_version": "15.4.x-scala2.12",
    "state": "RUNNING",
    "state_message": "",
    "start_time": 1725530772611,
    "last_state_loss_time": 1726564768418,
    "last_activity_time": 1726564725412,
    "last_restarted_time": 1726564768503,
    "autoscale": {
        "min_workers": 1,
        "max_workers": 2,
        "target_workers": 1
    },
    "cluster_memory_mb": 65536,
    "cluster_cores": 16,
    "default_tags": {
        "Vendor": "Databricks",
        "Creator": "***",
        "ClusterName": "Unity Cluster 15.4 LTS",
        "ClusterId": "***"
    },
    "cluster_log_status": {
        "last_attempted": 1726565051254
    },
    "pinned_by_user_name": "***",
    "init_scripts_safe_mode": false,
    "spec": {
        "cluster_name": "Cluster 15.4 LTS",
        "spark_version": "15.4.x-scala2.12",
        "spark_conf": {
            "spark.databricks.delta.schema.autoMerge.enabled": "true",
            "spark.databricks.unityCatalog.volumes.enabled": "true",
            "spark.sql.streaming.metricsEnabled": "true",
            "spark.ui.prometheus.enabled": "true",
            "spark.executor.processTreeMetrics.enabled": "true"
        },
        "aws_attributes": {
            "first_on_demand": 1,
            "availability": "SPOT_WITH_FALLBACK",
            "zone_id": "auto",
            "instance_profile_arn": "arn:aws:iam::***:instance-profile/***",
            "spot_bid_price_percent": 100,
            "ebs_volume_type": "GENERAL_PURPOSE_SSD",
            "ebs_volume_count": 3,
            "ebs_volume_size": 100
        },
        "node_type_id": "m-fleet.2xlarge",
        "driver_node_type_id": "m-fleet.2xlarge",
        "custom_tags": {
            "team": "***",
        },
        "cluster_log_conf": {
            "s3": {
                "destination": "s3://***",
                "region": "***",
                "enable_encryption": true,
                "canned_acl": "bucket-owner-full-control"
            }
        },
        "autotermination_minutes": 60,
        "enable_elastic_disk": false,
        "single_user_name": "***",
        "policy_id": "***",
        "enable_local_disk_encryption": false,
        "data_security_mode": "SINGLE_USER",
        "runtime_engine": "STANDARD",
        "effective_spark_version": "14.3.x-scala2.12",
        "autoscale": {
            "min_workers": 1,
            "max_workers": 2
        },
        "apply_policy_default_values": false
    }
}
jakubbaron commented 4 days ago

On the side not of that, how come on the very end of the spec we can see

"effective_spark_version": "14.3.x-scala2.12",

even though earlier we see

"effective_spark_version": "15.4.x-scala2.12",
NicholasFiorentini commented 4 days ago

@jakubbaron I noticed that. I think Databricks APIs set that. I'm trying with a cluster without any custom cluster policy.

NicholasFiorentini commented 4 days ago

BTW, that effective_spark_version is undocumented: https://docs.databricks.com/api/workspace/clusters/get

NicholasFiorentini commented 4 days ago

@jakubbaron Confirmed with an unrestricted cluster.

image

Create

{
    "cluster_name": "Cluster",
    "spark_version": "15.4.x-scala2.12",
    "aws_attributes": {
        "first_on_demand": 1,
        "availability": "SPOT_WITH_FALLBACK",
        "zone_id": "auto",
        "spot_bid_price_percent": 100,
        "ebs_volume_count": 0
    },
    "node_type_id": "r6id.xlarge",
    "autotermination_minutes": 120,
    "single_user_name": "***",
    "data_security_mode": "SINGLE_USER",
    "runtime_engine": "PHOTON",
    "autoscale": {
        "min_workers": 2,
        "max_workers": 8
    }
}

Get

{
    "cluster_id": "***",
    "creator_user_name": "***",
    "driver": {
        "private_ip": "***",
        "node_id": "***",
        "instance_id": "i-***",
        "start_timestamp": 1726566021224,
        "node_aws_attributes": {
            "is_spot": false
        },
        "node_attributes": {
            "is_spot": false
        },
        "host_private_ip": "***"
    },
    "executors": [
        {
            "private_ip": "***",
            "node_id": "***",
            "instance_id": "i-0a8b3a308c50fcb3c",
            "start_timestamp": 1726566021196,
            "node_aws_attributes": {
                "is_spot": true
            },
            "node_attributes": {
                "is_spot": true
            },
            "host_private_ip": "***"
        },
        {
            "private_ip": "***",
            "node_id": "***",
            "instance_id": "i-***",
            "start_timestamp": 1726566021169,
            "node_aws_attributes": {
                "is_spot": true
            },
            "node_attributes": {
                "is_spot": true
            },
            "host_private_ip": "***"
        }
    ],
    "spark_context_id": ***,
    "driver_healthy": true,
    "jdbc_port": 10000,
    "cluster_name": "Cluster",
    "spark_version": "15.4.x-scala2.12",
    "aws_attributes": {
        "first_on_demand": 1,
        "availability": "SPOT_WITH_FALLBACK",
        "zone_id": "auto",
        "spot_bid_price_percent": 100,
        "ebs_volume_count": 0
    },
    "node_type_id": "r6id.xlarge",
    "driver_node_type_id": "r6id.xlarge",
    "autotermination_minutes": 120,
    "enable_elastic_disk": false,
    "disk_spec": {
        "disk_count": 0
    },
    "cluster_source": "UI",
    "single_user_name": "***",
    "enable_local_disk_encryption": false,
    "instance_source": {
        "node_type_id": "r6id.xlarge"
    },
    "driver_instance_source": {
        "node_type_id": "r6id.xlarge"
    },
    "data_security_mode": "SINGLE_USER",
    "runtime_engine": "PHOTON",
    "effective_spark_version": "15.4.x-photon-scala2.12",
    "state": "RUNNING",
    "state_message": "",
    "start_time": 1726565887122,
    "last_state_loss_time": 0,
    "last_activity_time": 1726566183422,
    "last_restarted_time": 1726566162280,
    "autoscale": {
        "min_workers": 2,
        "max_workers": 8,
        "target_workers": 2
    },
    "cluster_memory_mb": 98304,
    "cluster_cores": 12,
    "default_tags": {
        "Vendor": "Databricks",
        "Creator": "***",
        "ClusterName": "Cluster",
        "ClusterId": "***"
    },
    "init_scripts_safe_mode": false,
    "spec": {
        "cluster_name": "Cluster",
        "spark_version": "15.4.x-scala2.12",
        "aws_attributes": {
            "first_on_demand": 1,
            "availability": "SPOT_WITH_FALLBACK",
            "zone_id": "auto",
            "spot_bid_price_percent": 100,
            "ebs_volume_count": 0
        },
        "node_type_id": "r6id.xlarge",
        "autotermination_minutes": 120,
        "single_user_name": "***",
        "data_security_mode": "SINGLE_USER",
        "runtime_engine": "PHOTON",
        "autoscale": {
            "min_workers": 2,
            "max_workers": 8
        }
    }
}
jakubbaron commented 4 days ago

It appears that the issue is caused down the line of ubuntu's repositories

As of Ubuntu 24.04's release, Python 3.11 may not be available directly through the official Ubuntu repositories or through Deadsnakes PPA

source: https://askubuntu.com/a/1512163