Customer states that the notebook does not work because the container never becomes ready

Here is a short snapshot of error:
---------------------------------------------------------------------------
FailedPrecondition                        Traceback (most recent call last)
Cell In[19], line 1
----> 1 model.deploy(
      2     endpoint=endpoint,
      3     deployed_model_display_name=MODEL_DISPLAY_NAME,
      4     machine_type="n1-standard-8",
      5     accelerator_type="NVIDIA_TESLA_P100",
      6     accelerator_count=1,
      7     traffic_percentage=100,
      8     deploy_request_timeout=1200,
      9     sync=True,
     10 )

File /opt/conda/lib/python3.10/site-packages/google/cloud/aiplatform/models.py:4876, in Model.deploy(self, endpoint, deployed_model_display_name, traffic_percentage, traffic_split, machine_type, min_replica_count, max_replica_count, accelerator_type, accelerator_count, tpu_topology, service_account, explanation_metadata, explanation_parameters, metadata, encryption_spec_key_name, network, sync, deploy_request_timeout, autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle, enable_access_logging, disable_container_logging, private_service_connect_config, deployment_resource_pool)
   4865         raise ValueError(
   4866             "Traffic splitting is not yet supported for PSA based PrivateEndpoint. "
   4867             "Try calling deploy() without providing `traffic_split`. "
   4868             "A maximum of one model can be deployed to each private Endpoint."
   4869         )
   4871 explanation_spec = _explanation_utils.create_and_validate_explanation_spec(
   4872     explanation_metadata=explanation_metadata,
   4873     explanation_parameters=explanation_parameters,
   4874 )
-> 4876 return self._deploy(
   4877     endpoint=endpoint,
   4878     deployed_model_display_name=deployed_model_display_name,
   4879     traffic_percentage=traffic_percentage,
   4880     traffic_split=traffic_split,
   4881     machine_type=machine_type,
   4882     min_replica_count=min_replica_count,
   4883     max_replica_count=max_replica_count,
   4884     accelerator_type=accelerator_type,
   4885     accelerator_count=accelerator_count,
   4886     tpu_topology=tpu_topology,
   4887     service_account=service_account,
   4888     explanation_spec=explanation_spec,
   4889     metadata=metadata,
   4890     encryption_spec_key_name=encryption_spec_key_name
   4891     or initializer.global_config.encryption_spec_key_name,
   4892     network=network,
   4893     sync=sync,
   4894     deploy_request_timeout=deploy_request_timeout,
   4895     autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
   4896     autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
   4897     enable_access_logging=enable_access_logging,
   4898     disable_container_logging=disable_container_logging,
   4899     private_service_connect_config=private_service_connect_config,
   4900     deployment_resource_pool=deployment_resource_pool,
   4901 )

File /opt/conda/lib/python3.10/site-packages/google/cloud/aiplatform/base.py:863, in optional_sync.<locals>.optional_run_in_thread.<locals>.wrapper(*args, **kwargs)
    861     if self:
    862         VertexAiResourceNounWithFutureManager.wait(self)
--> 863     return method(*args, **kwargs)
    865 # callbacks to call within the Future (in same Thread)
    866 internal_callbacks = []

File /opt/conda/lib/python3.10/site-packages/google/cloud/aiplatform/models.py:5069, in Model._deploy(self, endpoint, deployed_model_display_name, traffic_percentage, traffic_split, machine_type, min_replica_count, max_replica_count, accelerator_type, accelerator_count, tpu_topology, service_account, explanation_spec, metadata, encryption_spec_key_name, network, sync, deploy_request_timeout, autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle, enable_access_logging, disable_container_logging, private_service_connect_config, deployment_resource_pool)
   5057         endpoint = PrivateEndpoint.create(
   5058             display_name=display_name,
   5059             network=network,
   (...)
   5064             private_service_connect_config=private_service_connect_config,
   5065         )
   5067 _LOGGER.log_action_start_against_resource("Deploying model to", "", endpoint)
-> 5069 endpoint._deploy_call(
   5070     endpoint.api_client,
   5071     endpoint.resource_name,
   5072     self,
   5073     endpoint._gca_resource.traffic_split,
   5074     network=network or endpoint.network,
   5075     deployed_model_display_name=deployed_model_display_name,
   5076     traffic_percentage=traffic_percentage,
   5077     traffic_split=traffic_split,
   5078     machine_type=machine_type,
   5079     min_replica_count=min_replica_count,
   5080     max_replica_count=max_replica_count,
   5081     accelerator_type=accelerator_type,
   5082     accelerator_count=accelerator_count,
   5083     tpu_topology=tpu_topology,
   5084     service_account=service_account,
   5085     explanation_spec=explanation_spec,
   5086     metadata=metadata,
   5087     deploy_request_timeout=deploy_request_timeout,
   5088     autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
   5089     autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
   5090     enable_access_logging=enable_access_logging,
   5091     disable_container_logging=disable_container_logging,
   5092     deployment_resource_pool=deployment_resource_pool,
   5093 )
   5095 _LOGGER.log_action_completed_against_resource("model", "deployed", endpoint)
   5097 endpoint._sync_gca_resource()

File /opt/conda/lib/python3.10/site-packages/google/cloud/aiplatform/models.py:1827, in Endpoint._deploy_call(cls, api_client, endpoint_resource_name, model, endpoint_resource_traffic_split, network, deployed_model_display_name, traffic_percentage, traffic_split, machine_type, min_replica_count, max_replica_count, accelerator_type, accelerator_count, tpu_topology, service_account, explanation_spec, metadata, deploy_request_timeout, autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle, enable_access_logging, disable_container_logging, deployment_resource_pool)
   1815 operation_future = api_client.deploy_model(
   1816     endpoint=endpoint_resource_name,
   1817     deployed_model=deployed_model,
   (...)
   1820     timeout=deploy_request_timeout,
   1821 )
   1823 _LOGGER.log_action_started_against_resource_with_lro(
   1824     "Deploy", "model", cls, operation_future
   1825 )
-> 1827 operation_future.result(timeout=None)

File /opt/conda/lib/python3.10/site-packages/google/api_core/future/polling.py:261, in PollingFuture.result(self, timeout, retry, polling)
    256 self._blocking_poll(timeout=timeout, retry=retry, polling=polling)
    258 if self._exception is not None:
    259     # pylint: disable=raising-bad-type
    260     # Pylint doesn't recognize that this is valid in this case.
--> 261     raise self._exception
    263 return self._result

FailedPrecondition: 400 Model server never became ready. Please validate that your model file or container configuration are valid. Model server logs can be found at https://console.cloud.google.com/logs/viewer?project=934458780151&resource=aiplatform.googleapis.com%2FDeploymentResourcePool&advancedFilter=resource.type%3D%22aiplatform.googleapis.com%2FDeploymentResourcePool%22%0Aresource.labels.deployment_resource_pool_id%3D%22internal_756661912002887680%22%0Aresource.labels.location%3D%22us-central1%22.
GoogleCloudPlatform / vertex-ai-samples

Customer states that the notebook does not work because the container never becomes ready #3485

Expected Behavior

Actual Behavior

Steps to Reproduce the Problem

Specifications