Open aalferez123 opened 2 weeks ago
Here is a short snapshot of error:
---------------------------------------------------------------------------
FailedPrecondition Traceback (most recent call last)
Cell In[19], line 1
----> 1 model.deploy(
2 endpoint=endpoint,
3 deployed_model_display_name=MODEL_DISPLAY_NAME,
4 machine_type="n1-standard-8",
5 accelerator_type="NVIDIA_TESLA_P100",
6 accelerator_count=1,
7 traffic_percentage=100,
8 deploy_request_timeout=1200,
9 sync=True,
10 )
File /opt/conda/lib/python3.10/site-packages/google/cloud/aiplatform/models.py:4876, in Model.deploy(self, endpoint, deployed_model_display_name, traffic_percentage, traffic_split, machine_type, min_replica_count, max_replica_count, accelerator_type, accelerator_count, tpu_topology, service_account, explanation_metadata, explanation_parameters, metadata, encryption_spec_key_name, network, sync, deploy_request_timeout, autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle, enable_access_logging, disable_container_logging, private_service_connect_config, deployment_resource_pool)
4865 raise ValueError(
4866 "Traffic splitting is not yet supported for PSA based PrivateEndpoint. "
4867 "Try calling deploy() without providing `traffic_split`. "
4868 "A maximum of one model can be deployed to each private Endpoint."
4869 )
4871 explanation_spec = _explanation_utils.create_and_validate_explanation_spec(
4872 explanation_metadata=explanation_metadata,
4873 explanation_parameters=explanation_parameters,
4874 )
-> 4876 return self._deploy(
4877 endpoint=endpoint,
4878 deployed_model_display_name=deployed_model_display_name,
4879 traffic_percentage=traffic_percentage,
4880 traffic_split=traffic_split,
4881 machine_type=machine_type,
4882 min_replica_count=min_replica_count,
4883 max_replica_count=max_replica_count,
4884 accelerator_type=accelerator_type,
4885 accelerator_count=accelerator_count,
4886 tpu_topology=tpu_topology,
4887 service_account=service_account,
4888 explanation_spec=explanation_spec,
4889 metadata=metadata,
4890 encryption_spec_key_name=encryption_spec_key_name
4891 or initializer.global_config.encryption_spec_key_name,
4892 network=network,
4893 sync=sync,
4894 deploy_request_timeout=deploy_request_timeout,
4895 autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
4896 autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
4897 enable_access_logging=enable_access_logging,
4898 disable_container_logging=disable_container_logging,
4899 private_service_connect_config=private_service_connect_config,
4900 deployment_resource_pool=deployment_resource_pool,
4901 )
File /opt/conda/lib/python3.10/site-packages/google/cloud/aiplatform/base.py:863, in optional_sync.<locals>.optional_run_in_thread.<locals>.wrapper(*args, **kwargs)
861 if self:
862 VertexAiResourceNounWithFutureManager.wait(self)
--> 863 return method(*args, **kwargs)
865 # callbacks to call within the Future (in same Thread)
866 internal_callbacks = []
File /opt/conda/lib/python3.10/site-packages/google/cloud/aiplatform/models.py:5069, in Model._deploy(self, endpoint, deployed_model_display_name, traffic_percentage, traffic_split, machine_type, min_replica_count, max_replica_count, accelerator_type, accelerator_count, tpu_topology, service_account, explanation_spec, metadata, encryption_spec_key_name, network, sync, deploy_request_timeout, autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle, enable_access_logging, disable_container_logging, private_service_connect_config, deployment_resource_pool)
5057 endpoint = PrivateEndpoint.create(
5058 display_name=display_name,
5059 network=network,
(...)
5064 private_service_connect_config=private_service_connect_config,
5065 )
5067 _LOGGER.log_action_start_against_resource("Deploying model to", "", endpoint)
-> 5069 endpoint._deploy_call(
5070 endpoint.api_client,
5071 endpoint.resource_name,
5072 self,
5073 endpoint._gca_resource.traffic_split,
5074 network=network or endpoint.network,
5075 deployed_model_display_name=deployed_model_display_name,
5076 traffic_percentage=traffic_percentage,
5077 traffic_split=traffic_split,
5078 machine_type=machine_type,
5079 min_replica_count=min_replica_count,
5080 max_replica_count=max_replica_count,
5081 accelerator_type=accelerator_type,
5082 accelerator_count=accelerator_count,
5083 tpu_topology=tpu_topology,
5084 service_account=service_account,
5085 explanation_spec=explanation_spec,
5086 metadata=metadata,
5087 deploy_request_timeout=deploy_request_timeout,
5088 autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
5089 autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
5090 enable_access_logging=enable_access_logging,
5091 disable_container_logging=disable_container_logging,
5092 deployment_resource_pool=deployment_resource_pool,
5093 )
5095 _LOGGER.log_action_completed_against_resource("model", "deployed", endpoint)
5097 endpoint._sync_gca_resource()
File /opt/conda/lib/python3.10/site-packages/google/cloud/aiplatform/models.py:1827, in Endpoint._deploy_call(cls, api_client, endpoint_resource_name, model, endpoint_resource_traffic_split, network, deployed_model_display_name, traffic_percentage, traffic_split, machine_type, min_replica_count, max_replica_count, accelerator_type, accelerator_count, tpu_topology, service_account, explanation_spec, metadata, deploy_request_timeout, autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle, enable_access_logging, disable_container_logging, deployment_resource_pool)
1815 operation_future = api_client.deploy_model(
1816 endpoint=endpoint_resource_name,
1817 deployed_model=deployed_model,
(...)
1820 timeout=deploy_request_timeout,
1821 )
1823 _LOGGER.log_action_started_against_resource_with_lro(
1824 "Deploy", "model", cls, operation_future
1825 )
-> 1827 operation_future.result(timeout=None)
File /opt/conda/lib/python3.10/site-packages/google/api_core/future/polling.py:261, in PollingFuture.result(self, timeout, retry, polling)
256 self._blocking_poll(timeout=timeout, retry=retry, polling=polling)
258 if self._exception is not None:
259 # pylint: disable=raising-bad-type
260 # Pylint doesn't recognize that this is valid in this case.
--> 261 raise self._exception
263 return self._result
FailedPrecondition: 400 Model server never became ready. Please validate that your model file or container configuration are valid. Model server logs can be found at https://console.cloud.google.com/logs/viewer?project=934458780151&resource=aiplatform.googleapis.com%2FDeploymentResourcePool&advancedFilter=resource.type%3D%22aiplatform.googleapis.com%2FDeploymentResourcePool%22%0Aresource.labels.deployment_resource_pool_id%3D%22internal_756661912002887680%22%0Aresource.labels.location%3D%22us-central1%22.
notebooks/community/vertex_endpoints/torchserve/dreambooth_stablediffusion.ipynb
Expected Behavior
Being able to hit the endpoint running StableDiffusion
Actual Behavior
FailedPrecondition: 400 Model server never became ready. Please validate that your model file or container configuration are valid. Model server logs can be found at
Steps to Reproduce the Problem
1.Deploy the notebook
Specifications
notebooks/community/vertex_endpoints/torchserve/dreambooth_stablediffusion.ipynb