Following the example pipeline on https://docs.towhee.io/Triton%20Server/triton/ with nvcr.io/nvidia/tritonserver:22.07-py3 works well. However if a newer version of Triton Server (23.xx onward) is used the pipeline will fail with the following error:
AssertionError: Tensor is stored in GPU and cannot be converted to NumPy., Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/towhee/runtime/nodes/node.py", line 158, in _call
return True, self._op(*inputs), None
File "/root/.towhee/operators/image-text-embedding/clip/versions/main/clip.py", line 126, in __call__
result = self.inference_single_data(single_data)
File "/root/.towhee/operators/image-text-embedding/clip/versions/main/clip.py", line 112, in inference_single_data
vec = self._inference_from_image(data)
File "/usr/local/lib/python3.10/dist-packages/towhee/types/arg.py", line 33, in wrapper
return func(*new_args, **kwargs)
File "/root/.towhee/operators/image-text-embedding/clip/versions/main/clip.py", line 142, in _inference_from_image
image_features = self.model(inputs['pixel_values'])
File "/usr/local/lib/python3.10/dist-packages/towhee/serve/triton/triton_client.py", line 58, in __call__
outputs = torch.tensor(pb_utils.get_output_tensor_by_name(inference_response, self._output_names[0]).as_numpy())
c_python_backend_utils.TritonModelException: Tensor is stored in GPU and cannot be converted to NumPy.
This is the model.py that the pipeline generated based on the instructions:
#coding=utf-8
# Copyright 2023 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: skip-file
import logging
from pathlib import Path
import numpy as np
import dill as pickle
from towhee.serve.triton.bls.python_backend_wrapper import pb_utils
from towhee.runtime.runtime_pipeline import RuntimePipeline
from towhee.utils.serializer import to_triton_data, from_triton_data
logger = logging.getLogger()
class TritonPythonModel:
'''
Pipeline Model
'''
@staticmethod
def auto_complete_config(auto_complete_model_config):
input0 = {'name': 'INPUT0', 'data_type': 'TYPE_STRING', 'dims': [1]}
output0 = {'name': 'OUTPUT0', 'data_type': 'TYPE_STRING', 'dims': [1]}
auto_complete_model_config.set_max_batch_size(8)
auto_complete_model_config.add_input(input0)
auto_complete_model_config.add_output(output0)
return auto_complete_model_config
def initialize(self, args):
self._load_pipeline()
def _load_pipeline(self, fpath=None) -> str:
if fpath is None:
fpath = str(Path(__file__).parent.resolve() / 'pipe.pickle')
with open(fpath, 'rb') as f:
dag_repr = pickle.load(f)
self.pipe = RuntimePipeline(dag_repr)
self.pipe.preload()
def _get_result(self, q):
ret = []
while True:
data = q.get()
if data is None:
break
ret.append(data)
return ret
def execute(self, requests):
responses = []
batch_inputs = []
for request in requests:
in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0").as_numpy() ##### ERROR IS RAISED HERE.
for item in in_0:
arg = item[0]
inputs = from_triton_data(arg)
batch_inputs.append(inputs)
results = self.pipe.batch(batch_inputs)
batch_inputs = []
outputs = []
for q in results:
ret = self._get_result(q)
outputs.append(ret)
ret_str = to_triton_data(outputs)
out_tensor_0 = pb_utils.Tensor('OUTPUT0', np.array([ret_str], np.object_))
responses.append(pb_utils.InferenceResponse([out_tensor_0]))
return responses
def finalize(self):
pass
The specific error is:
c_python_backend_utils.TritonModelException: Tensor is stored in GPU and cannot be converted to NumPy.
What's the difference between TritonServer tritonserver:22.07-py3 and 23.xx that would create this error?
Expected Behavior
No response
Steps To Reproduce
1. Run the pipeline creator as specified in https://docs.towhee.io/Triton%20Server/triton/ but change the Triton Server Docker base to nvcr.io/nvidia/tritonserver:23.12-py3
Is there an existing issue for this?
Current Behavior
Following the example pipeline on https://docs.towhee.io/Triton%20Server/triton/ with nvcr.io/nvidia/tritonserver:22.07-py3 works well. However if a newer version of Triton Server (23.xx onward) is used the pipeline will fail with the following error:
This is the model.py that the pipeline generated based on the instructions:
The specific error is:
What's the difference between TritonServer tritonserver:22.07-py3 and 23.xx that would create this error?
Expected Behavior
No response
Steps To Reproduce
Environment
Anything else?
No response