Closed hong8c closed 2 years ago
Can you post the log of triton server?
@byshiue Below is the log of the triton server. CUDA_VISIBLE_DEVICES=0 mpirun -n 1 --allow-run-as-root /opt/tritonserver/bin/tritonserver --model-repository=${WORKSPACE}/all_models/t5/ root@052f181138ba:/home/ktlab# I0817 22:28:14.474518 1286 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7fcbb4000000' with size 268435456 I0817 22:28:14.474993 1286 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864 I0817 22:28:14.479374 1286 model_repository_manager.cc:1206] loading: fastertransformer:1 I0817 22:28:14.545351 1286 libfastertransformer.cc:1478] TRITONBACKEND_Initialize: fastertransformer I0817 22:28:14.545374 1286 libfastertransformer.cc:1488] Triton TRITONBACKEND API version: 1.10 I0817 22:28:14.545378 1286 libfastertransformer.cc:1494] 'fastertransformer' TRITONBACKEND API version: 1.10 I0817 22:28:14.545406 1286 libfastertransformer.cc:1526] TRITONBACKEND_ModelInitialize: fastertransformer (version 1) W0817 22:28:14.546628 1286 libfastertransformer.cc:160] model configuration: { "name": "fastertransformer", "platform": "", "backend": "fastertransformer", "version_policy": { "latest": { "num_versions": 1 } }, "max_batch_size": 1024, "input": [ { "name": "input_ids", "data_type": "TYPE_UINT32", "format": "FORMAT_NONE", "dims": [ -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false }, { "name": "sequence_length", "data_type": "TYPE_UINT32", "format": "FORMAT_NONE", "dims": [ 1 ], "reshape": { "shape": [] }, "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false }, { "name": "runtime_top_k", "data_type": "TYPE_UINT32", "format": "FORMAT_NONE", "dims": [ 1 ], "reshape": { "shape": [] }, "is_shape_tensor": false, "allow_ragged_batch": false, "optional": true }, { "name": "runtime_top_p", "data_type": "TYPE_FP32", "format": "FORMAT_NONE", "dims": [ 1 ], "reshape": { "shape": [] }, "is_shape_tensor": false, "allow_ragged_batch": false, "optional": true }, { "name": "beam_search_diversity_rate", "data_type": "TYPE_FP32", "format": "FORMAT_NONE", "dims": [ 1 ], "reshape": { "shape": [] }, "is_shape_tensor": false, "allow_ragged_batch": false, "optional": true }, { "name": "temperature", "data_type": "TYPE_FP32", "format": "FORMAT_NONE", "dims": [ 1 ], "reshape": { "shape": [] }, "is_shape_tensor": false, "allow_ragged_batch": false, "optional": true }, { "name": "len_penalty", "data_type": "TYPE_FP32", "format": "FORMAT_NONE", "dims": [ 1 ], "reshape": { "shape": [] }, "is_shape_tensor": false, "allow_ragged_batch": false, "optional": true }, { "name": "repetition_penalty", "data_type": "TYPE_FP32", "format": "FORMAT_NONE", "dims": [ 1 ], "reshape": { "shape": [] }, "is_shape_tensor": false, "allow_ragged_batch": false, "optional": true }, { "name": "random_seed", "data_type": "TYPE_UINT64", "format": "FORMAT_NONE", "dims": [ 1 ], "reshape": { "shape": [] }, "is_shape_tensor": false, "allow_ragged_batch": false, "optional": true }, { "name": "is_return_log_probs", "data_type": "TYPE_BOOL", "format": "FORMAT_NONE", "dims": [ 1 ], "reshape": { "shape": [] }, "is_shape_tensor": false, "allow_ragged_batch": false, "optional": true }, { "name": "max_output_len", "data_type": "TYPE_UINT32", "format": "FORMAT_NONE", "dims": [ 1 ], "reshape": { "shape": [] }, "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false }, { "name": "beam_width", "data_type": "TYPE_UINT32", "format": "FORMAT_NONE", "dims": [ 1 ], "reshape": { "shape": [] }, "is_shape_tensor": false, "allow_ragged_batch": false, "optional": true }, { "name": "start_id", "data_type": "TYPE_UINT32", "format": "FORMAT_NONE", "dims": [ 1 ], "reshape": { "shape": [] }, "is_shape_tensor": false, "allow_ragged_batch": false, "optional": true }, { "name": "end_id", "data_type": "TYPE_UINT32", "format": "FORMAT_NONE", "dims": [ 1 ], "reshape": { "shape": [] }, "is_shape_tensor": false, "allow_ragged_batch": false, "optional": true }, { "name": "bad_words_list", "data_type": "TYPE_INT32", "format": "FORMAT_NONE", "dims": [ 2, -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": true }, { "name": "stop_words_list", "data_type": "TYPE_INT32", "format": "FORMAT_NONE", "dims": [ 2, -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": true } ], "output": [ { "name": "output_ids", "data_type": "TYPE_UINT32", "dims": [ -1, -1 ], "label_filename": "", "is_shape_tensor": false }, { "name": "sequence_length", "data_type": "TYPE_UINT32", "dims": [ -1 ], "label_filename": "", "is_shape_tensor": false }, { "name": "cum_log_probs", "data_type": "TYPE_FP32", "dims": [ -1 ], "label_filename": "", "is_shape_tensor": false }, { "name": "output_log_probs", "data_type": "TYPE_FP32", "dims": [ -1, -1 ], "label_filename": "", "is_shape_tensor": false } ], "batch_input": [], "batch_output": [], "optimization": { "priority": "PRIORITY_DEFAULT", "input_pinned_memory": { "enable": true }, "output_pinned_memory": { "enable": true }, "gather_kernel_buffer_threshold": 0, "eager_batching": false }, "instance_group": [ { "name": "fastertransformer_0", "kind": "KIND_CPU", "count": 1, "gpus": [], "secondary_devices": [], "profile": [], "passive": false, "host_policy": "" } ], "default_model_filename": "t5", "cc_model_filenames": {}, "metric_tags": {}, "parameters": { "pipeline_para_size": { "string_value": "1" }, "tensor_para_size": { "string_value": "1" }, "data_type": { "string_value": "fp32" }, "enable_custom_all_reduce": { "string_value": "0" }, "model_type": { "string_value": "T5" }, "model_checkpoint_path": { "string_value": "/workspace/all_models/t5/fastertransformer/1/1-gpu" } }, "model_warmup": [] } I0817 22:28:14.546652 1286 libfastertransformer.cc:218] Instance group type: KIND_CPU count: 1 I0817 22:28:14.546659 1286 libfastertransformer.cc:248] Sequence Batching: disabled I0817 22:28:14.546837 1286 libfastertransformer.cc:420] Before Loading Weights: after allocation : free: 44.20 GB, total: 44.56 GB, used: 0.36 GB I0817 22:28:14.868437 1286 libfastertransformer.cc:430] After Loading Weights: W0817 22:28:14.868487 1286 libfastertransformer.cc:478] skipping model configuration auto-complete for 'fastertransformer': not supported for fastertransformer backend W0817 22:28:14.869351 1286 libfastertransformer.cc:651] Model name t5 W0817 22:28:14.869361 1286 libfastertransformer.cc:661] Use COUPLED (classic) API. W0817 22:28:14.869370 1286 libfastertransformer.cc:756] Get input name: input_ids, type: TYPE_UINT32, shape: [-1] W0817 22:28:14.869375 1286 libfastertransformer.cc:756] Get input name: sequence_length, type: TYPE_UINT32, shape: [1] W0817 22:28:14.869379 1286 libfastertransformer.cc:756] Get input name: runtime_top_k, type: TYPE_UINT32, shape: [1] W0817 22:28:14.869383 1286 libfastertransformer.cc:756] Get input name: runtime_top_p, type: TYPE_FP32, shape: [1] W0817 22:28:14.869387 1286 libfastertransformer.cc:756] Get input name: beam_search_diversity_rate, type: TYPE_FP32, shape: [1] W0817 22:28:14.869390 1286 libfastertransformer.cc:756] Get input name: temperature, type: TYPE_FP32, shape: [1] W0817 22:28:14.869393 1286 libfastertransformer.cc:756] Get input name: len_penalty, type: TYPE_FP32, shape: [1] W0817 22:28:14.869396 1286 libfastertransformer.cc:756] Get input name: repetition_penalty, type: TYPE_FP32, shape: [1] W0817 22:28:14.869400 1286 libfastertransformer.cc:756] Get input name: random_seed, type: TYPE_UINT64, shape: [1] W0817 22:28:14.869403 1286 libfastertransformer.cc:756] Get input name: is_return_log_probs, type: TYPE_BOOL, shape: [1] W0817 22:28:14.869407 1286 libfastertransformer.cc:756] Get input name: max_output_len, type: TYPE_UINT32, shape: [1] W0817 22:28:14.869411 1286 libfastertransformer.cc:756] Get input name: beam_width, type: TYPE_UINT32, shape: [1] W0817 22:28:14.869415 1286 libfastertransformer.cc:756] Get input name: start_id, type: TYPE_UINT32, shape: [1] W0817 22:28:14.869418 1286 libfastertransformer.cc:756] Get input name: end_id, type: TYPE_UINT32, shape: [1] W0817 22:28:14.869422 1286 libfastertransformer.cc:756] Get input name: bad_words_list, type: TYPE_INT32, shape: [2, -1] W0817 22:28:14.869426 1286 libfastertransformer.cc:756] Get input name: stop_words_list, type: TYPE_INT32, shape: [2, -1] W0817 22:28:14.869432 1286 libfastertransformer.cc:798] Get output name: output_ids, type: TYPE_UINT32, shape: [-1, -1] W0817 22:28:14.869435 1286 libfastertransformer.cc:798] Get output name: sequence_length, type: TYPE_UINT32, shape: [-1] W0817 22:28:14.869439 1286 libfastertransformer.cc:798] Get output name: cum_log_probs, type: TYPE_FP32, shape: [-1] W0817 22:28:14.869442 1286 libfastertransformer.cc:798] Get output name: output_log_probs, type: TYPE_FP32, shape: [-1, -1] after allocation : free: 43.84 GB, total: 44.56 GB, used: 0.72 GB I0817 22:28:14.869527 1286 libfastertransformer.cc:451] Before Loading Model: after allocation : free: 43.84 GB, total: 44.56 GB, used: 0.72 GB [WARNING] gemm_config.in is not found; using default GEMM algo I0817 22:28:15.652354 1286 libfastertransformer.cc:465] After Loading Model: I0817 22:28:15.652459 1286 libfastertransformer.cc:712] Model instance is created on GPU [ 0 ] I0817 22:28:15.652471 1286 libfastertransformer.cc:1590] TRITONBACKEND_ModelInstanceInitialize: fastertransformer_0 (count 1) (instance_id 0) after allocation : free: 43.43 GB, total: 44.56 GB, used: 1.14 GB I0817 22:28:15.652614 1286 model_repository_manager.cc:1352] successfully loaded 'fastertransformer' version 1 I0817 22:28:15.652693 1286 server.cc:559] +------------------+------+ | Repository Agent | Path | +------------------+------+ +------------------+------+
I0817 22:28:15.652731 1286 server.cc:586] +-------------------+-----------------------------------------------------------+-----------------------------------------------------------+ | Backend | Path | Config | +-------------------+-----------------------------------------------------------+-----------------------------------------------------------+ | fastertransformer | /opt/tritonserver/backends/fastertransformer/libtriton_fa | {"cmdline":{"auto-complete-config":"true","min-compute-ca | stertransformer.so | pability":"6.000000","backend-directory":"/opt/tritonserv | er/backends","default-max-batch-size":"4"}} |
---|
+-------------------+-----------------------------------------------------------+-----------------------------------------------------------+
I0817 22:28:15.652755 1286 server.cc:629] +-------------------+---------+--------+ | Model | Version | Status | +-------------------+---------+--------+ | fastertransformer | 1 | READY | +-------------------+---------+--------+
I0817 22:28:15.671403 1286 metrics.cc:650] Collecting metrics for GPU 0: NVIDIA A40 I0817 22:28:15.671723 1286 tritonserver.cc:2176] +----------------------------------+--------------------------------------------------------------------------------------------------------+ | Option | Value | +----------------------------------+--------------------------------------------------------------------------------------------------------+ | server_id | triton | | server_version | 2.24.0 | | server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_con | | | figuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace | | model_repository_path[0] | /workspace/all_models/t5/ | | model_control_mode | MODE_NONE | | strict_model_config | 0 | | rate_limit | OFF | | pinned_memory_pool_byte_size | 268435456 | | cuda_memory_pool_byte_size{0} | 67108864 | | response_cache_byte_size | 0 | | min_supported_compute_capability | 6.0 | | strict_readiness | 1 | | exit_timeout | 30 | +----------------------------------+--------------------------------------------------------------------------------------------------------+
I0817 22:28:15.672591 1286 grpc_server.cc:4608] Started GRPCInferenceService at 0.0.0.0:8001 I0817 22:28:15.672820 1286 http_server.cc:3312] Started HTTPService at 0.0.0.0:8000 I0817 22:28:15.720115 1286 http_server.cc:178] Started Metrics Service at 0.0.0.0:8002
The model_checkpoint_path
in your log is
"model_checkpoint_path": {
"string_value": "/workspace/all_models/t5/fastertransformer/1/1-gpu"
}
, but not
"model_checkpoint_path": {
"string_value": "./all_models/t5/fastertransformer/1/1-gpu"
}
Can you check that do you set the model_checkpoint_path
correctly?
@byshiue Thanks. The issue is resolved.
Description
Reproduced Steps