Open zhangjiawei5911 opened 10 months ago
Could you share the command-line(s) to reproduce the issue, please?
Could you share the command-line(s) to reproduce the issue, please? This error occurs when I run the 'run.py' file. code of my 'run.py' file:
if __name__ == '__main__': tokenizer = AutoTokenizer.from_pretrained("./BertModel") args = parse_arguments()
tensorrt_llm.logger.set_level(args.log_level)
config_path = os.path.join(args.engine_dir, 'config.json')
with open(config_path, 'r') as f:
config = json.load(f)
dtype = config['builder_config']['precision']
world_size = config['builder_config']['tensor_parallel']
assert world_size == tensorrt_llm.mpi_world_size(), \
f'Engine world size ({world_size}) != Runtime world size ({tensorrt_llm.mpi_world_size()})'
model_name = config['builder_config']['name']
runtime_rank = tensorrt_llm.mpi_rank() if world_size > 1 else 0
runtime_mapping = tensorrt_llm.Mapping(world_size,
runtime_rank,
tp_size=world_size)
torch.cuda.set_device(runtime_rank % runtime_mapping.gpus_per_node)
serialize_path = get_engine_name(model_name, dtype, world_size,
runtime_rank)
serialize_path = os.path.join(args.engine_dir, serialize_path)
stream = torch.cuda.current_stream().cuda_stream
logger.info(f'Loading engine from {serialize_path}')
with open(serialize_path, 'rb') as f:
engine_buffer = f.read()
logger.info(f'Creating session from engine')
session = Session.from_serialized_engine(engine_buffer)
texts = [[], []]
with open("rt_data_10w") as fd:
for line in fd:
line = line.lower()
lt = line.strip('\r\n').split('\t')
query = lt[0]
title = lt[2]
content = lt[3]
title_content = title + '[SEP]' + content
texts[0].append(query)
texts[1].append(title_content)
device = "cuda"
inputs = tokenizer(*texts, padding=True, truncation = 'longest_first', return_tensors="pt", max_length=512).to(device)
input_ids = inputs["input_ids"]
attention_mask = inputs['attention_mask']
token_type_ids = inputs['token_type_ids']
batch_size = 256
length = len(input_ids)
for index in range(0, length, batch_size):
input_ids_tmp = input_ids[index:index+batch_size].int().cuda()
token_type_ids_tmp = token_type_ids[index:index+batch_size].int().cuda()
input_lengths = 512*torch.ones((batch_size, ), dtype=torch.int32, device='cuda')
inputs_tmp = {
'input_ids': input_ids_tmp,
'input_lengths': input_lengths,
'token_type_ids': token_type_ids_tmp
}
output_info = session.infer_shapes([
TensorInfo('input_ids', trt.DataType.INT32, input_ids_tmp.shape),
TensorInfo('input_lengths', trt.DataType.INT32,
input_lengths.shape),
TensorInfo('token_type_ids', trt.DataType.INT32,
token_type_ids_tmp.shape),
])
for t in output_info:
print("t.shape: {}".format(t.shape))
outputs = {
t.name: torch.empty(tuple(t.shape),
dtype=trt_dtype_to_torch(t.dtype),
device='cuda')
for t in output_info
}
if (model_name == BertModel.__name__):
output_name = 'hidden_states'
elif (model_name == BertForQuestionAnswering.__name__):
output_name = 'logits'
else:
assert False, f"Unknown BERT model {model_name}"
assert output_name in outputs, f'{output_name} not found in outputs, check if build.py set the name correctly'
ok = session.run(inputs, outputs, stream)
assert ok, "Runtime execution failed"
torch.cuda.synchronize()
res = outputs[output_name]
I think there might be an issue with my output_info and outputs settings.
would u please try our latest code base to see if the issue still exists?
And do u still have further issue or question now? If not, we'll close it soon.
My original model is a four class BERT model.created engines successfully, but got this errors during inference:
AssertionError: Runtime execution failed
Please tell me how to solve this problem?