File "/home//JetStream/benchmarks/benchmark_serving.py", line 778, in
main(parsed_args)
File "/home//JetStream/benchmarks/benchmark_serving.py", line 574, in main
benchmark_result, request_outputs = asyncio.run(
File "/usr/lib/python3.10/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
return future.result()
File "/home//JetStream/benchmarks/benchmark_serving.py", line 453, in benchmark
metrics = calculate_metrics(
File "/home//JetStream/benchmarks/benchmark_serving.py", line 343, in calculate_metrics
per_token_latencies.append(outputs[i].latency / output_len)
ZeroDivisionError: float division by zero
Command:
python benchmarks/benchmark_serving.py --tokenizer /home//data/tokenizer.model --num-prompts 300 --dataset-path /home//data/ShareGPT_V3_unfiltered_cleaned_split.json --dataset sharegpt --save-request-outputs
Logs: