I found that on PVC GPU, output of fp16 has randomness. The same llm model, if I run two times, the output will be different, below is the reproduce script:
import torch
import intel_extension_for_pytorch
import os
import pytest
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
device = 'xpu'
prompt = "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun"
@pytest.mark.parametrize('Model, Tokenizer, model_path',[
(AutoModelForCausalLM, AutoTokenizer, "meta/Llama-2-7b-chat-hf"),
])
def test_model(Model, Tokenizer, model_path):
with torch.inference_mode():
tokenizer = Tokenizer.from_pretrained(model_path, trust_remote_code=True)
input_ids = tokenizer.encode(prompt+prompt+prompt, return_tensors="pt").to(device)
print(input_ids.shape)
model = Model.from_pretrained(model_path,
trust_remote_code=True)
model = model.half().to(device)
logits_base_model = (model(input_ids)).logits
torch.xpu.synchronize()
model.to('cpu') # deallocate gpu memory
model = Model.from_pretrained(model_path,
trust_remote_code=True)
model = model.half().to(device)
logits_optimized_model = (model(input_ids)).logits
torch.xpu.synchronize()
model.to('cpu')
tol = 1e-03
num_false = torch.isclose(logits_optimized_model, logits_base_model, rtol=tol, atol=tol)\
.flatten().tolist().count(False)
percent_false = num_false / logits_optimized_model.numel()
print(percent_false)
assert percent_false < 1e-02
After running pytest logits.py -v -s, the test will fail.
Describe the bug
I found that on PVC GPU, output of fp16 has randomness. The same llm model, if I run two times, the output will be different, below is the reproduce script:
After running
pytest logits.py -v -s
, the test will fail.Versions