I have a workflow where I create a PyTorch model, export it to an ONNX format, and then use QNN to convert it into .so library files. The model is a simple linear forward model with a single layer.
However, when I test the model with the same input across all three versions (PyTorch, ONNX, and QNN), the output from the QNN model differs significantly from the outputs of the PyTorch and ONNX models (which are almost identical).
I have three scripts:
1. simple_model.py to build the PyTorch model and export it to ONNX.
2. run_debug.sh to run QNN commands and compile the model.
3. compare_output_diff.py to compare the outputs of all three models.
simple_model.py
import torch
import torch.nn as nn
class SimpleForwardModel(nn.Module):
def __init__(self, hidden_size=8):
super(SimpleForwardModel, self).__init__()
self.hidden_size = hidden_size
self.linear = nn.Linear(hidden_size, hidden_size)
def forward(self, input_tensor):
return self.linear(input_tensor)
if __name__ == "__main__":
# Instantiate the model
model = SimpleForwardModel()
# Create dummy input
batch_size = 1
seq_length = 128
hidden_size = 8
dummy_input = torch.randn(batch_size, seq_length, hidden_size, dtype=torch.float32)
# Save the model to simple.pt
torch.save(model, 'simple.pt')
print("Model saved to simple.pt")
# Convert to ONNX
torch.onnx.export(model, dummy_input, "simple.onnx",
input_names=['input'],
output_names=['output'],
dynamic_axes={'input': {0: 'batch_size', 1: 'sequence_length'},
'output': {0: 'batch_size', 1: 'sequence_length'}})
print("Model exported to simple.onnx")
compare_output_diff.py
import torch
import onnx
import onnxruntime as ort
import numpy as np
import argparse
import os
import sys
from simple_model import *
import numpy as np
import os
import subprocess
import os
torch.manual_seed(0)
def save_as_rawtensor(array, filename):
with open(filename, 'wb') as f:
f.write(array.tobytes())
def generate_qnn_inputs(sample):
input = np.array(sample['tokens'], dtype=np.float32)
# Create a directory for input files if it doesn't exist
os.makedirs('qnn_inputs', exist_ok=True)
# Save each input as a separate .rawtensor file
print(f"input: {input.dtype}")
print(f"input: {input.shape}")
save_as_rawtensor(input, 'qnn_inputs/input.rawtensor')
# Create the input list file
with open('qnn_input_list.txt', 'w') as f:
f.write("input:=qnn_inputs/input.rawtensor\n")
# run cmd : qnn-net-run --input_list qnn_input_list.txt --backend ${QNN_SDK_ROOT}/lib/x86_64-linux-clang/libQnnCpu.so --model ../minilm-L6-v2/model_libs/x86_64-linux-clang/libminilm-l6-v2.so
def run_qnn_net_run(model_path):
qnn_sdk_root = os.environ.get('QNN_SDK_ROOT')
if not qnn_sdk_root:
raise EnvironmentError("QNN_SDK_ROOT environment variable is not set")
cmd = [
"qnn-net-run",
"--input_list", "qnn_input_list.txt",
"--backend", f"{qnn_sdk_root}/lib/x86_64-linux-clang/libQnnCpu.so",
"--model", model_path
]
try:
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
print("Command executed successfully")
print("Output:", result.stdout)
except subprocess.CalledProcessError as e:
print("Error executing command:")
print("Return code:", e.returncode)
print("Error output:", e.stderr)
raise
def get_qnn_embedding(model_path, sample):
# Call the function after generating inputs
generate_qnn_inputs(sample)
run_qnn_net_run(model_path)
# the raw output is save in output/Result_0/last_hidden_state.raw, load and transform to numpy array
with open('output/Result_0/output.raw', 'rb') as f:
result = f.read()
result = np.frombuffer(result, dtype=np.float32)
print(result.shape)
return result.reshape(1, sample['tokens'].shape[1], -1)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--pt_model_path", type=str, default="simple.pt")
parser.add_argument("--onnx_model_path", type=str, default="simple.fixed.onnx")
parser.add_argument("--qnn_model_path", type=str, default="simple_model_qnn.so")
return parser.parse_args()
def load_pt_model(model_path):
return torch.load(model_path)
def load_onnx_model(model_path):
return ort.InferenceSession(model_path)
def generate_random_input(batch_size=1, seq_length=128):
input = torch.randn(batch_size, seq_length, 8).to(torch.float32)
return input
def run_pt_model(model, input):
with torch.no_grad():
outputs = model(input)
return outputs[0].numpy()
def run_onnx_model(session, input):
ort_inputs = {
'input': input.numpy()
}
ort_outputs = session.run(None, ort_inputs)
return ort_outputs[0]
def run_qnn_model(model_path, input):
sample = {
'tokens': input # Assuming batch size is 1
}
return get_qnn_embedding(model_path, sample)
def compare_outputs(pt_output, onnx_output, qnn_output):
print(f"pt_output: {pt_output.shape}")
print(f"onnx_output: {onnx_output.shape}")
print(f"qnn_output: {qnn_output.shape}")
pt_onnx_diff = np.abs(pt_output - onnx_output).mean()
pt_qnn_diff = np.abs(pt_output - qnn_output).mean()
print(f"Mean difference between PyTorch and ONNX: {pt_onnx_diff}")
print(f"Mean difference between PyTorch and QNN: {pt_qnn_diff}")
def main():
args = parse_args()
# Load models
pt_model = load_pt_model(args.pt_model_path)
print(f"torch model: {pt_model}")
onnx_model = load_onnx_model(args.onnx_model_path)
# Generate random input
input = generate_random_input()
# Run models
pt_output = run_pt_model(pt_model, input)
onnx_output = run_onnx_model(onnx_model, input)
qnn_output = run_qnn_model(args.qnn_model_path, input)
# qnn_output = 0
# Compare outputs
compare_outputs(pt_output, onnx_output, qnn_output)
if __name__ == "__main__":
main()
When I run bash run_debug.sh I got all the work done and get the result:
Mean difference between PyTorch and ONNX: 2.6827365218196064e-08
Mean difference between PyTorch and QNN: 0.662140965461731
Its not related with the structure of the network as I have tried many others, they got the similar result.
Is there anything wrong when I compile the qnn model?
def save_as_rawtensor(array, filename):
with open(filename, 'wb') as f:
f.write(array.tobytes())
def generate_qnn_inputs(sample):
input = np.array(sample['tokens'], dtype=np.float32)
# Create a directory for input files if it doesn't exist
os.makedirs('qnn_inputs', exist_ok=True)
# Save each input as a separate .rawtensor file
print(f"input: {input.dtype}")
print(f"input: {input.shape}")
save_as_rawtensor(input, 'qnn_inputs/input.rawtensor')
# Create the input list file
with open('qnn_input_list.txt', 'w') as f:
f.write("input:=qnn_inputs/input.rawtensor\n")
# run cmd : qnn-net-run --input_list qnn_input_list.txt --backend ${QNN_SDK_ROOT}/lib/x86_64-linux-clang/libQnnCpu.so --model ../minilm-L6-v2/model_libs/x86_64-linux-clang/libminilm-l6-v2.so
def run_qnn_net_run(model_path):
qnn_sdk_root = os.environ.get('QNN_SDK_ROOT')
if not qnn_sdk_root:
raise EnvironmentError("QNN_SDK_ROOT environment variable is not set")
cmd = [
"qnn-net-run",
"--input_list", "qnn_input_list.txt",
"--backend", f"{qnn_sdk_root}/lib/x86_64-linux-clang/libQnnCpu.so",
"--model", model_path
]
try:
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
print("Command executed successfully")
print("Output:", result.stdout)
except subprocess.CalledProcessError as e:
print("Error executing command:")
print("Return code:", e.returncode)
print("Error output:", e.stderr)
raise
def get_qnn_embedding(model_path, sample):
# Call the function after generating inputs
generate_qnn_inputs(sample)
run_qnn_net_run(model_path)
# the raw output is save in output/Result_0/last_hidden_state.raw, load and transform to numpy array
with open('output/Result_0/output.raw', 'rb') as f:
result = f.read()
result = np.frombuffer(result, dtype=np.float32)
print(result.shape)
return result.reshape(1, sample['tokens'].shape[1], -1)
I have a workflow where I create a PyTorch model, export it to an ONNX format, and then use QNN to convert it into .so library files. The model is a simple linear forward model with a single layer.
However, when I test the model with the same input across all three versions (PyTorch, ONNX, and QNN), the output from the QNN model differs significantly from the outputs of the PyTorch and ONNX models (which are almost identical).
I have three scripts:
simple_model.py
compare_output_diff.py
run_debug.sh
When I run bash run_debug.sh I got all the work done and get the result: Mean difference between PyTorch and ONNX: 2.6827365218196064e-08 Mean difference between PyTorch and QNN: 0.662140965461731
Its not related with the structure of the network as I have tried many others, they got the similar result.
Is there anything wrong when I compile the qnn model?
Or the way I create input and run the qnn model?