Examples for using ONNX Runtime for machine learning inferencing.
MIT License
1.07k
stars
312
forks
source link
RUNTIME_EXCEPTION : Non-zero status code returned while running Mul node. Name:'/time_proj/Mul' Status Message: /onnxruntime_src/onnxruntime/core/providers/cpu/math/element_wise_ops.h:540 void onnxruntime::BroadcastIterator::Init(ptrdiff_t, ptrdiff_t) axis == 1 || axis == largest was false. Attempting to broadcast an axis by a dimension other than 1. 2 by 160 #367
def convert_unet_onnx(unet:torch.nn.Module, onnx_path:Path, num_channels:int = 4, width:int = 1, height:int = 1):
"""
Convert Unet model to ONNX, then IR format.
Function accepts pipeline, prepares example inputs for ONNX conversion via torch.export,
Parameters:
unet (torch.nn.Module): UNet PyTorch model
onnx_path (Path): File for storing onnx model
num_channels (int, optional, 4): number of input channels
width (int, optional, 64): input width
height (int, optional, 64): input height
Returns:
None
"""
if not onnx_path.exists():
prepare inputs
encoder_hidden_state = torch.ones((2, 77, 1024))
latents_shape = (2, num_channels, width, height)
latents = torch.randn(latents_shape)
t = torch.from_numpy(np.array(1, dtype=np.float32))
# model size > 2Gb, it will be represented as onnx with external data files, we will store it in separated directory for avoid a lot of files in current directory
onnx_path.parent.mkdir(exist_ok=True, parents=True)
unet.eval()
with torch.no_grad():
torch.onnx.export(
unet,
(latents, t, encoder_hidden_state),
str(onnx_path),
input_names=['latent_model_input', 't', 'encoder_hidden_states'],
output_names=['out_sample'],
#use_external_data_format=True,
#onnx_shape_inference=False,
)
print('U-Net successfully converted to ONNX')
if not UNET_OV_PATH.exists():
convert_unet_onnx(unet, UNET_ONNX_PATH, width=96, height=96)
print(f"U-Net will be loaded from {UNET_OV_PATH}")
gc.collect();
then try to qunatize the unet but
import gc
import onnx
import torch
import numpy as np
import onnxruntime
from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType, CalibrationMethod, QuantFormat
)
gc.collect()
RuntimeException: [ONNXRuntimeError] : 6 : RUNTIME_EXCEPTION : Non-zero status code returned while running Mul node. Name:'/time_proj/Mul' Status Message: /onnxruntime_src/onnxruntime/core/providers/cpu/math/element_wise_ops.h:540 void onnxruntime::BroadcastIterator::Init(ptrdiff_t, ptrdiff_t) axis == 1 || axis == largest was false. Attempting to broadcast an axis by a dimension other than 1. 2 by 160
from diffusers import StableDiffusionPipeline
pipe = StableDiffusionPipeline.from_pretrained("echarlaix/stable-diffusion-v1-5-inc-int8-dynamic").to("cpu")
for reducing memory consumption get all components from pipeline independently
text_encoder = pipe.text_encoder text_encoder.eval() unet = pipe.unet unet.eval() vae = pipe.vae vae.eval()
conf = pipe.scheduler.config
del pipe
try to export as onnx import numpy as np
UNET_ONNX_PATH = sd2_1_model_dir / 'unet/unet.onnx' UNET_OV_PATH = UNET_ONNX_PATH.parents[1] / 'unet.xml'
def convert_unet_onnx(unet:torch.nn.Module, onnx_path:Path, num_channels:int = 4, width:int = 1, height:int = 1): """ Convert Unet model to ONNX, then IR format. Function accepts pipeline, prepares example inputs for ONNX conversion via torch.export, Parameters: unet (torch.nn.Module): UNet PyTorch model onnx_path (Path): File for storing onnx model num_channels (int, optional, 4): number of input channels width (int, optional, 64): input width height (int, optional, 64): input height Returns: None """ if not onnx_path.exists():
prepare inputs
if not UNET_OV_PATH.exists(): convert_unet_onnx(unet, UNET_ONNX_PATH, width=96, height=96)
del unet
else:
del unet
gc.collect();
then try to qunatize the unet but import gc import onnx import torch import numpy as np import onnxruntime from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType, CalibrationMethod, QuantFormat
Define a custom CalibrationDataReader class
class UNetDataReader(CalibrationDataReader): def init(self, model_path: str): self.model_path = model_path self.input_names = None self.enum_data = None self.load_model() self.generate_calibration_data()
Define paths for the input and quantized models
model_path = '/content/sd2.1/unet/unet.onnx'
model_path = '/content/unet_fp16.onnx'
model_path = '/content/sd2.12/unet1/unet.onnx'
quantized_model_path = '/content/unetint8/unet.onnx'
Create a calibration data reader
data_reader = UNetDataReader(model_path) gc.collect()
Perform static quantization
quantize_static( model_input=model_path, model_output=quantized_model_path, calibration_data_reader=data_reader, activation_type=QuantType.QInt8, weight_type=QuantType.QInt8, use_external_data_format=True, calibrate_method=CalibrationMethod.MinMax, quant_format=QuantFormat.QDQ,
) gc.collect() RuntimeException: [ONNXRuntimeError] : 6 : RUNTIME_EXCEPTION : Non-zero status code returned while running Mul node. Name:'/time_proj/Mul' Status Message: /onnxruntime_src/onnxruntime/core/providers/cpu/math/element_wise_ops.h:540 void onnxruntime::BroadcastIterator::Init(ptrdiff_t, ptrdiff_t) axis == 1 || axis == largest was false. Attempting to broadcast an axis by a dimension other than 1. 2 by 160