Use nncase1.7.1 to convert the shufflenet_v2_x0_5 model in TorchVision, the quantization is stuck in 4.3. Quantize graph... , and ctrl+c does not respond. If quantization is not used, the kmodel model of FP32 can be successfully generated.
generated onnx model named shufflenet_v2_x0_5_224.onnx
generated simplified onnx model named shufflenet_v2_x0_5_224_simplify.onnx
1. Import graph...
2. Optimize target independent...
3. Optimize target dependent...
4.1. Add quantize annotation...
4.2. Run calibration...
4.2.1. Collecting ranges...
4.3. Quantize graph...
Test Code
import argparse
import cv2
import numpy as np
import torch
import onnx
from onnxsim import simplify
import nncase
from pathlib import Path
import torchvision
import torchvision.models as models
def parse_args():
parser = argparse.ArgumentParser(description='NNCase PTQ Debug')
# basic
parser.add_argument('--mode', default='image',
type=str, help='Use the data from image, video or camera')
parser.add_argument('--cuda', action='store_true', default=False,
help='Use cuda')
# model
parser.add_argument('-m', '--model', default='shufflenet_v2_x0_5')
parser.add_argument('--weight', default='weights/yolo_nano_plus/yolo_nano_plus_best.pth', type=str, help='Trained state_dict file path to open')
parser.add_argument('-size', '--img_size', default=224, type=int, help='img_size')
parser.add_argument("--input", default="images", type=str, help="input node name of onnx model")
parser.add_argument("--output", default="output", type=str, help="output node name of onnx model")
parser.add_argument("-o", "--opset", default=11, type=int, help="onnx opset version")
parser.add_argument("--batch-size", type=int, default=1, help="batch size")
parser.add_argument( "--dynamic", action="store_true", help="whether the input shape should be dynamic or not" )
parser.add_argument('--method', default='no_clip', choices=['no_clip', 'l2', 'kld_m0', 'kld_m1', 'kld_m2', 'cdf'], help='calibrate method')
parser.add_argument( '--target ', default='k210')
return parser.parse_args()
def preproc(img, input_size, transpose=True):
if len(img.shape) == 3:
padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
else:
padded_img = np.ones(input_size, dtype=np.uint8) * 114
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * r), int(img.shape[0] * r)),
interpolation=cv2.INTER_LINEAR,
).astype(np.uint8)
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
padded_img = cv2.cvtColor(padded_img, cv2.COLOR_BGR2RGB)
if transpose:
padded_img = padded_img.transpose((2, 0, 1))
padded_img = np.ascontiguousarray(padded_img)
return padded_img, r
def read_images(imgs_dir: str, test_size: list):
imgs_dir = Path(imgs_dir)
imgs = []
for p in imgs_dir.iterdir():
img = cv2.imread(str(p))
img, _ = preproc(img, test_size, True) # img [h,w,c] rgb,
imgs.append(img)
imgs = np.stack(imgs)
return len(imgs), imgs.tobytes()
def read_model_file(model_file):
with open(model_file, 'rb') as f:
model_content = f.read()
return model_content
def run():
args = parse_args()
# use cuda
if args.cuda:
device = torch.device("cuda")
else:
device = torch.device("cpu")
model = models.__dict__[args.model](pretrained=True).to(device)
model.eval()
print('Finished loading model!')
dummy_input = torch.randn(args.batch_size, 3, args.img_size, args.img_size)
torch.onnx._export(
model,
dummy_input,
str(args.model)+'_'+str(args.img_size)+'.onnx',
input_names=[args.input],
output_names=[args.output],
dynamic_axes={args.input: {0: 'batch'},
args.output: {0: 'batch'}} if args.dynamic else None,
opset_version=args.opset,
)
print("generated onnx model named {}".format(str(args.model)+'_'+str(args.img_size)+'.onnx'))
input_shapes = {args.input: list(dummy_input.shape)} if args.dynamic else None
# use onnxsimplify to reduce reduent model.
onnx_model = onnx.load(str(args.model)+'_'+str(args.img_size)+'.onnx')
model_simp, check = simplify(onnx_model, dynamic_input_shape=args.dynamic, input_shapes=input_shapes)
assert check, "Simplified ONNX model could not be validated"
onnx.save(model_simp, str(args.model)+'_'+str(args.img_size)+'_simplify.onnx')
print("generated simplified onnx model named {}".format(str(args.model)+'_'+str(args.img_size)+'_simplify.onnx'))
# compile_options
compile_options = nncase.CompileOptions()
compile_options.target = 'k210'
compile_options.dump_ir = True
compile_options.dump_asm = True
compile_options.dump_dir = 'tmp'
compile_options.swapRB = False # legacy use RGB
compile_options.input_range = [0, 1]
compile_options.mean = [0.485, 0.456, 0.406]
compile_options.std = [0.229, 0.224, 0.225]
# compile_options.swapRB = True # new model use BGR
# compile_options.input_range = [0, 255]
# compile_options.mean = [0, 0, 0]
# compile_options.std = [1, 1, 1]
compile_options.input_type = 'uint8'
compile_options.input_layout = 'NCHW'
compile_options.input_shape = [1, 3, args.img_size, args.img_size]
compile_options.quant_type = 'int8' # uint8 or int8
# compiler
compiler = nncase.Compiler(compile_options)
# import_options
import_options = nncase.ImportOptions()
# import
model_content = read_model_file(str(args.model)+'_'+str(args.img_size)+'_simplify.onnx')
compiler.import_onnx(model_content, import_options)
# ptq
ptq_opt = nncase.PTQTensorOptions()
ptq_opt.calibrate_method = args.method
ptq_opt.samples_count, tensor_data = read_images("./data/demo/images", [args.img_size, args.img_size])
ptq_opt.set_tensor_data(tensor_data)
compiler.use_ptq(ptq_opt)
# compile
compiler.compile()
# kmodel
kmodel = compiler.gencode_tobytes()
with open(str(args.model)+'_'+str(args.img_size)+'_simplify_int8.kmodel', 'wb') as f:
f.write(kmodel)
f.flush()
if __name__ == '__main__':
run()
Describe the bug
Use
nncase1.7.1
to convert theshufflenet_v2_x0_5
model in TorchVision, the quantization is stuck in 4.3. Quantize graph... , and ctrl+c does not respond. If quantization is not used, the kmodel model of FP32 can be successfully generated.Test Code