kendryte / nncase

Open deep learning compiler stack for Kendryte AI accelerators ✨
Apache License 2.0
753 stars 183 forks source link

nncase 1.7.1 onnx quantization bug #617

Closed xuke225 closed 2 years ago

xuke225 commented 2 years ago

Describe the bug

Use nncase1.7.1 to convert the shufflenet_v2_x0_5 model in TorchVision, the quantization is stuck in 4.3. Quantize graph... , and ctrl+c does not respond. If quantization is not used, the kmodel model of FP32 can be successfully generated.

generated onnx model named shufflenet_v2_x0_5_224.onnx
generated simplified onnx model named shufflenet_v2_x0_5_224_simplify.onnx
1. Import graph...
2. Optimize target independent...
3. Optimize target dependent...
4.1. Add quantize annotation...
4.2. Run calibration...
4.2.1. Collecting ranges...
4.3. Quantize graph...

Test Code

import argparse
import cv2
import numpy as np
import torch
import onnx
from onnxsim import simplify
import nncase
from pathlib import Path

import torchvision
import torchvision.models as models

def parse_args():
    parser = argparse.ArgumentParser(description='NNCase PTQ Debug')

    # basic
    parser.add_argument('--mode', default='image',
                        type=str, help='Use the data from image, video or camera')
    parser.add_argument('--cuda', action='store_true', default=False,
                        help='Use cuda')

    # model
    parser.add_argument('-m', '--model', default='shufflenet_v2_x0_5')
    parser.add_argument('--weight', default='weights/yolo_nano_plus/yolo_nano_plus_best.pth', type=str, help='Trained state_dict file path to open')
    parser.add_argument('-size', '--img_size', default=224, type=int, help='img_size')

    parser.add_argument("--input", default="images", type=str, help="input node name of onnx model")
    parser.add_argument("--output", default="output", type=str, help="output node name of onnx model")
    parser.add_argument("-o", "--opset", default=11, type=int, help="onnx opset version")
    parser.add_argument("--batch-size", type=int, default=1,  help="batch size")
    parser.add_argument( "--dynamic", action="store_true", help="whether the input shape should be dynamic or not" )

    parser.add_argument('--method', default='no_clip', choices=['no_clip', 'l2', 'kld_m0', 'kld_m1', 'kld_m2', 'cdf'], help='calibrate method')
    parser.add_argument( '--target ', default='k210')

    return parser.parse_args()

def preproc(img, input_size, transpose=True):
    if len(img.shape) == 3:
        padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
    else:
        padded_img = np.ones(input_size, dtype=np.uint8) * 114

    r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
    resized_img = cv2.resize(
        img,
        (int(img.shape[1] * r), int(img.shape[0] * r)),
        interpolation=cv2.INTER_LINEAR,
    ).astype(np.uint8)
    padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
    padded_img = cv2.cvtColor(padded_img, cv2.COLOR_BGR2RGB)
    if transpose:
        padded_img = padded_img.transpose((2, 0, 1))
    padded_img = np.ascontiguousarray(padded_img)
    return padded_img, r

def read_images(imgs_dir: str, test_size: list):
    imgs_dir = Path(imgs_dir)
    imgs = []
    for p in imgs_dir.iterdir():
        img = cv2.imread(str(p))
        img, _ = preproc(img, test_size, True)  # img [h,w,c] rgb,
        imgs.append(img)

    imgs = np.stack(imgs)
    return len(imgs), imgs.tobytes()

def read_model_file(model_file):
    with open(model_file, 'rb') as f:
        model_content = f.read()
    return model_content

def run():
    args = parse_args()

    # use cuda
    if args.cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    model = models.__dict__[args.model](pretrained=True).to(device)
    model.eval()

    print('Finished loading model!')

    dummy_input = torch.randn(args.batch_size, 3, args.img_size, args.img_size)

    torch.onnx._export(
        model,
        dummy_input,
        str(args.model)+'_'+str(args.img_size)+'.onnx',
        input_names=[args.input],
        output_names=[args.output],
        dynamic_axes={args.input: {0: 'batch'},
                      args.output: {0: 'batch'}} if args.dynamic else None,
        opset_version=args.opset,
    )

    print("generated onnx model named {}".format(str(args.model)+'_'+str(args.img_size)+'.onnx'))

    input_shapes = {args.input: list(dummy_input.shape)} if args.dynamic else None

    # use onnxsimplify to reduce reduent model.
    onnx_model = onnx.load(str(args.model)+'_'+str(args.img_size)+'.onnx')
    model_simp, check = simplify(onnx_model, dynamic_input_shape=args.dynamic, input_shapes=input_shapes)
    assert check, "Simplified ONNX model could not be validated"
    onnx.save(model_simp, str(args.model)+'_'+str(args.img_size)+'_simplify.onnx')
    print("generated simplified onnx model named {}".format(str(args.model)+'_'+str(args.img_size)+'_simplify.onnx'))

    # compile_options
    compile_options = nncase.CompileOptions()
    compile_options.target = 'k210'
    compile_options.dump_ir = True
    compile_options.dump_asm = True
    compile_options.dump_dir = 'tmp'

    compile_options.swapRB = False  # legacy use RGB 
    compile_options.input_range = [0, 1]
    compile_options.mean = [0.485, 0.456, 0.406]
    compile_options.std = [0.229, 0.224, 0.225]

    # compile_options.swapRB = True  # new model use BGR 
    # compile_options.input_range = [0, 255]
    # compile_options.mean = [0, 0, 0]
    # compile_options.std = [1, 1, 1]

    compile_options.input_type = 'uint8'
    compile_options.input_layout = 'NCHW'
    compile_options.input_shape = [1, 3, args.img_size, args.img_size]
    compile_options.quant_type = 'int8'  # uint8 or int8

    # compiler
    compiler = nncase.Compiler(compile_options)

    # import_options
    import_options = nncase.ImportOptions()

    # import
    model_content = read_model_file(str(args.model)+'_'+str(args.img_size)+'_simplify.onnx')
    compiler.import_onnx(model_content, import_options)

    # ptq

    ptq_opt = nncase.PTQTensorOptions()
    ptq_opt.calibrate_method = args.method
    ptq_opt.samples_count, tensor_data = read_images("./data/demo/images", [args.img_size, args.img_size])
    ptq_opt.set_tensor_data(tensor_data)
    compiler.use_ptq(ptq_opt)

    # compile
    compiler.compile()

    # kmodel
    kmodel = compiler.gencode_tobytes()
    with open(str(args.model)+'_'+str(args.img_size)+'_simplify_int8.kmodel', 'wb') as f:
        f.write(kmodel)
        f.flush()

if __name__ == '__main__':
    run()
curioyang commented 2 years ago

@xuke225 add compile_options.preprocess = True in your script.

xuke225 commented 2 years ago

@curioyang Thanks a lot, the problem is solved!