PINTO0309 / PINTO_model_zoo

A repository for storing models that have been inter-converted between various frameworks. Supported frameworks are TensorFlow, PyTorch, ONNX, OpenVINO, TFJS, TFTRT, TensorFlowLite (Float32/16/INT8), EdgeTPU, CoreML.
https://qiita.com/PINTO
MIT License
3.59k stars 572 forks source link

Inconsistent output from quantized tflite and float32 tflite #263

Closed zye1996 closed 2 years ago

zye1996 commented 2 years ago

Issue Type

Performance

OS

Ubuntu

OS architecture

x86_64

Programming Language

Python

Framework

TensorFlowLite

Model name and Weights/Checkpoints URL

https://github.com/PINTO0309/PINTO_model_zoo/tree/main/194_face_recognizer_fast

Description

The output from tflite in float32 and full quantized uint8 are quite different. Not sure if it is expected for face recognition.

Relevant Log Output

No response

URL or source code for simple inference testing code


def get_quant_int8_output(interpreter, output_index):
    feature = interpreter.get_tensor(output_index)
    details = interpreter.get_output_details()
    for detail in details:
        if detail['index'] == output_index:
            break
    if feature.dtype == np.uint8:
        zero_points = detail["quantization_parameters"]["zero_points"]
        scales = detail["quantization_parameters"]["scales"]
        return (feature - zero_points) * scales
    return feature

image = plt.imread("Wu_Peng_0001.jpg")
image = cv2.resize(image, (112, 112))
image_float = image.astype(np.float32)

model_tf = tflite.Interpreter("194_face_recognizer_fast/saved_model_face_recognizer_fast/model_float32.tflite")
model_tf.allocate_tensors()
input_details = model_tf.get_input_details()[0]['index']
model_tf.set_tensor(input_details, image_float[None, ...])
output_details = model_tf.get_output_details()[0]['index']
model_tf.invoke()
embedding_tf = model_tf.get_tensor(output_details)

model_tf_uint8 = tflite.Interpreter("194_face_recognizer_fast/saved_model_face_recognizer_fast/model_full_integer_quant.tflite") 
model_tf_uint8.allocate_tensors()
input_details = model_tf_uint8.get_input_details()[0]['index']
model_tf_uint8.set_tensor(input_details, image[None, ...])
output_details = model_tf_uint8.get_output_details()[0]['index']
model_tf_uint8.invoke()
embedding_tf_uint8 = get_quant_int8_output(interpreter=model_tf_uint8, output_index=output_details)
PINTO0309 commented 2 years ago
import tensorflow as tf
import time
import numpy as np
from pprint import pprint

H=112
W=112

############################################################
from openvino.inference_engine import IECore
ie = IECore()
net = ie.read_network(
    model=f'openvino/FP32/face_recognizer_fast.xml',
    weights=f'openvino/FP32/face_recognizer_fast.bin'
)
input_blob = next(iter(net.input_info))
exec_net = ie.load_network(network=net, device_name='CPU')

roop = 1
e = 0.0
result = None
inp = np.ones((1,3,H,W), dtype=np.float32)
for _ in range(roop):
    s = time.time()
    result = exec_net.infer(inputs={input_blob: inp})
    e += (time.time() - s)
print('OpenVINO output @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
print(f'elapsed time: {e/roop*1000}ms')
print(f'shape: {result["fc1"].shape}')
pprint(result['fc1'])

############################################################

interpreter = tf.lite.Interpreter(model_path=f'model_float32.tflite', num_threads=4)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

roop = 1
e = 0.0
result = None
inp = np.ones((1,H,W,3), dtype=np.float32)
for _ in range(roop):
    s = time.time()
    interpreter.set_tensor(input_details[0]['index'], inp)
    interpreter.invoke()
    result = interpreter.get_tensor(output_details[0]['index'])
    e += (time.time() - s)
print('tflite output @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
print(f'elapsed time: {e/roop*1000}ms')
print(f'shape: {result.shape}')
pprint(result)
OpenVINO output @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
elapsed time: 7.77435302734375ms
shape: (1, 128)
array([[-0.30791214, -0.00172161, -0.32203954, -0.4465644 ,  0.5506708 ,
         0.37454242,  0.12765971, -0.34852087, -0.55883896,  0.05936675,
         0.1695568 , -0.2994346 , -0.32539135, -0.03171316,  0.1584038 ,
         0.06941229, -0.7254103 ,  0.22967511, -0.40271038,  0.03725373,
         0.17559841, -0.24316749,  0.20910957, -0.38083535,  0.2821527 ,
         0.64665926,  0.14585382, -0.01600966, -0.12683704, -0.16275045,
         0.046739  , -0.3108144 ,  0.42539626, -0.04507577, -0.01005561,
         0.33453947,  0.05367449,  0.09337044, -0.11997464, -0.25770757,
        -0.07461177, -0.6524699 ,  0.08730686, -0.06027391,  0.41346878,
        -0.31559795,  0.03768324,  0.41175154, -0.23365799, -0.17392859,
         0.44504082, -0.07452258,  0.12696928,  0.01730857, -0.4296226 ,
         0.19510299, -0.10751891, -0.04601097, -0.39241505,  0.27709037,
        -0.15694436, -0.2031919 , -0.5533962 , -0.04073964,  0.0481807 ,
        -0.5410749 ,  0.26406363,  0.00634025,  0.23910517,  0.5124856 ,
        -0.00100496,  0.41695806, -0.29561228,  0.1865741 , -0.02398361,
         0.06689882, -0.00815826,  0.20982082, -0.1156534 , -0.24292208,
         0.30511013,  0.08169898, -0.06288522,  0.05135342, -0.21524626,
         0.0141438 , -0.2397377 , -0.13586396,  0.29874226,  0.10599164,
        -0.38982812,  0.22435834,  0.1403134 ,  0.26996595, -0.27892074,
         0.45670548, -0.22077468,  0.05652796, -0.6642338 ,  0.6857936 ,
         0.18472582, -0.11868192, -0.20869748, -0.2998895 , -0.02720285,
         0.6399674 ,  0.05989804,  0.29125848,  0.06604294, -0.13117297,
        -0.01385057, -0.50441426, -0.14871584, -0.41186276, -0.15124089,
         0.27266443, -0.25697875, -0.33428097, -0.10980904,  0.14820883,
         0.3153484 ,  0.37297386,  0.07718931,  0.1712934 ,  0.18546768,
        -0.59192634,  0.33165112,  0.02169564]], dtype=float32)
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
tflite output @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
elapsed time: 7.680654525756836ms
shape: (1, 128)
array([[-0.30791512, -0.00172003, -0.32203746, -0.44656596,  0.55067044,
         0.37454244,  0.12766045, -0.34852353, -0.55884385,  0.05936857,
         0.16955893, -0.29943365, -0.32538933, -0.03171276,  0.15840298,
         0.06941136, -0.7254181 ,  0.22967474, -0.40271398,  0.03725343,
         0.17559725, -0.24316718,  0.20910642, -0.38083786,  0.28215402,
         0.6466574 ,  0.14585344, -0.01601134, -0.12683675, -0.16274995,
         0.04673719, -0.3108138 ,  0.4253984 , -0.04507551, -0.01005552,
         0.33454174,  0.05367091,  0.0933719 , -0.11997414, -0.257707  ,
        -0.07460804, -0.6524724 ,  0.08730798, -0.0602743 ,  0.4134705 ,
        -0.31559613,  0.03768187,  0.41174823, -0.23365946, -0.17392643,
         0.44504157, -0.07452238,  0.12697074,  0.01730801, -0.42962405,
         0.19510286, -0.10752033, -0.04601185, -0.39241552,  0.27709046,
        -0.15694287, -0.20319283, -0.55339617, -0.04074154,  0.04818019,
        -0.5410755 ,  0.26406595,  0.00633784,  0.2391063 ,  0.51248306,
        -0.00100307,  0.41696066, -0.29561004,  0.18657266, -0.0239817 ,
         0.06689788, -0.00816114,  0.20982026, -0.11565492, -0.24292044,
         0.30510852,  0.08169975, -0.06288571,  0.0513528 , -0.21524292,
         0.01414456, -0.23973477, -0.13586488,  0.29874215,  0.10599194,
        -0.389825  ,  0.22436565,  0.14031234,  0.2699634 , -0.27892292,
         0.45670462, -0.22077489,  0.05652812, -0.6642385 ,  0.6857954 ,
         0.18472388, -0.11868376, -0.20869698, -0.29988924, -0.02720007,
         0.639966  ,  0.05989749,  0.29125947,  0.06604305, -0.13117433,
        -0.01385185, -0.50442   , -0.14871615, -0.41186142, -0.15123957,
         0.27266696, -0.25697806, -0.33427885, -0.10980733,  0.14821012,
         0.31534806,  0.3729769 ,  0.07718804,  0.17129503,  0.1854662 ,
        -0.591922  ,  0.33165163,  0.02169659]], dtype=float32)

The Float32 values before and after conversion are almost identical. Therefore, modify the parameters for quantization as you expect. https://github.com/PINTO0309/PINTO_model_zoo/blob/main/194_face_recognizer_fast/convert_script.txt

PINTO0309 commented 2 years ago

Closed due to lack of progress for an extended period of time.