tensorflow / model-optimization

A toolkit to optimize ML models for deployment for Keras and TensorFlow, including quantization and pruning.
https://www.tensorflow.org/model_optimization
Apache License 2.0
1.49k stars 319 forks source link

Quantize naive !!! #956

Open STAROFWIND opened 2 years ago

STAROFWIND commented 2 years ago

Hi all, I am working on quantization. I have a .h5 model. I want to convert the weights from float 32 to int8 or float16. It seem to be " post training quantization". How can I do that without converting to tflite ( still save as .h5 model) ?

I tried: import os, argparse, json, cv2

Import necessary items from Keras

from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Activation, Dropout, UpSampling2D from tensorflow.keras.layers import Conv2D, Conv2DTranspose, MaxPooling2D from tensorflow.keras.layers import BatchNormalization from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

Import local packages

import tensorflow_model_optimization as tfmot import tensorflow as tf from tensorflow.keras.models import model_from_json DEBUG = False

LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer

def apply_quantization(skip_layers): def wrapper(layer): if type(layer) in skip_layers: print(layer.name) return layer else: return tfmot.quantization.keras.quantize_annotate_layer(layer) return wrapper

# def wrapper(layer):
#     if type(layer) in skip_layers:
#         print(layer.name)
#         return tfmot.quantization.keras.quantize_annotate_layer(layer)
#     else:
#         return layer
# return wrapper

LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer

class DefaultQuantizeConfig(tfmot.quantization.keras.QuantizeConfig): def get_weights_and_quantizers(self, layer): return [(layer.kernel, LastValueQuantizer(num_bits=4, symmetric=True, narrow_range=False, per_axis=False))]

def get_activations_and_quantizers(self, layer):
    return [(layer.activation, MovingAverageQuantizer(num_bits=4, symmetric=False, narrow_range=False, per_axis=False))]

def set_quantize_weights(self, layer, quantize_weights):
    layer.kernel = quantize_weights[0]
def set_quantize_activations(self, layer, quantize_activations):
    layer.activation = quantize_activations[0]
def get_output_quantizers(self, layer):
    return [tfmot.quantization.keras.quantizers.MovingAverageQuantizer(
    num_bits=4, per_axis=False, symmetric=False, narrow_range=False)]
def get_config(self):
    return {}

"""
# Configure how to quantize weights.
def get_weights_and_quantizers(self, layer):
    return [(layer.kernel, LastValueQuantizer(num_bits=8, symmetric=True, narrow_range=False, per_axis=False))]

# Configure how to quantize activations.
def get_activations_and_quantizers(self, layer):
    return [
        (layer.activation, MovingAverageQuantizer(num_bits=8, symmetric=False, narrow_range=False, per_axis=False))]

def set_quantize_weights(self, layer, quantize_weights):
    # Add this line for each item returned in `get_weights_and_quantizers`
    # , in the same order
    layer.kernel = quantize_weights[0]

def set_quantize_activations(self, layer, quantize_activations):
    # Add this line for each item returned in `get_activations_and_quantizers`
    # , in the same order.
    layer.activation = quantize_activations[0]

# Configure how to quantize outputs (may be equivalent to activations).
def get_output_quantizers(self, layer):
    return []

def get_config(self):
    return {}
"""

if name == 'main':

input_shape = (320,320,3)

with open("path/to/1_model_quantize.json") as f:
    json_model = f.read()
model = model_from_json(json_model)
model.load_weights("path to h5")
model.summary()

# Quantize
quantize_model = tfmot.quantization.keras.quantize_model
# q_aware stands for for quantization aware.
# q_aware_model = quantize_model(model)
q_aware_model = tf.keras.models.clone_model(model, clone_function=apply_quantization(skip_layers=[BatchNormalization]))
with tfmot.quantization.keras.quantize_scope({'DefaultQuantizeConfig': DefaultQuantizeConfig,
                                              }):
    quant_aware_model = tfmot.quantization.keras.quantize_apply(q_aware_model)

# `quantize_model` requires a recompile.
quant_aware_model.compile(optimizer='Adam', loss='mean_squared_error', metrics=['mean_squared_error', 'accuracy'])
quantize_file = "save quantize .h5"
quant_aware_model.summary()
tf.keras.models.save_model(quant_aware_model, quantize_file, include_optimizer=False)

the result is all layer still in float32 Thank you so much.