16x8 Quantization fails for RNN model - Max and min for dynamic tensors should be recorded during calibration

Black3rror commented 10 months ago

Doing 16x8 quantization on RNN models fails.

Code to reproduce the issue gist to reproduce the issue on Google Colab Code:

import numpy as np
import tensorflow as tf
import tensorflow_model_optimization as tfmot

def create_model():
  model = tf.keras.models.Sequential()

  # For the model to later get converted, batch_size and sequence_length should be fixed.
  # E.g., batch_input_shape=[None, 10] will throw an error.
  # This is just a limitation when using RNNs. E.g., for FC or CNN we can have batch_size=None
  model.add(tf.keras.layers.Embedding(
    input_dim=5,
    output_dim=16,
    batch_input_shape=[1, 10]
  ))

  model.add(tf.keras.layers.SimpleRNN(
    units=8,
    return_sequences=True,
    stateful=False
  ))

  model.add(tf.keras.layers.Dense(5))

  return model

model = create_model()
model.summary()

model.save("/content/model/")

representative_data = np.random.randint(0, 5, (200, 10)).astype(np.float32)

def representative_dataset():
  for sample in representative_data:
    sample = np.expand_dims(sample, axis=0)     # batch_size = 1
    yield [sample]                              # set sample as first (and only) input of the model

# 16x8 quantization - Fail
converter = tf.lite.TFLiteConverter.from_saved_model("/content/model/")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]
# Commenting the following line will remove the error
converter.representative_dataset = representative_dataset
tflite_quant_model = converter.convert()

Error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
[<ipython-input-11-b8b7cfec032a>](https://localhost:8080/#) in <cell line: 7>()
      5 # Commenting the following line will remove the error
      6 converter.representative_dataset = representative_dataset
----> 7 tflite_quant_model = converter.convert()

10 frames
[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in wrapper(self, *args, **kwargs)
    960   def wrapper(self, *args, **kwargs):
    961     # pylint: disable=protected-access
--> 962     return self._convert_and_export_metrics(convert_func, *args, **kwargs)
    963     # pylint: enable=protected-access
    964 

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _convert_and_export_metrics(self, convert_func, *args, **kwargs)
    938     self._save_conversion_params_metric()
    939     start_time = time.process_time()
--> 940     result = convert_func(self, *args, **kwargs)
    941     elapsed_time_ms = (time.process_time() - start_time) * 1000
    942     if result:

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in convert(self)
   1245           graph_def)
   1246 
-> 1247     return self._convert_from_saved_model(graph_def)
   1248 
   1249 

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _convert_from_saved_model(self, graph_def)
   1129 
   1130     result = _convert_saved_model(**converter_kwargs)
-> 1131     return self._optimize_tflite_model(
   1132         result, quant_mode, quant_io=self.experimental_new_quantizer)
   1133 

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
    213       except Exception as error:
    214         report_error_message(str(error))
--> 215         raise error from None  # Re-throws the exception.
    216 
    217     return wrapper

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
    203     def wrapper(*args, **kwargs):
    204       try:
--> 205         return func(*args, **kwargs)
    206       except ConverterError as converter_error:
    207         if converter_error.errors:

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _optimize_tflite_model(self, model, quant_mode, quant_io)
    897         q_allow_float = quant_mode.is_allow_float()
    898         q_variable_quantization = quant_mode.enable_mlir_variable_quantization
--> 899         model = self._quantize(model, q_in_type, q_out_type, q_activations_type,
    900                                q_bias_type, q_allow_float,
    901                                q_variable_quantization)

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/lite.py](https://localhost:8080/#) in _quantize(self, result, input_type, output_type, activations_type, bias_type, allow_float, enable_variable_quantization)
    652           enable_variable_quantization=enable_variable_quantization)
    653     else:
--> 654       return calibrate_quantize.calibrate_and_quantize(
    655           self.representative_dataset.input_gen,
    656           input_type,

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
    213       except Exception as error:
    214         report_error_message(str(error))
--> 215         raise error from None  # Re-throws the exception.
    216 
    217     return wrapper

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/convert_phase.py](https://localhost:8080/#) in wrapper(*args, **kwargs)
    203     def wrapper(*args, **kwargs):
    204       try:
--> 205         return func(*args, **kwargs)
    206       except ConverterError as converter_error:
    207         if converter_error.errors:

[/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/optimize/calibrator.py](https://localhost:8080/#) in calibrate_and_quantize(self, dataset_gen, input_type, output_type, allow_float, activations_type, bias_type, resize_input, disable_per_channel)
    174     """
    175     self._feed_tensors(dataset_gen, resize_input)
--> 176     return self._calibrator.QuantizeModel(
    177         np.dtype(input_type.as_numpy_dtype()).num,
    178         np.dtype(output_type.as_numpy_dtype()).num, allow_float,

RuntimeError: Max and min for dynamic tensors should be recorded during calibration: Failed for tensor arg1
Empty min/max for tensor arg1

cdh4696 commented 10 months ago

@yyoon Could you please check? Thanks!

yyoon commented 10 months ago

@tucan9389 could you take a look? Not sure if RNN is supported at all.

Black3rror commented 5 months ago

Any update?

HajarAva commented 2 months ago

Hello, I'm facing the same problem. Is there any update on how to deal with this error?

tensorflow / model-optimization

16x8 Quantization fails for RNN model - Max and min for dynamic tensors should be recorded during calibration #1090