error with concatenation when converting QAT model to tflite model using EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8

1. System information

Windows 11
TensorFlow installation (pip package or built from source): pip
TensorFlow library : 2.13

I am attempting to convert a QAT model trained with int8 weights, int16 activations to a tflite model using tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8. Unfortunately there is an issue with converting the model using this opset. Minimal code to reproduce the error:

import tensorflow as tf
import tensorflow_model_optimization as tfmot

inp1 = tf.keras.Input(shape=[2,4,8], batch_size = 1,name = 'input1')
inp2 = tf.keras.Input(shape=[2,4,8], batch_size = 1,name = 'input2')
r1 =tf.keras.layers.ReLU()(inp1)
r2 = tf.keras.layers.ReLU()(inp2)
c1 = tf.keras.layers.Concatenate(axis = -1)([r1,r2])

scheme_16_8 = tfmot.quantization.keras.experimental.default_n_bit.DefaultNBitQuantizeScheme(
    disable_per_axis=False, num_bits_weight=8, num_bits_activation=16)

test_model = tf.keras.Model(inputs=[inp1, inp2], outputs=c1)
annotated_model = tf.keras.models.clone_model(
        test_model,      
    )
ann_model = tfmot.quantization.keras.quantize_annotate_model(annotated_model)
q_model = tfmot.quantization.keras.quantize_apply(ann_model, scheme = scheme_16_8)

converter = tf.lite.TFLiteConverter.from_keras_model(q_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]#tf.lite.OpsSet.TFLITE_BUILTINS]
#converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
quantized_tflite_model = converter.convert()

This yields the following error:

---------------------------------------------------------------------------
ConverterError                            Traceback (most recent call last)
Cell In[11], line 5
      3 converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]#tf.lite.OpsSet.TFLITE_BUILTINS]
      4 #converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
----> 5 quantized_tflite_model = converter.convert()
      7 file_name = 'test_model.tflite'
      9     # Save the model.

File ~\.conda\envs\tf212\lib\site-packages\tensorflow\lite\python\lite.py:962, in _export_metrics.<locals>.wrapper(self, *args, **kwargs)
    959 @functools.wraps(convert_func)
    960 def wrapper(self, *args, **kwargs):
    961   # pylint: disable=protected-access
--> 962   return self._convert_and_export_metrics(convert_func, *args, **kwargs)

File ~\.conda\envs\tf212\lib\site-packages\tensorflow\lite\python\lite.py:940, in TFLiteConverterBase._convert_and_export_metrics(self, convert_func, *args, **kwargs)
    938 self._save_conversion_params_metric()
    939 start_time = time.process_time()
--> 940 result = convert_func(self, *args, **kwargs)
    941 elapsed_time_ms = (time.process_time() - start_time) * 1000
    942 if result:

File ~\.conda\envs\tf212\lib\site-packages\tensorflow\lite\python\lite.py:1373, in TFLiteKerasModelConverterV2.convert(self)
   1360 @_export_metrics
   1361 def convert(self):
   1362   """Converts a keras model based on instance variables.
   1363 
   1364   Returns:
   (...)
   1371       Invalid quantization parameters.
   1372   """
-> 1373   saved_model_convert_result = self._convert_as_saved_model()
   1374   if saved_model_convert_result:
   1375     return saved_model_convert_result

File ~\.conda\envs\tf212\lib\site-packages\tensorflow\lite\python\lite.py:1355, in TFLiteKerasModelConverterV2._convert_as_saved_model(self)
   1352   graph_def, input_tensors, output_tensors = (
   1353       self._convert_keras_to_saved_model(temp_dir))
   1354   if self.saved_model_dir:
-> 1355     return super(TFLiteKerasModelConverterV2,
   1356                  self).convert(graph_def, input_tensors, output_tensors)
   1357 finally:
   1358   shutil.rmtree(temp_dir, True)

File ~\.conda\envs\tf212\lib\site-packages\tensorflow\lite\python\lite.py:1166, in TFLiteConverterBaseV2.convert(self, graph_def, input_tensors, output_tensors)
   1161   logging.info("Using new converter: If you encounter a problem "
   1162                "please file a bug. You can opt-out "
   1163                "by setting experimental_new_converter=False")
   1165 # Converts model.
-> 1166 result = _convert_graphdef(
   1167     input_data=graph_def,
   1168     input_tensors=input_tensors,
   1169     output_tensors=output_tensors,
   1170     **converter_kwargs)
   1172 return self._optimize_tflite_model(
   1173     result, self._quant_mode, quant_io=self.experimental_new_quantizer)

File ~\.conda\envs\tf212\lib\site-packages\tensorflow\lite\python\convert_phase.py:212, in convert_phase.<locals>.actual_decorator.<locals>.wrapper(*args, **kwargs)
    210   else:
    211     report_error_message(str(converter_error))
--> 212   raise converter_error from None  # Re-throws the exception.
    213 except Exception as error:
    214   report_error_message(str(error))

File ~\.conda\envs\tf212\lib\site-packages\tensorflow\lite\python\convert_phase.py:205, in convert_phase.<locals>.actual_decorator.<locals>.wrapper(*args, **kwargs)
    202 @functools.wraps(func)
    203 def wrapper(*args, **kwargs):
    204   try:
--> 205     return func(*args, **kwargs)
    206   except ConverterError as converter_error:
    207     if converter_error.errors:

File ~\.conda\envs\tf212\lib\site-packages\tensorflow\lite\python\convert.py:817, in convert_graphdef(input_data, input_tensors, output_tensors, **kwargs)
    814   else:
    815     model_flags.output_arrays.append(util.get_tensor_name(output_tensor))
--> 817 data = convert(
    818     model_flags.SerializeToString(),
    819     conversion_flags.SerializeToString(),
    820     input_data.SerializeToString(),
    821     debug_info_str=debug_info.SerializeToString() if debug_info else None,
    822     enable_mlir_converter=enable_mlir_converter)
    823 return data

File ~\.conda\envs\tf212\lib\site-packages\tensorflow\lite\python\convert.py:322, in convert(model_flags_str, conversion_flags_str, input_data_str, debug_info_str, enable_mlir_converter)
    320     for error_data in _metrics_wrapper.retrieve_collected_errors():
    321       converter_error.append_error(error_data)
--> 322     raise converter_error
    324 return _run_deprecated_conversion_binary(model_flags_str,
    325                                          conversion_flags_str, input_data_str,
    326                                          debug_info_str)

ConverterError: C:\Users\derry\.conda\envs\tf212\lib\site-packages\keras\layers\merging\concatenate.py:134:0: error: 'tfl.concatenation' op operand #0 must be tensor of 32-bit float or 64-bit signless integer or 32-bit signless integer or 16-bit signless integer or 8-bit signless integer or QI8 type or QUI8 type or 8-bit unsigned integer or 1-bit signless integer values, but got 'tensor<1x2x4x8x!quant.uniform<i16:f32, 1.8310826276035706E-4>>'
<unknown>:0: note: loc(fused["StatefulPartitionedCall:", "StatefulPartitionedCall"]): called from

Doing the same process but in int8 yields no errors:

q_model_2 = tfmot.quantization.keras.quantize_apply(ann_model)

converter = tf.lite.TFLiteConverter.from_keras_model(q_model2)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
#converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]#tf.lite.OpsSet.TFLITE_BUILTINS]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
quantized_tflite_model2 = converter.convert()

google-ai-edge / ai-edge-torch

error with concatenation when converting QAT model to tflite model using EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8 #391

1. System information