problem with quantizing the BN layer

lovodkin93 commented 3 years ago

Hello, I am trying to perform a QAT on a ResNet50 network with BN layers, and I keep getting the following error:

ValueError: Shape must be rank 4 but is rank 5 for '{{node batch_normalization_8/FusedBatchNormV3}} = FusedBatchNormV3[T=DT_FLOAT, U=DT_FLOAT, data_format="NHWC", epsilon=0.001, exponential_avg_factor=1, is_training=false](Placeholder, batch_normalization_8/ReadVariableOp, batch_normalization_8/ReadVariableOp_1, batch_normalization_8/FusedBatchNormV3/ReadVariableOp, batch_normalization_8/FusedBatchNormV3/ReadVariableOp_1)' with input shapes: [1,?,7,7,64], [64], [64], [64], [64].

I tried to isolate each of the BN layers, and it appears they all cause the same error.

Here is the code I am trying to run:

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import datetime as dt
import h5py
import math
import numpy as np
import pandas as pd
import scipy as sci
import matplotlib.pyplot as plt
from tensorflow.keras import regularizers 
from tensorflow.keras import activations
from tensorflow.keras import Model

from tensorflow.keras.layers import ZeroPadding2D,Add,Dense,Flatten,AveragePooling2D,Conv2D,BatchNormalization,MaxPooling2D,Activation,Input,Dropout

from IPython.display import SVG
from tensorflow.keras.utils import plot_model
from tensorflow.keras.utils import model_to_dot
from tensorflow.python.keras.regularizers import Regularizer
import tensorflow_model_optimization as tfmot

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the third GPUtry:
  tf.config.experimental.set_visible_devices(gpus[0], 'GPU')

def res_identity(x, filters): 
  #renet block where dimension doesnot change.
  #The skip connection is just simple identity conncection
  #we will have 3 blocks and then input will be added

  x_skip = x # this will be used for addition with the residual block 
  f1, f2 = filters

  #first block 
  x = Conv2D(f1, kernel_size=(1, 1), strides=(1, 1), padding='valid', kernel_regularizer = Reg)(x)
  x = BatchNormalization()(x)
  x = Activation(activations.relu)(x)

  #second block # bottleneck (but size kept same with padding)
  x = Conv2D(f1, kernel_size=(3, 3), strides=(1, 1), padding='same', kernel_regularizer = Reg)(x)
  x = BatchNormalization()(x)
  x = Activation(activations.relu)(x)

  # third block activation used after adding the input
  x = Conv2D(f2, kernel_size=(1, 1), strides=(1, 1), padding='valid', kernel_regularizer = Reg)(x)
  x = BatchNormalization()(x)
  # x = Activation(activations.relu)(x)

  # add the input 
  x = Add()([x, x_skip])
  x = tf.keras.activations.relu(x)

  return x

def res_conv(x, s, filters):
  '''
  here the input size changes''' 
  x_skip = x
  f1, f2 = filters

  # first block
  x = Conv2D(f1, kernel_size=(1, 1), strides=(s, s), padding='valid', kernel_regularizer = Reg)(x)
  # when s = 2 then it is like downsizing the feature map
  x = BatchNormalization()(x)
  x = Activation(activations.relu)(x)

  # second block
  x = Conv2D(f1, kernel_size=(3, 3), strides=(1, 1), padding='same', kernel_regularizer = Reg)(x)
  x = BatchNormalization()(x)
  x = Activation(activations.relu)(x)

  #third block
  x = Conv2D(f2, kernel_size=(1, 1), strides=(1, 1), padding='valid', kernel_regularizer = Reg)(x)
  x = BatchNormalization()(x)

  # shortcut 
  x_skip = Conv2D(f2, kernel_size=(1, 1), strides=(s, s), padding='valid', kernel_regularizer = Reg)(x_skip)
  x_skip = BatchNormalization()(x_skip)

  # add 
  x = Add()([x, x_skip])
  x = Activation(activations.relu)(x)

  return x

def resnet50():

  input_im = Input(shape=(32, 32, 3)) 
  x = ZeroPadding2D(padding=(3, 3))(input_im)

  # 1st stage
  # here we perform maxpooling, see the figure above

  x = Conv2D(64, kernel_size=(7, 7), strides=(2, 2))(x)
  x = BatchNormalization()(x)
  x = Activation(activations.relu)(x)
  x = MaxPooling2D((3, 3), strides=(2, 2))(x)

  #2nd stage 
  # frm here on only conv block and identity block, no pooling

  x = res_conv(x, s=1, filters=(64, 256))
  x = res_identity(x, filters=(64, 256))
  x = res_identity(x, filters=(64, 256))
  x = Dropout(0.15)(x)
  # 3rd stage

  x = res_conv(x, s=2, filters=(128, 512))
  x = res_identity(x, filters=(128, 512))
  x = res_identity(x, filters=(128, 512))
  x = res_identity(x, filters=(128, 512))
  x = Dropout(0.15)(x)
  # 4th stage

  x = res_conv(x, s=2, filters=(256, 1024))
  x = res_identity(x, filters=(256, 1024))
  x = res_identity(x, filters=(256, 1024))
  x = res_identity(x, filters=(256, 1024))
  x = res_identity(x, filters=(256, 1024))
  x = res_identity(x, filters=(256, 1024))
  x = Dropout(0.15)(x)
  # 5th stage

  x = res_conv(x, s=2, filters=(512, 2048))
  x = res_identity(x, filters=(512, 2048))
  x = res_identity(x, filters=(512, 2048))

  # ends with average pooling and dense connection

  x = AveragePooling2D((2, 2), padding='same')(x)

  x = Flatten()(x)
  x = Dropout(0.1)(x)
  x = Dense(10, activation='softmax', kernel_initializer='he_normal', kernel_regularizer = Reg)(x) #multi-class

  # define the model 

  model = Model(inputs=input_im, outputs=x, name='Resnet50')

  return model

def main():
  num_classes = 10

  (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()

  train_images = train_images.reshape((50000, 32, 32, 3)).astype("float32")
  test_images = test_images.reshape((10000, 32, 32, 3)).astype("float32")

  # Normalize pixel values to be between -1 and 1
  train_images, test_images = train_images / 127.5 - 1, test_images / 127.5 - 1

  train_labels = tf.keras.utils.to_categorical(train_labels, num_classes)
  test_labels = tf.keras.utils.to_categorical(test_labels, num_classes)

  resNet50 = resnet50()

  ######################## mixed precision ######################  
  resNet50 = tfmot.quantization.keras.quantize_model(resNet50)
  ###############################################################

  resNet50.compile(
      tf.keras.optimizers.Adam(lr=0.01, decay=0.0001),
      loss="categorical_crossentropy",
      metrics=["accuracy"],
  )

  history = resNet50.fit(train_images, train_labels, batch_size = 32, epochs=100, callbacks=callbacks, validation_split=0.15)

if __name__ == "__main__":
    main()

What am I missing?

teijeong commented 3 years ago

Hi @lovodkin93 , can you share a colab so we can reproduce easily?

Also please let us know which TF and TF-MOT versions you're using.

lovodkin93 commented 3 years ago

sure thing: https://colab.research.google.com/drive/1o9hbgv4Toc59DTkM3h5PaBY1G1duiIdE?usp=sharing Thanks!

Xhark commented 3 years ago

It's an issue when you use tf.nn.relu instead of tf.keras.layers.ReLU. (It converted to TFLambdaOp which has a trouble with current QAT API.)

Would you please use tf.keras.layers.ReLU if it's okay?

lovodkin93 commented 3 years ago

So I tried to replace every x = Activation(activations.relu)(x) line with x = Activation(ReLU)(x), and now I get the following error (which is the reason I worked with x = Activation(activations.relu)(x) in the first place):

tensorflow.python.framework.errors_impl.OperatorNotAllowedInGraphError: using a `tf.Tensor` as a Python `bool` is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function.

Do you happen to know why this might occur? Thanks!

guillem-ms commented 1 year ago

Hi @Xhark , Is it possible that this same error might be happening using tensorflow.keras.activations.sigmoid? If so, what replacement should I employ? Since, there is not a tf.keras.layers.sigmoid or such. Using a tf.keras.layers.Lambdaperhaps?

robertatdm commented 7 months ago

I had a similar issue with tf.split. I could overcome the error by wrapping tf.split in a keras layer:

@keras.saving.register_keras_serializable(package="MyLayers", name="SplitLayer")
class SplitLayer(keras.layers.Layer):
    def __init__(self, num_or_size_splits, axis, **kwargs):
        super(SplitLayer, self).__init__(**kwargs)
        self.num_or_size_splits = num_or_size_splits
        self.axis = axis

    def call(self, inputs):
        return tf.split(inputs, self.num_or_size_splits, axis=self.axis)

    def get_config(self):
        config = super(SplitLayer, self).get_config()
        config.update({
            'num_or_size_splits': self.num_or_size_splits,
            'axis': self.axis,
        })
        return config

tf version: 2.15.1 keras version: 2.15.0 tfmot version: 0.7.5

tensorflow / model-optimization

problem with quantizing the BN layer #841