tensorlayer / TensorLayer

Deep Learning and Reinforcement Learning Library for Scientists and Engineers
http://tensorlayerx.com
Other
7.33k stars 1.61k forks source link

Why accuracy of CNN with BatchNormLayer will change slightly after restoring? #57

Closed wagamamaz closed 7 years ago

wagamamaz commented 7 years ago

Hi everyone, I found a interesting thing but I don't know the reason. When I restore a CNN network with BatchNormLayer from npz file, the accuracy is slightly different, my code as attached. Hope someone can help me, thanks in advance.

import numpy as np import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import set_keep import time

is_test_only = False # if True, restore and test without training

X_train, y_train, X_val, y_val, X_test, y_test = \ tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1))

X_train = np.asarray(X_train, dtype=np.float32)[0:10000]#<-- small training set for fast debugging y_train = np.asarray(y_train, dtype=np.int64)[0:10000] X_val = np.asarray(X_val, dtype=np.float32) y_val = np.asarray(y_val, dtype=np.int64) X_test = np.asarray(X_test, dtype=np.float32) y_test = np.asarray(y_test, dtype=np.int64)

print('X_train.shape', X_train.shape) print('y_train.shape', y_train.shape) print('X_val.shape', X_val.shape) print('y_val.shape', y_val.shape) print('X_test.shape', X_test.shape) print('y_test.shape', y_test.shape) print('X %s y %s' % (X_test.dtype, y_test.dtype))

sess = tf.InteractiveSession()

batch_size = 128 x = tf.placeholder(tf.float32, shape=[batchsize, 28, 28, 1]) y = tf.placeholder(tf.int64, shape=[batch_size,])

def inference(x, is_train, reuse=None): gamma_init = tf.random_normal_initializer(1., 0.02) with tf.variable_scope("CNN", reuse=reuse): tl.layers.set_name_reuse(reuse) network = tl.layers.InputLayer(x, name='input_layer') network = tl.layers.Conv2d(network, n_filter=32, filter_size=(5, 5), strides=(1, 1), act=None, b_init=None, padding='SAME', name='cnn_layer1') network = tl.layers.BatchNormLayer(network, act=tf.nn.relu, gamma_init=gamma_init, is_train=is_train, name='batch1')

    network = tl.layers.MaxPool2d(network, filter_size=(2, 2), strides=(2, 2),
            padding='SAME', name='pool_layer1')
    network = tl.layers.Conv2d(network, n_filter=64, filter_size=(5, 5), strides=(1, 1),
            act=None, b_init=None, padding='SAME', name='cnn_layer2')
    network = tl.layers.BatchNormLayer(network, act=tf.nn.relu,
            gamma_init=gamma_init, is_train=is_train, name='batch2')

    network = tl.layers.MaxPool2d(network, filter_size=(2, 2), strides=(2, 2),
            padding='SAME', name='pool_layer2')
    ## end of conv
    network = tl.layers.FlattenLayer(network, name='flatten_layer')
    if is_train:
        network = tl.layers.DropoutLayer(network, keep=0.5, is_fix=True, name='drop1')
    network = tl.layers.DenseLayer(network, n_units=256,
                                    act = tf.nn.relu, name='relu1')
    if is_train:
        network = tl.layers.DropoutLayer(network, keep=0.5, is_fix=True, name='drop2')
    network = tl.layers.DenseLayer(network, n_units=10,
                                    act = tf.identity, name='output_layer')   
return network

train phase

network = inference(x, is_train=True, reuse=False) y = network.outputs cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_withlogits(y, y)) correctprediction = tf.equal(tf.argmax(y, 1), y) acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

test phase

network_test = inference(x, is_train=False, reuse=True) y_t = network_test.outputs cost_t = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(yt, y)) correct_prediction = tf.equal(tf.argmax(yt, 1), y) acc_t = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

train

if is_test_only: n_epoch = 1 else: n_epoch = 1 learning_rate = 0.0001 print_freq = 1

train_params = network.all_params train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)

tl.layers.initialize_global_variables(sess)

if is_test_only: load_params = tl.files.load_npz(name='_model_test.npz') tl.files.assign_params(sess, load_params, network)

network.print_params(True) network.print_layers()

print(' learning_rate: %f' % learning_rate) print(' batch_size: %d' % batch_size)

if not is_test_only: for epoch in range(n_epoch): start_time = time.time() for X_train_a, y_train_a in tl.iterate.minibatches( X_train, y_train, batch_size, shuffle=True): sess.run(train_op, feed_dict={x: X_traina, y: y_train_a})

    if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
        print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
        train_loss, train_acc, n_batch = 0, 0, 0
        for X_train_a, y_train_a in tl.iterate.minibatches(
                                X_train, y_train, batch_size, shuffle=True):
            err, ac = sess.run([cost_t, acc_t], feed_dict={x: X_train_a, y_: y_train_a})
            train_loss += err; train_acc += ac; n_batch += 1
        print("   train loss: %f" % (train_loss/ n_batch))
        print("   train acc: %f" % (train_acc/ n_batch))
        val_loss, val_acc, n_batch = 0, 0, 0
        for X_val_a, y_val_a in tl.iterate.minibatches(
                                    X_val, y_val, batch_size, shuffle=True):
            err, ac = sess.run([cost_t, acc_t], feed_dict={x: X_val_a, y_: y_val_a})
            val_loss += err; val_acc += ac; n_batch += 1
        print("   val loss: %f" % (val_loss/ n_batch))
        print("   val acc: %f" % (val_acc/ n_batch))

print('Evaluation') test_loss, test_acc, n_batch = 0, 0, 0 for X_test_a, y_test_a in tl.iterate.minibatches( X_test, y_test, batch_size, shuffle=False): err, ac = sess.run([cost_t, acc_t], feed_dict={x: X_testa, y: y_test_a}) test_loss += err; test_acc += ac; n_batch += 1 print(" test loss: %f" % (test_loss/n_batch)) print(" test acc: %f" % (test_acc/n_batch))

network.print_params(True)

tl.files.save_npz(network.all_params, name='_model_test.npz', sess=sess)

zsdonghao commented 7 years ago

please follow the lastest implementatiom BatchNormLayer and BatchNormLayer5 in https://github.com/zsdonghao/tensorlayer/blob/master/tensorlayer/layers.py

---previous answer --- Hi, I just make a commit for TF12, please have a try and let me know if there are any other issues.


class BatchNormLayer5(Layer):   #
    """
    The :class:`BatchNormLayer` class is a normalization layer, see ``tf.nn.batch_normalization`` and ``tf.nn.moments``.

    Batch normalization on fully-connected or convolutional maps.

    Parameters
    -----------
    layer : a :class:`Layer` instance
        The `Layer` class feeding into this layer.
    decay : float
        A decay factor for ExponentialMovingAverage.
    epsilon : float
        A small float number to avoid dividing by 0.
    act : activation function.
    is_train : boolean
        Whether train or inference.
    beta_init : beta initializer
        The initializer for initializing beta
    gamma_init : gamma initializer
        The initializer for initializing gamma
    name : a string or None
        An optional name to attach to this layer.

    References
    ----------
    - `Source <https://github.com/ry/tensorflow-resnet/blob/master/resnet.py>`_
    - `stackoverflow <http://stackoverflow.com/questions/38312668/how-does-one-do-inference-with-batch-normalization-with-tensor-flow>`_
    """
    def __init__(
        self,
        layer = None,
        decay = 0.999,
        epsilon = 0.00001,
        act = tf.identity,
        is_train = False,
        beta_init = tf.zeros_initializer,
        # gamma_init = tf.ones_initializer,
        gamma_init = tf.random_normal_initializer(mean=1.0, stddev=0.002),
        name ='batchnorm_layer',
    ):
        Layer.__init__(self, name=name)
        self.inputs = layer.outputs
        print("  tensorlayer:Instantiate BatchNormLayer %s: decay: %f, epsilon: %f, act: %s, is_train: %s" %
                            (self.name, decay, epsilon, act.__name__, is_train))
        x_shape = self.inputs.get_shape()
        params_shape = x_shape[-1:]

        from tensorflow.python.training import moving_averages
        from tensorflow.python.ops import control_flow_ops

        with tf.variable_scope(name) as vs:
            axis = list(range(len(x_shape) - 1))

            ## 1. beta, gamma
            beta = tf.get_variable('beta', shape=params_shape,
                               initializer=beta_init,
                               trainable=is_train)#, restore=restore)

            gamma = tf.get_variable('gamma', shape=params_shape,
                                initializer=gamma_init, trainable=is_train,
                                )#restore=restore)

            ## 2. moving variables during training (not update by gradient!)
            moving_mean = tf.get_variable('moving_mean',
                                      params_shape,
                                      initializer=tf.zeros_initializer,
                                      trainable=False,)#   restore=restore)
            moving_variance = tf.get_variable('moving_variance',
                                          params_shape,
                                          initializer=tf.constant_initializer(1.),
                                          trainable=False,)#   restore=restore)

            batch_mean, batch_var = tf.nn.moments(self.inputs, axis)
            ## 3.
            # These ops will only be preformed when training.
            def mean_var_with_update():
                try:    # TF12
                    update_moving_mean = moving_averages.assign_moving_average(
                                    moving_mean, batch_mean, decay, zero_debias=False)     # if zero_debias=True, has bias
                    update_moving_variance = moving_averages.assign_moving_average(
                                    moving_variance, batch_var, decay, zero_debias=False) # if zero_debias=True, has bias
                    # print("TF12 moving")
                except Exception as e:  # TF11
                    update_moving_mean = moving_averages.assign_moving_average(
                                    moving_mean, batch_mean, decay)
                    update_moving_variance = moving_averages.assign_moving_average(
                                    moving_variance, batch_var, decay)
                    # print("TF11 moving")

                with tf.control_dependencies([update_moving_mean, update_moving_variance]):
                    # return tf.identity(update_moving_mean), tf.identity(update_moving_variance)
                    return tf.identity(batch_mean), tf.identity(batch_var)

            if is_train:
                mean, var = mean_var_with_update()
            else:
                mean, var = (batch_mean, batch_var) # hao

            normed = tf.nn.batch_normalization(
              x=self.inputs,
              mean=mean,
              variance=var,
              offset=beta,
              scale=gamma,
              variance_epsilon=epsilon,
              name="tf_bn"
            )
            self.outputs = act( normed )
            variables = [beta, gamma]

        self.all_layers = list(layer.all_layers)
        self.all_params = list(layer.all_params)
        self.all_drop = dict(layer.all_drop)
        self.all_layers.extend( [self.outputs] )
        self.all_params.extend( variables )