Closed wagamamaz closed 7 years ago
please follow the lastest implementatiom BatchNormLayer
and BatchNormLayer5
---previous answer --- Hi, I just make a commit for TF12, please have a try and let me know if there are any other issues.
class BatchNormLayer5(Layer): #
The :class:`BatchNormLayer` class is a normalization layer, see ``tf.nn.batch_normalization`` and ``tf.nn.moments``.
Batch normalization on fully-connected or convolutional maps.
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
decay : float
A decay factor for ExponentialMovingAverage.
epsilon : float
A small float number to avoid dividing by 0.
act : activation function.
is_train : boolean
Whether train or inference.
beta_init : beta initializer
The initializer for initializing beta
gamma_init : gamma initializer
The initializer for initializing gamma
name : a string or None
An optional name to attach to this layer.
- `Source <>`_
- `stackoverflow <>`_
def __init__(
layer = None,
decay = 0.999,
epsilon = 0.00001,
act = tf.identity,
is_train = False,
beta_init = tf.zeros_initializer,
# gamma_init = tf.ones_initializer,
gamma_init = tf.random_normal_initializer(mean=1.0, stddev=0.002),
name ='batchnorm_layer',
Layer.__init__(self, name=name)
self.inputs = layer.outputs
print(" tensorlayer:Instantiate BatchNormLayer %s: decay: %f, epsilon: %f, act: %s, is_train: %s" %
(, decay, epsilon, act.__name__, is_train))
x_shape = self.inputs.get_shape()
params_shape = x_shape[-1:]
from import moving_averages
from tensorflow.python.ops import control_flow_ops
with tf.variable_scope(name) as vs:
axis = list(range(len(x_shape) - 1))
## 1. beta, gamma
beta = tf.get_variable('beta', shape=params_shape,
trainable=is_train)#, restore=restore)
gamma = tf.get_variable('gamma', shape=params_shape,
initializer=gamma_init, trainable=is_train,
## 2. moving variables during training (not update by gradient!)
moving_mean = tf.get_variable('moving_mean',
trainable=False,)# restore=restore)
moving_variance = tf.get_variable('moving_variance',
trainable=False,)# restore=restore)
batch_mean, batch_var = tf.nn.moments(self.inputs, axis)
## 3.
# These ops will only be preformed when training.
def mean_var_with_update():
try: # TF12
update_moving_mean = moving_averages.assign_moving_average(
moving_mean, batch_mean, decay, zero_debias=False) # if zero_debias=True, has bias
update_moving_variance = moving_averages.assign_moving_average(
moving_variance, batch_var, decay, zero_debias=False) # if zero_debias=True, has bias
# print("TF12 moving")
except Exception as e: # TF11
update_moving_mean = moving_averages.assign_moving_average(
moving_mean, batch_mean, decay)
update_moving_variance = moving_averages.assign_moving_average(
moving_variance, batch_var, decay)
# print("TF11 moving")
with tf.control_dependencies([update_moving_mean, update_moving_variance]):
# return tf.identity(update_moving_mean), tf.identity(update_moving_variance)
return tf.identity(batch_mean), tf.identity(batch_var)
if is_train:
mean, var = mean_var_with_update()
mean, var = (batch_mean, batch_var) # hao
normed = tf.nn.batch_normalization(
self.outputs = act( normed )
variables = [beta, gamma]
self.all_layers = list(layer.all_layers)
self.all_params = list(layer.all_params)
self.all_drop = dict(layer.all_drop)
self.all_layers.extend( [self.outputs] )
self.all_params.extend( variables )
Hi everyone, I found a interesting thing but I don't know the reason. When I restore a CNN network with
from npz file, the accuracy is slightly different, my code as attached. Hope someone can help me, thanks in advance.Here is my result:
is_test_only = False
: (note: I setn_epoch=1
and use only a small part of trainig data for fast debugging.)is_test_only = True
:@sczhengyabin I saw you set
variables = tf.GraphKeys.GLOBAL_VARIABLES
( line 1825), but I found it will get 8 parameters ... are you sure it is correct? I tried the following setting, but the accuracy still has slightly difference ...@boscotsang as I discuss with you in pull/42, the testing and training cost are all drop normally, but I really don't understand why the accuracies are different after restoring and what variables should be included in the
.My code environment: TensorFlow 0.12 and TensorLayer 1.3.0
import numpy as np import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import set_keep import time
is_test_only = False # if True, restore and test without training
X_train, y_train, X_val, y_val, X_test, y_test = \ tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1))
X_train = np.asarray(X_train, dtype=np.float32)[0:10000]#<-- small training set for fast debugging y_train = np.asarray(y_train, dtype=np.int64)[0:10000] X_val = np.asarray(X_val, dtype=np.float32) y_val = np.asarray(y_val, dtype=np.int64) X_test = np.asarray(X_test, dtype=np.float32) y_test = np.asarray(y_test, dtype=np.int64)
print('X_train.shape', X_train.shape) print('y_train.shape', y_train.shape) print('X_val.shape', X_val.shape) print('y_val.shape', y_val.shape) print('X_test.shape', X_test.shape) print('y_test.shape', y_test.shape) print('X %s y %s' % (X_test.dtype, y_test.dtype))
sess = tf.InteractiveSession()
batch_size = 128 x = tf.placeholder(tf.float32, shape=[batchsize, 28, 28, 1]) y = tf.placeholder(tf.int64, shape=[batch_size,])
def inference(x, is_train, reuse=None): gamma_init = tf.random_normal_initializer(1., 0.02) with tf.variable_scope("CNN", reuse=reuse): tl.layers.set_name_reuse(reuse) network = tl.layers.InputLayer(x, name='input_layer') network = tl.layers.Conv2d(network, n_filter=32, filter_size=(5, 5), strides=(1, 1), act=None, b_init=None, padding='SAME', name='cnn_layer1') network = tl.layers.BatchNormLayer(network, act=tf.nn.relu, gamma_init=gamma_init, is_train=is_train, name='batch1')
train phase
network = inference(x, is_train=True, reuse=False) y = network.outputs cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_withlogits(y, y)) correctprediction = tf.equal(tf.argmax(y, 1), y) acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
test phase
network_test = inference(x, is_train=False, reuse=True) y_t = network_test.outputs cost_t = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(yt, y)) correct_prediction = tf.equal(tf.argmax(yt, 1), y) acc_t = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
if is_test_only: n_epoch = 1 else: n_epoch = 1 learning_rate = 0.0001 print_freq = 1
train_params = network.all_params train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)
if is_test_only: load_params = tl.files.load_npz(name='_model_test.npz') tl.files.assign_params(sess, load_params, network)
network.print_params(True) network.print_layers()
print(' learning_rate: %f' % learning_rate) print(' batch_size: %d' % batch_size)
if not is_test_only: for epoch in range(n_epoch): start_time = time.time() for X_train_a, y_train_a in tl.iterate.minibatches( X_train, y_train, batch_size, shuffle=True):, feed_dict={x: X_traina, y: y_train_a})
print('Evaluation') test_loss, test_acc, n_batch = 0, 0, 0 for X_test_a, y_test_a in tl.iterate.minibatches( X_test, y_test, batch_size, shuffle=False): err, ac =[cost_t, acc_t], feed_dict={x: X_testa, y: y_test_a}) test_loss += err; test_acc += ac; n_batch += 1 print(" test loss: %f" % (test_loss/n_batch)) print(" test acc: %f" % (test_acc/n_batch))
tl.files.save_npz(network.all_params, name='_model_test.npz', sess=sess)