hequn / keras-transfer-learning

Based on https://github.com/Arsey/keras-transfer-learning-for-oxford102, but more things are done in the project. Especially for the triplet and center loss.
MIT License
5 stars 2 forks source link

I have some problem when I use centerloss in image classification with keras. #1

Open wangjue-wzq opened 5 years ago

wangjue-wzq commented 5 years ago

I have some problem when I use centerloss in image classification with keras. 1、in custom_vgg_model.fit(y = {'fc2':y_train,'predictions':y_train}),'fc2':y_train have error that

ValueError: Error when checking target: expected fc2 to have shape (None, 4096) but got array with shape (6300, 45)

y_train is the labels. If I do like this custom_vgg_model.fit(y = {'fc2':dummy1,'predictions':y_train}),the model will train successful. The dummy1 have same shape with 'fc2' output(feature). dummy1 = np.zeros((y_train.shape[0],4096)) But can't improve the accuracy of the model.So it is wrong coding. 2、It is wrong to use ImageDataGenerator.flow(x = X_train, y = {'fc2':dummy1,'predictions':y_train}, batch_size=batch_Sizes) .So I can't expand my data.

image_input = Input(shape=(224, 224, 3))
model = VGG16(input_tensor=image_input, include_top=True,weights='imagenet')
model.summary()
last_layer = model.get_layer('fc2').output
feature = last_layer
out = Dense(num_classes,activation = 'softmax',name='predictions')(last_layer)
custom_vgg_model = Model(inputs = image_input, outputs = [out,feature])
custom_vgg_model.summary()
for layer in custom_vgg_model.layers[:-3]:
    layer.trainable = False
custom_vgg_model.layers[3].trainable    
sgd = optimizers.SGD(lr=learn_Rate,decay=decay_Rate,momentum=0.9,nesterov=True)
center_loss = lossclass.get_center_loss(alpha=0.5, num_classes=45,feature_dim = 4096)
custom_vgg_model.compile(loss={'predictions': "categorical_crossentropy", 'fc2': center_loss},
                         loss_weights={'fc2': 1, 'predictions': 1},optimizer= sgd,
                                      metrics={'predictions': 'accuracy'})
t=time.time()
dummy1 = np.zeros((y_train.shape[0],4096))
dummy2 = np.zeros((y_test.shape[0],4096))
if not data_Augmentation:
    hist = custom_vgg_model.fit(x = X_train,y = {'fc2':y_train,'predictions':y_train},batch_size=batch_Sizes,
                                epochs=epoch_Times, verbose=1,validation_data=(X_test, {'fc2':y_test,'predictions':y_test}))
else:
    datagen = ImageDataGenerator(
            featurewise_center=False,
            samplewise_center=False,
            featurewise_std_normalization=False,
            samplewise_std_normalization=False,
            zca_whitening=False,
            rotation_range=20,
            width_shift_range=0.2,
            height_shift_range=0.2,
            horizontal_flip=True,
            vertical_flip=True,
            rescale=None,
            preprocessing_function=None,
            data_format=None)
    print('x_train.shape[0]:{:d}'.format(X_train.shape[0]))
    hist = custom_vgg_model.fit_generator(datagen.flow(x = X_train, y = {'fc2':dummy1,'predictions':y_train}, batch_size=batch_Sizes),
                                          steps_per_epoch=X_train.shape[0]/batch_Sizes,epochs=epoch_Times,
                                                                       verbose=1, validation_data=(X_test, {'fc2':y_test,'predictions':y_test}))
# lossclass.py
def _center_loss_func(labels,features, alpha, num_classes, centers, feature_dim):
    assert feature_dim == features.get_shape()[1]    
    labels = K.argmax(labels, axis=1)
    labels = tf.to_int32(labels)
    centers_batch = K.gather(centers, labels)
    diff = (1 - alpha) * (centers_batch - features)
    centers = tf.scatter_sub(centers, labels, diff)
    centers_batch = K.gather(centers, labels)
    loss = K.mean(K.square(features - centers_batch))
    return loss

def get_center_loss(alpha, num_classes, feature_dim):
    """Center loss based on the paper "A Discriminative 
       Feature Learning Approach for Deep Face Recognition"
       (http://ydwen.github.io/papers/WenECCV16.pdf)
    """    
    # Each output layer use one independed center: scope/centers
    centers = K.zeros([num_classes, feature_dim], dtype='float32')
    @functools.wraps(_center_loss_func)
    def center_loss(y_true, y_pred):
        return _center_loss_func(y_true, y_pred, alpha, num_classes, centers, feature_dim)
    return center_loss
hequn commented 5 years ago

@422301949 1、for y_train it seems that it is an array of shape (6300, 45). So dummy1 is of shape (6300,4096) which is exactly matching shape of (None,4096). It will not raise an error. 2、I think you'd better debugging the code in IDE in order to the variables in each stage to find the bug especially the source code datagen.flow.

wangjue-wzq commented 5 years ago

@hequn Thank you! I want to get a loss through a custom center-loss. Calculating the center_loss requires acquiring the output feature of the middle layer (fc2). I want labels to correspond to the y_true of the center_loss function, and feature as the input to the center_loss function.

The following functions, can you help me see where is wrong?

center_loss = lossclass.get_center_loss(alpha=0.5, num_classes=45,feature_dim = 4096) custom_vgg_model.compile() custom_vgg_model.fit() def center_loss(y_true, y_pred): return _center_loss_func(y_true, y_pred, alpha, num_classes, centers, feature_dim)

image_input = Input(shape=(224, 224, 3))
model = VGG16(input_tensor=image_input, include_top=True,weights='imagenet')
model.summary()
last_layer = model.get_layer('fc2').output
feature = last_layer
out = Dense(num_classes,activation = 'softmax',name='predictions')(last_layer)
custom_vgg_model = Model(inputs = image_input, outputs = [out,feature])**
custom_vgg_model.summary()
for layer in custom_vgg_model.layers[:-3]:
    layer.trainable = False
custom_vgg_model.layers[3].trainable    
center_loss = lossclass.get_center_loss(alpha=0.5, num_classes=45,feature_dim = 4096)
custom_vgg_model.compile(loss={, 'fc2': center_loss,'predictions': "categorical_crossentropy"},
                         loss_weights={'fc2': 1, 'predictions': 1},optimizer= sgd,
                                      metrics={'predictions': 'accuracy'})
hist = custom_vgg_model.fit(x = X_train,y = {'fc2':y_train,'predictions':y_train},batch_size=batch_Sizes,
                                epochs=epoch_Times, verbose=1,validation_data=(X_test, {'fc2':y_test,'predictions':y_test}))

def _center_loss_func(labels,features, alpha, num_classes, centers, feature_dim):
    assert feature_dim == features.get_shape()[1]    
    labels = K.argmax(labels, axis=1)
    labels = tf.to_int32(labels)
    centers_batch = K.gather(centers, labels)
    diff = (1 - alpha) * (centers_batch - features)
    centers = tf.scatter_sub(centers, labels, diff)
    centers_batch = K.gather(centers, labels)
    loss = K.mean(K.square(features - centers_batch))
    return loss

def get_center_loss(alpha, num_classes, feature_dim):
    """Center loss based on the paper "A Discriminative 
       Feature Learning Approach for Deep Face Recognition"
       (http://ydwen.github.io/papers/WenECCV16.pdf)
    """    
    # Each output layer use one independed center: scope/centers
    centers = K.zeros([num_classes, feature_dim], dtype='float32')
    @functools.wraps(_center_loss_func)
    def center_loss(y_true, y_pred):
        return _center_loss_func(y_true, y_pred, alpha, num_classes, centers, feature_dim)
    return center_loss
hequn commented 5 years ago

@422301949 I am not sure if the y_train is in right format for hist = custom_vgg_model.fit(x = X_train,y = {'fc2':y_train,'predictions':y_train}. The def _center_loss_func(labels,features should be passed right.