4uiiurz1 / keras-arcface

Keras implementation of ArcFace, CosFace, and SphereFace
MIT License
283 stars 65 forks source link

low test accuracy #7

Closed Liang-yc closed 5 years ago

Liang-yc commented 5 years ago

Hi, I use your ArcFace to train and test FashionMnist dataset. According to your guildline, the test accuracy is less than 0.1. Your test code seems not work very well.

Can you provide a copy of model test code which can achieve good test accuracy? Thanks.

4uiiurz1 commented 5 years ago

@Liang-yc Did you set num_features to 3? If so, you should increase num_features (ex. 64). Default num_features is 3 due to visualize.

Liang-yc commented 5 years ago

@Liang-yc Did you set num_features to 3? If so, you should increase num_features (ex. 64). Default num_features is 3 due to visualize.

Thanks for your reply. I have increased the num_features from 3 to 10. The test accuracy is still insanely low. (Sorry, I cannot reproduce the same results in this moment due to I changed my project several days ago) In your experiment, does ArcFace really work?

4uiiurz1 commented 5 years ago

@Liang-yc Could you paste your code?

Liang-yc commented 5 years ago

@Liang-yc Could you paste your code?

Try this code:


from __future__ import print_function
from __future__ import division

import os
import sklearn.metrics as metrics
from keras.utils import np_utils
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
import keras
from keras.optimizers import SGD,Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, CSVLogger
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model,load_model
from extra_loss import ArcFace
from keras.layers import Input, Dense, Dropout, Lambda,Reshape,BatchNormalization
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from scipy import misc
from tqdm import tqdm
from keras import backend as K
import tensorflow as tf
from keras.applications.mobilenet import MobileNet

import numpy as np

def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return (-K.mean(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) - K.mean((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0)))*0.5+keras.losses.categorical_crossentropy(y_true, y_pred)*0.5
    return focal_loss_fixed

def arcface_test(trainX,trainY,testX,testY):
    batch_size = 100
    nb_classes = 10
    nb_epoch = 300

    input =Input(shape=(56,56,1))
    labels = Input((10,))
    input_image_ = Lambda(lambda x: K.repeat_elements(x, 3, 3))(input)
    print(input_image_.shape)
    model = MobileNet(input_shape=(56,56,3),input_tensor=input_image_, include_top=False, pooling='avg')

    # x = keras.layers.GlobalAveragePooling2D()(model.output)

    x = keras.layers.Dropout(0.5)(model.output)
    x = keras.layers.PReLU( name='side_out')(x)
    x = keras.layers.Dense(10)(x)

    real = Dense(10, kernel_initializer='he_normal',
                kernel_regularizer=keras.regularizers.l2(1e-4),name='real_output')(x)
    x = BatchNormalization(name='fcbn1')(real)

    x = ArcFace(n_classes=10,regularizer=keras.regularizers.l2(1e-4),name='output')([x,labels])
    model =keras.models.Model([input,labels],x)

    print("Model created")

    model.summary()

    optimizer =SGD(1e-3)
    model.compile(loss={'output':'categorical_crossentropy'}, optimizer=optimizer, metrics=["accuracy"])
    # model.compile(loss={'output':'categorical_crossentropy','centerlosslayer':zero_loss},loss_weights=[0.8, 0.2], optimizer=optimizer, metrics=["accuracy"])

    # model.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=["accuracy"])

    print("Finished compiling")
    print("Building model...")

    # Import data

    height, width=56,56

    trainX = trainX.reshape((-1, 28, 28))
    trainX = np.array([misc.imresize(x, (height, width)).astype(float) for x in tqdm(iter(trainX))]) / 255.
    # trainX=np.uint8(trainX*255)
    trainX = trainX.reshape((-1, height, width,1))
    testX = testX.reshape((-1, 28, 28))

    testX = np.array([misc.imresize(x, (height, width)).astype(float) for x in tqdm(iter(testX))]) / 255.
    testX = testX.reshape((-1, height, width,1))

    trainX = (trainX-0.1307)/0.3081
    testX = (testX-0.1307)/0.3081
    Y_train = np_utils.to_categorical(trainY, nb_classes)
    Y_test = np_utils.to_categorical(testY, nb_classes)

    generator = ImageDataGenerator(featurewise_center=True,
                                   featurewise_std_normalization=True,
                                   horizontal_flip=True,
                                    )
    validation_data = ImageDataGenerator(featurewise_center=True,
                                            featurewise_std_normalization=True)

    # generator = ImageDataGenerator()

    for data in (generator, validation_data):
        data.fit(trainX, seed=0)

    # generator.fit(trainX, seed=0)

    out_dir = "model/"
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # Load model
    weights_file = "./model/mobilenet.h5"
    # model.save_weights(weights_file)
    if os.path.exists(weights_file):
        model.load_weights(weights_file,by_name=True)
        print("Model loaded.")

    lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                                   cooldown=0, patience=20, min_lr=1e-8)
    model_checkpoint = ModelCheckpoint(weights_file, monitor="val_acc", save_best_only=True,
                                       save_weights_only=True, mode='auto')
    callbacks = [lr_reducer, model_checkpoint]

    def gen_flow_for_two_inputs_v2(gen,X1, y, dummy,is_train=False):
        genX1 = gen.flow(X1, y, batch_size=batch_size, seed=666)
        # genX2 = gen.flow(X1, y1, batch_size=batch_size, seed=666)
        while True:
            X1i = genX1.__next__()
            # X2i = genX2.__next__()
            # yield X1i[0], X1i[1]
            # if is_train:
            yield [X1i[0], X1i[1]], X1i[1]
            # else:
            #     yield [X1i[0],dummy], X1i[1]

    dummy = np.zeros((batch_size, 10))
    model.fit_generator(gen_flow_for_two_inputs_v2(generator,trainX, Y_train,dummy),
                        steps_per_epoch=len(trainX) // batch_size,
                        epochs=10,
                        callbacks=callbacks,
                        validation_data=gen_flow_for_two_inputs_v2(validation_data,testX, Y_test,dummy),
                        validation_steps=testX.shape[0] // batch_size, verbose=1)

    # pred = model.evaluate([testX,Y_test])
    # preds = np.argmax(pred,axis=1)
    # yTrue = np.argmax(Y_test)
    # accuracy = metrics.accuracy_score(yTrue, preds) * 100
    # error = 100 - accuracy
    # print("Accuracy : ", accuracy)
    # print("Error : ", error)

    # model.fit_generator(generator.flow(trainX, Y_train),
    #                     steps_per_epoch=len(trainX) // batch_size,
    #                     epochs=nb_epoch,
    #                     callbacks=callbacks,
    #                     validation_data=validation_data.flow(testX, Y_test),
    #                     validation_steps=testX.shape[0] // batch_size, verbose=1)

    # yPreds = model.predict(testX)
    # yPred = np.argmax(yPreds, axis=1)
    # yTrue = testY
    #
    # accuracy = metrics.accuracy_score(yTrue, yPred) * 100
    # error = 100 - accuracy
    # print("Accuracy : ", accuracy)
    # print("Error : ", error)

    model.save(weights_file)
    arcface_model = load_model(weights_file, custom_objects={'ArcFace': ArcFace})
    genX1 = validation_data.flow(testX, Y_test, batch_size=10000, seed=666)
    X1i = genX1.__next__()
    # x = arcface_model.layers[-3].output
    # x = keras.layers.Dense(10, activation='softmax')(x)
    # arcface_model = Model(inputs=arcface_model.input[0], outputs=x)
    arcface_model = Model(inputs=arcface_model.input[0], outputs=arcface_model.layers[-3].output)
    optimizer = Adam(lr=3e-5)  # Using Adam instead of SGD to speed up training
    arcface_model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy"])

    arcface_features = arcface_model.predict(X1i[0], verbose=0)
    arcface_features /= np.linalg.norm(arcface_features, axis=1, keepdims=True)
    yTrue = np.argmax(X1i[1], axis=1)
    yPred = np.argmax(arcface_features, axis=1)
    accuracy = metrics.accuracy_score(yTrue, yPred) * 100
    error = 100 - accuracy
    print(yTrue, yPred, np.max(yPred))
    print("Accuracy : ", accuracy)
    print("Error : ", error)
    return model

if __name__ == '__main__':
    mnist = read_data_sets('./data/fashion', reshape=False, validation_size=0,
                           source_url='http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/')
    trainX = mnist.train.images
    trainY = mnist.train.labels
    testX= mnist.test.images
    testY = mnist.test.labels
    arcface_test(trainX,trainY,testX,testY)

    # os.system('shutdown -s -f -t 59')

The train epoch number is only 10. But the test accuracy is too low.

600/600 [==============================] - 27s 45ms/step - loss: 4.1397 - acc: 0.7342 - val_loss: 4.1501 - val_acc: 0.7358
2019-09-17 21:33:21.360094: E tensorflow/core/grappler/optimizers/dependency_optimizer.cc:697] Iteration = 0, topological sort failed with message: The graph couldn't be sorted in topological order.
2019-09-17 21:33:21.362870: E tensorflow/core/grappler/optimizers/dependency_optimizer.cc:697] Iteration = 1, topological sort failed with message: The graph couldn't be sorted in topological order.
2019-09-17 21:33:21.366107: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:502] layout failed: Invalid argument: The graph couldn't be sorted in topological order.
2019-09-17 21:33:21.392748: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:502] remapper failed: Invalid argument: The graph couldn't be sorted in topological order.
2019-09-17 21:33:21.395162: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:502] arithmetic_optimizer failed: Invalid argument: The graph couldn't be sorted in topological order.
2019-09-17 21:33:21.397450: E tensorflow/core/grappler/optimizers/dependency_optimizer.cc:697] Iteration = 0, topological sort failed with message: The graph couldn't be sorted in topological order.
2019-09-17 21:33:21.400164: E tensorflow/core/grappler/optimizers/dependency_optimizer.cc:697] Iteration = 1, topological sort failed with message: The graph couldn't be sorted in topological order.
[3 9 6 ... 0 9 5] [7 2 1 ... 9 2 5] 9
Accuracy :  10.059999999999999
Error :  89.94
4uiiurz1 commented 5 years ago

@Liang-yc I found the problem. In the following code, you are comparing the argmax of arcface_features with the label. But, it's meaningless.

yTrue = np.argmax(X1i[1], axis=1)
yPred = np.argmax(arcface_features, axis=1)
accuracy = metrics.accuracy_score(yTrue, yPred) * 100

You try this,

  1. Predict arcface_features of the master image each class.
    master_features = arcface_model.predict(master_images, verbose=0)
    master_features /= np.linalg.norm(master_features, axis=1, keepdims=True) # each feature corresponds to each class
  2. Predict arcface_features of the query images, and calculate dot product of master_features and query_features.
    query_features = arcface_model.predict(query_images, verbose=0)
    query_features /= np.linalg.norm(query_features, axis=1, keepdims=True) # each feature corresponds to each class
    cosine_similarity = query_features @ master_features
    yPred = np.argmax(cosine_similarity, axis=1)
    accuracy = metrics.accuracy_score(yTrue, yPred) * 100

    I haven't tried above code, so it might be wrong somewhere! I'm not familiar with re-identification tasks, so plz refer to the arcface paper or other re-identification papers.

4uiiurz1 commented 5 years ago

@Liang-yc Another simple solution (When the train classes and the test classes are the same):

arcface_model = load_model(weights_file, custom_objects={'ArcFace': ArcFace})
genX1 = validation_data.flow(testX, Y_test, batch_size=10000, seed=666)
X1i = genX1.__next__()
outputs = arcface_model.predict(X1i[0], verbose=0)
yPred = np.argmax(outputs, axis=1)
accuracy = metrics.accuracy_score(yTrue, yPred) * 100
Liang-yc commented 5 years ago

@Liang-yc Another simple solution (When the train classes and the test classes are the same):

arcface_model = load_model(weights_file, custom_objects={'ArcFace': ArcFace})
genX1 = validation_data.flow(testX, Y_test, batch_size=10000, seed=666)
X1i = genX1.__next__()
outputs = arcface_model.predict(X1i[0], verbose=0)
yPred = np.argmax(outputs, axis=1)
accuracy = metrics.accuracy_score(yTrue, yPred) * 100

It does not work. Using the train images in testing phase, the accuracy is not consistent with the accuracy in training phase.

Liang-yc commented 5 years ago
  1. master image

What does "master image" mean ? All the training images with the same label?

4uiiurz1 commented 5 years ago

@Liang-yc

Sry! I misunderstood! Plz try this code:

W = arcface_model.get_layer('output').W
arcface_model = Model(inputs=arcface_model.input[0], outputs=arcface_model.layers[-3].output)
arcface_features = arcface_model.predict(X1i[0], verbose=0)
arcface_features /= np.linalg.norm(arcface_features, axis=1, keepdims=True)
yTrue = np.argmax(X1i[1], axis=1)
yPred = np.argmax(K.eval(arcface_features @ W), axis=1)
accuracy = metrics.accuracy_score(yTrue, yPred) * 100
error = 100 - accuracy
print(yTrue, yPred, np.max(yPred))
print("Accuracy : ", accuracy)
print("Error : ", error)
Liang-yc commented 5 years ago

@Liang-yc

Sry! I misunderstood! Plz try this code:

W = arcface_model.get_layer('output').W
arcface_model = Model(inputs=arcface_model.input[0], outputs=arcface_model.layers[-3].output)
arcface_features = arcface_model.predict(X1i[0], verbose=0)
arcface_features /= np.linalg.norm(arcface_features, axis=1, keepdims=True)
yTrue = np.argmax(X1i[1], axis=1)
yPred = np.argmax(K.eval(arcface_features @ W), axis=1)
accuracy = metrics.accuracy_score(yTrue, yPred) * 100
error = 100 - accuracy
print(yTrue, yPred, np.max(yPred))
print("Accuracy : ", accuracy)
print("Error : ", error)

It works, thx.

almoghitelman commented 3 years ago

Hi, I tried to implement the code above in order to get prediction on test image, but the (K.eval(arcface_features @ W)) returns a matrix with negative values which don't sum to 1. the all training process is build as same as provided here. As I understood the predicted matrix values are prob which sum to 1. is it correct? where is the problem in my code? results and model attached.

Thanks!

Model: "model_1"


Layer (type) Output Shape Param # Connected to

input_1 (InputLayer) (None, 112, 112, 3) 0


conv2d_1 (Conv2D) (None, 112, 112, 16) 448 input_1[0][0]


batch_normalization_1 (BatchNor (None, 112, 112, 16) 64 conv2d_1[0][0]


activation_1 (Activation) (None, 112, 112, 16) 0 batch_normalization_1[0][0]


conv2d_2 (Conv2D) (None, 112, 112, 16) 2320 activation_1[0][0]


batch_normalization_2 (BatchNor (None, 112, 112, 16) 64 conv2d_2[0][0]


activation_2 (Activation) (None, 112, 112, 16) 0 batch_normalization_2[0][0]


max_pooling2d_1 (MaxPooling2D) (None, 56, 56, 16) 0 activation_2[0][0]


conv2d_3 (Conv2D) (None, 56, 56, 32) 4640 max_pooling2d_1[0][0]


batch_normalization_3 (BatchNor (None, 56, 56, 32) 128 conv2d_3[0][0]


activation_3 (Activation) (None, 56, 56, 32) 0 batch_normalization_3[0][0]


conv2d_4 (Conv2D) (None, 56, 56, 32) 9248 activation_3[0][0]


batch_normalization_4 (BatchNor (None, 56, 56, 32) 128 conv2d_4[0][0]


activation_4 (Activation) (None, 56, 56, 32) 0 batch_normalization_4[0][0]


max_pooling2d_2 (MaxPooling2D) (None, 28, 28, 32) 0 activation_4[0][0]


conv2d_5 (Conv2D) (None, 28, 28, 64) 18496 max_pooling2d_2[0][0]


batch_normalization_5 (BatchNor (None, 28, 28, 64) 256 conv2d_5[0][0]


activation_5 (Activation) (None, 28, 28, 64) 0 batch_normalization_5[0][0]


conv2d_6 (Conv2D) (None, 28, 28, 64) 36928 activation_5[0][0]


batch_normalization_6 (BatchNor (None, 28, 28, 64) 256 conv2d_6[0][0]


activation_6 (Activation) (None, 28, 28, 64) 0 batch_normalization_6[0][0]


max_pooling2d_3 (MaxPooling2D) (None, 14, 14, 64) 0 activation_6[0][0]


conv2d_7 (Conv2D) (None, 14, 14, 128) 73856 max_pooling2d_3[0][0]


batch_normalization_7 (BatchNor (None, 14, 14, 128) 512 conv2d_7[0][0]


activation_7 (Activation) (None, 14, 14, 128) 0 batch_normalization_7[0][0]


conv2d_8 (Conv2D) (None, 14, 14, 128) 147584 activation_7[0][0]


batch_normalization_8 (BatchNor (None, 14, 14, 128) 512 conv2d_8[0][0]


activation_8 (Activation) (None, 14, 14, 128) 0 batch_normalization_8[0][0]


max_pooling2d_4 (MaxPooling2D) (None, 7, 7, 128) 0 activation_8[0][0]


conv2d_9 (Conv2D) (None, 7, 7, 256) 295168 max_pooling2d_4[0][0]


batch_normalization_9 (BatchNor (None, 7, 7, 256) 1024 conv2d_9[0][0]


activation_9 (Activation) (None, 7, 7, 256) 0 batch_normalization_9[0][0]


conv2d_10 (Conv2D) (None, 7, 7, 256) 590080 activation_9[0][0]


batch_normalization_10 (BatchNo (None, 7, 7, 256) 1024 conv2d_10[0][0]


activation_10 (Activation) (None, 7, 7, 256) 0 batch_normalization_10[0][0]


max_pooling2d_5 (MaxPooling2D) (None, 3, 3, 256) 0 activation_10[0][0]


batch_normalization_11 (BatchNo (None, 3, 3, 256) 1024 max_pooling2d_5[0][0]


dropout_1 (Dropout) (None, 3, 3, 256) 0 batch_normalization_11[0][0]


flatten_1 (Flatten) (None, 2304) 0 dropout_1[0][0]


dense_1 (Dense) (None, 128) 295040 flatten_1[0][0]


batch_normalization_12 (BatchNo (None, 128) 512 dense_1[0][0]


input_2 (InputLayer) (None, 47) 0


arc_face_1 (ArcFace) (None, 47) 6016 batch_normalization_12[0][0]
input_2[0][0]

x_new = np.expand_dims(x_new, axis=0)
x_new = np.array(x_new, dtype=np.float32) / 255.0

arcface_model = load_model(Params['model_path'], custom_objects={'ArcFace': ArcFace})
arcface_model.summary()
W = arcface_model.get_layer('arc_face_1').W
#W = arcface_model.get_weights
arcface_model = Model(inputs=arcface_model.input[0], outputs=arcface_model.layers[-3].output)
arcface_features = arcface_model.predict(x_new, verbose=0)
arcface_features /= np.linalg.norm(arcface_features, axis=1, keepdims=True)
print(W)
print(arcface_features)
#yTrue = np.argmax(X1i[1], axis=1)
calc = K.eval(arcface_features @ W)
yPred = np.argmax(calc, axis=1)
print(calc)

calc matrix: [[-1.3757099 -1.3127273 -1.0342306 -1.052017 -1.3646903 -0.99030894 -1.0056239 -0.9335255 -1.0159407 -0.9508162 -0.87866336 -1.1595434 -1.0569867 -1.0423982 -1.1982433 -1.1383088 -0.95849425 -1.0996643 -0.96682745 -0.96783435 -1.1292582 -1.1419181 -1.213215 -1.1428651 -1.8159819 -1.8428197 -1.4751792 -2.1246967 -2.9295607 -1.2175179 -1.458914 -5.2540975 -2.6965609 -0.25673357 -1.9802661 -1.4791859 -2.3778896 -2.2576988 -2.724395 -1.3704951 -4.3583426 -0.5972382 -0.281455 -0.49513057 -0.68900996 -3.0312972 -0.28473428]]