keras-team / keras

Deep Learning for humans
http://keras.io/
Apache License 2.0
61.93k stars 19.46k forks source link

training accuracy decreases as epoch increases #14707

Closed aguswirawan closed 3 years ago

aguswirawan commented 3 years ago

Dear everybody I have problems with my model, whereas my model results in accuracy decrease

epoch test_accuracy test_loss
1 1 10698.17
2 1 9863.695
3 1 9092.648
4 1 8380.9
5 1 7723.852
6 1 7117.128
7 1 6556.73
8 1 6038.933
9 1 5560.432
10 1 5118.174
11 1 4709.375
12 1 4331.493
13 1 3982.192
14 1 3659.34
15 1 3360.969
16 1 3085.27
17 1 2830.575
18 1 2595.343
19 0.99375 2378.155
20 0.99375 2177.692
21 0.990625 1992.738
22 0.989063 1822.165
23 0.9875 1664.927
24 0.9875 1520.054
25 0.980943 1386.645
26 0.980943 1263.864
27 0.976256 1150.931
28 0.977818 1047.124
29 0.979381 951.7716
30 0.980943 864.2477
31 0.979381 783.97
32 0.969699 710.3975
33 0.980943 643.0256
34 0.979381 581.3846
35 0.971262 525.0378
36 0.972824 473.5788
37 0.967217 426.6296
38 0.970648 383.8396
39 0.965961 344.8818
40 0.944728 309.4534
41 0.929103 277.2726
42 0.889428 248.0783
43 0.799168 221.6274
44 0.729863 197.6945
45 0.737982 176.0702
46 0.510032 156.5599
47 0.510032 138.9832
48 0.510032 123.1729
49 0.510032 108.9739
50 0.510032 96.24297

this is an overfit or underfit phenomenon?

namaannn commented 3 years ago

This is complete overfitting. You can alter the learning rate or try changing the hyperparamters.

namaannn commented 3 years ago

@aguswirawan

rmothukuru commented 3 years ago

@aguswirawan, In order to expedite the trouble-shooting process, please provide a code snippet to reproduce the issue reported here. Thanks!

aguswirawan commented 3 years ago

Dear rmothukuru

this is my code CNN

import sys import sklearn from sklearn.metrics import precision_score from sklearn.metrics import recall_score from sklearn.metrics import roc_auc_score from sklearn.metrics import f1_score from sklearn.metrics import confusion_matrix from sklearn.metrics import roc_curve from sklearn.metrics import auc import os import pandas as pd import scipy.io as sio import tensorflow as tf import numpy as np import time import math tf.compat.v1.disable_eager_execution()

def minus(item): return item-1

input_height = 9 input_width = 9

input_channel_num = 4

conv_fuse = "plus"

conv_1_shape = '441*16' pool_1_shape = 'None'

conv_2_shape = 'None'

conv_2_shape = '441*32' pool_2_shape = 'None'

conv_3_shape = 'None'

conv_3_shape = '441*64' pool_3_shape = 'None'

conv_4_shape = '11128*4' pool_4_shape = 'None'

time_step = 1 window_size = 1

convolution full connected parameter

fc_size = 1024

dropout_prob = 0.5 np.random.seed(3)

calibration = 'N' norm_type = '2D' regularization_method = 'dropout' enable_penalty = True

args = sys.argv[:] with_or_not = "with" # with/without input_file = "res01" #s01, s02,....,s32

input_file=str(sys.argv[1])

arousal_or_valence = "arousal" # valence/arousal/dominance

arousal_or_valence = str(sys.argv[2])

inputs = [1,2,3,4] # 1 / 2 / 3 / 4 / 1 2 / 1 3 / 1 2 3 4 kombinasi theta, theta alpha,....,theta alpha beta gamma bands = list(map(minus,inputs)) print(bands) input_channel_num = len(bands) * time_step

dataset_dir = "3D_dataset_default_div/"+with_or_not+"base/DE"

load training set

print("loading ",dataset_dir+input_file,".mat") data_file = sio.loadmat(dataset_dir+input_file+".mat")

cnn_datasets = data_file["data"] label_key = arousal_or_valence+"_labels" labels = data_file[label_key]

2018-5-16 modified

label_index = [i for i in range(0,labels.shape[1],time_step)]

labels = labels[0,[label_index]] labels = np.squeeze(np.transpose(labels))

print("loaded shape:",labels.shape)

lables_backup = labels

print("cnn_dataset shape before reshape:", np.shape(cnn_datasets))

cnn_datasets = cnn_datasets.transpose(0,2,3,1)

cnn_datasets = cnn_datasets[:,:,:,bands]

cnn_datasets = cnn_datasets.reshape(len(cnn_datasets)//time_step, window_size,input_height,input_width,input_channel_num)

cnn_datasets = cnn_datasets.reshape(len(cnn_datasets), window_size, 9,9,input_channel_num)

print("cnn_dataset shape after reshape:", np.shape(cnn_datasets))

one_hot_labels = np.array(list(pd.get_dummies(labels)))

print("one_hot_labels:",one_hot_labels.shape)

labels = np.asarray(pd.get_dummies(labels), dtype=np.int8)

shuffle data

index = np.array(range(0, len(labels))) np.random.shuffle( index)

cnn_datasets = cnn_datasets[index] labels = labels[index]

print("**(" + time.asctime(time.localtime(time.time())) + ") Load and Split dataset End **\n") print("**(" + time.asctime(time.localtime(time.time())) + ") Define parameters and functions Begin: **\n")

input parameter

n_input_ele = 32 n_time_step = window_size

n_labels = 2

training parameter

lambda_loss_amount = 0.5 training_epochs = 50

batch_size = 128

kernel parameter

kernel_height_1st = 4 kernel_width_1st = 4

kernel_height_2nd = 4 kernel_width_2nd = 4

kernel_height_3rd = 4 kernel_width_3rd = 4

kernel_height_4th = 1 kernel_width_4th = 1

kernel_stride = 1 conv_channel_num = 64

pooling parameter

pooling_height = 2 pooling_width = 2 pooling_stride = 2

algorithn parameter

learning_rate = 1e-4

def get_index(): test_index = [] for i in range(0,40): temp_index = [j for j in range(i60,i60+30)] test_index = np.append(test_index,temp_index)

fine_tune_index = np.setxor1d([i for i in range(0,2400)],test_index)

test_index = list(map(int,test_index))
fine_tune_index = list(map(int,fine_tune_index))
return test_index,fine_tune_index

def weight_variable(shape,name): initial = tf.random.truncated_normal(shape, stddev=0.1) return tf.Variable(initial,name=name)

def bias_variable(shape,name): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial,name=name)

def conv2d(x, W, kernel_stride):

API: must strides[0]=strides[4]=1

return tf.nn.conv2d(input=x, filters=W, strides=[1, kernel_stride, kernel_stride, 1], padding='SAME')

def apply_conv1d(x, filter_width, in_channels, out_channels, kernel_stride,name): weight = weight_variable([filter_width, in_channels, out_channels],name) bias = bias_variable([out_channels],name) # each feature map shares the same weight and bias return tf.nn.relu(tf.add(conv1d(x, weight, kernel_stride), bias))

def apply_conv2d(x, filter_height, filter_width, in_channels, out_channels, kernel_stride,name): weight = weight_variable([filter_height, filter_width, in_channels, out_channels],name) bias = bias_variable([out_channels],name) # each feature map shares the same weight and bias print("weight shape:", np.shape(weight)) print("x shape:", np.shape(x))

tf.layers.batch_normalization()

return tf.nn.relu(tf.add(conv2d(x, weight, kernel_stride),bias))

def apply_max_pooling(x, pooling_height, pooling_width, pooling_stride):

API: must ksize[0]=ksize[4]=1, strides[0]=strides[4]=1

return tf.nn.max_pool2d(input=x, ksize=[1, pooling_height, pooling_width, 1],
                      strides=[1, pooling_stride, pooling_stride, 1], padding='SAME')

def apply_fully_connect(x, x_size, fc_size,name): fc_weight = weight_variable([x_size, fc_size],name) fc_bias = bias_variable([fc_size],name) return tf.nn.relu(tf.add(tf.matmul(x, fc_weight), fc_bias))

def apply_readout(x, x_size, readout_size,name): readout_weight = weight_variable([x_size, readout_size],name) readout_bias = bias_variable([readout_size],name) return tf.add(tf.matmul(x, readout_weight), readout_bias)

print("\n**(" + time.asctime(time.localtime(time.time())) + ") Define parameters and functions End **")

print("\n**(" + time.asctime(time.localtime(time.time())) + ") Define NN structure Begin: **")

input placeholder

cnn_in = tf.compat.v1.placeholder(tf.float32, shape=[None, input_height, input_width, input_channel_num], name='cnn_in') Y = tf.compat.v1.placeholder(tf.float32, shape=[None, n_labels], name='Y') keep_prob = tf.compat.v1.placeholder(tf.float32, name='keep_prob') phase_train = tf.compat.v1.placeholder(tf.bool, name='phase_train')

###########################################################################################

add cnn parallel to network

###########################################################################################

first CNN layer

conv_1 = apply_conv2d(cnn_in, kernel_height_1st, kernel_width_1st, input_channel_num, conv_channel_num, kernel_stride,'conv1')

pool_1 = apply_max_pooling(conv_1, pooling_height, pooling_width, pooling_stride)

print("\nconv_1 shape:", conv_1.shape)

print("\npool_1 shape:", pool_1.shape)

second CNN layer

conv_2 = apply_conv2d(conv_1, kernel_height_2nd, kernel_width_2nd, conv_channel_num, conv_channel_num * 2, kernel_stride,'conv2')

pool_2 = apply_max_pooling(conv_2, pooling_height, pooling_width, pooling_stride)

print("\nconv_2 shape:", conv_2.shape)

print("\npool_2 shape:", pool_2.shape)

third CNN layer

conv_3 = apply_conv2d(conv_2, kernel_height_3rd, kernel_width_3rd, conv_channel_num 2, conv_channel_num 4, kernel_stride,'conv3')

conv_3 = apply_max_pooling(conv_3, pooling_height, pooling_width, pooling_stride)

print("\nconv_3 shape:", conv_3.shape)

print("\npool_3 shape:", conv_3.shape)

conv_3 = apply_conv2d(conv_3, 1, 1,conv_channel_num * 4, conv_channel_num,kernel_stride,'conv4')

conv_3 = apply_max_pooling(conv_3, pooling_height, pooling_width, pooling_stride)

print("\nconv_3 shape:", conv_3.shape)

fully connected layer

shape = conv_3.get_shape().as_list() conv_3_flat = tf.reshape(conv_3, [-1, shape[1] shape[2] shape[3]]) cnn_fc = apply_fully_connect(conv_3_flat, shape[1] shape[2] shape[3], fc_size,"fc")

print("shape after cnn_full", np.shape(conv_3_shape))

dropout regularizer

Dropout (to reduce overfitting; useful when training very large neural network)

We will turn on dropout during training & turn off during testing

cnn_fc_drop = tf.nn.dropout(cnn_fc, rate=1 - (keep_prob))

readout layer

y_ = apply_readout(cnn_fc_drop, fc_size, n_labels,'readout') ypred = tf.argmax(input=tf.nn.softmax(y), axis=1, name="y_pred") yposi = tf.nn.softmax(y, name="y_posi")

l2 regularization

l2 = lambda_loss_amount * sum( tf.nn.l2_loss(tf_var) for tf_var in tf.compat.v1.trainable_variables() )

if enable_penalty:

cross entropy cost function

cost = tf.reduce_mean(input_tensor=tf.nn.softmax_cross_entropy_with_logits(logits=y_, labels=tf.stop_gradient(Y)) + l2, name='loss')

else:

cross entropy cost function

cost = tf.reduce_mean(input_tensor=tf.nn.softmax_cross_entropy_with_logits(logits=y_, labels=tf.stop_gradient(Y)), name='loss')

optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate).minimize(cost)

get correctly predicted object and accuracy

correctprediction = tf.equal(tf.argmax(input=tf.nn.softmax(y), axis=1), tf.argmax(input=Y, axis=1)) accuracy = tf.reduce_mean(input_tensor=tf.cast(correct_prediction, tf.float32), name='accuracy')

tf.summary.scalar('accuracy',accuracy)

print("\n**(" + time.asctime(time.localtime(time.time())) + ") Define NN structure End **")

print("\n**(" + time.asctime(time.localtime(time.time())) + ") Train and Test NN Begin: **")

run

config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True

fold = 10 for curr_fold in range(0,fold): print("folder: ",curr_fold) fold_size = cnn_datasets.shape[0]//fold indexes_list = [i for i in range(len(cnn_datasets))] indexes = np.array(indexes_list) split_list = [i for i in range(curr_foldfold_size,(curr_fold+1)fold_size)] split = np.array(split_list) cnn_test_x = cnn_datasets[split] test_y = labels[split]

split = np.array(list(set(indexes_list)^set(split_list)))
cnn_train_x = cnn_datasets[split]
train_y = labels[split]
train_sample = train_y.shape[0]

# shuffle data
index = np.array(range(0, len(train_y)))
np.random.shuffle(index)

cnn_train_x   = cnn_train_x[index]
train_y  = train_y[index]

print("training examples:", train_sample)
test_sample = test_y.shape[0]
print("test examples    :",test_sample)
# set train batch number per epoch
batch_num_per_epoch = math.floor(cnn_train_x.shape[0]/batch_size)+ 1
# set test batch number per epoch
accuracy_batch_size = batch_size
train_accuracy_batch_num = batch_num_per_epoch
test_accuracy_batch_num = math.floor(cnn_test_x.shape[0]/batch_size)+ 1

with tf.compat.v1.Session(config=config) as session:
    session.run(tf.compat.v1.global_variables_initializer())
    train_accuracy_save = np.zeros(shape=[0], dtype=float)
    test_accuracy_save = np.zeros(shape=[0], dtype=float)
    test_loss_save = np.zeros(shape=[0], dtype=float)
    train_loss_save = np.zeros(shape=[0], dtype=float)
    for epoch in range(training_epochs):
        print("learning rate: ",learning_rate)
        cost_history = np.zeros(shape=[0], dtype=float)
        for b in range(batch_num_per_epoch):
            start = b* batch_size
            if (b+1)*batch_size>train_y.shape[0]:
                offset = train_y.shape[0] % batch_size
            else:
                offset = batch_size
            #offset = (b * batch_size) % (train_y.shape[0] - batch_size)
            #print("start->end:",start,"->",start+offset)
            cnn_batch = cnn_train_x[start:(start + offset), :, :, :, :]
            cnn_batch = cnn_batch.reshape(len(cnn_batch) * window_size, input_height, input_width, input_channel_num)
            # print("cnn_batch shape:",cnn_batch.shape)
            batch_y = train_y[start:(offset + start), :]
            _, c = session.run([optimizer, cost],
                               feed_dict={cnn_in: cnn_batch, Y: batch_y, keep_prob: 1 - dropout_prob,
                                          phase_train: True})
            cost_history = np.append(cost_history, c)
        if (epoch % 1 == 0):
            train_accuracy = np.zeros(shape=[0], dtype=float)
            test_accuracy = np.zeros(shape=[0], dtype=float)
            test_loss = np.zeros(shape=[0], dtype=float)
            train_loss = np.zeros(shape=[0], dtype=float)

            for i in range(train_accuracy_batch_num):
                start = i* batch_size
                if (i+1)*batch_size>train_y.shape[0]:
                    offset = train_y.shape[0] % batch_size
                else:
                    offset = batch_size
                #offset = (i * accuracy_batch_size) % (train_y.shape[0] - accuracy_batch_size)
                train_cnn_batch = cnn_train_x[start:(start + offset), :, :, :, :]
                train_cnn_batch = train_cnn_batch.reshape(len(train_cnn_batch) * window_size, input_height, input_width, input_channel_num)
                train_batch_y = train_y[start:(start + offset), :]

                train_a, train_c = session.run([accuracy, cost],
                                               feed_dict={cnn_in: train_cnn_batch,Y: train_batch_y, keep_prob: 1.0, phase_train: False})

                train_loss = np.append(train_loss, train_c)
                train_accuracy = np.append(train_accuracy, train_a)
            print("(" + time.asctime(time.localtime(time.time())) + ") Epoch: ", epoch + 1, " Training Cost: ",
                  np.mean(train_loss), "Training Accuracy: ", np.mean(train_accuracy))
            train_accuracy_save = np.append(train_accuracy_save, np.mean(train_accuracy))
            train_loss_save = np.append(train_loss_save, np.mean(train_loss))

            if(np.mean(train_accuracy)<0.70):
                learning_rate=1e-4
            elif(0.70<=np.mean(train_accuracy)<0.80):
                learning_rate=1e-5
            elif(0.80<=np.mean(train_accuracy)<0.85):
                learning_rate=1e-6
            elif(0.85<=np.mean(train_accuracy)<0.90):
                learning_rate=1e-7
            elif(0.90<=np.mean(train_accuracy)):
                learning_rate=1e-8
            # if(np.mean(train_accuracy)<0.7):
            #     learning_rate=1e-4
            # elif(0.7<np.mean(train_accuracy)<0.85):
            #     learning_rate=5e-5
            # elif(0.85<np.mean(train_accuracy)):
            #     learning_rate=1e-6

            for j in range(test_accuracy_batch_num):
                start = j * batch_size
                if (j+1)*batch_size>test_y.shape[0]:
                    offset = test_y.shape[0] % batch_size
                else:
                    offset = batch_size
                #offset = (j * accuracy_batch_size) % (test_y.shape[0] - accuracy_batch_size)
                test_cnn_batch = cnn_test_x[start:(offset + start), :, :, :, :]
                test_cnn_batch = test_cnn_batch.reshape(len(test_cnn_batch) * window_size, input_height, input_width, input_channel_num)
                test_batch_y = test_y[start:(offset + start), :]

                test_a, test_c = session.run([accuracy, cost],
                                             feed_dict={cnn_in: test_cnn_batch,Y: test_batch_y,keep_prob: 1.0, phase_train: False})

                test_accuracy = np.append(test_accuracy, test_a)
                test_loss = np.append(test_loss, test_c)

            print("(" + time.asctime(time.localtime(time.time())) + ") Epoch: ", epoch + 1, " Test Cost: ",
                  np.mean(test_loss), "Test Accuracy: ", np.mean(test_accuracy), "\n")
            test_accuracy_save = np.append(test_accuracy_save, np.mean(test_accuracy))
            test_loss_save = np.append(test_loss_save, np.mean(test_loss))
        # reshuffle
        index = np.array(range(0, len(train_y)))
        np.random.shuffle(index)
        cnn_train_x=cnn_train_x[index]
        train_y=train_y[index]

    test_accuracy = np.zeros(shape=[0], dtype=float)
    test_loss = np.zeros(shape=[0], dtype=float)
    test_pred = np.zeros(shape=[0], dtype=float)
    test_true = np.zeros(shape=[0, 2], dtype=float)
    test_posi = np.zeros(shape=[0, 2], dtype=float)
    for k in range(test_accuracy_batch_num):
        start = k * batch_size
        if (k+1)*batch_size>test_y.shape[0]:
            offset = test_y.shape[0] % batch_size
        else:
            offset = batch_size
        #offset = (k * accuracy_batch_size) % (test_y.shape[0] - accuracy_batch_size)
        test_cnn_batch = cnn_test_x[start:(offset + start), :, :, :, :]
        test_cnn_batch = test_cnn_batch.reshape(len(test_cnn_batch) * window_size, input_height, input_width, input_channel_num)
        test_batch_y = test_y[start:(offset + start), :]

        test_a, test_c, test_p, test_r = session.run([accuracy, cost, y_pred, y_posi],
                                                     feed_dict={cnn_in: test_cnn_batch,Y: test_batch_y, keep_prob: 1.0, phase_train: False})
        test_t = test_batch_y

        test_accuracy = np.append(test_accuracy, test_a)
        test_loss = np.append(test_loss, test_c)
        test_pred = np.append(test_pred, test_p)
        test_true = np.vstack([test_true, test_t])
        test_posi = np.vstack([test_posi, test_r])
    # test_true = tf.argmax(test_true, 1)
    test_pred_1_hot = np.asarray(pd.get_dummies(test_pred), dtype=np.int8)
    test_true_list = tf.argmax(input=test_true, axis=1).eval()

    print("(" + time.asctime(time.localtime(time.time())) + ") Final Test Cost: ", np.mean(test_loss),
          "Final Test Accuracy: ", np.mean(test_accuracy))
    # save result
#    os.system("mkdir -p ./result/cnn_rnn_parallel/tune_rnn_layer/" + output_dir)
    result = pd.DataFrame(
        {'epoch': range(1, epoch + 2), "train_accuracy": train_accuracy_save, "test_accuracy": test_accuracy_save,
         "train_loss": train_loss_save, "test_loss": test_loss_save})

    ins = pd.DataFrame({'conv_1': conv_1_shape, 'conv_2': conv_2_shape,'conv_3': conv_3_shape,
                        'cnn_fc': fc_size,'accuracy': np.mean(test_accuracy),
                        'keep_prob': 1 - dropout_prob,"epoch": epoch + 1, "norm": norm_type,
                        "learning_rate": learning_rate, "regularization": regularization_method,
                        "train_sample": train_sample, "test_sample": test_sample,"batch_size":batch_size}, index=[0])
#    summary = pd.DataFrame({'class': one_hot_labels, 'recall': test_recall, 'precision': test_precision,
#                            'f1_score': test_f1})  # , 'roc_auc':test_auc})
    file_dir = ""
    for i in inputs:
        file_dir = file_dir+str(i)
    # file_dir = str(band)+str(band_1)+str(band_2)+str(band_3)
    result_dir = "result_default_div_x/"+with_or_not+"/"+file_dir+"/"+arousal_or_valence
    file_path = result_dir+"\\"+input_file+"_"+str(curr_fold)+".xlsx"

    if os.path.isdir(result_dir)==False:
        os.makedirs(result_dir)
    print("saving ",file_path)
    writer = pd.ExcelWriter(file_path)
    ins.to_excel(writer, 'condition', index=False)
    result.to_excel(writer, 'result', index=False)
    writer.save()

    # save model parameters
    # model_dict= {}
    # for variable in tf.trainable_variables():
    #     print(variable.name,"-->",variable.get_shape())
    #     model_dict[variable.name]=session.run(variable)
    # sio.savemat(input_file+".mat",model_dict)
    print("**********(" + time.asctime(time.localtime(time.time())) + ") Train and Test NN End **********\n")
rmothukuru commented 3 years ago

Hi There,

We are checking to see if you still need help on this issue, as you are using an older version of tensorflow(1.x) which is officially considered as end of life. We recommend that you upgrade to 2.4 or later version and let us know if the issue still persists in newer versions.

This issue will be closed automatically 7 days from now. If you still need help with this issue, Please open a new issue for any help you need against 2.x, and we will get you the right help.