tensorflow / tensorflow

An Open Source Machine Learning Framework for Everyone
https://tensorflow.org
Apache License 2.0
186.42k stars 74.31k forks source link

How to solve the error: tensorflow.python.framework.errors_impl.NotFoundError: Key conv_layer3/bias not found in checkpoint #14752

Closed graydove closed 6 years ago

graydove commented 6 years ago

coding=utf-8

tensorflow 1.4

python 3.6

import os import numpy as np import tensorflow as tf from PIL import Image

获取dataset

def load_data(dataset_path): img = Image.open(dataset_path)

定义一个20 × 20的训练样本,一共有40个人,每个人都10张样本照片

img_ndarray = np.asarray(img, dtype='float64') / 256        #图片灰度值输出
#img_ndarray = np.asarray(img, dtype='float32') / 32
# 记录脸数据矩阵,57 * 47为每张脸的像素矩阵
faces = np.empty((400, 57 * 47))        #脸数据矩阵
# 脸数据矩阵化为一维向量
for row in range(20):
    for column in range(20):
        faces[20 * row + column] = np.ndarray.flatten(
            img_ndarray[row * 57: (row + 1) * 57, column * 47 : (column + 1) * 47]
            )

label = np.zeros((400, 40))     #空白的label矩阵
for i in range(40):
    label[i * 10: (i + 1) * 10, i] = 1      #初始分类矩阵

# 将数据分成训练集,验证集,测试集
train_data = np.empty((320, 57 * 47))
train_label = np.zeros((320, 40))

vaild_data = np.empty((40, 57 * 47))
vaild_label = np.zeros((40, 40))

test_data = np.empty((40, 57 * 47))
test_label = np.zeros((40, 40))

# 各数据集初始化
for i in range(40):
    train_data[i * 8: i * 8 + 8] = faces[i * 10: i * 10 + 8]
    train_label[i * 8: i * 8 + 8] = label[i * 10: i * 10 + 8]

    vaild_data[i] = faces[i * 10 + 8]
    vaild_label[i] = label[i * 10 + 8]

    test_data[i] = faces[i * 10 + 9]
    test_label[i] = label[i * 10 + 9]

train_data = train_data.astype('float32')
vaild_data = vaild_data.astype('float32')
test_data = test_data.astype('float32')

return [
    (train_data, train_label),
    (vaild_data, vaild_label),
    (test_data, test_label)
]

def convolutional_layer(data, kernel_size, bias_size, pooling_size): #数据,卷积核,偏差,池 kernel = tf.get_variable("conv", kernel_size, initializer=tf.random_normal_initializer()) bias = tf.get_variable('bias', bias_size, initializer=tf.random_normal_initializer()) conv = tf.nn.conv2d(data, kernel, strides=[1, 1, 1, 1], padding='SAME') #strides步长,padding卷积方式,表示卷积核可以停留在图像边缘 linear_output = tf.nn.relu(tf.add(conv, bias)) #激活函数

pooling: Tensor("conv_layer2/MaxPool:0", shape=(40, 15, 12, 64), dtype=float32)

pooling = tf.nn.max_pool(linear_output, ksize=pooling_size, strides=pooling_size, padding="SAME")#池化函数
return pooling

def linear_layer(data, weights_size, biases_size): weights = tf.get_variable("weigths", weights_size, initializer=tf.random_normal_initializer()) #卷积权重矩阵 biases = tf.get_variable("biases", biases_size, initializer=tf.random_normal_initializer()) return tf.add(tf.matmul(data, weights), biases) #f(x) = Wx + b

def convolutional_neural_network(data):

根据类别个数定义最后输出层的神经元

n_ouput_layer = 40

kernel_shape1 = [5, 5, 1, 32]       #卷积核的大小
kernel_shape2 = [5, 5, 32, 64]
kernel_shape3 = [5, 5, 64, 128]

bias_shape1 = [32]      #第一层偏差矩阵的大小
bias_shape2 = [64]      #第二层偏差矩阵的大小
bias_shape3 = [128]

full_conn_w_shape = [8 * 6 * 128, 1024]        #Softmax Regression模型参数
full_conn_b_shape = [1024]

out_b_shape = [n_ouput_layer]
out_w_shape = [1024, n_ouput_layer]  # 输出层权重矩阵

data = tf.reshape(data, [-1, 57, 47, 1])

# 经过第一层卷积神经网络后,得到的张量shape为:[batch, 29, 24, 32]
with tf.variable_scope("conv_layer1") as layer1:
    layer1_output = convolutional_layer(
        data = data,
        kernel_size = kernel_shape1,
        bias_size = bias_shape1,
        pooling_size = [1, 2, 2, 1]
    )
# 经过第二层卷积神经网络后,得到的张量shape为:[batch, 15, 12, 64]
with tf.variable_scope("conv_layer2") as layer2:
    layer2_output = convolutional_layer(
        data=layer1_output,
        kernel_size=kernel_shape2,
        bias_size=bias_shape2,
        pooling_size=[1, 2, 2, 1]
    )
with tf.variable_scope("conv_layer3") as layer3:
    layer3_output = convolutional_layer(
        data=layer2_output,
        kernel_size=kernel_shape3,
        bias_size=bias_shape3,
        pooling_size=[1, 2, 2, 1]
    )
with tf.variable_scope("full_connection") as full_layer4:
    # 讲卷积层张量数据拉成2-D张量只有有一列的列向量
    layer3_output_flatten = tf.contrib.layers.flatten(layer3_output)
    layer4_output = tf.nn.relu(
        linear_layer(
            data=layer3_output_flatten,
            weights_size=full_conn_w_shape,
            biases_size=full_conn_b_shape
        )
    )

with tf.variable_scope("output") as output_layer5:
    output = linear_layer(
        data=layer4_output,
        weights_size=out_w_shape,
        biases_size=out_b_shape
    )
print(data)
return output

def train_facedata(dataset, model_dir,model_path):

# train_set_x = data[0][0]
# train_set_y = data[0][1]
# valid_set_x = data[1][0]
# valid_set_y = data[1][1]
# test_set_x = data[2][0]
# test_set_y = data[2][1]
# X = tf.placeholder(tf.float32, shape=(None, None), name="x-input")  # 输入数据
# Y = tf.placeholder(tf.float32, shape=(None, None), name='y-input')  # 输入标签

batch_size = 40

# train_set_x, train_set_y = dataset[0]
# valid_set_x, valid_set_y = dataset[1]
# test_set_x, test_set_y = dataset[2]
train_set_x = dataset[0][0]
train_set_y = dataset[0][1]
valid_set_x = dataset[1][0]
valid_set_y = dataset[1][1]
test_set_x = dataset[2][0]
test_set_y = dataset[2][1]

X = tf.placeholder(tf.float32, [batch_size, 57 * 47])
Y = tf.placeholder(tf.float32, [batch_size, 40])

predict = convolutional_neural_network(X)
cost_func = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predict, labels=Y))
optimizer = tf.train.AdamOptimizer(1e-2).minimize(cost_func)

# 用于保存训练的最佳模型
saver = tf.train.Saver()
#model_dir = './model'
#model_path = model_dir + '/best.ckpt'

with tf.Session() as session:

    # 若不存在模型数据,需要训练模型参数
    if not os.path.exists(model_path + ".index"):

        session.run(tf.global_variables_initializer())  ##

        best_loss = float('Inf')
        for epoch in range(20):

            epoch_loss = 0
            for i in range((int)(np.shape(train_set_x)[0] / batch_size)):
                x = train_set_x[i * batch_size: (i + 1) * batch_size]
                y = train_set_y[i * batch_size: (i + 1) * batch_size]
                _, cost = session.run([optimizer, cost_func], feed_dict={X: x, Y: y})
                epoch_loss += cost

            print(epoch, ' : ', epoch_loss)
            if best_loss > epoch_loss:
                best_loss = epoch_loss
                if not os.path.exists(model_dir):
                    os.mkdir(model_dir)
                    print("create the directory: %s" % model_dir)
                save_path = saver.save(session, model_path)
                print("Model saved in file: %s" % save_path)

    # 恢复数据并校验和测试
    saver.restore(session, model_path)
    correct = tf.equal(tf.argmax(predict,1), tf.argmax(Y,1))
    valid_accuracy = tf.reduce_mean(tf.cast(correct,'float'))
    print('valid set accuracy: ', valid_accuracy.eval({X: valid_set_x, Y: valid_set_y}))

    test_pred = tf.argmax(predict, 1).eval({X: test_set_x})
    test_true = np.argmax(test_set_y, 1)
    test_correct = correct.eval({X: test_set_x, Y: test_set_y})
    incorrect_index = [i for i in range(np.shape(test_correct)[0]) if not test_correct[i]]
    for i in incorrect_index:
        print('picture person is %i, but mis-predicted as person %i'
            %(test_true[i], test_pred[i]))

def main(): dataset_path = "olivettifaces.gif" data = load_data(dataset_path) model_dir = './model' model_path = model_dir + '/best.ckpt' print(len(data)) train_facedata(data, model_dir, model_path)

if name == "main" : main()

I am sure I have set the bias and the convince of the conv_layer3, but it calls me they are not exist.

tensorflowbutler commented 6 years ago

Thank you for your post. We noticed you have not filled out the following field in the issue template. Could you update them if they are relevant in your case, or leave them as N/A? Thanks. Have I written custom code OS Platform and Distribution TensorFlow installed from TensorFlow version Bazel version CUDA/cuDNN version GPU model and memory Exact command to reproduce

tensorflowbutler commented 6 years ago

It has been 14 days with no activity and the awaiting response label was assigned. Is this still an issue? Please update the label and/or status accordingly.

tensorflowbutler commented 6 years ago

It has been 14 days with no activity and the awaiting response label was assigned. Is this still an issue? Please update the label and/or status accordingly.

tensorflowbutler commented 6 years ago

Nagging Awaiting Response: It has been 14 days with no activityand the awaiting response label was assigned. Is this still an issue?

tensorflowbutler commented 6 years ago

Nagging Awaiting Response: It has been 14 days with no activityand the awaiting response label was assigned. Is this still an issue?

tensorflowbutler commented 6 years ago

Nagging Awaiting Response: It has been 14 days with no activityand the awaiting response label was assigned. Is this still an issue?

tensorflowbutler commented 6 years ago

Nagging Awaiting Response: It has been 14 days with no activityand the awaiting response label was assigned. Is this still an issue?

tensorflowbutler commented 6 years ago

It has been 14 days with no activity and the awaiting response label was assigned. Is this still an issue?

angerson commented 6 years ago

Automatically closing due to lack of recent activity. Please update the issue when new information becomes available, and we will reopen the issue. Thanks!