xu-feiran commented 5 years ago

I'm sorry for asking this stupid question, I have tried for so many days, but I can't figure it out. I can ask no one in my laboratory, I wish someone could help me, thank you!

model.py

from __future__ import print_function
import os
import time
import random
import datetime
import scipy.misc
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
from datetime import datetime
from util.util import *
from util.BasicConvLSTMCell import *
import sys
reload(sys)
sys.setdefaultencoding('utf8')

class DEBLUR(object):
    def __init__(self, args):
        self.args = args
        self.n_levels = 3 
        self.scale = 0.5 
        self.chns = 1

        # if args.phase == 'train':
        self.crop_size = 128 
        self.data_list = open(args.datalist, 'rt').read().splitlines()
        self.data_list = list(map(lambda x: x.split(' '), self.data_list))
        random.shuffle(self.data_list)
        self.train_dir = os.path.join('./checkpoints', args.model)
        if not os.path.exists(self.train_dir):
            os.makedirs(self.train_dir)

        self.batch_size = args.batch_size
        self.epoch = args.epoch
        self.data_size = (len(self.data_list)) // self.batch_size
        self.max_steps = int(self.epoch * self.data_size)
        self.learning_rate = args.learning_rate

    def input_producer(self, batch_size=10):
        def read_data():
            img_a = tf.image.decode_image(tf.read_file(tf.string_join(['./training_set/', self.data_queue[0]])),
                                          channels=0)
            img_b = tf.image.decode_image(tf.read_file(tf.string_join(['./training_set/', self.data_queue[1]])),
                                          channels=0)
            img_a, img_b = preprocessing([img_a, img_b])
            return img_a, img_b

        def preprocessing(imgs):
            imgs = [tf.cast(img, tf.float32) / 255.0 for img in imgs]
            if self.args.model != 'color':
                imgs = [tf.image.rgb_to_grayscale(img) for img in imgs]
            img_crop = tf.unstack(tf.random_crop(tf.stack(imgs, axis=0), [2, self.crop_size, self.crop_size, self.chns]),
                                  axis=0)
            return img_crop

        with tf.variable_scope('input'):
            List_all = tf.convert_to_tensor(self.data_list, dtype=tf.string)
            gt_list = List_all[:, 0]
            in_list = List_all[:, 1]

            self.data_queue = tf.train.slice_input_producer([in_list, gt_list], capacity=20)
            image_in, image_gt = read_data()
            batch_in, batch_gt = tf.train.batch([image_in, image_gt], batch_size=batch_size, num_threads=8, capacity=20)

        return batch_in, batch_gt

    def generator(self, inputs, reuse=False, scope='g_net'):
        n, h, w, c = inputs.get_shape().as_list()

        if self.args.model == 'lstm':
            with tf.variable_scope('LSTM'):
                cell = BasicConvLSTMCell([h / 4, w / 4], [3, 3], 128)
                rnn_state = cell.zero_state(batch_size=self.batch_size, dtype=tf.float32)

        x_unwrap = []
        with tf.variable_scope(scope, reuse=reuse):
            with slim.arg_scope([slim.conv2d, slim.conv2d_transpose],
                                activation_fn=tf.nn.relu, padding='SAME', normalizer_fn=None,
                                weights_initializer=tf.contrib.layers.xavier_initializer(uniform=True),
                                biases_initializer=tf.constant_initializer(0.0)):

                inp_pred = inputs
                for i in xrange(self.n_levels):
                    scale = self.scale ** (self.n_levels - i - 1)
                    hi = int(round(h * scale))
                    wi = int(round(w * scale))
                    inp_blur = tf.image.resize_images(inputs, [hi, wi], method=0)
                    inp_pred = tf.stop_gradient(tf.image.resize_images(inp_pred, [hi, wi], method=0))
                    inp_all = tf.concat([inp_blur, inp_pred], axis=3, name='inp')
                    if self.args.model == 'lstm':
                        rnn_state = tf.image.resize_images(rnn_state, [hi // 4, wi // 4], method=0)

                    # encoder
                    conv1_1 = slim.conv2d(inp_all, 32, [5, 5], scope='enc1_1')
                    conv1_2 = ResnetBlock(conv1_1, 32, 5, scope='enc1_2')
                    conv1_3 = ResnetBlock(conv1_2, 32, 5, scope='enc1_3')
                    conv1_4 = ResnetBlock(conv1_3, 32, 5, scope='enc1_4')
                    conv2_1 = slim.conv2d(conv1_4, 64, [5, 5], stride=2, scope='enc2_1')
                    conv2_2 = ResnetBlock(conv2_1, 64, 5, scope='enc2_2')
                    conv2_3 = ResnetBlock(conv2_2, 64, 5, scope='enc2_3')
                    conv2_4 = ResnetBlock(conv2_3, 64, 5, scope='enc2_4')
                    conv3_1 = slim.conv2d(conv2_4, 128, [5, 5], stride=2, scope='enc3_1')
                    conv3_2 = ResnetBlock(conv3_1, 128, 5, scope='enc3_2')
                    conv3_3 = ResnetBlock(conv3_2, 128, 5, scope='enc3_3')
                    conv3_4 = ResnetBlock(conv3_3, 128, 5, scope='enc3_4')

                    if self.args.model == 'lstm':
                        deconv3_4, rnn_state = cell(conv3_4, rnn_state)
                    else:
                        deconv3_4 = conv3_4

                    # decoder
                    deconv3_3 = ResnetBlock(deconv3_4, 128, 5, scope='dec3_3')
                    deconv3_2 = ResnetBlock(deconv3_3, 128, 5, scope='dec3_2')
                    deconv3_1 = ResnetBlock(deconv3_2, 128, 5, scope='dec3_1')
                    deconv2_4 = slim.conv2d_transpose(deconv3_1, 64, [4, 4], stride=2, scope='dec2_4')
                    cat2 = deconv2_4 + conv2_4
                    deconv2_3 = ResnetBlock(cat2, 64, 5, scope='dec2_3')
                    deconv2_2 = ResnetBlock(deconv2_3, 64, 5, scope='dec2_2')
                    deconv2_1 = ResnetBlock(deconv2_2, 64, 5, scope='dec2_1')
                    deconv1_4 = slim.conv2d_transpose(deconv2_1, 32, [4, 4], stride=2, scope='dec1_4')
                    cat1 = deconv1_4 + conv1_4
                    deconv1_3 = ResnetBlock(cat1, 32, 5, scope='dec1_3')
                    deconv1_2 = ResnetBlock(deconv1_3, 32, 5, scope='dec1_2')
                    deconv1_1 = ResnetBlock(deconv1_2, 32, 5, scope='dec1_1')
                    inp_pred = slim.conv2d(deconv1_1, self.chns, [5, 5], activation_fn=None, scope='dec1_0')

                    if i >= 0:
                        x_unwrap.append(inp_pred)
                    if i == 0:
                        tf.get_variable_scope().reuse_variables()

            return x_unwrap

    def build_model(self):
        img_in, img_gt = self.input_producer(self.batch_size)

        tf.summary.image('img_in', im2uint8(img_in))
        tf.summary.image('img_gt', im2uint8(img_gt))
        print('img_in, img_gt', img_in.get_shape(), img_gt.get_shape())

        # generator
        x_unwrap = self.generator(img_in, reuse=False, scope='g_net')
        # calculate multi-scale loss
        self.loss_total = 0
        for i in xrange(self.n_levels):
            _, hi, wi, _ = x_unwrap[i].get_shape().as_list()
            gt_i = tf.image.resize_images(img_gt, [hi, wi], method=0)
            loss = tf.reduce_mean((gt_i - x_unwrap[i]) ** 2)
            self.loss_total += loss

            tf.summary.image('out_' + str(i), im2uint8(x_unwrap[i]))
            tf.summary.scalar('loss_' + str(i), loss)

        # losses
        tf.summary.scalar('loss_total', self.loss_total)

        # training vars
        all_vars = tf.trainable_variables()
        self.all_vars = all_vars
        self.g_vars = [var for var in all_vars if 'g_net' in var.name]
        self.lstm_vars = [var for var in all_vars if 'LSTM' in var.name]
        for var in all_vars:
            print(var.name)

    def train(self):
        def get_optimizer(loss, global_step=None, var_list=None, is_gradient_clip=False):
            train_op = tf.train.AdamOptimizer(self.lr)
            if is_gradient_clip:
                grads_and_vars = train_op.compute_gradients(loss, var_list=var_list)
                unchanged_gvs = [(grad, var) for grad, var in grads_and_vars if not 'LSTM' in var.name]
                rnn_grad = [grad for grad, var in grads_and_vars if 'LSTM' in var.name]
                rnn_var = [var for grad, var in grads_and_vars if 'LSTM' in var.name]
                capped_grad, _ = tf.clip_by_global_norm(rnn_grad, clip_norm=3)
                capped_gvs = list(zip(capped_grad, rnn_var))
                train_op = train_op.apply_gradients(grads_and_vars=capped_gvs + unchanged_gvs, global_step=global_step)
            else:
                train_op = train_op.minimize(loss, global_step, var_list)
            return train_op

        global_step = tf.Variable(initial_value=0, dtype=tf.int32, trainable=False)
        self.global_step = global_step

        # build model
        self.build_model() # TODO 

        # learning rate decay
        self.lr = tf.train.polynomial_decay(self.learning_rate, global_step, self.max_steps, end_learning_rate=0.0,
                                            power=0.3)
        tf.summary.scalar('learning_rate', self.lr)

        # training operators
        train_gnet = get_optimizer(self.loss_total, global_step, self.all_vars)

        # session and thread
        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        self.sess = sess
        sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver(max_to_keep=50, keep_checkpoint_every_n_hours=1)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        # training summary
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(self.train_dir, sess.graph, flush_secs=30)

        for step in xrange(sess.run(global_step), self.max_steps + 1):

            start_time = time.time()

            # update G network
            _, loss_total_val = sess.run([train_gnet, self.loss_total])

            duration = time.time() - start_time
            # print loss_value
            assert not np.isnan(loss_total_val), 'Model diverged with loss = NaN'

            if step % 5 == 0:
                num_examples_per_step = self.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = ('%s: step %d, loss = (%.5f; %.5f, %.5f)(%.1f data/s; %.3f s/bch)')
                print(format_str % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), step, loss_total_val, 0.0,
                                    0.0, examples_per_sec, sec_per_batch))

            if step % 20 == 0:
                # summary_str = sess.run(summary_op, feed_dict={inputs:batch_input, gt:batch_gt})
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, global_step=step)

            # Save the model checkpoint periodically.
            if step % 1000 == 0 or step == self.max_steps:
                checkpoint_path = os.path.join(self.train_dir, 'checkpoints')
                self.save(sess, checkpoint_path, step)

    def save(self, sess, checkpoint_dir, step):
        model_name = "deblur.model"
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        self.saver.save(sess, os.path.join(checkpoint_dir, model_name), global_step=step)

    def load(self, sess, checkpoint_dir, step=None):
        print(" [*] Reading checkpoints...")
        model_name = "deblur.model"
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)

        if step is not None:
            ckpt_name = model_name + '-' + str(step)
            self.saver.restore(sess, os.path.join(checkpoint_dir, ckpt_name))
            print(" [*] Reading intermediate checkpoints... Success")
            return str(step)
        elif ckpt and ckpt.model_checkpoint_path:
            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
            ckpt_iter = ckpt_name.split('-')[1]
            self.saver.restore(sess, os.path.join(checkpoint_dir, ckpt_name))
            print(" [*] Reading updated checkpoints... Success")
            return ckpt_iter
        else:
            print(" [*] Reading checkpoints... ERROR")
            return False

    def test(self, height, width, input_path, output_path):
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        imgsName = sorted(os.listdir(input_path))

        H, W = height, width
        inp_chns = 3 if self.args.model == 'color' else 1
        #self.batch_size = 1 if self.args.model == 'color' else 3
        self.batch_size = 1
        inputs = tf.placeholder(shape=[self.batch_size, H, W, inp_chns], dtype=tf.float32)
        outputs = self.generator(inputs, reuse=False)

        sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)))

        self.saver = tf.train.Saver()
        self.load(sess, self.train_dir, step=523000)

        for imgName in imgsName:
            blur = scipy.misc.imread(os.path.join(input_path, imgName))
            if self.args.model == 'gray': # gai
                blur = blur.reshape(blur.shape[0], blur.shape[1], 1)
            h, w, c = blur.shape
            # make sure the width is larger than the height
            rot = False
            if h > w:
                blur = np.transpose(blur, [1, 0, 2])
                rot = True
            h = int(blur.shape[0])
            w = int(blur.shape[1])
            resize = False
            if h > H or w > W:
                scale = min(1.0 * H / h, 1.0 * W / w)
                new_h = int(h * scale)
                new_w = int(w * scale)
                blur = scipy.misc.imresize(blur, [new_h, new_w], 'bicubic')
                resize = True
                blurPad = np.pad(blur, ((0, H - new_h), (0, W - new_w), (0, 0)), 'edge')
            else:
                blurPad = np.pad(blur, ((0, H - h), (0, W - w), (0, 0)), 'edge')
            blurPad = np.expand_dims(blurPad, 0)
            if self.args.model != 'color':
                blurPad = np.transpose(blurPad, (3, 1, 2, 0))

            start = time.time()
            deblur = sess.run(outputs, feed_dict={inputs: blurPad / 255.0})
            duration = time.time() - start
            print('Saving results: %s ... %4.3fs' % (os.path.join(output_path, imgName), duration))
            res = deblur[-1]
            if self.args.model != 'color':
                res = np.transpose(res, (3, 1, 2, 0))
            res = im2uint8(res[0, :, :, :])
            # crop the image into original size
            if resize:
                res = res[:new_h, :new_w, :]
                res = scipy.misc.imresize(res, [h, w], 'bicubic')
            else:
                res = res[:h, :w, :]

            if rot:
                res = np.transpose(res, [1, 0, 2])
                if self.args.model == 'gray': # gai
                    res = res.reshape(res.shape[0], res.shape[1])
            imgName = imgName.replace('blur', 'deblur')
            scipy.misc.imsave(os.path.join(output_path, imgName), res)

error infomation

/home/faelan/anaconda3/envs/py27/bin/python /media/faelan/WestData/code/Deblur/SRN-Deblur/run_model.py
img_in, img_gt (1, 128, 128, 1) (1, 128, 128, 1)
g_net/enc1_1/weights:0
g_net/enc1_1/biases:0
g_net/enc1_2/conv1/weights:0
g_net/enc1_2/conv1/biases:0
g_net/enc1_2/conv2/weights:0
g_net/enc1_2/conv2/biases:0
g_net/enc1_3/conv1/weights:0
g_net/enc1_3/conv1/biases:0
g_net/enc1_3/conv2/weights:0
g_net/enc1_3/conv2/biases:0
g_net/enc1_4/conv1/weights:0
g_net/enc1_4/conv1/biases:0
g_net/enc1_4/conv2/weights:0
g_net/enc1_4/conv2/biases:0
g_net/enc2_1/weights:0
g_net/enc2_1/biases:0
g_net/enc2_2/conv1/weights:0
g_net/enc2_2/conv1/biases:0
g_net/enc2_2/conv2/weights:0
g_net/enc2_2/conv2/biases:0
g_net/enc2_3/conv1/weights:0
g_net/enc2_3/conv1/biases:0
g_net/enc2_3/conv2/weights:0
g_net/enc2_3/conv2/biases:0
g_net/enc2_4/conv1/weights:0
g_net/enc2_4/conv1/biases:0
g_net/enc2_4/conv2/weights:0
g_net/enc2_4/conv2/biases:0
g_net/enc3_1/weights:0
g_net/enc3_1/biases:0
g_net/enc3_2/conv1/weights:0
g_net/enc3_2/conv1/biases:0
g_net/enc3_2/conv2/weights:0
g_net/enc3_2/conv2/biases:0
g_net/enc3_3/conv1/weights:0
g_net/enc3_3/conv1/biases:0
g_net/enc3_3/conv2/weights:0
g_net/enc3_3/conv2/biases:0
g_net/enc3_4/conv1/weights:0
g_net/enc3_4/conv1/biases:0
g_net/enc3_4/conv2/weights:0
g_net/enc3_4/conv2/biases:0
g_net/convLSTM/LSTM_conv/weights:0
g_net/convLSTM/LSTM_conv/biases:0
g_net/dec3_3/conv1/weights:0
g_net/dec3_3/conv1/biases:0
g_net/dec3_3/conv2/weights:0
g_net/dec3_3/conv2/biases:0
g_net/dec3_2/conv1/weights:0
g_net/dec3_2/conv1/biases:0
g_net/dec3_2/conv2/weights:0
g_net/dec3_2/conv2/biases:0
g_net/dec3_1/conv1/weights:0
g_net/dec3_1/conv1/biases:0
g_net/dec3_1/conv2/weights:0
g_net/dec3_1/conv2/biases:0
g_net/dec2_4/weights:0
g_net/dec2_4/biases:0
g_net/dec2_3/conv1/weights:0
g_net/dec2_3/conv1/biases:0
g_net/dec2_3/conv2/weights:0
g_net/dec2_3/conv2/biases:0
g_net/dec2_2/conv1/weights:0
g_net/dec2_2/conv1/biases:0
g_net/dec2_2/conv2/weights:0
g_net/dec2_2/conv2/biases:0
g_net/dec2_1/conv1/weights:0
g_net/dec2_1/conv1/biases:0
g_net/dec2_1/conv2/weights:0
g_net/dec2_1/conv2/biases:0
g_net/dec1_4/weights:0
g_net/dec1_4/biases:0
g_net/dec1_3/conv1/weights:0
g_net/dec1_3/conv1/biases:0
g_net/dec1_3/conv2/weights:0
g_net/dec1_3/conv2/biases:0
g_net/dec1_2/conv1/weights:0
g_net/dec1_2/conv1/biases:0
g_net/dec1_2/conv2/weights:0
g_net/dec1_2/conv2/biases:0
g_net/dec1_1/conv1/weights:0
g_net/dec1_1/conv1/biases:0
g_net/dec1_1/conv2/weights:0
g_net/dec1_1/conv2/biases:0
g_net/dec1_0/weights:0
g_net/dec1_0/biases:0
2019-11-15 18:50:58.905733: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2019-11-15 18:50:58.985701: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:892] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2019-11-15 18:50:58.986010: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 0 with properties: 
name: GeForce GTX 1060 6GB major: 6 minor: 1 memoryClockRate(GHz): 1.8095
pciBusID: 0000:0c:00.0
totalMemory: 5.93GiB freeMemory: 5.49GiB
2019-11-15 18:50:58.986029: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1060 6GB, pci bus id: 0000:0c:00.0, compute capability: 6.1)
2019-11-15 18:51:00.790961: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.834833: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image_1/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image_1/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.835682: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.837560: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.837569: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.837582: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.837729: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image_1/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image_1/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.837744: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image_1/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image_1/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.837721: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image_1/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image_1/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.838160: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image_1/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image_1/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.838148: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.838247: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image_1/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image_1/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.838156: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.838156: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.838317: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image_1/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image_1/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.838385: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image_1/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image_1/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.838392: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image_1/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image_1/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.838418: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.838435: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image_1/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image_1/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.846275: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image_1/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image_1/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.846288: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image_1/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image_1/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image_1/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.893219: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
2019-11-15 18:51:00.893239: W tensorflow/core/framework/op_kernel.cc:1192] Invalid argument: Number of channels must be 3 or 4, was 1
         [[Node: input/decode_image/cond_jpeg/cond_png/cond_gif/DecodeBmp = DecodeBmp[channels=0, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/decode_image/cond_jpeg/cond_png/cond_gif/Substr/Switch, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_1/Assert, ^input/decode_image/cond_jpeg/cond_png/cond_gif/Assert_2/Assert)]]
Traceback (most recent call last):
  File "/media/faelan/WestData/code/Deblur/SRN-Deblur/run_model.py", line 54, in <module>
    tf.app.run()
  File "/home/faelan/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/platform/app.py", line 48, in run
    _sys.exit(main(_sys.argv[:1] + flags_passthrough))
  File "/media/faelan/WestData/code/Deblur/SRN-Deblur/run_model.py", line 48, in main
    deblur.train()
  File "/media/faelan/WestData/code/Deblur/SRN-Deblur/models/model.py", line 214, in train
    _, loss_total_val = sess.run([train_gnet, self.loss_total])
  File "/home/faelan/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 889, in run
    run_metadata_ptr)
  File "/home/faelan/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1120, in _run
    feed_dict_tensor, options, run_metadata)
  File "/home/faelan/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1317, in _do_run
    options, run_metadata)
  File "/home/faelan/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1336, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.OutOfRangeError: FIFOQueue '_1_input/batch/fifo_queue' is closed and has insufficient elements (requested 1, current size 0)
         [[Node: input/batch = QueueDequeueManyV2[component_types=[DT_FLOAT, DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/batch/fifo_queue, input/batch/n)]]

Caused by op u'input/batch', defined at:
  File "/media/faelan/WestData/code/Deblur/SRN-Deblur/run_model.py", line 54, in <module>
    tf.app.run()
  File "/home/faelan/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/platform/app.py", line 48, in run
    _sys.exit(main(_sys.argv[:1] + flags_passthrough))
  File "/media/faelan/WestData/code/Deblur/SRN-Deblur/run_model.py", line 48, in main
    deblur.train()
  File "/media/faelan/WestData/code/Deblur/SRN-Deblur/models/model.py", line 186, in train
    self.build_model() # TODO
  File "/media/faelan/WestData/code/Deblur/SRN-Deblur/models/model.py", line 137, in build_model
    img_in, img_gt = self.input_producer(self.batch_size)
  File "/media/faelan/WestData/code/Deblur/SRN-Deblur/models/model.py", line 64, in input_producer
    batch_in, batch_gt = tf.train.batch([image_in, image_gt], batch_size=batch_size, num_threads=8, capacity=20)
  File "/home/faelan/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 927, in batch
    name=name)
  File "/home/faelan/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/training/input.py", line 722, in _batch
    dequeued = queue.dequeue_many(batch_size, name=name)
  File "/home/faelan/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/ops/data_flow_ops.py", line 464, in dequeue_many
    self._queue_ref, n=n, component_types=self._dtypes, name=name)
  File "/home/faelan/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 2418, in _queue_dequeue_many_v2
    component_types=component_types, timeout_ms=timeout_ms, name=name)
  File "/home/faelan/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/faelan/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
    op_def=op_def)
  File "/home/faelan/anaconda3/envs/py27/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

OutOfRangeError (see above for traceback): FIFOQueue '_1_input/batch/fifo_queue' is closed and has insufficient elements (requested 1, current size 0)
         [[Node: input/batch = QueueDequeueManyV2[component_types=[DT_FLOAT, DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input/batch/fifo_queue, input/batch/n)]]

lenalinyajing commented 4 years ago

Three options have been given in the original code. If they are not color options, they all have to be grayscale and then processed. Is the grayscale image that you enter directly?

jiangsutx commented 4 years ago

According to the last line of error code, the TensorFlow data pipeline cannot read image data.

Maybe you provide the wrong path to images, or maybe your data fail to pass preprocessing part.
Please check https://github.com/jiangsutx/SRN-Deblur/blob/master/models/model.py#L37-L63 And TensorFlow data pipeline is really hard to debug.
Please check https://github.com/jiangsutx/SRN-Deblur/blob/master/models/model.py#L39-L41 It seems that you are using newer version of TensorFlow and make sure tf.image_decode has correct parameters.

xu-feiran commented 4 years ago

Thank you for your reply! I'm sure that the path of images are right, because the code runs well with RGB images. The code runs with tensorflow-gpu 1.4.0. I fork this repository and uploaded my dataset at SRN-Deblur. Would you like to help me to debug the code? I will be very appreciate.

nwpuqyj commented 4 years ago

1.Option"model" should be "gray" 2.Training set should be grayscale images.

"Channels"should be 3 not 0.

def input_producer(self, batch_size=10):
    def read_data():
        img_a = tf.image.decode_image(tf.read_file(tf.string_join(['./training_set/', self.data_queue[0]])),
                                      channels=0)
        img_b = tf.image.decode_image(tf.read_file(tf.string_join(['./training_set/', self.data_queue[1]])),
                                      channels=0)

I have trained grayscale images successfully.

xu-feiran commented 4 years ago

1.Option"model" should be "gray" 2.Training set should be grayscale images.

"Channels"should be 3 not 0.

def input_producer(self, batch_size=10):
        def read_data():
            img_a = tf.image.decode_image(tf.read_file(tf.string_join(['./training_set/', self.data_queue[0]])),
                                          channels=0)
            img_b = tf.image.decode_image(tf.read_file(tf.string_join(['./training_set/', self.data_queue[1]])),
                                          channels=0)

I have trained grayscale images successfully.

Thanks, I used grayscale images, set Channels to 0 and set model =gray. The error may caused by my images' encoding. I will try it. You help me a lot!

xu-feiran commented 4 years ago

The code is no error, my dataset has some problems. The code runs well with another dataset. Thank you!

jiangsutx / SRN-Deblur

How to train gray images by one channel? #47

I'm sorry for asking this stupid question, I have tried for so many days, but I can't figure it out. I can ask no one in my laboratory, I wish someone could help me, thank you!

model.py

error infomation