warmspringwinds / tf-image-segmentation

Image Segmentation framework based on Tensorflow and TF-Slim library
MIT License
549 stars 188 forks source link

logits and labels must be same size error and training issues #42

Open PascPeli opened 6 years ago

PascPeli commented 6 years ago

When runing the following code I get this error "InvalidArgumentError (see above for traceback): logits and labels must be same size: logits_size=[1,65536] labels_size=[65536,1]" in cross_entropy_with_logits. I can work around this by doing

cross_entropies = tf.nn.softmax_cross_entropy_with_logits(logits=valid_logits_batch_tensor,
                                                          labels=tf.transpose(valid_labels_batch_tensor))

but not

cross_entropies = tf.nn.softmax_cross_entropy_with_logits(logits=tf.transpose(valid_logits_batch_tensor),
                                                          labels=valid_labels_batch_tensor)

And then when training the loss only increases continuously. I have changed the learning rate couple of times to no avail. I'm using images with 2 classes (Background and Ground Truth)

import numpy as np
import tensorflow as tf
import skimage.io as io
#import tensorflow.contrib.slim.nets

from matplotlib import pyplot as plt
from nets import vgg
from preprocessing import vgg_preprocessing

# Load the mean pixel values and the function
# that performs the subtraction from each pixel
from preprocessing.vgg_preprocessing import (_mean_image_subtraction,
                                            _R_MEAN, _G_MEAN, _B_MEAN)
from tf_image_segmentation.utils.tf_records import read_tfrecord_and_decode_into_image_annotation_pair_tensors
from tf_image_segmentation.models.fcn_32s import FCN_32s, extract_vgg_16_mapping_without_fc8

from tf_image_segmentation.utils.pascal_voc import pascal_segmentation_lut

from tf_image_segmentation.utils.training import get_valid_logits_and_labels

from tf_image_segmentation.utils.augmentation import (distort_randomly_image_color,
                                                      flip_randomly_left_right_image_with_annotation,
                                                      scale_randomly_image_with_annotation_with_fixed_size_output)

#os.environ["CUDA_VISIBLE_DEVICES"] = '0'

def my_detection_lut():
    """Return look-up table with number and correspondng class names
    for PASCAL VOC segmentation dataset. Two special classes are: 0 -
    background and 255 - ambigious region. All others are numerated from
    1 to 20.

    Returns
    -------
    classes_lut : dict
        look-up table with number and correspondng class names
    """

    class_names = ['background', 'GT', 'ambigious']

    enumerated_array = enumerate(class_names[:-1])

    classes_lut = list(enumerated_array)

    # Add a special class representing ambigious regions
    # which has index 255.
    classes_lut.append((255, class_names[-1]))

    classes_lut = dict(classes_lut)

    return classes_lut
root_folder = os.getcwd()
img_folder = os.path.join(root_folder, 'data\\images')
checkpoints_dir = os.path.join(root_folder, 'data')
vgg_checkpoint_path = os.path.join(checkpoints_dir, 'vgg_16.ckpt')
log_folder = os.path.join(root_folder, 'logs')
model_ckpt_path = os.path.join(root_folder, 'model_fcn32.ckpt')
slim = tf.contrib.slim

dataset_len = len([name for name in os.listdir(os.path.join(img_folder,'processed\\frames'))])
print(dataset_len)
trainset_len = int(dataset_len * 0.25)
testset_len = dataset_len - trainset_len
image_train_size = [256,256]
number_of_classes = 2
tfrecord_filename = 'polyp_test_pairs.tfrecords'
tfrecord_filename = os.path.join(img_folder, 'polyp_test_pairs.tfrecords')
print(tfrecord_filename)
my_detection = my_detection_lut()
class_labels = list(my_detection.keys())

tf.reset_default_graph() 

filename_queue = tf.train.string_input_producer([tfrecord_filename], num_epochs=10)
image, annotation = read_tfrecord_and_decode_into_image_annotation_pair_tensors(filename_queue)

resized_image = tf.image.resize_images(images=image,
                                           size=image_train_size)

resized_annotation = tf.image.resize_images(images=annotation,
                                           size=image_train_size)

image_batch, annotation_batch = tf.train.shuffle_batch([resized_image, resized_annotation],
                                                       batch_size=1, capacity=20, num_threads=1,
                                                       min_after_dequeue=10)

upsampled_logits_batch, vgg_16_variables_mapping = FCN_32s(image_batch_tensor=image_batch,
                                                           number_of_classes=number_of_classes,
                                                           is_training=True)

valid_labels_batch_tensor, valid_logits_batch_tensor = get_valid_logits_and_labels(annotation_batch_tensor=annotation_batch,
                                                                                     logits_batch_tensor=upsampled_logits_batch,
                                                                                    class_labels=class_labels)

cross_entropies = tf.nn.softmax_cross_entropy_with_logits(logits=valid_logits_batch_tensor,
                                                          labels=valid_labels_batch_tensor)

# Normalize the cross entropy -- the number of elements
# is different during each step due to mask out regions
cross_entropy_sum = tf.reduce_mean(cross_entropies)

pred = tf.argmax(upsampled_logits_batch, dimension=3)

probabilities = tf.nn.softmax(upsampled_logits_batch)

with tf.variable_scope("adam_vars"):
    train_step = tf.train.AdamOptimizer(learning_rate=0.000001).minimize(cross_entropy_sum)

# Variable's initialization functions
vgg_16_without_fc8_variables_mapping = extract_vgg_16_mapping_without_fc8(vgg_16_variables_mapping)

init_fn = slim.assign_from_checkpoint_fn(model_path=vgg_checkpoint_path,
                                         var_list=vgg_16_without_fc8_variables_mapping)

global_vars_init_op = tf.global_variables_initializer()

tf.summary.scalar('cross_entropy_loss', cross_entropy_sum)

merged_summary_op = tf.summary.merge_all()

summary_string_writer = tf.summary.FileWriter(log_folder)

# Create the log folder if doesn't exist yet
if not os.path.exists(log_folder):
     os.makedirs(log_folder)

#The op for initializing the variables.
local_vars_init_op = tf.local_variables_initializer()

combined_op = tf.group(local_vars_init_op, global_vars_init_op)

# We need this to save only model variables and omit optimization-related and other variables.
model_variables = slim.get_model_variables()
saver = tf.train.Saver(model_variables)

with tf.Session()  as sess:

    sess.run(combined_op)
    init_fn(sess)
    summary_string_writer.add_graph(sess.graph)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    for i in range(testset_len*10):
        cross_entropy, summary_string, _ = sess.run([ cross_entropy_sum,
                                                      merged_summary_op,
                                                      train_step ])
        print(i,") Current loss: " + str(cross_entropy))
        summary_string_writer.add_summary(summary_string, i)           
        if i % trainset_len == 0:
            save_path = saver.save(sess, model_ckpt_path)
            print("Model saved in file: %s" % save_path)
    coord.request_stop()
    coord.join(threads)            
    save_path = saver.save(sess, model_ckpt_path)
    print("Model saved in file: %s" % save_path)
summary_string_writer.close()