backstopmedia / tensorflowbook

457 stars 296 forks source link

I found it is rather slow when converting images to TFRecords, is it normal? #10

Open jeffacode opened 8 years ago

jeffacode commented 8 years ago

This is my computing environment: Ubuntu 16.04+gtx1080+cuda8.0rc+python2+Tensorflow 0.10.0. I realized that your codes are Python3.x suited, but that's not a problem. The real problem is when I was testing write_records_file function in Chapter 5 - 05 CNN Implementation.ipynb, at first the process was well running, quite okay... but after a while, it took almost 20s to 30s to generate one TFRecord object! I checked the usage of my graphic card, only 2% memory was being used. Then I switch to CPU only mode, I opened all the 28 threads to run the code, but nothing got better, it was still pretty slow, the usage of CPU was 5% to 8%...Is it normal?

XDXX commented 8 years ago

@jeffacode

I think the original code in the book may define too many ops in the default graph which could cause the program slow down. You can try my version of write_records_file.

import tensorflow as tf
import numpy as np
import glob
import time
from itertools import groupby
from collections import defaultdict

image_filenames = glob.glob("./imagenet-dogs/n02*/*.jpg")

training_dataset = defaultdict(list)
testing_dataset = defaultdict(list)

image_filename_with_breed = map(lambda filename: (
    filename.split("/")[2], filename), image_filenames)

for dog_breed, breed_images in groupby(image_filename_with_breed, lambda x: x[0]):
    for i, breed_image in enumerate(breed_images):
        if i % 5 == 0:
            testing_dataset[dog_breed].append(breed_image[1])
        else:
            training_dataset[dog_breed].append(breed_image[1])

    breed_training_count = len(training_dataset[dog_breed])
    breed_testing_count = len(testing_dataset[dog_breed])

    assert round(breed_testing_count / (breed_testing_count +
                                        breed_training_count), 2) > 0.18, "Not enough testing images."

def write_records_file(dataset, record_location):
    writer = None
    global counter
    global start_time

    current_index = 0
    for breed, images_filenames in dataset.items():
        for image_filename in images_filenames:
            with tf.Graph().as_default():
                with tf.Session() as sess:
                    sess.run(tf.initialize_all_variables())
                    if current_index % 100 == 0:
                        if writer:
                            writer.close()

                        record_filename = "{record_location}-{current_index}.tfrecords".format(
                            record_location=record_location,
                            current_index=current_index)
                        writer = tf.python_io.TFRecordWriter(record_filename)
                    current_index += 1

                    image_file = tf.read_file(image_filename)
                    image = tf.image.decode_jpeg(image_file)

                    grayscale_image = tf.image.rgb_to_grayscale(image)
                    resized_image = tf.image.resize_images(
                        grayscale_image, [250, 251])
                    try:
                        image_bytes = sess.run(
                            tf.cast(resized_image, tf.uint8)).tobytes()
                    except:
                        continue

                    image_label = breed.encode("utf-8")
                    example = tf.train.Example(features=tf.train.Features(feature={
                        'label': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_label])),
                        'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes]))
                    }))
                    writer.write(example.SerializeToString())
                    counter += 1
                    if counter % 1030 == 0:
                        print('{:.{prec}f}   {:.{prec}f}s'.format(
                            counter / 20580, float(time.time() - start_time), prec=2))
                        start_time = time.time()

    writer.close()

counter = 0
start_time = time.time()
write_records_file(training_dataset, "./output/training-images/training-image")
write_records_file(testing_dataset, "./output/testing-images/testing-image")

P.S. I'm not sure my code is completely right.

jeffacode commented 8 years ago

It is really getting much faster!! Thanks a lot! @XDXX

hnxyxiaomeng commented 7 years ago

have you run the demo for stanford dogs correctly? Could you share the whole codes?thanks!

XinliangZhu commented 6 years ago

@jeffacode

I think the original code in the book may define too many ops in the default graph which could cause the program slow down. You can try my version of write_records_file.

import tensorflow as tf
import numpy as np
import glob
import time
from itertools import groupby
from collections import defaultdict

image_filenames = glob.glob("./imagenet-dogs/n02*/*.jpg")

training_dataset = defaultdict(list)
testing_dataset = defaultdict(list)

image_filename_with_breed = map(lambda filename: (
    filename.split("/")[2], filename), image_filenames)

for dog_breed, breed_images in groupby(image_filename_with_breed, lambda x: x[0]):
    for i, breed_image in enumerate(breed_images):
        if i % 5 == 0:
            testing_dataset[dog_breed].append(breed_image[1])
        else:
            training_dataset[dog_breed].append(breed_image[1])

    breed_training_count = len(training_dataset[dog_breed])
    breed_testing_count = len(testing_dataset[dog_breed])

    assert round(breed_testing_count / (breed_testing_count +
                                        breed_training_count), 2) > 0.18, "Not enough testing images."

def write_records_file(dataset, record_location):
    writer = None
    global counter
    global start_time

    current_index = 0
    for breed, images_filenames in dataset.items():
        for image_filename in images_filenames:
            with tf.Graph().as_default():
                with tf.Session() as sess:
                    sess.run(tf.initialize_all_variables())
                    if current_index % 100 == 0:
                        if writer:
                            writer.close()

                        record_filename = "{record_location}-{current_index}.tfrecords".format(
                            record_location=record_location,
                            current_index=current_index)
                        writer = tf.python_io.TFRecordWriter(record_filename)
                    current_index += 1

                    image_file = tf.read_file(image_filename)
                    image = tf.image.decode_jpeg(image_file)

                    grayscale_image = tf.image.rgb_to_grayscale(image)
                    resized_image = tf.image.resize_images(
                        grayscale_image, [250, 251])
                    try:
                        image_bytes = sess.run(
                            tf.cast(resized_image, tf.uint8)).tobytes()
                    except:
                        continue

                    image_label = breed.encode("utf-8")
                    example = tf.train.Example(features=tf.train.Features(feature={
                        'label': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_label])),
                        'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes]))
                    }))
                    writer.write(example.SerializeToString())
                    counter += 1
                    if counter % 1030 == 0:
                        print('{:.{prec}f}   {:.{prec}f}s'.format(
                            counter / 20580, float(time.time() - start_time), prec=2))
                        start_time = time.time()

    writer.close()

counter = 0
start_time = time.time()
write_records_file(training_dataset, "./output/training-images/training-image")
write_records_file(testing_dataset, "./output/testing-images/testing-image")

P.S. I'm not sure my code is completely right.

It also works for my case. Thanks!