keras-team / keras-applications

Reference implementations of popular deep learning models.
2k stars 914 forks source link

Obtained baseline accuracy for ResNet50v2 is different than officially reported #207

Closed gcunhase closed 2 years ago

gcunhase commented 2 years ago


Obtained baseline accuracy for ResNet50v2 is different than officially reported:

Model Top-1 (%)
Baseline (official) 75.96
Baseline (obtained) 66.85

ResNet50v1 works fine, so I don't think the issue is with data-loading or with the eval code. My current guess is that there might be some disconnect between the loaded checkpoints and the input pre-processing function in ResNetv2 (keras.applications.resnet_v2.preprocess_input).

Any advice is appreciated.


Logs or source codes for reproduction

  1. Download ImageNet tfrecord validation dataset.
  2. Run evaluation script:
    import os
    import tensorflow as tf

DATA_DIR = "/media/Data/ImageNet/train-val-tfrecord" _DEFAULT_IMAGE_SIZE = 224 _NUM_CHANNELS = 3 _RESIZE_MIN = 256

def load_data(data_dir='./data/imagenet', batch_size=8, num_val_files=128):

1. Load ImageNet2012 train dataset - needs to manually download the full ImageNet2012 dataset first.

assert os.path.exists(data_dir)

# 2. Make train/validation datasets
filenames = get_filenames(data_dir, num_val_files=num_val_files)
dataset =
dataset =
dataset = dataset.batch(batch_size, drop_remainder=True)

return dataset

def preprocess_image_record_tf_v2(record): """ This function is called by "" in the "map_func" argument. That function doesn't allow for multiple input arguments, only "record". """ imgdata, label, bbox, text = _deserialize_image_record(record)

Subtract one so that labels are in [0, 1000)

label -= 1

    image = tf.image.decode_jpeg(imgdata, channels=_NUM_CHANNELS, fancy_upscaling=False, dct_method='INTEGER_FAST')
    image = tf.image.decode_image(imgdata, channels=_NUM_CHANNELS)

# V2
image = tf.cast(image, tf.float32)
image = tf.image.resize(image, (_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE))
image = tf.keras.applications.resnet_v2.preprocess_input(image)

return image, label

def get_filenames(data_dir, num_val_files=128): """Return filenames for dataset.""" return [ os.path.join(data_dir, 'validation-{:05d}-of-{:05d}'.format(i, num_val_files)) for i in range(num_val_files) ]

def _deserialize_image_record(record): feature_map = { 'image/encoded':[], tf.string, ''), 'image/class/label':[], tf.int64, -1), 'image/class/text':[], tf.string, ''), 'image/object/bbox/xmin':, 'image/object/bbox/ymin':, 'image/object/bbox/xmax':, 'image/object/bbox/ymax': } with tf.name_scope('deserialize_image_record'): obj =, feature_map) imgdata = obj['image/encoded'] label = tf.cast(obj['image/class/label'], tf.int32) bbox = tf.stack([obj['image/object/bbox/%s' % x].values for x in ['ymin', 'xmin', 'ymax', 'xmax']]) bbox = tf.transpose(tf.expand_dims(bbox, 0), [0, 2, 1]) text = obj['image/class/text']

    return imgdata, label, bbox, text

def main():

Load tfrecord data

val_batches = load_data(data_dir=DATA_DIR, batch_size=128)

# Instantiate Baseline model
model = tf.keras.applications.ResNet50V2(
        include_top=True, weights='imagenet', input_tensor=None,
        input_shape=None, pooling=None, classes=1000,

model.compile(optimizer="sgd", loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
_, baseline_model_accuracy = model.evaluate(val_batches)
print("Baseline val accuracy:", baseline_model_accuracy)

if name == 'main': main()

gcunhase commented 2 years ago

Any updates on this?

gcunhase commented 2 years ago

Any updates on this?

gcunhase commented 2 years ago

Any updates on this?

qlzh727 commented 2 years ago

Let me close this bug and track the progress in