gcunhase commented 2 years ago

Summary

Obtained baseline accuracy for ResNet50v2 is different than officially reported:

Model	Top-1 (%)
Baseline (official)	75.96
Baseline (obtained)	66.85

ResNet50v1 works fine, so I don't think the issue is with data-loading or with the eval code. My current guess is that there might be some disconnect between the loaded checkpoints and the input pre-processing function in ResNetv2 (keras.applications.resnet_v2.preprocess_input).

Any advice is appreciated.

Environment

Python version: 3.8
Keras backend with version: TF 2.7

Logs or source codes for reproduction

Download ImageNet tfrecord validation dataset.
Run evaluation script:
```
import os
import tensorflow as tf
```

DATA_DIR = "/media/Data/ImageNet/train-val-tfrecord" _DEFAULT_IMAGE_SIZE = 224 _NUM_CHANNELS = 3 _RESIZE_MIN = 256

def load_data(data_dir='./data/imagenet', batch_size=8, num_val_files=128):

1. Load ImageNet2012 train dataset - needs to manually download the full ImageNet2012 dataset first.

assert os.path.exists(data_dir)

# 2. Make train/validation datasets
filenames = get_filenames(data_dir, num_val_files=num_val_files)
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.map(map_func=preprocess_image_record_tf_v2)
dataset = dataset.batch(batch_size, drop_remainder=True)

return dataset

def preprocess_image_record_tf_v2(record): """ This function is called by "dataset.map()" in the "map_func" argument. That function doesn't allow for multiple input arguments, only "record". """ imgdata, label, bbox, text = _deserialize_image_record(record)

Subtract one so that labels are in [0, 1000)

label -= 1

try:
    image = tf.image.decode_jpeg(imgdata, channels=_NUM_CHANNELS, fancy_upscaling=False, dct_method='INTEGER_FAST')
except:
    image = tf.image.decode_image(imgdata, channels=_NUM_CHANNELS)

# V2
image = tf.cast(image, tf.float32)
image = tf.image.resize(image, (_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE))
image = tf.keras.applications.resnet_v2.preprocess_input(image)

return image, label

def get_filenames(data_dir, num_val_files=128): """Return filenames for dataset.""" return [ os.path.join(data_dir, 'validation-{:05d}-of-{:05d}'.format(i, num_val_files)) for i in range(num_val_files) ]

def _deserialize_image_record(record): feature_map = { 'image/encoded': tf.io.FixedLenFeature([], tf.string, ''), 'image/class/label': tf.io.FixedLenFeature([], tf.int64, -1), 'image/class/text': tf.io.FixedLenFeature([], tf.string, ''), 'image/object/bbox/xmin': tf.io.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.io.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.io.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.io.VarLenFeature(dtype=tf.float32) } with tf.name_scope('deserialize_image_record'): obj = tf.io.parse_single_example(record, feature_map) imgdata = obj['image/encoded'] label = tf.cast(obj['image/class/label'], tf.int32) bbox = tf.stack([obj['image/object/bbox/%s' % x].values for x in ['ymin', 'xmin', 'ymax', 'xmax']]) bbox = tf.transpose(tf.expand_dims(bbox, 0), [0, 2, 1]) text = obj['image/class/text']

    return imgdata, label, bbox, text

def main():

Load tfrecord data

val_batches = load_data(data_dir=DATA_DIR, batch_size=128)

# Instantiate Baseline model
model = tf.keras.applications.ResNet50V2(
        include_top=True, weights='imagenet', input_tensor=None,
        input_shape=None, pooling=None, classes=1000,
        classifier_activation='softmax'
    )

model.compile(optimizer="sgd", loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
_, baseline_model_accuracy = model.evaluate(val_batches)
print("Baseline val accuracy:", baseline_model_accuracy)

if name == 'main': main()

gcunhase commented 2 years ago

Any updates on this?

gcunhase commented 2 years ago

Any updates on this?

gcunhase commented 2 years ago

Any updates on this?

qlzh727 commented 2 years ago

Let me close this bug and track the progress in https://github.com/keras-team/keras/issues/15822

keras-team / keras-applications

Obtained baseline accuracy for ResNet50v2 is different than officially reported #207

Summary

Environment

Logs or source codes for reproduction

1. Load ImageNet2012 train dataset - needs to manually download the full ImageNet2012 dataset first.

Subtract one so that labels are in [0, 1000)

Load tfrecord data