ResNet50v1 works fine, so I don't think the issue is with data-loading or with the eval code. My current guess is that there might be some disconnect between the loaded checkpoints and the input pre-processing function in ResNetv2 (keras.applications.resnet_v2.preprocess_input).
def preprocess_image_record_tf_v2(record):
"""
This function is called by "dataset.map()" in the "map_func" argument.
That function doesn't allow for multiple input arguments, only "record".
"""
imgdata, label, bbox, text = _deserialize_image_record(record)
def get_filenames(data_dir, num_val_files=128):
"""Return filenames for dataset."""
return [
os.path.join(data_dir, 'validation-{:05d}-of-{:05d}'.format(i, num_val_files))
for i in range(num_val_files)
]
Summary
Obtained baseline accuracy for ResNet50v2 is different than officially reported:
ResNet50v1 works fine, so I don't think the issue is with data-loading or with the eval code. My current guess is that there might be some disconnect between the loaded checkpoints and the input pre-processing function in ResNetv2 (
keras.applications.resnet_v2.preprocess_input
).Any advice is appreciated.
Environment
Logs or source codes for reproduction
tfrecord
validation dataset.DATA_DIR = "/media/Data/ImageNet/train-val-tfrecord" _DEFAULT_IMAGE_SIZE = 224 _NUM_CHANNELS = 3 _RESIZE_MIN = 256
def load_data(data_dir='./data/imagenet', batch_size=8, num_val_files=128):
1. Load ImageNet2012 train dataset - needs to manually download the full ImageNet2012 dataset first.
def preprocess_image_record_tf_v2(record): """ This function is called by "dataset.map()" in the "map_func" argument. That function doesn't allow for multiple input arguments, only "record". """ imgdata, label, bbox, text = _deserialize_image_record(record)
Subtract one so that labels are in [0, 1000)
def get_filenames(data_dir, num_val_files=128): """Return filenames for dataset.""" return [ os.path.join(data_dir, 'validation-{:05d}-of-{:05d}'.format(i, num_val_files)) for i in range(num_val_files) ]
def _deserialize_image_record(record): feature_map = { 'image/encoded': tf.io.FixedLenFeature([], tf.string, ''), 'image/class/label': tf.io.FixedLenFeature([], tf.int64, -1), 'image/class/text': tf.io.FixedLenFeature([], tf.string, ''), 'image/object/bbox/xmin': tf.io.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.io.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.io.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.io.VarLenFeature(dtype=tf.float32) } with tf.name_scope('deserialize_image_record'): obj = tf.io.parse_single_example(record, feature_map) imgdata = obj['image/encoded'] label = tf.cast(obj['image/class/label'], tf.int32) bbox = tf.stack([obj['image/object/bbox/%s' % x].values for x in ['ymin', 'xmin', 'ymax', 'xmax']]) bbox = tf.transpose(tf.expand_dims(bbox, 0), [0, 2, 1]) text = obj['image/class/text']
def main():
Load tfrecord data
if name == 'main': main()