Closed HYPJUDY closed 4 years ago
The inference code is copied from https://github.com/tensorflow/models/blob/master/research/im2txt/im2txt/inference_utils/caption_generator.py#L141 I think it is possible to write a batch version, but I don't have time to do it.
I have a multi-gpu version. I may push to the repository some days later. I can send to you if you want it now.
Thanks! You may find my email in my github homepage : )
"""Evaluates the performance of all the checkpoints on validation set."""
import cPickle as pkl
import glob
import json
import multiprocessing
import os
import sys
import numpy as np
from absl import app
from absl import flags
from tqdm import tqdm
from config import COCO_PATH
flags.DEFINE_integer('threads', 1, 'num of threads')
from caption_infer import Infer
sys.path.insert(0, COCO_PATH)
from pycocotools.coco import COCO
from pycocoevalcap.eval import COCOEvalCap
FLAGS = flags.FLAGS
def initializer(args):
"""Decides which GPU is assigned to a worker.
If your GPU memory is large enough, you may put several workers in one GPU.
"""
devices = os.getenv('CUDA_VISIBLE_DEVICES')
if devices is None:
devices = []
else:
devices = devices.split(',')
if len(devices) == 0:
os.environ['CUDA_VISIBLE_DEVICES'] = ''
else:
current = multiprocessing.current_process()
id = (current._identity[0] - 1) % len(devices)
os.environ['CUDA_VISIBLE_DEVICES'] = devices[id]
global infer
infer = Infer(job_dir='%s/model.ckpt-%s' % (FLAGS.job_dir, args))
def run(args):
name, feat = args
sentences = infer.infer(feat[np.newaxis])
return name, sentences
def image_generator():
with open('data/image_val.pkl', 'r') as f:
dic = pkl.load(f)
for k, v in dic.items():
yield k, v
def main(_):
results = glob.glob(FLAGS.job_dir + '/model.ckpt-*')
results = [os.path.splitext(i)[0] for i in results]
results = set(results)
gs_list = [i.split('-')[-1] for i in results]
gs_list = sorted(gs_list, key=lambda x: int(x))
with open(COCO_PATH + '/annotations/captions_conceptual.json') as g:
caption_data = json.load(g)
name_to_id = [(x['file_name'], x['id']) for x in caption_data['images']]
name_to_id = dict(name_to_id)
meta = []
for i in gs_list:
out = FLAGS.job_dir + '/val_%s.json' % i
if not os.path.exists(out):
ret = []
pool = multiprocessing.Pool(FLAGS.threads, initializer, (i,))
for name, sentences in tqdm(pool.imap(run, image_generator()),
total=14748):
cur = {}
cur['image_id'] = name_to_id[name]
cur['caption'] = sentences[0][0]
ret.append(cur)
pool.close()
pool.join()
with open(out, 'w') as g:
json.dump(ret, g)
coco = COCO(COCO_PATH + '/annotations/captions_conceptual.json')
cocoRes = coco.loadRes(out)
# create cocoEval object by taking coco and cocoRes
cocoEval = COCOEvalCap(coco, cocoRes)
# evaluate on a subset of images by setting
# cocoEval.params['image_id'] = cocoRes.getImgIds()
# please remove this line when evaluating the full validation set
cocoEval.params['image_id'] = cocoRes.getImgIds()
# evaluate results
cocoEval.evaluate()
meta.append((i, cocoEval.eval['CIDEr'], cocoEval.eval['METEOR'],
cocoEval.eval['Bleu_4'], cocoEval.eval['Bleu_3'],
cocoEval.eval['Bleu_2']))
ret = meta
ret = sorted(ret, key=lambda x: x[1])
with open(FLAGS.job_dir + '/cider.json', 'w') as f:
json.dump(ret, f)
ret = sorted(ret, key=lambda x: x[2])
with open(FLAGS.job_dir + '/meteor.json', 'w') as f:
json.dump(ret, f)
ret = sorted(ret, key=lambda x: x[3])
with open(FLAGS.job_dir + '/b4.json', 'w') as f:
json.dump(ret, f)
ret = sorted(ret, key=lambda x: x[4])
with open(FLAGS.job_dir + '/b3.json', 'w') as f:
json.dump(ret, f)
ret = sorted(ret, key=lambda x: x[5])
with open(FLAGS.job_dir + '/b2.json', 'w') as f:
json.dump(ret, f)
ret = sorted(ret, key=lambda x: x[3] + x[4])
with open(FLAGS.job_dir + '/b34.json', 'w') as f:
json.dump(ret, f)
if __name__ == '__main__':
app.run(main)
image_generator return the image name and image.
Hi, it seems that 'data/image_val.pkl'
is missing. Could you please upload it?
COCO_PATH + '/annotations/captions_conceptual.json'
this file is also missing, do you mean COCO_PATH + '/annotations/captions_val2014.json'
?
And could you explain why the total number is 14748
in the following code? If this value represents the number of val images, then it should be 5000
?
for name, sentences in tqdm(pool.imap(run, image_generator()),
total=14748)
Thanks!
You don't need the image_val.pkl file. You should re-write image_generator() function.
You can just change 14748 to 5000. I was using this code for some other experiment.
As metioned in https://github.com/fengyang0317/unsupervised_captioning/issues/4, the testing/evaluation are slow. One reason is they do not support multi-gpu for one model. And I found the crucial reason might be that they iterate images one by one instead of processing them in batch. I notice that you use different dataloaders between training and testing, where tfrec format and placeholder are used in training and testing respectively. I wonder why not testing/evaluation use the same dataloader and similar pipeline as training so that they can also process data in batch. Parameter
batch_size
is defined incaption_infer.py
but it seems that the size larger than one will cause errors. https://github.com/fengyang0317/unsupervised_captioning/blob/ae17dc7edf556689eb943c8e51581a229ad41742/caption_infer.py#L29 Could you please kindly provide a batch version? Thanks!