Hi, I found a way to circumvent using tsv files by modifying `scene_graph_benchmark/tools/demo/demo_image.py`, and now I only need `jpg` image dataset, VinVl yaml configuration file and model weight file. The predictions are saved in dictionary and are stored in `pth` format. I ran it on Google Colab and it generates predictions at a rate about 2s/image. I hope this helps. #51
Hi, I found a way to circumvent using tsv files by modifying scene_graph_benchmark/tools/demo/demo_image.py, and now I only need jpg image dataset, VinVl yaml configuration file and model weight file. The predictions are saved in dictionary and are stored in pth format. I ran it on Google Colab and it generates predictions at a rate about 2s/image. I hope this helps.
# pretrained models at https://penzhanwu2.blob.core.windows.net/sgg/sgg_benchmark/vinvl_model_zoo/vinvl_vg_x152c4.pth
# the associated labelmap at https://penzhanwu2.blob.core.windows.net/sgg/sgg_benchmark/vinvl_model_zoo/VG-SGG-dicts-vgoi6-clipped.json
import cv2
import os
import os.path as op
import argparse
import json
from PIL import Image
from scene_graph_benchmark.scene_parser import SceneParser
from scene_graph_benchmark.AttrRCNN import AttrRCNN
from maskrcnn_benchmark.data.transforms import build_transforms
from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
from maskrcnn_benchmark.config import cfg
from scene_graph_benchmark.config import sg_cfg
from maskrcnn_benchmark.data.datasets.utils.load_files import \
config_dataset_file
from maskrcnn_benchmark.data.datasets.utils.load_files import load_labelmap_file
from maskrcnn_benchmark.utils.miscellaneous import mkdir
def cv2Img_to_Image(input_img):
cv2_img = input_img.copy()
img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
return img
def detect_objects_on_single_image(model, transforms, cv2_img):
# cv2_img is the original input, so we can get the height and
# width information to scale the output boxes.
img_input = cv2Img_to_Image(cv2_img)
img_input, _ = transforms(img_input, target=None)
img_input = img_input.to(model.device)
with torch.no_grad():
prediction = model(img_input)[0].to('cpu')
# prediction = prediction[0].to(torch.device("cpu"))
img_height = cv2_img.shape[0]
img_width = cv2_img.shape[1]
prediction = prediction.resize((img_width, img_height))
return prediction
#Setting configuration
cfg.set_new_allowed(True)
cfg.merge_from_other_cfg(sg_cfg)
cfg.set_new_allowed(False)
#Configuring VinVl
cfg.merge_from_file('/scene_graph_benchmark/sgg_configs/vgattr/vinvl_x152c4.yaml')
#This is a list specifying the values for additional arguments, it encompasses pairs of list and values in an ordered manner
#MODEL.WEIGHT specifies the full path of the VinVl weight pth file
#DATA_DIR specifies the directory that contains VinVl input tsv configuration yaml file
argument_list = [
'MODEL.WEIGHT', 'vinvl_vg_x152c4.pth',
'MODEL.ROI_HEADS.NMS_FILTER', 1,
'MODEL.ROI_HEADS.SCORE_THRESH', 0.2,
'TEST.IGNORE_BOX_REGRESSION', False,
'MODEL.ATTRIBUTE_ON', True
]
cfg.merge_from_list(argument_list)
cfg.freeze()
# assert op.isfile(args.img_file), \
# "Image: {} does not exist".format(args.img_file)
output_dir = cfg.OUTPUT_DIR
# mkdir(output_dir)
model = AttrRCNN(cfg)
model.to(cfg.MODEL.DEVICE)
model.eval()
checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
checkpointer.load(cfg.MODEL.WEIGHT)
transforms = build_transforms(cfg, is_train=False)
input_img_directory = 'insert your images directory path here'
#need to be pth
output_prediction_file = 'insert your output pth file path here'
dets = {}
for img_name in os.listdir(input_img_directory):
#Convert png format to jpg format
if img_name.split('.')[1]=='png' or img_name.split('.')[1]=='PNG':
im = Image.open(os.path.join(input_img_directory, img_name))
rgb_im = im.convert('RGB')
new_name = img_name.split('.')[0]+'.jpg'
rgb_im.save(os.path.join(input_img_directory, new_name))
print(new_name)
img_file_path = os.path.join(input_img_directory,img_name.split('.')[0]+'.jpg')
print(img_file_path)
cv2_img = cv2.imread(img_file_path)
det = detect_objects_on_single_image(model, transforms, cv2_img)
# prediction contains ['labels',
# 'scores',
# 'box_features',
# 'scores_all',
# 'boxes_all',
# 'attr_labels',
# 'attr_scores']
# box_features are used for oscar
det_dict ={key : det1[0].get_field(key) for key in det1[0].fields()}
dets[img_name.split('.')[0]] = det_dict
torch.save(dets, output_prediction_file)
Hi, I found a way to circumvent using tsv files by modifying
scene_graph_benchmark/tools/demo/demo_image.py
, and now I only needjpg
image dataset, VinVl yaml configuration file and model weight file. The predictions are saved in dictionary and are stored inpth
format. I ran it on Google Colab and it generates predictions at a rate about 2s/image. I hope this helps._Originally posted by @SPQRXVIII001 in https://github.com/microsoft/scene_graph_benchmark/issues/7#issuecomment-904324682_