I I want to transfer this model to my task. However, when I conducted forward inference with the provided pre-trained model weights (fiber_refcoco.pth), I found that the model's output for a simple sample was invalid?
Specifically, I have written a simple forward inference script referring to the test_grounding_net.py (as shown below). The model can complete the forward inference process. But for a simple sample, the model outputs 0 bbox ([BoxList(num_boxes=0, image_width=1280, image_height=768, mode=xyxy)])?
May I ask if there are any steps that require special attention in forward process? Thank you very much!!!
from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip
import argparse
from maskrcnn_benchmark.data.transforms import transforms as T
from maskrcnn_benchmark.config import cfg
from maskrcnn_benchmark.modeling.detector import build_detection_model
from maskrcnn_benchmark.utils.comm import synchronize, get_rank
from maskrcnn_benchmark.utils.logger import setup_logger
from maskrcnn_benchmark.utils.miscellaneous import mkdir
import os
import datetime
import torch
import torch.distributed as dist
from maskrcnn_benchmark.data.transforms.build import build_transforms
from transformers import AutoTokenizer
from PIL import Image
def init_distributed_mode(args):
"""Initialize distributed training, if appropriate"""
if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
args.rank = int(os.environ["RANK"])
args.world_size = int(os.environ["WORLD_SIZE"])
args.gpu = int(os.environ["LOCAL_RANK"])
elif "SLURM_PROCID" in os.environ:
args.rank = int(os.environ["SLURM_PROCID"])
args.gpu = args.rank % torch.cuda.device_count()
else:
print("Not using distributed mode")
args.distributed = False
return
# args.distributed = True
torch.cuda.set_device(args.gpu)
args.dist_backend = "nccl"
print("| distributed init (rank {}): {}".format(args.rank, args.dist_url), flush=True)
dist.init_process_group(
backend=args.dist_backend,
init_method=args.dist_url,
world_size=args.world_size,
rank=args.rank,
timeout=datetime.timedelta(0, 7200),
)
dist.barrier()
setup_for_distributed(args.rank == 0)
def setup_for_distributed(is_master):
"""
This function disables printing when not in master process
"""
import builtins as __builtin__
builtin_print = __builtin__.print
def print(*args, **kwargs):
force = kwargs.pop("force", False)
if is_master or force:
builtin_print(*args, **kwargs)
__builtin__.print = print
def model_infer_test():
# par input; use default
parser = argparse.ArgumentParser(description="PyTorch Detection to Grounding Inference")
parser.add_argument(
"--config-file",
default="configs/refcoco.yaml",
metavar="FILE",
help="path to config file",
)
parser.add_argument(
"--weight",
default='model_weight/fiber_refcoco.pth',
metavar="FILE",
help="path to config file",
)
parser.add_argument("--local_rank", type=int, default=0)
parser.add_argument(
"opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER
)
parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes")
parser.add_argument("--dist-url", default="env://", help="url used to set up distributed training")
parser.add_argument("--task_config", default=None)
args = parser.parse_args()
num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
distributed = num_gpus > 1
if distributed:
init_distributed_mode(args)
print("Passed distributed init")
cfg.local_rank = args.local_rank
cfg.num_gpus = num_gpus
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
# load model & checkpoint
model = build_detection_model(cfg)
model.to(cfg.MODEL.DEVICE)
model_without_ddp = model
model_weight = "model_weight/fiber_refcoco.pth"
checkpoint = torch.load(model_weight, map_location='cpu')
missing_keys, unexpected_keys = model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
unexpected_keys = [k for k in unexpected_keys if not (k.endswith('total_params') or k.endswith('total_ops'))]
if len(missing_keys) > 0:
print('Missing Keys: {}'.format(missing_keys))
if len(unexpected_keys) > 0:
print('Unexpected Keys: {}'.format(unexpected_keys))
model.eval()
# load sample
data_dir = "resource/ZhuoQiu_video_02-Done/imgs/00002.jpg"
txt = "the ball"
transform = T.Compose(
[
# T.Resize(800,1200),
T.ToTensor(),
T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
]
)
img = Image.open(data_dir).convert('RGB')
img = img.resize((1280,768)) # origin size=(1280,720), resize to (1280, 768) inorder to de divided by 32
imgs = [transform(img)]
imgs = torch.stack(imgs,dim=0).to(cfg.MODEL.DEVICE)
# tokenizer = AutoTokenizer.from_pretrained("roberta-base")
# tokenized = tokenizer(txt, return_tensors="pt", max_length=256, truncation=True)
# infer
output = model(images=imgs,captions=[txt])
print(output)
if __name__ == "__main__":
model_infer_test()
print("done!")
Here is the test image:
And it is the model's output:
’‘’
[BoxList(num_boxes=0, image_width=1280, image_height=768, mode=xyxy)]
‘’‘
I I want to transfer this model to my task. However, when I conducted forward inference with the provided pre-trained model weights (fiber_refcoco.pth), I found that the model's output for a simple sample was invalid?
Specifically, I have written a simple forward inference script referring to the test_grounding_net.py (as shown below). The model can complete the forward inference process. But for a simple sample, the model outputs 0 bbox ([BoxList(num_boxes=0, image_width=1280, image_height=768, mode=xyxy)])?
May I ask if there are any steps that require special attention in forward process? Thank you very much!!!
Here is the test image:
And it is the model's output: ’‘’ [BoxList(num_boxes=0, image_width=1280, image_height=768, mode=xyxy)] ‘’‘