zjhuang22 / maskscoring_rcnn

Codes for paper "Mask Scoring R-CNN".
MIT License
1.9k stars 378 forks source link

roi_maskiou_feature_extractors.py" x = torch.cat((x, mask_pool), 1) invalid argument 0: Sizes of tensors must match except in dimension 1. #97

Closed jpainam closed 4 years ago

jpainam commented 4 years ago

Hi, i trained using my custom dataset. the trained worked well. but during testing, i get this error. I even tried to test it with the dataset i trained on. still the same error

loading annotations into memory...
Done (t=0.10s)
creating index...
index created!
2020-08-18 16:38:42,344 maskrcnn_benchmark.inference INFO: Start evaluation on 1042 images
0it [00:00, ?it/s]THCudaCheck FAIL file=/pytorch/aten/src/THC/THCGeneral.cpp line=405 error=11 : invalid argument
Traceback (most recent call last):
  File "tools/test_net.py", line 94, in <module>
    main()
  File "tools/test_net.py", line 88, in main
    maskiou_on=cfg.MODEL.MASKIOU_ON
  File "/home/eldad/anaconda3/lib/python3.7/site-packages/maskrcnn_benchmark-0.1-py3.7-linux-x86_64.egg/maskrcnn_benchmark/engine/inference.py", line 379, in inference
    predictions = compute_on_dataset(model, data_loader, device)
  File "/home/eldad/anaconda3/lib/python3.7/site-packages/maskrcnn_benchmark-0.1-py3.7-linux-x86_64.egg/maskrcnn_benchmark/engine/inference.py", line 31, in compute_on_dataset
    output = model(images)
  File "/home/eldad/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/eldad/anaconda3/lib/python3.7/site-packages/maskrcnn_benchmark-0.1-py3.7-linux-x86_64.egg/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py", line 51, in forward
    x, result, detector_losses = self.roi_heads(features, proposals, targets)
  File "/home/eldad/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/eldad/anaconda3/lib/python3.7/site-packages/maskrcnn_benchmark-0.1-py3.7-linux-x86_64.egg/maskrcnn_benchmark/modeling/roi_heads/roi_heads.py", line 43, in forward
    loss_maskiou, detections = self.maskiou(roi_feature, detections, selected_mask, labels, maskiou_targets)
  File "/home/eldad/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/eldad/anaconda3/lib/python3.7/site-packages/maskrcnn_benchmark-0.1-py3.7-linux-x86_64.egg/maskrcnn_benchmark/modeling/roi_heads/maskiou_head/maskiou_head.py", line 41, in forward
    x = self.feature_extractor(features, selected_mask)
  File "/home/eldad/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 489, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/eldad/anaconda3/lib/python3.7/site-packages/maskrcnn_benchmark-0.1-py3.7-linux-x86_64.egg/maskrcnn_benchmark/modeling/roi_heads/maskiou_head/roi_maskiou_feature_extractors.py", line 39, in forward
    x = torch.cat((x, mask_pool), 1)
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 1. Got 5 and 20 in dimension 0 at /pytorch/aten/src/THC/generic/THCTensorMath.cu:83

this is the tools/test_net.py file i used for testing

# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# Set up custom environment before nearly anything else is imported
# NOTE: this should be the first import (no not reorder)
from maskrcnn_benchmark.utils.env import setup_environment  # noqa F401 isort:skip

import argparse
import os

import torch
from maskrcnn_benchmark.config import cfg
from maskrcnn_benchmark.data import make_data_loader
from maskrcnn_benchmark.engine.inference import inference
from maskrcnn_benchmark.modeling.detector import build_detection_model
from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
from maskrcnn_benchmark.utils.collect_env import collect_env_info
from maskrcnn_benchmark.utils.comm import synchronize, get_rank
from maskrcnn_benchmark.utils.logger import setup_logger
from maskrcnn_benchmark.utils.miscellaneous import mkdir

def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="/home/eldad/maskscoring_rcnn/configs/e2e_ms_rcnn_R_50_FPN_1x.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.deprecated.init_process_group(
            backend="nccl", init_method="env://"
        )

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    if cfg.OUTPUT_DIR:
        dataset_names = cfg.DATASETS.TEST
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, data_loader_val in zip(output_folders, data_loaders_val):
        inference(
            model,
            data_loader_val,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            maskiou_on=cfg.MODEL.MASKIOU_ON
        )
        synchronize()

if __name__ == "__main__":
    main()

Thanks for pointing to the right solution. i found out that, \maskrcnn_benchmark\modeling\roi_heads\maskiou_head\roi_maskiou_feature_extractors.py before x = torch.cat((x, mask_pool), 1) has this shape

x # 20x256x14x14
mask_pool # 5x1x14x14

As you can see, size(0) does not match.