facebookresearch / detectron2

Detectron2 is a platform for object detection, segmentation and other visual recognition tasks.
https://detectron2.readthedocs.io/en/latest/
Apache License 2.0
30.54k stars 7.49k forks source link

AssertionError: RPN requires gt_instances in training! #3042

Closed zhengjie6 closed 3 years ago

zhengjie6 commented 3 years ago

Instructions To Reproduce the Issue:

  1. Full runnable code or full changes you made:

Changes made: changed config file in Cityscape config mask_rcnn_R_50_FPN.yaml to TRAIN: ("cityscapes_fine_sem_seg_train",) TEST: ("cityscapes_fine_sem_seg_val",)

#!/usr/bin/env python
# Copyright (c) Facebook, Inc. and its affiliates.
"""
A main training script.

This scripts reads a given config file and runs the training or evaluation.
It is an entry point that is made to train standard models in detectron2.

In order to let one script support training of many models,
this script contains logic that are specific to these built-in models and therefore
may not be suitable for your own project.
For example, your research project perhaps only needs a single "evaluator".

Therefore, we recommend you to use detectron2 as an library and take
this file as an example of how to use the library.
You may want to write your own script with your datasets and other customizations.
"""

import logging
import os
from collections import OrderedDict
import torch

import detectron2.utils.comm as comm
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
from detectron2.evaluation import (
    CityscapesInstanceEvaluator,
    CityscapesSemSegEvaluator,
    COCOEvaluator,
    COCOPanopticEvaluator,
    DatasetEvaluators,
    LVISEvaluator,
    PascalVOCDetectionEvaluator,
    SemSegEvaluator,
    verify_results,
)
from detectron2.modeling import GeneralizedRCNNWithTTA

class Trainer(DefaultTrainer):
    """
    We use the "DefaultTrainer" which contains pre-defined default logic for
    standard training workflow. They may not work for you, especially if you
    are working on a new research project. In that case you can write your
    own training loop. You can use "tools/plain_train_net.py" as an example.
    """

    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        """
        Create evaluator(s) for a given dataset.
        This uses the special metadata "evaluator_type" associated with each builtin dataset.
        For your own dataset, you can simply create an evaluator manually in your
        script and do not have to worry about the hacky if-else logic here.
        """
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
        evaluator_list = []
        evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
        if evaluator_type in ["sem_seg", "coco_panoptic_seg"]:
            evaluator_list.append(
                SemSegEvaluator(
                    dataset_name,
                    distributed=True,
                    output_dir=output_folder,
                )
            )
        if evaluator_type in ["coco", "coco_panoptic_seg"]:
            evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder))
        if evaluator_type == "coco_panoptic_seg":
            evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
        if evaluator_type == "cityscapes_instance":
            assert (
                torch.cuda.device_count() >= comm.get_rank()
            ), "CityscapesEvaluator currently do not work with multiple machines."
            return CityscapesInstanceEvaluator(dataset_name)
        if evaluator_type == "cityscapes_sem_seg":
            assert (
                torch.cuda.device_count() >= comm.get_rank()
            ), "CityscapesEvaluator currently do not work with multiple machines."
            return CityscapesSemSegEvaluator(dataset_name)
        elif evaluator_type == "pascal_voc":
            return PascalVOCDetectionEvaluator(dataset_name)
        elif evaluator_type == "lvis":
            return LVISEvaluator(dataset_name, output_dir=output_folder)
        if len(evaluator_list) == 0:
            raise NotImplementedError(
                "no Evaluator for the dataset {} with the type {}".format(
                    dataset_name, evaluator_type
                )
            )
        elif len(evaluator_list) == 1:
            return evaluator_list[0]
        return DatasetEvaluators(evaluator_list)

    @classmethod
    def test_with_TTA(cls, cfg, model):
        logger = logging.getLogger("detectron2.trainer")
        # In the end of training, run an evaluation with TTA
        # Only support some R-CNN models.
        logger.info("Running inference with test-time augmentation ...")
        model = GeneralizedRCNNWithTTA(cfg, model)
        evaluators = [
            cls.build_evaluator(
                cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
            )
            for name in cfg.DATASETS.TEST
        ]
        res = cls.test(cfg, model, evaluators)
        res = OrderedDict({k + "_TTA": v for k, v in res.items()})
        return res

def setup(args):
    """
    Create configs and perform basic setups.
    """
    cfg = get_cfg()
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()
    default_setup(cfg, args)
    return cfg

def main(args):
    cfg = setup(args)

    if args.eval_only:
        model = Trainer.build_model(cfg)
        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
            cfg.MODEL.WEIGHTS, resume=args.resume
        )
        res = Trainer.test(cfg, model)
        if cfg.TEST.AUG.ENABLED:
            res.update(Trainer.test_with_TTA(cfg, model))
        if comm.is_main_process():
            verify_results(cfg, res)
        return res

    """
    If you'd like to do anything fancier than the standard training logic,
    consider writing your own training loop (see plain_train_net.py) or
    subclassing the trainer.
    """
    trainer = Trainer(cfg)
    trainer.resume_or_load(resume=args.resume)
    if cfg.TEST.AUG.ENABLED:
        trainer.register_hooks(
            [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
        )
    return trainer.train()

if __name__ == "__main__":
    args = default_argument_parser().parse_args()
    print("Command Line Args:", args)
    launch(
        main,
        args.num_gpus,
        num_machines=args.num_machines,
        machine_rank=args.machine_rank,
        dist_url=args.dist_url,
        args=(args,),
    )
  1. What exact command you run:

./tools/train_net.py --config-file /home/zhengjie/Desktop/FYP/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml --num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025

  1. Full logs or other relevant observations:
    
    n2) zhengjie@zhengjie-desktop:~/Desktop/FYP/detectron2$ ./tools/train_net.py   --config-file /home/zhengjie/Desktop/FYP/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml   --num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025
    Command Line Args: Namespace(config_file='/home/zhengjie/Desktop/FYP/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml', dist_url='tcp://127.0.0.1:50152', eval_only=False, machine_rank=0, num_gpus=1, num_machines=1, opts=['SOLVER.IMS_PER_BATCH', '2', 'SOLVER.BASE_LR', '0.0025'], resume=False)
    [05/15 00:12:37 detectron2]: Rank of current process: 0. World size: 1

[05/15 00:12:38 detectron2]: Environment info:


sys.platform linux Python 3.7.10 (default, Feb 26 2021, 18:47:35) [GCC 7.3.0] numpy 1.20.1 detectron2 0.4 @/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2 Compiler GCC 7.3 CUDA compiler CUDA 11.0 detectron2 arch flags /home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2/_C.cpython-37m-x86_64-linux-gnu.so DETECTRON2_ENV_MODULE PyTorch 1.7.0 @/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/torch PyTorch debug build True GPU available True GPU 0 GeForce RTX 2070 (arch=7.5) CUDA_HOME None - invalid! Pillow 8.2.0 torchvision 0.8.0 @/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/torchvision torchvision arch flags /home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/torchvision/_C.so fvcore 0.1.5.post20210514 cv2 Not found


PyTorch built with:

[05/15 00:12:38 detectron2]: Command line arguments: Namespace(config_file='/home/zhengjie/Desktop/FYP/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml', dist_url='tcp://127.0.0.1:50152', eval_only=False, machine_rank=0, num_gpus=1, num_machines=1, opts=['SOLVER.IMS_PER_BATCH', '2', 'SOLVER.BASE_LR', '0.0025'], resume=False) [05/15 00:12:38 detectron2]: Contents of args.config_file=/home/zhengjie/Desktop/FYP/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml: BASE: "../Base-RCNN-FPN.yaml" MODEL:

WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"

For better, more stable performance initialize from COCO

WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" MASK_ON: True ROI_HEADS: NUM_CLASSES: 8

This is similar to the setting used in Mask R-CNN paper, Appendix A

But there are some differences, e.g., we did not initialize the output

layer using the corresponding classes from COCO

INPUT: MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) MIN_SIZE_TRAIN_SAMPLING: "choice" MIN_SIZE_TEST: 1024 MAX_SIZE_TRAIN: 2048 MAX_SIZE_TEST: 2048 DATASETS: TRAIN: ("cityscapes_fine_sem_seg_train",) TEST: ("cityscapes_fine_sem_seg_val",) SOLVER: BASE_LR: 0.01 STEPS: (18000,) MAX_ITER: 24000 IMS_PER_BATCH: 8 TEST: EVAL_PERIOD: 8000

[05/15 00:12:38 detectron2]: Running with full config: CUDNN_BENCHMARK: False DATALOADER: ASPECT_RATIO_GROUPING: True FILTER_EMPTY_ANNOTATIONS: True NUM_WORKERS: 4 REPEAT_THRESHOLD: 0.0 SAMPLER_TRAIN: TrainingSampler DATASETS: PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000 PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000 PROPOSAL_FILES_TEST: () PROPOSAL_FILES_TRAIN: () TEST: ('cityscapes_fine_sem_seg_val',) TRAIN: ('cityscapes_fine_sem_seg_train',) GLOBAL: HACK: 1.0 INPUT: CROP: ENABLED: False SIZE: [0.9, 0.9] TYPE: relative_range FORMAT: BGR MASK_FORMAT: polygon MAX_SIZE_TEST: 2048 MAX_SIZE_TRAIN: 2048 MIN_SIZE_TEST: 1024 MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) MIN_SIZE_TRAIN_SAMPLING: choice RANDOM_FLIP: horizontal MODEL: ANCHOR_GENERATOR: ANGLES: [[-90, 0, 90]] ASPECT_RATIOS: [[0.5, 1.0, 2.0]] NAME: DefaultAnchorGenerator OFFSET: 0.0 SIZES: [[32], [64], [128], [256], [512]] BACKBONE: FREEZE_AT: 2 NAME: build_resnet_fpn_backbone DEVICE: cuda FPN: FUSE_TYPE: sum IN_FEATURES: ['res2', 'res3', 'res4', 'res5'] NORM: OUT_CHANNELS: 256 KEYPOINT_ON: False LOAD_PROPOSALS: False MASK_ON: True META_ARCHITECTURE: GeneralizedRCNN PANOPTIC_FPN: COMBINE: ENABLED: True INSTANCES_CONFIDENCE_THRESH: 0.5 OVERLAP_THRESH: 0.5 STUFF_AREA_LIMIT: 4096 INSTANCE_LOSS_WEIGHT: 1.0 PIXEL_MEAN: [103.53, 116.28, 123.675] PIXEL_STD: [1.0, 1.0, 1.0] PROPOSAL_GENERATOR: MIN_SIZE: 0 NAME: RPN RESNETS: DEFORM_MODULATED: False DEFORM_NUM_GROUPS: 1 DEFORM_ON_PER_STAGE: [False, False, False, False] DEPTH: 50 NORM: FrozenBN NUM_GROUPS: 1 OUT_FEATURES: ['res2', 'res3', 'res4', 'res5'] RES2_OUT_CHANNELS: 256 RES5_DILATION: 1 STEM_OUT_CHANNELS: 64 STRIDE_IN_1X1: True WIDTH_PER_GROUP: 64 RETINANET: BBOX_REG_LOSS_TYPE: smooth_l1 BBOX_REG_WEIGHTS: (1.0, 1.0, 1.0, 1.0) FOCAL_LOSS_ALPHA: 0.25 FOCAL_LOSS_GAMMA: 2.0 IN_FEATURES: ['p3', 'p4', 'p5', 'p6', 'p7'] IOU_LABELS: [0, -1, 1] IOU_THRESHOLDS: [0.4, 0.5] NMS_THRESH_TEST: 0.5 NORM: NUM_CLASSES: 80 NUM_CONVS: 4 PRIOR_PROB: 0.01 SCORE_THRESH_TEST: 0.05 SMOOTH_L1_LOSS_BETA: 0.1 TOPK_CANDIDATES_TEST: 1000 ROI_BOX_CASCADE_HEAD: BBOX_REG_WEIGHTS: ((10.0, 10.0, 5.0, 5.0), (20.0, 20.0, 10.0, 10.0), (30.0, 30.0, 15.0, 15.0)) IOUS: (0.5, 0.6, 0.7) ROI_BOX_HEAD: BBOX_REG_LOSS_TYPE: smooth_l1 BBOX_REG_LOSS_WEIGHT: 1.0 BBOX_REG_WEIGHTS: (10.0, 10.0, 5.0, 5.0) CLS_AGNOSTIC_BBOX_REG: False CONV_DIM: 256 FC_DIM: 1024 NAME: FastRCNNConvFCHead NORM: NUM_CONV: 0 NUM_FC: 2 POOLER_RESOLUTION: 7 POOLER_SAMPLING_RATIO: 0 POOLER_TYPE: ROIAlignV2 SMOOTH_L1_BETA: 0.0 TRAIN_ON_PRED_BOXES: False ROI_HEADS: BATCH_SIZE_PER_IMAGE: 512 IN_FEATURES: ['p2', 'p3', 'p4', 'p5'] IOU_LABELS: [0, 1] IOU_THRESHOLDS: [0.5] NAME: StandardROIHeads NMS_THRESH_TEST: 0.5 NUM_CLASSES: 8 POSITIVE_FRACTION: 0.25 PROPOSAL_APPEND_GT: True SCORE_THRESH_TEST: 0.05 ROI_KEYPOINT_HEAD: CONV_DIMS: (512, 512, 512, 512, 512, 512, 512, 512) LOSS_WEIGHT: 1.0 MIN_KEYPOINTS_PER_IMAGE: 1 NAME: KRCNNConvDeconvUpsampleHead NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: True NUM_KEYPOINTS: 17 POOLER_RESOLUTION: 14 POOLER_SAMPLING_RATIO: 0 POOLER_TYPE: ROIAlignV2 ROI_MASK_HEAD: CLS_AGNOSTIC_MASK: False CONV_DIM: 256 NAME: MaskRCNNConvUpsampleHead NORM: NUM_CONV: 4 POOLER_RESOLUTION: 14 POOLER_SAMPLING_RATIO: 0 POOLER_TYPE: ROIAlignV2 RPN: BATCH_SIZE_PER_IMAGE: 256 BBOX_REG_LOSS_TYPE: smooth_l1 BBOX_REG_LOSS_WEIGHT: 1.0 BBOX_REG_WEIGHTS: (1.0, 1.0, 1.0, 1.0) BOUNDARY_THRESH: -1 HEAD_NAME: StandardRPNHead IN_FEATURES: ['p2', 'p3', 'p4', 'p5', 'p6'] IOU_LABELS: [0, -1, 1] IOU_THRESHOLDS: [0.3, 0.7] LOSS_WEIGHT: 1.0 NMS_THRESH: 0.7 POSITIVE_FRACTION: 0.5 POST_NMS_TOPK_TEST: 1000 POST_NMS_TOPK_TRAIN: 1000 PRE_NMS_TOPK_TEST: 1000 PRE_NMS_TOPK_TRAIN: 2000 SMOOTH_L1_BETA: 0.0 SEM_SEG_HEAD: COMMON_STRIDE: 4 CONVS_DIM: 128 IGNORE_VALUE: 255 IN_FEATURES: ['p2', 'p3', 'p4', 'p5'] LOSS_WEIGHT: 1.0 NAME: SemSegFPNHead NORM: GN NUM_CLASSES: 54 WEIGHTS: detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl OUTPUT_DIR: ./output SEED: -1 SOLVER: AMP: ENABLED: False BASE_LR: 0.0025 BIAS_LR_FACTOR: 1.0 CHECKPOINT_PERIOD: 5000 CLIP_GRADIENTS: CLIP_TYPE: value CLIP_VALUE: 1.0 ENABLED: False NORM_TYPE: 2.0 GAMMA: 0.1 IMS_PER_BATCH: 2 LR_SCHEDULER_NAME: WarmupMultiStepLR MAX_ITER: 24000 MOMENTUM: 0.9 NESTEROV: False REFERENCE_WORLD_SIZE: 0 STEPS: (18000,) WARMUP_FACTOR: 0.001 WARMUP_ITERS: 1000 WARMUP_METHOD: linear WEIGHT_DECAY: 0.0001 WEIGHT_DECAY_BIAS: 0.0001 WEIGHT_DECAY_NORM: 0.0 TEST: AUG: ENABLED: False FLIP: True MAX_SIZE: 4000 MIN_SIZES: (400, 500, 600, 700, 800, 900, 1000, 1100, 1200) DETECTIONS_PER_IMAGE: 100 EVAL_PERIOD: 8000 EXPECTED_RESULTS: [] KEYPOINT_OKS_SIGMAS: [] PRECISE_BN: ENABLED: False NUM_ITER: 200 VERSION: 2 VIS_PERIOD: 0 [05/15 00:12:38 detectron2]: Full config saved to ./output/config.yaml [05/15 00:12:38 d2.utils.env]: Using a generated random seed 38271830

GeneralizedRCNN( (backbone): FPN( (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (top_block): LastLevelMaxPool() (bottom_up): ResNet( (stem): BasicStem( (conv1): Conv2d( 3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) ) (res2): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv1): Conv2d( 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) ) (res3): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv1): Conv2d( 256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (3): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) ) (res4): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv1): Conv2d( 512, 256, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (3): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (4): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (5): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) ) (res5): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) (conv1): Conv2d( 1024, 512, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) ) ) ) (proposal_generator): RPN( (rpn_head): StandardRPNHead( (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (objectness_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1)) (anchor_deltas): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1)) ) (anchor_generator): DefaultAnchorGenerator( (cell_anchors): BufferList() ) ) (roi_heads): StandardROIHeads( (box_pooler): ROIPooler( (level_poolers): ModuleList( (0): ROIAlign(output_size=(7, 7), spatial_scale=0.25, sampling_ratio=0, aligned=True) (1): ROIAlign(output_size=(7, 7), spatial_scale=0.125, sampling_ratio=0, aligned=True) (2): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=0, aligned=True) (3): ROIAlign(output_size=(7, 7), spatial_scale=0.03125, sampling_ratio=0, aligned=True) ) ) (box_head): FastRCNNConvFCHead( (flatten): Flatten(start_dim=1, end_dim=-1) (fc1): Linear(in_features=12544, out_features=1024, bias=True) (fc_relu1): ReLU() (fc2): Linear(in_features=1024, out_features=1024, bias=True) (fc_relu2): ReLU() ) (box_predictor): FastRCNNOutputLayers( (cls_score): Linear(in_features=1024, out_features=9, bias=True) (bbox_pred): Linear(in_features=1024, out_features=32, bias=True) ) (mask_pooler): ROIPooler( (level_poolers): ModuleList( (0): ROIAlign(output_size=(14, 14), spatial_scale=0.25, sampling_ratio=0, aligned=True) (1): ROIAlign(output_size=(14, 14), spatial_scale=0.125, sampling_ratio=0, aligned=True) (2): ROIAlign(output_size=(14, 14), spatial_scale=0.0625, sampling_ratio=0, aligned=True) (3): ROIAlign(output_size=(14, 14), spatial_scale=0.03125, sampling_ratio=0, aligned=True) ) ) (mask_head): MaskRCNNConvUpsampleHead( (mask_fcn1): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (mask_fcn2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (mask_fcn3): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (mask_fcn4): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (deconv): ConvTranspose2d(256, 256, kernel_size=(2, 2), stride=(2, 2)) (deconv_relu): ReLU() (predictor): Conv2d(256, 8, kernel_size=(1, 1), stride=(1, 1)) ) ) ) [05/15 00:12:40 d2.data.datasets.cityscapes]: 18 cities found in 'datasets/cityscapes/leftImg8bit/train/'. [05/15 00:12:42 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024), max_size=2048, sample_style='choice'), RandomFlip()] [05/15 00:12:42 d2.data.build]: Using training sampler TrainingSampler [05/15 00:12:42 d2.data.common]: Serializing 2975 elements to byte tensors and concatenating them all ... [05/15 00:12:42 d2.data.common]: Serialized dataset takes 0.71 MiB [05/15 00:12:42 fvcore.common.checkpoint]: [Checkpointer] Loading from detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl ... [05/15 00:12:42 fvcore.common.checkpoint]: Reading a file from 'Detectron2 Model Zoo' WARNING [05/15 00:12:42 fvcore.common.checkpoint]: Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (9, 1024) in the model! You might want to double check if this is expected. WARNING [05/15 00:12:42 fvcore.common.checkpoint]: Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (9,) in the model! You might want to double check if this is expected. WARNING [05/15 00:12:42 fvcore.common.checkpoint]: Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (32, 1024) in the model! You might want to double check if this is expected. WARNING [05/15 00:12:42 fvcore.common.checkpoint]: Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (32,) in the model! You might want to double check if this is expected. WARNING [05/15 00:12:42 fvcore.common.checkpoint]: Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (8, 256, 1, 1) in the model! You might want to double check if this is expected. WARNING [05/15 00:12:42 fvcore.common.checkpoint]: Skip loading parameter 'roi_heads.mask_head.predictor.bias' to the model due to incompatible shapes: (80,) in the checkpoint but (8,) in the model! You might want to double check if this is expected. WARNING [05/15 00:12:42 fvcore.common.checkpoint]: Some model parameters or buffers are not found in the checkpoint: roi_heads.box_predictor.bbox_pred.{bias, weight} roi_heads.box_predictor.cls_score.{bias, weight} roi_heads.mask_head.predictor.{bias, weight} [05/15 00:12:42 d2.engine.train_loop]: Starting training from iteration 0 ERROR [05/15 00:12:43 d2.engine.train_loop]: Exception during training: Traceback (most recent call last): File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2/engine/train_loop.py", line 138, in train self.run_step() File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2/engine/defaults.py", line 441, in run_step self._trainer.run_step() File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2/engine/train_loop.py", line 232, in run_step loss_dict = self.model(data) File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(*input, kwargs) File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2/modeling/meta_arch/rcnn.py", line 160, in forward proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(*input, kwargs) File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2/modeling/proposal_generator/rpn.py", line 429, in forward assert gt_instances is not None, "RPN requires gt_instances in training!" AssertionError: RPN requires gt_instances in training! [05/15 00:12:43 d2.engine.hooks]: Total training time: 0:00:00 (0:00:00 on hooks) [05/15 00:12:43 d2.utils.events]: iter: 0 lr: N/A max_mem: 3179M Traceback (most recent call last): File "./tools/train_net.py", line 167, in args=(args,), File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2/engine/launch.py", line 62, in launch main_func(args) File "./tools/train_net.py", line 155, in main return trainer.train() File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2/engine/defaults.py", line 431, in train super().train(self.start_iter, self.max_iter) File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2/engine/train_loop.py", line 138, in train self.run_step() File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2/engine/defaults.py", line 441, in run_step self._trainer.run_step() File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2/engine/train_loop.py", line 232, in run_step loss_dict = self.model(data) File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(input, kwargs) File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2/modeling/meta_arch/rcnn.py", line 160, in forward proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(*input, kwargs) File "/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/detectron2/modeling/proposal_generator/rpn.py", line 429, in forward assert gt_instances is not None, "RPN requires gt_instances in training!" AssertionError: RPN requires gt_instances in training!


## Expected behavior:

Start the training

## Environment:

Paste the output of the following command:

2021-05-15 00:13:09 URL:https://raw.githubusercontent.com/facebookresearch/detectron2/master/detectron2/utils/collect_env.py [7340/7340] -> "collect_env.py" [1]


sys.platform linux Python 3.7.10 (default, Feb 26 2021, 18:47:35) [GCC 7.3.0] numpy 1.20.1 detectron2 0.4 @/home/zhengjie/Desktop/FYP/detectron2/detectron2 detectron2._C not built correctly: No module named 'detectron2._C' Compiler ($CXX) c++ (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0 DETECTRON2_ENV_MODULE PyTorch 1.7.0 @/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/torch PyTorch debug build True GPU available True GPU 0 GeForce RTX 2070 (arch=7.5) CUDA_HOME None - invalid! Pillow 8.2.0 torchvision 0.8.0 @/home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/torchvision torchvision arch flags /home/zhengjie/anaconda3/envs/detectron2/lib/python3.7/site-packages/torchvision/_C.so fvcore 0.1.5.post20210514 iopath 0.1.8 cv2 Not found


PyTorch built with:

ppwwyyxx commented 3 years ago

cityscapes_fine_sem_seg_train is a semantic segmentation dataset but Mask R-CNN is an instance segmentation model.