nearly 0 Loss but AP are all -1

Hello there,

Thanks for all the brilliant work! I read a lot of issues and documents and that provided me a lot of help. But I didn't find any issue similar to mine.

Let me describe the issue briefly at first and I will also paste the code, command and logs later. I was trying to apply this work on my own dataset which includes 100k images, and I regisitered it by

register_coco_instances("VG_100K_TEST", {},
                            "./VG_100K_data/test.json",
                            "./VG_100K_data/images")

My plan was to train it 300k iterations, 2 images for each batch based on

catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k

At first, the training looks work well. The total loss start with about 16 but shapely decrease to 0.3 after hundreds of iterations. For the rest of 290k iterations the loss keep shaking at 0.1 - 0.2 (I think this means that it didn't work for it didn't even acieve an epoch.)

And after all the iterations, I try to evaluate the model. But it didn't work indeed. The code

output = model(input)

almostly return nothing except image width and height.

I try to make the output visilble and nothing drawn on image.

Instructions To Reproduce the Issue:

what changes you made (git diff) or what code you wrote


diff --git a/detectron2/config/defaults.py b/detectron2/config/defaults.py
index e901ea8..e8f7b7f 100644
--- a/detectron2/config/defaults.py
+++ b/detectron2/config/defaults.py
@@ -261,7 +261,7 @@ _C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
# detections that will slow down inference post processing steps (like NMS)
# A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down
# inference.
-_C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05
+_C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.0001
# Overlap threshold used for non-maximum suppression (suppress boxes with
# IoU >= this threshold)
_C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5
@@ -525,7 +525,7 @@ _C.TEST = CN()
# For end-to-end tests to verify the expected accuracy.
# Each item is [task, metric, value, tolerance]
# e.g.: [['bbox', 'AP', 38.5, 0.2]]
-_C.TEST.EXPECTED_RESULTS = []
+_C.TEST.EXPECTED_RESULTS = [['bbox', 'AP', 38.5, 0.2]]
# The period (in terms of steps) to evaluate the model during training.
# Set to 0 to disable.
_C.TEST.EVAL_PERIOD = 0
diff --git a/detectron2/engine/defaults.py b/detectron2/engine/defaults.py
index f733cf7..fe5df01 100644
--- a/detectron2/engine/defaults.py
+++ b/detectron2/engine/defaults.py
@@ -186,8 +186,10 @@ class DefaultPredictor:
     image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))

     inputs = {"image": image, "height": height, "width": width}
-        predictions = self.model([inputs])[0]
-        return predictions
+        predictions = self.model([inputs])
+        # print(predictions)
+        # predictions=predictions[0]
+        return predictions, inputs

class DefaultTrainer(SimpleTrainer):
diff --git a/detectron2/engine/hooks.py b/detectron2/engine/hooks.py
index 9697fb2..10fbdf9 100644
--- a/detectron2/engine/hooks.py
+++ b/detectron2/engine/hooks.py
@@ -319,28 +319,28 @@ class EvalHook(HookBase):
 def after_step(self):
     next_iter = self.trainer.iter + 1
     is_final = next_iter == self.trainer.max_iter
-        if is_final or (self._period > 0 and next_iter % self._period == 0):
-            results = self._func()
-
-            if results:
-                assert isinstance(
-                    results, dict
-                ), "Eval function must return a dict. Got {} instead.".format(results)
-
-                flattened_results = flatten_results_dict(results)
-                for k, v in flattened_results.items():
-                    try:
-                        v = float(v)
-                    except Exception:
-                        raise ValueError(
-                            "[EvalHook] eval_function should return a nested dict of float. "
-                            "Got '{}: {}' instead.".format(k, v)
-                        )
-                self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False)
+        # if is_final or (self._period > 0 and next_iter % self._period == 0):
+        #     results = self._func()
+        #
+        #     if results:
+        #         assert isinstance(
+        #             results, dict
+        #         ), "Eval function must return a dict. Got {} instead.".format(results)
+        #
+        #         flattened_results = flatten_results_dict(results)
+        #         for k, v in flattened_results.items():
+        #             try:
+        #                 v = float(v)
+        #             except Exception:
+        #                 raise ValueError(
+        #                     "[EvalHook] eval_function should return a nested dict of float. "
+        #                     "Got '{}: {}' instead.".format(k, v)
+        #                 )
+        #         self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False)

         # Evaluation may take different time among workers.
         # A barrier make them start the next iteration together.
-            comm.synchronize()
+            # comm.synchronize()

 def after_train(self):
     # func is likely a closure that holds reference to the trainer
diff --git a/detectron2/evaluation/coco_evaluation.py b/detectron2/evaluation/coco_evaluation.py
index c7dc264..1f2a522 100644
--- a/detectron2/evaluation/coco_evaluation.py
+++ b/detectron2/evaluation/coco_evaluation.py
@@ -96,7 +96,6 @@ class COCOEvaluator(DatasetEvaluator):
     """
     for input, output in zip(inputs, outputs):
         prediction = {"image_id": input["image_id"]}
-
         # TODO this is ugly
         if "instances" in output:
             instances = output["instances"].to(self._cpu_device)
@@ -174,7 +173,6 @@ class COCOEvaluator(DatasetEvaluator):
             if len(self._coco_results) > 0
             else None  # cocoapi does not handle empty results very well
         )
-
         res = self._derive_coco_results(
             coco_eval, task, class_names=self._metadata.get("thing_classes")
         )
@@ -491,5 +489,6 @@ def _evaluate_predictions_on_coco(coco_gt, coco_results, iou_type, kpt_oks_sigma
 coco_eval.evaluate()
 coco_eval.accumulate()
 coco_eval.summarize()
-
+    # print(coco_eval)
+    # print(len(coco_eval))
 return coco_eval
diff --git a/detectron2/evaluation/evaluator.py b/detectron2/evaluation/evaluator.py
index 25ea798..4a643ba 100644
--- a/detectron2/evaluation/evaluator.py
+++ b/detectron2/evaluation/evaluator.py
@@ -7,6 +7,7 @@ from contextlib import contextmanager
import torch

from detectron2.utils.comm import is_main_process
+# from tools.train_net import predictor

class DatasetEvaluator:
@@ -100,6 +101,7 @@ def inference_on_dataset(model, data_loader, evaluator):
 Returns:
     The return value of `evaluator.evaluate()`
 """
+
 num_devices = torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1
 logger = logging.getLogger(__name__)
 logger.info("Start inference on {} images".format(len(data_loader)))
@@ -111,6 +113,7 @@ def inference_on_dataset(model, data_loader, evaluator):
 num_warmup = min(5, logging_interval - 1, total - 1)
 start_time = time.time()
 total_compute_time = 0
+
 with inference_context(model), torch.no_grad():
     for idx, inputs in enumerate(data_loader):
         if idx == num_warmup:
diff --git a/tools/train_net.py b/tools/train_net.py
index 11e7b40..7996e7e 100755
--- a/tools/train_net.py
+++ b/tools/train_net.py
@@ -19,11 +19,15 @@ import logging
import os
from collections import OrderedDict
import torch
+import cv2

import detectron2.utils.comm as comm
+from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog
+from detectron2.engine.defaults import DefaultPredictor
+from detectron2.data.datasets import  register_coco_instances
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
from detectron2.evaluation import (
 CityscapesEvaluator,
@@ -72,14 +76,14 @@ class Trainer(DefaultTrainer):
         evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder))
     if evaluator_type == "coco_panoptic_seg":
         evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
-        elif evaluator_type == "cityscapes":
+        if evaluator_type == "cityscapes":
         assert (
             torch.cuda.device_count() >= comm.get_rank()
         ), "CityscapesEvaluator currently do not work with multiple machines."
         return CityscapesEvaluator(dataset_name)
-        elif evaluator_type == "pascal_voc":
+        if evaluator_type == "pascal_voc":
         return PascalVOCDetectionEvaluator(dataset_name)
-        elif evaluator_type == "lvis":
+        if evaluator_type == "lvis":
         return LVISEvaluator(dataset_name, cfg, True, output_folder)
     if len(evaluator_list) == 0:
         raise NotImplementedError(
@@ -87,7 +91,7 @@ class Trainer(DefaultTrainer):
                 dataset_name, evaluator_type
             )
         )
-        elif len(evaluator_list) == 1:
+        if len(evaluator_list) == 1:
         return evaluator_list[0]
     return DatasetEvaluators(evaluator_list)

@@ -114,8 +118,28 @@ def setup(args): Create configs and perform basic setups. """ cfg = get_cfg()

cfg.merge_from_file(args.config_file)
cfg.merge_from_file(args.config_file)
cfg.merge_from_file(
"./configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml"
)
'''
Modified by jian, in order to train on the specific dataset
2019.12.16 Edit, try to test
'''
cfg.DATASETS.TRAIN = ("VG_100K",)
cfg.DATASETS.TEST = ("small_vg",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
cfg.MODEL.WEIGHTS = "./it-30w_lr-0.0025_bat-2/model_0099999.pth"
cfg.OUTPUT_DIR = './it-30w_lr-0.0025_bat-2_t'
cfg.SOLVER.IMS_PER_BATCH = 1
cfg.SOLVER.BASE_LR = 0.0025
cfg.SOLVER.MAX_ITER = (300000)
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = (1)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 150 cfg.merge_from_list(args.opts)
predictor = DefaultPredictor(cfg)
cfg.TEST.AUG.ENABLED = True

cfg.freeze() default_setup(cfg, args) return cfg @@ -123,7 +147,33 @@ def setup(args):

def main(args): cfg = setup(args)
print("Register VG_100K_TEST dataset")
register_coco_instances("VG_100K_TEST", {},
"./VG_100K_data/test.json",
"./VG_100K_data/images")
print("Register VG_100K dataset")
register_coco_instances("VG_100K", {},
"./VG_100K_data/trainval.json",
"./VG_100K_data/images")
print("Register small vg dataset")
register_coco_instances("small_vg", {},
"./VG_100K_data/small_vg.json",
"./VG_100K_data/images")
args.eval_only = True
vg_metadata = MetadataCatalog.get("VG_100K")
predictor = DefaultPredictor(cfg)
data_f = "./VG_100K_data/VG_100K/1.jpg"
im = cv2.imread(data_f)
res = predictor(im)
v = Visualizer(im[:, :, ::-1],
metadata=vg_metadata,
scale=0.8,
instance_mode=ColorMode.IMAGE_BW # remove the colors of unsegmented pixels
)
v = v.draw_instance_predictions(res["instances"].to("cpu"))
img = v.get_image()[:, :, ::-1]
cv2.imwrite("vg_test.jpg", img)
exit()

if args.eval_only: model = Trainer.build_model(cfg) DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( @@ -141,7 +191,7 @@ def main(args): consider writing your own training loop or subclassing the trainer. """ trainer = Trainer(cfg)
trainer.resume_or_load(resume=args.resume)
trainer.resume_or_load(resume=True) if cfg.TEST.AUG.ENABLED: trainer.register_hooks( [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] @@ -150,8 +200,10 @@ def main(args):

if name == "main":
os.environ["CUDA_VISIBLE_DEVICES"] = '1' args = default_argument_parser().parse_args() print("Command Line Args:", args)

print("Done") launch( main, args.num_gpus,


2. what exact command you run:
I want to say sorry for I modified some default args and cfg in code so that makes you trouble on analysising by commands.

Training by

python tools/train_net.py --num-gpus 1

Evaluate by

python tools/train_net.py --eval-only

what you observed (including the full logs): First part：


[12/20 09:20:30] detectron2 INFO: Rank of current process: 0. World size: 1
[12/20 09:20:30] detectron2 INFO: Environment info:
------------------------  -------------------------------------------------------------------
sys.platform              linux
Python                    3.6.3 |Anaconda, Inc.| (default, Oct 13 2017, 12:02:49) [GCC 7.2.0]
Numpy                     1.17.4
Detectron2 Compiler       GCC 5.4
Detectron2 CUDA Compiler  9.0
DETECTRON2_ENV_MODULE     <not set>
PyTorch                   1.3.1
PyTorch Debug Build       False
torchvision               0.4.2
CUDA available            True
GPU 0                     Tesla P100-PCIE-16GB
CUDA_HOME                 /usr/local/cuda
NVCC                      Cuda compilation tools, release 9.0, V9.0.176
Pillow                    6.2.1
cv2                       4.1.2
------------------------  -------------------------------------------------------------------
PyTorch built with:
- GCC 7.3
- Intel(R) Math Kernel Library Version 2019.0.4 Product Build 20190411 for Intel(R) 64 architecture applications
- Intel(R) MKL-DNN v0.20.5 (Git Hash 0125f28c61c1f822fd48570b4c1066f96fcb9b2e)
- OpenMP 201511 (a.k.a. OpenMP 4.5)
- NNPACK is enabled
- CUDA Runtime 10.1
- NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_37,code=compute_37
- CuDNN 7.6.3
- Magma 2.5.1
- Build settings: BLAS=MKL, BUILD_NAMEDTENSOR=OFF, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -fopenmp -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Wno-stringop-overflow, DISABLE_NUMA=1, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=True, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_STATIC_DISPATCH=OFF,

[12/20 09:20:30] detectron2 INFO: Command line arguments: Namespace(config_file='', dist_url='tcp://127.0.0.1:50168', eval_only=False, machine_rank=0, num_gpus=1, num_machines=1, opts=[], resume=False) [12/20 09:20:30] detectron2 INFO: Running with full config: CUDNN_BENCHMARK: False DATALOADER: ASPECT_RATIO_GROUPING: True FILTER_EMPTY_ANNOTATIONS: True NUM_WORKERS: 2 REPEAT_THRESHOLD: 0.0 SAMPLER_TRAIN: TrainingSampler DATASETS: PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000 PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000 PROPOSAL_FILES_TEST: () PROPOSAL_FILES_TRAIN: () TEST: ('VG_TEST',) TRAIN: ('VG_100K',) GLOBAL: HACK: 1.0 INPUT: CROP: ENABLED: True SIZE: [0.9, 0.9] TYPE: relative_range FORMAT: BGR MASK_FORMAT: polygon MAX_SIZE_TEST: 1333 MAX_SIZE_TRAIN: 1440 MIN_SIZE_TEST: 800 MIN_SIZE_TRAIN: (640, 864) MIN_SIZE_TRAIN_SAMPLING: range MODEL: ANCHOR_GENERATOR: ANGLES: [[-90, 0, 90]] ASPECT_RATIOS: [[0.5, 1.0, 2.0]] NAME: DefaultAnchorGenerator OFFSET: 0.0 SIZES: [[32], [64], [128], [256], [512]] BACKBONE: FREEZE_AT: 2 NAME: build_resnet_fpn_backbone DEVICE: cuda FPN: FUSE_TYPE: sum IN_FEATURES: ['res2', 'res3', 'res4', 'res5'] NORM: OUT_CHANNELS: 256 KEYPOINT_ON: False LOAD_PROPOSALS: False MASK_ON: True META_ARCHITECTURE: GeneralizedRCNN PANOPTIC_FPN: COMBINE: ENABLED: True INSTANCES_CONFIDENCE_THRESH: 0.5 OVERLAP_THRESH: 0.5 STUFF_AREA_LIMIT: 4096 INSTANCE_LOSS_WEIGHT: 1.0 PIXEL_MEAN: [103.53, 116.28, 123.675] PIXEL_STD: [1.0, 1.0, 1.0] PROPOSAL_GENERATOR: MIN_SIZE: 0 NAME: RPN RESNETS: DEFORM_MODULATED: False DEFORM_NUM_GROUPS: 1 DEFORM_ON_PER_STAGE: [False, True, True, True] DEPTH: 152 NORM: FrozenBN NUM_GROUPS: 32 OUT_FEATURES: ['res2', 'res3', 'res4', 'res5'] RES2_OUT_CHANNELS: 256 RES5_DILATION: 1 STEM_OUT_CHANNELS: 64 STRIDE_IN_1X1: False WIDTH_PER_GROUP: 8 RETINANET: BBOX_REG_WEIGHTS: (1.0, 1.0, 1.0, 1.0) FOCAL_LOSS_ALPHA: 0.25 FOCAL_LOSS_GAMMA: 2.0 IN_FEATURES: ['p3', 'p4', 'p5', 'p6', 'p7'] IOU_LABELS: [0, -1, 1] IOU_THRESHOLDS: [0.4, 0.5] NMS_THRESH_TEST: 0.5 NUM_CLASSES: 80 NUM_CONVS: 4 PRIOR_PROB: 0.01 SCORE_THRESH_TEST: 0.05 SMOOTH_L1_LOSS_BETA: 0.1 TOPK_CANDIDATES_TEST: 1000 ROI_BOX_CASCADE_HEAD: BBOX_REG_WEIGHTS: ((10.0, 10.0, 5.0, 5.0), (20.0, 20.0, 10.0, 10.0), (30.0, 30.0, 15.0, 15.0)) IOUS: (0.5, 0.6, 0.7) ROI_BOX_HEAD: BBOX_REG_WEIGHTS: (10.0, 10.0, 5.0, 5.0) CLS_AGNOSTIC_BBOX_REG: True CONV_DIM: 256 FC_DIM: 1024 NAME: FastRCNNConvFCHead NORM: GN NUM_CONV: 4 NUM_FC: 1 POOLER_RESOLUTION: 7 POOLER_SAMPLING_RATIO: 0 POOLER_TYPE: ROIAlignV2 SMOOTH_L1_BETA: 0.0 ROI_HEADS: BATCH_SIZE_PER_IMAGE: 1 IN_FEATURES: ['p2', 'p3', 'p4', 'p5'] IOU_LABELS: [0, 1] IOU_THRESHOLDS: [0.5] NAME: CascadeROIHeads NMS_THRESH_TEST: 0.5 NUM_CLASSES: 150 POSITIVE_FRACTION: 0.25 PROPOSAL_APPEND_GT: True SCORE_THRESH_TEST: 0.0001 ROI_KEYPOINT_HEAD: CONV_DIMS: (512, 512, 512, 512, 512, 512, 512, 512) LOSS_WEIGHT: 1.0 MIN_KEYPOINTS_PER_IMAGE: 1 NAME: KRCNNConvDeconvUpsampleHead NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: True NUM_KEYPOINTS: 17 POOLER_RESOLUTION: 14 POOLER_SAMPLING_RATIO: 0 POOLER_TYPE: ROIAlignV2 ROI_MASK_HEAD: CLS_AGNOSTIC_MASK: False CONV_DIM: 256 NAME: MaskRCNNConvUpsampleHead NORM: GN NUM_CONV: 8 POOLER_RESOLUTION: 14 POOLER_SAMPLING_RATIO: 0 POOLER_TYPE: ROIAlignV2 RPN: BATCH_SIZE_PER_IMAGE: 256 BBOX_REG_WEIGHTS: (1.0, 1.0, 1.0, 1.0) BOUNDARY_THRESH: -1 HEAD_NAME: StandardRPNHead IN_FEATURES: ['p2', 'p3', 'p4', 'p5', 'p6'] IOU_LABELS: [0, -1, 1] IOU_THRESHOLDS: [0.3, 0.7] LOSS_WEIGHT: 1.0 NMS_THRESH: 0.7 POSITIVE_FRACTION: 0.5 POST_NMS_TOPK_TEST: 1000 POST_NMS_TOPK_TRAIN: 2000 PRE_NMS_TOPK_TEST: 1000 PRE_NMS_TOPK_TRAIN: 2000 SMOOTH_L1_BETA: 0.0 SEM_SEG_HEAD: COMMON_STRIDE: 4 CONVS_DIM: 128 IGNORE_VALUE: 255 IN_FEATURES: ['p2', 'p3', 'p4', 'p5'] LOSS_WEIGHT: 1.0 NAME: SemSegFPNHead NORM: GN NUM_CLASSES: 54 WEIGHTS: ./it-30w_lr-0.0025_bat-2/model_0099999.pth OUTPUT_DIR: ./it-30w_lr-0.0025_bat-2_t SEED: -1 SOLVER: BASE_LR: 0.0025 BIAS_LR_FACTOR: 1.0 CHECKPOINT_PERIOD: 5000 GAMMA: 0.1 IMS_PER_BATCH: 1 LR_SCHEDULER_NAME: WarmupMultiStepLR MAX_ITER: 300000 MOMENTUM: 0.9 STEPS: (35000, 45000) WARMUP_FACTOR: 0.001 WARMUP_ITERS: 1000 WARMUP_METHOD: linear WEIGHT_DECAY: 0.0001 WEIGHT_DECAY_BIAS: 0.0001 WEIGHT_DECAY_NORM: 0.0 TEST: AUG: ENABLED: False FLIP: True MAX_SIZE: 4000 MIN_SIZES: (400, 500, 600, 700, 800, 900, 1000, 1100, 1200) DETECTIONS_PER_IMAGE: 100 EVAL_PERIOD: 2500 EXPECTED_RESULTS: [['bbox', 'AP', 38.5, 0.2]] KEYPOINT_OKS_SIGMAS: [] PRECISE_BN: ENABLED: False NUM_ITER: 200 VERSION: 2 VIS_PERIOD: 0 [12/20 09:20:30] detectron2 INFO: Full config saved to /media/e/hujiang/detectron2/it-30w_lr-0.0025_bat-2_t/config.yaml [12/20 09:20:30] d2.utils.env INFO: Using a generated random seed 30631325 [12/20 09:20:41] d2.engine.defaults INFO: Model: GeneralizedRCNN( (backbone): FPN( (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (top_block): LastLevelMaxPool() (bottom_up): ResNet( (stem): BasicStem( (conv1): Conv2d( 3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) ) (res2): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv1): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) ) (res3): Sequential( (0): DeformBottleneckBlock( (shortcut): Conv2d( 256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv1): Conv2d( 256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2_offset): Conv2d(512, 18, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) (conv2): DeformConv( in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (1): DeformBottleneckBlock( (conv1): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2_offset): Conv2d(512, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (2): DeformBottleneckBlock( (conv1): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2_offset): Conv2d(512, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (3): DeformBottleneckBlock( (conv1): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2_offset): Conv2d(512, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (4): DeformBottleneckBlock( (conv1): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2_offset): Conv2d(512, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (5): DeformBottleneckBlock( (conv1): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2_offset): Conv2d(512, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (6): DeformBottleneckBlock( (conv1): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2_offset): Conv2d(512, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (7): DeformBottleneckBlock( (conv1): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2_offset): Conv2d(512, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) ) (res4): Sequential( (0): DeformBottleneckBlock( (shortcut): Conv2d( 512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv1): Conv2d( 512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (1): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (2): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (3): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (4): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (5): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (6): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (7): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (8): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (9): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (10): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (11): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (12): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (13): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (14): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (15): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (16): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (17): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (18): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (19): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (20): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (21): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (22): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (23): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (24): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (25): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (26): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (27): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (28): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (29): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (30): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (31): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (32): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (33): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (34): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (35): DeformBottleneckBlock( (conv1): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv2_offset): Conv2d(1024, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=1024, out_channels=1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv3): Conv2d( 1024, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) ) (res5): Sequential( (0): DeformBottleneckBlock( (shortcut): Conv2d( 1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) (conv1): Conv2d( 1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) (conv2_offset): Conv2d(2048, 18, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) (conv2): DeformConv( in_channels=2048, out_channels=2048, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) (conv3): Conv2d( 2048, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (1): DeformBottleneckBlock( (conv1): Conv2d( 2048, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) (conv2_offset): Conv2d(2048, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=2048, out_channels=2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) (conv3): Conv2d( 2048, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (2): DeformBottleneckBlock( (conv1): Conv2d( 2048, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) (conv2_offset): Conv2d(2048, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (conv2): DeformConv( in_channels=2048, out_channels=2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=32, deformable_groups=1, bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) (conv3): Conv2d( 2048, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) ) ) ) (proposal_generator): RPN( (anchor_generator): DefaultAnchorGenerator( (cell_anchors): BufferList() ) (rpn_head): StandardRPNHead( (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (objectness_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1)) (anchor_deltas): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1)) ) ) (roi_heads): CascadeROIHeads( (box_pooler): ROIPooler( (level_poolers): ModuleList( (0): ROIAlign(output_size=(7, 7), spatial_scale=0.25, sampling_ratio=0, aligned=True) (1): ROIAlign(output_size=(7, 7), spatial_scale=0.125, sampling_ratio=0, aligned=True) (2): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=0, aligned=True) (3): ROIAlign(output_size=(7, 7), spatial_scale=0.03125, sampling_ratio=0, aligned=True) ) ) (box_head): ModuleList( (0): FastRCNNConvFCHead( (conv1): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (conv3): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (conv4): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (fc1): Linear(in_features=12544, out_features=1024, bias=True) ) (1): FastRCNNConvFCHead( (conv1): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (conv3): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (conv4): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (fc1): Linear(in_features=12544, out_features=1024, bias=True) ) (2): FastRCNNConvFCHead( (conv1): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (conv3): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (conv4): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (fc1): Linear(in_features=12544, out_features=1024, bias=True) ) ) (box_predictor): ModuleList( (0): FastRCNNOutputLayers( (cls_score): Linear(in_features=1024, out_features=151, bias=True) (bbox_pred): Linear(in_features=1024, out_features=4, bias=True) ) (1): FastRCNNOutputLayers( (cls_score): Linear(in_features=1024, out_features=151, bias=True) (bbox_pred): Linear(in_features=1024, out_features=4, bias=True) ) (2): FastRCNNOutputLayers( (cls_score): Linear(in_features=1024, out_features=151, bias=True) (bbox_pred): Linear(in_features=1024, out_features=4, bias=True) ) ) (mask_pooler): ROIPooler( (level_poolers): ModuleList( (0): ROIAlign(output_size=(14, 14), spatial_scale=0.25, sampling_ratio=0, aligned=True) (1): ROIAlign(output_size=(14, 14), spatial_scale=0.125, sampling_ratio=0, aligned=True) (2): ROIAlign(output_size=(14, 14), spatial_scale=0.0625, sampling_ratio=0, aligned=True) (3): ROIAlign(output_size=(14, 14), spatial_scale=0.03125, sampling_ratio=0, aligned=True) ) ) (mask_head): MaskRCNNConvUpsampleHead( (mask_fcn1): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (mask_fcn2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (mask_fcn3): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (mask_fcn4): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (mask_fcn5): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (mask_fcn6): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (mask_fcn7): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (mask_fcn8): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): GroupNorm(32, 256, eps=1e-05, affine=True) ) (deconv): ConvTranspose2d(256, 256, kernel_size=(2, 2), stride=(2, 2)) (predictor): Conv2d(256, 150, kernel_size=(1, 1), stride=(1, 1)) ) ) ) [12/20 09:20:51] d2.data.datasets.coco INFO: Loading ./VG_100K_data/trainval.json takes 9.71 seconds. [12/20 09:20:51] d2.data.datasets.coco INFO: Loaded 75651 images in COCO format from ./VG_100K_data/trainval.json [12/20 09:20:57] d2.data.build INFO: Removed 2148 images with no usable annotations. 73503 images left. [12/20 09:21:01] d2.data.build INFO: Distribution of training instances among all 150 categories:	category	#instances	category	#instances	category
airplane	2992	animal	2622	arm	6689
bag	6939	banana	3775	basket	2483
beach	3732	bear	4541	bed	3935
bench	6199	bike	4764	bird	6885
board	3843	boat	8113	book	3542
boot	1328	bottle	5982	bowl	4871
box	4625	boy	8333	branch	3259
building	33863	bus	7215	cabinet	3115
cap	4750	car	18616	cat	5470
chair	11333	child	3349	clock	7871
coat	3614	counter	4114	cow	6153
cup	4263	curtain	2081	desk	3202
dog	6506	door	11485	drawer	1816
ear	11491	elephant	7002	engine	2416
eye	10043	face	7011	fence	11441
finger	2246	flag	3897	flower	6907
food	5669	fork	2453	fruit	2257
giraffe	6724	girl	7410	glass	6841
glove	3326	guy	2292	hair	15787
hand	15918	handle	8493	hat	10177
head	17371	helmet	7587	hill	3148
horse	6829	house	4808	jacket	9503
jean	129	kid	1567	kite	5597
lady	2718	lamp	4259	laptop	3262
leaf	6758	leg	13061	letter	5320
light	18266	logo	5457	man	60659
men	1984	motorcycle	5105	mountain	3185
mouth	4368	neck	3780	nose	8607
number	4918	orange	2154	pant	265
paper	3755	paw	2002	people	14903
person	42023	phone	3078	pillow	4771
pizza	4456	plane	6475	plant	4268
plate	11387	player	4616	pole	19368
post	5863	pot	2570	racket	2587
railing	2469	rock	6080	roof	5745
room	1952	screen	2135	seat	3753
sheep	6020	shelf	4768	shirt	31505
shoe	8234	short	346	sidewalk	9006
sign	27058	sink	3964	skateboard	4826
ski	1674	skier	2654	sneaker	1222
snow	9585	sock	2154	stand	1968
street	10624	surfboard	4776	table	19387
tail	8299	tie	2888	tile	5568
tire	5797	toilet	3296	towel	2732
tower	3209	track	2318	train	9541
tree	33621	truck	5357	trunk	4493
umbrella	7004	vase	3429	vegetable	1122
vehicle	3464	wave	3778	wheel	9304
window	47474	windshield	2671	wing	3458
wire	2015	woman	27248	zebra	6361

total	1107208

[12/20 09:21:01] d2.data.dataset_mapper INFO: CropGen used in training: RandomCrop(crop_type='relative_range', crop_size=[0.9, 0.9]) [12/20 09:21:01] d2.data.detection_utils INFO: TransformGens used in training: [ResizeShortestEdge(short_edge_length=(640, 864), max_size=1440, sample_style='range'), RandomFlip()] [12/20 09:21:01] d2.data.build INFO: Using training sampler TrainingSampler [12/20 09:21:02] fvcore.common.checkpoint INFO: Loading checkpoint from ./it-30w_lr-0.0025_bat-2/model_0099999.pth [12/20 09:21:06] fvcore.common.checkpoint INFO: Loading optimizer from ./it-30w_lr-0.0025_bat-2/model_0099999.pth [12/20 09:21:07] fvcore.common.checkpoint INFO: Loading scheduler from ./it-30w_lr-0.0025_bat-2/model_0099999.pth [12/20 09:21:07] d2.engine.train_loop INFO: Starting training from iteration 100000 [12/20 09:21:53] d2.utils.events INFO: eta: 5 days, 11:57:20 iter: 100019 total_loss: 0.242 loss_cls_stage0: 0.000 loss_box_reg_stage0: 0.000 loss_cls_stage1: 0.000 loss_box_reg_stage1: 0.000 loss_cls_stage2: 0.000 loss_box_reg_stage2: 0.000 loss_mask: 0.000 loss_rpn_cls: 0.091 loss_rpn_loc: 0.146 time: 2.2641 data_time: 0.0020 lr: 0.000025 max_mem: 5414M [12/20 09:22:37] d2.utils.events INFO: eta: 4 days, 23:53:27 iter: 100039 total_loss: 0.246 loss_cls_stage0: 0.000 loss_box_reg_stage0: 0.000 loss_cls_stage1: 0.000 loss_box_reg_stage1: 0.000 loss_cls_stage2: 0.000 loss_box_reg_stage2: 0.000 loss_mask: 0.000 loss_rpn_cls: 0.099 loss_rpn_loc: 0.153 time: 2.2323 data_time: 0.0017 lr: 0.000025 max_mem: 5993M ... ... [12/25 15:35:56] d2.utils.events INFO: eta: 0:02:20 iter: 299939 total_loss: 0.256 loss_cls_stage0: 0.000 loss_box_reg_stage0: 0.000 loss_cls_stage1: 0.000 loss_box_reg_stage1: 0.000 loss_cls_stage2: 0.000 loss_box_reg_stage2: 0.000 loss_mask: 0.000 loss_rpn_cls: 0.094 loss_rpn_loc: 0.156 time: 2.2686 data_time: 0.0019 lr: 0.000025 max_mem: 6436M [12/25 15:36:41] d2.utils.events INFO: eta: 0:01:34 iter: 299959 total_loss: 0.228 loss_cls_stage0: 0.000 loss_box_reg_stage0: 0.000 loss_cls_stage1: 0.000 loss_box_reg_stage1: 0.000 loss_cls_stage2: 0.000 loss_box_reg_stage2: 0.000 loss_mask: 0.000 loss_rpn_cls: 0.076 loss_rpn_loc: 0.128 time: 2.2686 data_time: 0.0021 lr: 0.000025 max_mem: 6436M [12/25 15:37:26] d2.utils.events INFO: eta: 0:00:48 iter: 299979 total_loss: 0.207 loss_cls_stage0: 0.000 loss_box_reg_stage0: 0.000 loss_cls_stage1: 0.000 loss_box_reg_stage1: 0.000 loss_cls_stage2: 0.000 loss_box_reg_stage2: 0.000 loss_mask: 0.000 loss_rpn_cls: 0.081 loss_rpn_loc: 0.062 time: 2.2686 data_time: 0.0023 lr: 0.000025 max_mem: 6436M [12/25 15:38:11] fvcore.common.checkpoint INFO: Saving checkpoint to ./it-30w_lr-0.0025_bat-2_t/model_0299999.pth [12/25 15:38:14] fvcore.common.checkpoint INFO: Saving checkpoint to ./it-30w_lr-0.0025_bat-2_t/model_final.pth [12/25 15:38:17] d2.utils.events INFO: eta: 0:00:02 iter: 299999 total_loss: 0.226 loss_cls_stage0: 0.000 loss_box_reg_stage0: 0.000 loss_cls_stage1: 0.000 loss_box_reg_stage1: 0.000 loss_cls_stage2: 0.000 loss_box_reg_stage2: 0.000 loss_mask: 0.000 loss_rpn_cls: 0.082 loss_rpn_loc: 0.123 time: 2.2686 data_time: 0.0020 lr: 0.000025 max_mem: 6436M [12/25 15:38:17] d2.engine.hooks INFO: Overall training speed: 199997 iterations in 5 days, 6:01:56 (2.2686 s / it) [12/25 15:38:17] d2.engine.hooks INFO: Total training time: 5 days, 6:17:04 (0:15:07 on hooks)


The evaluation logs I apply it on a tiny dataset(50 images) for debugging.

... ... [01/02 15:41:58 d2.evaluation.evaluator]: Start inference on 51 images [01/02 15:42:27 d2.evaluation.evaluator]: Inference done 50/51. 0.5634 s / img. ETA=0:00:00 [01/02 15:42:27 d2.evaluation.evaluator]: Total inference time: 0:00:26 (0.565217 s / img per device, on 1 devices) [01/02 15:42:27 d2.evaluation.evaluator]: Total inference pure compute time: 0:00:25 (0.561752 s / img per device, on 1 devices) [01/02 15:42:27 d2.evaluation.coco_evaluation]: Preparing results for COCO format ... [01/02 15:42:27 d2.evaluation.coco_evaluation]: Saving results to ./it-30w_lr-0.0025_bat-2_t/inference/coco_instances_results.json [01/02 15:42:27 d2.evaluation.coco_evaluation]: Evaluating predictions ... WARNING [01/02 15:42:27 d2.evaluation.coco_evaluation]: No predictions from the model! Set scores to -1 WARNING [01/02 15:42:27 d2.evaluation.coco_evaluation]: No predictions from the model! Set scores to -1 [01/02 15:42:27 d2.engine.defaults]: Evaluation results for small_vg in csv format: [01/02 15:42:27 d2.evaluation.testing]: copypaste: Task: bbox [01/02 15:42:27 d2.evaluation.testing]: copypaste: AP,AP50,AP75,APs,APm,APl [01/02 15:42:27 d2.evaluation.testing]: copypaste: -1.0000,-1.0000,-1.0000,-1.0000,-1.0000,-1.0000 [01/02 15:42:27 d2.evaluation.testing]: copypaste: Task: segm [01/02 15:42:27 d2.evaluation.testing]: copypaste: AP,AP50,AP75,APs,APm,APl [01/02 15:42:27 d2.evaluation.testing]: copypaste: -1.0000,-1.0000,-1.0000,-1.0000,-1.0000,-1.0000 ERROR [01/02 15:42:27 d2.evaluation.testing]: Result verification failed! ERROR [01/02 15:42:27 d2.evaluation.testing]: Expected Results: [['bbox', 'AP', 38.5, 0.2]] ERROR [01/02 15:42:27 d2.evaluation.testing]: Actual Results: OrderedDict([('bbox', {'AP': -1, 'AP50': -1, 'AP75': -1, 'APl': -1, 'APm': -1, 'APs': -1}), ('segm', {'AP': -1, 'AP50': -1, 'AP75': -1, 'APl': -1, 'APm': -1, 'APs': -1})])

4. please also simplify the steps as much as possible so they do not require additional resources to
     run, such as a private dataset.

## Expected behavior:

I have already got that you will **not** give me any suggestion about training.
But I think the problem I met is not about "how to train" but "how to use detectron2 correctlly".

I knew that my experiment won't achieve the mAP as yours of course.
But at least the APs and  shouldn't be 0. Please help me.

## Environment:

sys.platform linux Python 3.6.3	Anaconda, Inc.	(default, Oct 13 2017, 12:02:49) [GCC 7.2.0] Numpy 1.17.4 Detectron2 Compiler GCC 5.4 Detectron2 CUDA Compiler 9.0 DETECTRON2_ENV_MODULE PyTorch 1.3.1 PyTorch Debug Build False torchvision 0.4.2 CUDA available True GPU 0,1,2,3,4,5,6,7 Tesla P100-PCIE-16GB CUDA_HOME /usr/local/cuda NVCC Cuda compilation tools, release 9.0, V9.0.176 Pillow 6.2.1 cv2 4.1.2

PyTorch built with:

GCC 7.3
Intel(R) Math Kernel Library Version 2019.0.4 Product Build 20190411 for Intel(R) 64 architecture applications
Intel(R) MKL-DNN v0.20.5 (Git Hash 0125f28c61c1f822fd48570b4c1066f96fcb9b2e)
OpenMP 201511 (a.k.a. OpenMP 4.5)
NNPACK is enabled
CUDA Runtime 10.1
NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_37,code=compute_37
CuDNN 7.6.3
Magma 2.5.1
Build settings: BLAS=MKL, BUILD_NAMEDTENSOR=OFF, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -fopenmp -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Wno-stringop-overflow, DISABLE_NUMA=1, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=True, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_STATIC_DISPATCH=OFF,

Thanks for reading.

facebookresearch / detectron2

nearly 0 Loss but AP are all -1 #607

Instructions To Reproduce the Issue:

cfg.merge_from_file(args.config_file)

cfg.MODEL.WEIGHTS = "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"

predictor = DefaultPredictor(cfg)

cfg.TEST.AUG.ENABLED = True

args.eval_only = True

vg_metadata = MetadataCatalog.get("VG_100K")

predictor = DefaultPredictor(cfg)

data_f = "./VG_100K_data/VG_100K/1.jpg"

im = cv2.imread(data_f)

res = predictor(im)

v = Visualizer(im[:, :, ::-1],

metadata=vg_metadata,

scale=0.8,

instance_mode=ColorMode.IMAGE_BW # remove the colors of unsegmented pixels

)

v = v.draw_instance_predictions(res["instances"].to("cpu"))

img = v.get_image()[:, :, ::-1]

cv2.imwrite("vg_test.jpg", img)

exit()