Closed deepaksinghcv closed 4 years ago
Any suggestions on what should be done to fix this issue.
I tried executing the same with v2.0.0,. I'm still facing the same issue.
I found the mistake.
I have converted cityscapes to coco.
But in the config file I have specified dataset_type='CityscapesDataset'
, when I changed it to dataset_type='CocoDataset'
it performed the validation.
It worked on v2.0.0
Note: Cityscapes has 500 val images, but the evaluation shows 496/493 images. I'm not closing the issue because the length are still unequal.
Hi, sorry for the late reply, could you show me your config?
I wanted to train on cityscapes, so i converted cityscapes to coco format. I have mentioned the path to the annotation files. I have mentioned the classes.
During validation it shows 493 images but evaluates 496.
classes=('person','rider','car','truck','bus','train','motorcycle','bicycle')
# classes='./cityscapes_classes.txt'
model=dict(
type='MaskScoringRCNN',
pretrained='open-mmlab://resnext101_32x4d',
backbone=dict(
type='ResNeXt',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(
type='BN',
requires_grad=True),
norm_eval=True,
style='pytorch',
groups=32,
base_width=4),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0),
loss_bbox=dict(
type='L1Loss',
loss_weight=1.0)),
roi_head=dict(
type='MaskScoringRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(
type='RoIAlign',
out_size=7,
sample_num=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=8,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(
type='L1Loss',
loss_weight=1.0)),
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(
type='RoIAlign',
out_size=14,
sample_num=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=8,
loss_mask=dict(
type='CrossEntropyLoss',
use_mask=True,
loss_weight=1.0)),
mask_iou_head=dict(
type='MaskIoUHead',
num_convs=4,
num_fcs=2,
roi_feat_size=14,
in_channels=256,
conv_out_channels=256,
fc_out_channels=1024,
num_classes=8)))
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False,
mask_thr_binary=0.5))
test_cfg=dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(
type='nms',
iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5))
dataset_type='CocoDataset'
data_root='data/cityscapes'
img_norm_cfg=dict(
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True)
train_pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations',
with_bbox=True,
with_mask=True),
dict(type='Resize',
img_scale=(1333, 800),
keep_ratio=True),
dict(type='RandomFlip',
flip_ratio=0.5),
dict(type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad',
size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])]
test_pipeline=[
dict(type='LoadImageFromFile'),
dict(type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize',
keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad',
size_divisor=32),
dict(type='ImageToTensor',
keys=['img']),
dict(type='Collect',
keys=['img'])])]
data=dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
classes=classes,
type='CocoDataset',
ann_file='data/cityscapes/annotations/instancesonly_filtered_gtFine_train.json',
img_prefix='data/cityscapes/leftImg8bit/train',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations',
with_bbox=True,
with_mask=True),
dict(type='Resize',
img_scale=(1333, 800),
keep_ratio=True),
dict(type='RandomFlip',
flip_ratio=0.5),
dict(type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad',
size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])]),
val=dict(
classes=classes,
type='CocoDataset',
ann_file='data/cityscapes/annotations/instancesonly_filtered_gtFine_val.json',
img_prefix='data/cityscapes/leftImg8bit/val',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize',
keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad',
size_divisor=32),
dict(type='ImageToTensor',
keys=['img']),
dict(type='Collect',
keys=['img'])])]),
test=dict(
classes=classes,
type='CocoDataset',
ann_file='data/cityscapes/annotations/instancesonly_filtered_gtFine_val.json',
img_prefix='data/cityscapes/leftImg8bit/val',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize',
keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad',
size_divisor=32),
dict(type='ImageToTensor',
keys=['img']),
dict(type='Collect',
keys=['img'])])]))
evaluation=dict(
interval=1,
metric=['bbox', 'segm'])
optimizer=dict(
type='SGD',
lr=0.02,
momentum=0.9,
weight_decay=0.0001)
optimizer_config=dict(
grad_clip=None)
lr_config=dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[8, 11])
total_epochs=12
checkpoint_config=dict(
interval=1)
log_config=dict(
interval=50,
hooks=[
dict(type='TextLoggerHook')])
dist_params=dict(
backend='nccl')
model=dict(
type='MaskScoringRCNN',
pretrained='open-mmlab://resnext101_32x4d',
backbone=dict(
type='ResNeXt',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(
type='BN',
requires_grad=True),
norm_eval=True,
style='pytorch',
groups=32,
base_width=4),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0),
loss_bbox=dict(
type='L1Loss',
loss_weight=1.0)),
roi_head=dict(
type='MaskScoringRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(
type='RoIAlign',
out_size=7,
sample_num=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=8,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(
type='L1Loss',
loss_weight=1.0)),
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(
type='RoIAlign',
out_size=14,
sample_num=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=8,
loss_mask=dict(
type='CrossEntropyLoss',
use_mask=True,
loss_weight=1.0)),
mask_iou_head=dict(
type='MaskIoUHead',
num_convs=4,
num_fcs=2,
roi_feat_size=14,
in_channels=256,
conv_out_channels=256,
fc_out_channels=1024,
num_classes=8)))
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False,
mask_thr_binary=0.5))
test_cfg=dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(
type='nms',
iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5))
dataset_type='CocoDataset'
data_root='data/cityscapes/'
img_norm_cfg=dict(
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True)
train_pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations',
with_bbox=True,
with_mask=True),
dict(type='Resize',
img_scale=(1333, 800),
keep_ratio=True),
dict(type='RandomFlip',
flip_ratio=0.5),
dict(type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad',
size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])]
test_pipeline=[
dict(type='LoadImageFromFile'),
dict(type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize',
keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad',
size_divisor=32),
dict(type='ImageToTensor',
keys=['img']),
dict(type='Collect',
keys=['img'])])]
data=dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
classes=classes,
type='CocoDataset',
ann_file='data/cityscapes/annotations/instancesonly_filtered_gtFine_train.json',
img_prefix='data/cityscapes/leftImg8bit/train',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations',
with_bbox=True,
with_mask=True),
dict(type='Resize',
img_scale=(1333, 800),
keep_ratio=True),
dict(type='RandomFlip',
flip_ratio=0.5),
dict(type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad',
size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])]),
val=dict(
classes=classes,
type='CocoDataset',
ann_file='data/cityscapes/annotations/instancesonly_filtered_gtFine_val.json',
img_prefix='data/cityscapes/leftImg8bit/val',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize',
keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad',
size_divisor=32),
dict(type='ImageToTensor',
keys=['img']),
dict(type='Collect',
keys=['img'])])]),
test=dict(
classes=classes,
type='CocoDataset',
ann_file='data/cityscapes/annotations/instancesonly_filtered_gtFine_val.json',
img_prefix='data/cityscapes/leftImg8bit/val',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize',
keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad',
size_divisor=32),
dict(type='ImageToTensor',
keys=['img']),
dict(type='Collect',
keys=['img'])])]))
evaluation=dict(
interval=1,
metric=['bbox', 'segm'])
optimizer=dict(
type='SGD',
lr=0.02,
momentum=0.9,
weight_decay=0.0001)
optimizer_config=dict(
grad_clip=None)
lr_config=dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[8, 11])
total_epochs=12
checkpoint_config=dict(
interval=1)
log_config=dict(
interval=50,
hooks=[
dict(type='TextLoggerHook')])
dist_params=dict(
backend='nccl')
log_level='INFO'
load_from=None
resume_from=None
workflow=[('train', 1)]
work_dir='/ssd_scratch/cvit/dksingh/mmdetection_logs/ms_rcnn_x101_32x4d_fpn_1x_cityscapes_new/'
gpu_ids=range(0, 4)
Describe the bug I'm trying to train MS_RCNN on cityscapes. I converted cityscapes to coco format and created a config file and modified the necessary parameters. The model trains and validates, but while displaying the evaluated result it throws: AssertionError: The length of results is not equal to the dataset len: 493 != 500
Reproduction
What command or script did you run?
Did you make any modifications on the code or config? Did you understand what you have modified? I have added a config file in configs/ms_rcnn directory.
What dataset did you use? Cityscapes but in COCO format. Environment
Please run
python mmdet/utils/collect_env.py
to collect necessary environment infomation and paste it here.TorchVision: 0.6.0a0+82fd1c8 OpenCV: 4.2.0 MMCV: 0.5.1 MMDetection: 2.0.0+c802b17 MMDetection Compiler: GCC 5.5 MMDetection CUDA Compiler: 10.2
(open-mmlab) dksingh@gnode17:~/sandboxes/msrcnn_coco/mmdetection$ ./tools/dist_train.sh configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_cityscapes_new.py 4
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
2020-06-10 12:20:03,994 - mmdet - INFO - Environment info:
sys.platform: linux Python: 3.7.7 (default, May 7 2020, 21:25:33) [GCC 7.3.0] CUDA available: True CUDA_HOME: /usr/local/cuda-10.0 NVCC: Cuda compilation tools, release 10.0, V10.0.130 GPU 0,1,2,3: GeForce GTX 1080 Ti GCC: gcc (Ubuntu 5.5.0-12ubuntu1~16.04) 5.5.0 20171010 PyTorch: 1.5.0 PyTorch compiling details: PyTorch built with:
TorchVision: 0.6.0a0+82fd1c8 OpenCV: 4.2.0 MMCV: 0.5.1 MMDetection: 2.0.0+c802b17 MMDetection Compiler: GCC 5.5 MMDetection CUDA Compiler: 10.2
2020-06-10 12:20:03,994 - mmdet - INFO - Distributed training: True 2020-06-10 12:20:03,996 - mmdet - INFO - Config: classes=('person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle') model=dict( type='MaskScoringRCNN', pretrained='open-mmlab://resnext101_32x4d', backbone=dict( type='ResNeXt', depth=101, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, norm_cfg=dict( type='BN', requires_grad=True), norm_eval=True, style='pytorch', groups=32, base_width=4), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), rpn_head=dict( type='RPNHead', in_channels=256, feat_channels=256, anchor_generator=dict( type='AnchorGenerator', scales=[8], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0]), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict( type='L1Loss', loss_weight=1.0)), roi_head=dict( type='MaskScoringRoIHead', bbox_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict( type='RoIAlign', out_size=7, sample_num=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=dict( type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=8, bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict( type='L1Loss', loss_weight=1.0)), mask_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict( type='RoIAlign', out_size=14, sample_num=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), mask_head=dict( type='FCNMaskHead', num_convs=4, in_channels=256, conv_out_channels=256, num_classes=8, loss_mask=dict( type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), mask_iou_head=dict( type='MaskIoUHead', num_convs=4, num_fcs=2, roi_feat_size=14, in_channels=256, conv_out_channels=256, fc_out_channels=1024, num_classes=8))) train_cfg=dict( rpn=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.7, neg_iou_thr=0.3, min_pos_iou=0.3, match_low_quality=True, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=256, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False), allowed_border=-1, pos_weight=-1, debug=False), rpn_proposal=dict( nms_across_levels=False, nms_pre=2000, nms_post=1000, max_num=1000, nms_thr=0.7, min_bbox_size=0), rcnn=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=True, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), mask_size=28, pos_weight=-1, debug=False, mask_thr_binary=0.5)) test_cfg=dict( rpn=dict( nms_across_levels=False, nms_pre=1000, nms_post=1000, max_num=1000, nms_thr=0.7, min_bbox_size=0), rcnn=dict( score_thr=0.05, nms=dict( type='nms', iou_thr=0.5), max_per_img=100, mask_thr_binary=0.5)) dataset_type='CityscapesDataset' data_root='data/cityscapes/' img_norm_cfg=dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.5), dict(type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])] test_pipeline=[ dict(type='LoadImageFromFile'), dict(type='MultiScaleFlipAug', img_scale=(1333, 800), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict(type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img'])])] data=dict( samples_per_gpu=2, workers_per_gpu=2, train=dict( classes=('person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle'), type='CityscapesDataset', ann_file='data/cityscapes/annotations/instancesonly_filtered_gtFine_train.json', img_prefix='data/cityscapes/leftImg8bit/train', pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True, with_mask=True), dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.5), dict(type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])]), val=dict( classes=('person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle'), type='CityscapesDataset', ann_file='data/cityscapes/annotations/instancesonly_filtered_gtFine_val.json', img_prefix='data/cityscapes/leftImg8bit/val', pipeline=[ dict(type='LoadImageFromFile'), dict(type='MultiScaleFlipAug', img_scale=(1333, 800), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict(type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img'])])]), test=dict( classes=('person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle'), type='CityscapesDataset', ann_file='data/cityscapes/annotations/instancesonly_filtered_gtFine_val.json', img_prefix='data/cityscapes/leftImg8bit/val', pipeline=[ dict(type='LoadImageFromFile'), dict(type='MultiScaleFlipAug', img_scale=(1333, 800), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict(type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img'])])])) evaluation=dict( interval=1, metric=['bbox', 'segm']) optimizer=dict( type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer_config=dict( grad_clip=None) lr_config=dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=0.001, step=[8, 11]) total_epochs=12 checkpoint_config=dict( interval=1) log_config=dict( interval=50, hooks=[ dict(type='TextLoggerHook')]) dist_params=dict( backend='nccl') log_level='INFO' load_from=None resume_from=None workflow=[('train', 1)] work_dir='/ssd_scratch/cvit/dksingh/mmdetection_logs/ms_rcnn_x101_32x4d_fpn_1x_cityscapes_new/' gpu_ids=range(0, 1) 2020-06-10 12:20:07,451 - root - INFO - load model from: open-mmlab://resnext101_32x4d loading annotations into memory... loading annotations into memory... loading annotations into memory... loading annotations into memory... Done (t=1.49s) creating index... index created! Done (t=1.43s) creating index... Done (t=1.43s) creating index... index created! Done (t=1.45s) creating index... index created! index created! loading annotations into memory... loading annotations into memory... Done (t=0.15s) creating index... index created! 2020-06-10 12:20:16,229 - mmdet - INFO - Start running, host: dksingh@gnode17, work_dir: /ssd_scratch/cvit/dksingh/mmdetection_logs/ms_rcnn_x101_32x4d_fpn_1x_cityscapes_new 2020-06-10 12:20:16,229 - mmdet - INFO - workflow: [('train', 1)], max: 12 epochs loading annotations into memory... loading annotations into memory... Done (t=0.19s) creating index... index created! Done (t=0.20s) creating index... index created! Done (t=0.19s) creating index... index created! 2020-06-10 12:23:51,011 - mmdet - INFO - Epoch [1][50/371] lr: 0.00198, eta: 5:15:07, time: 4.295, data_time: 0.936, memory: 6724, loss_rpn_cls: 0.5591, loss_rpn_bbox: 0.1505, loss_cls: 0.5727, acc: 87.2520, loss_bbox: 0.0844, loss_mask: 0.7708, loss_mask_iou: 0.0221, loss: 2.1596 2020-06-10 12:26:56,695 - mmdet - INFO - Epoch [1][100/371] lr: 0.00398, eta: 4:50:25, time: 3.713, data_time: 0.418, memory: 6866, loss_rpn_cls: 0.2295, loss_rpn_bbox: 0.1438, loss_cls: 0.4238, acc: 89.9307, loss_bbox: 0.2964, loss_mask: 0.6281, loss_mask_iou: 0.0134, loss: 1.7350 2020-06-10 12:30:12,380 - mmdet - INFO - Epoch [1][150/371] lr: 0.00597, eta: 4:44:55, time: 3.913, data_time: 0.502, memory: 6866, loss_rpn_cls: 0.1433, loss_rpn_bbox: 0.1500, loss_cls: 0.3553, acc: 90.2085, loss_bbox: 0.2789, loss_mask: 0.6078, loss_mask_iou: 0.0170, loss: 1.5523 2020-06-10 12:33:25,925 - mmdet - INFO - Epoch [1][200/371] lr: 0.00797, eta: 4:39:47, time: 3.871, data_time: 0.461, memory: 6958, loss_rpn_cls: 0.1030, loss_rpn_bbox: 0.1405, loss_cls: 0.4616, acc: 85.2173, loss_bbox: 0.5360, loss_mask: 0.5672, loss_mask_iou: 0.0132, loss: 1.8215 2020-06-10 12:36:48,480 - mmdet - INFO - Epoch [1][250/371] lr: 0.00997, eta: 4:37:56, time: 4.051, data_time: 0.459, memory: 6958, loss_rpn_cls: 0.0828, loss_rpn_bbox: 0.1345, loss_cls: 0.4820, acc: 83.5576, loss_bbox: 0.6226, loss_mask: 0.5180, loss_mask_iou: 0.0144, loss: 1.8544 2020-06-10 12:40:07,226 - mmdet - INFO - Epoch [1][300/371] lr: 0.01197, eta: 4:34:42, time: 3.975, data_time: 0.424, memory: 6958, loss_rpn_cls: 0.0654, loss_rpn_bbox: 0.1189, loss_cls: 0.4370, acc: 84.3989, loss_bbox: 0.6138, loss_mask: 0.4824, loss_mask_iou: 0.0110, loss: 1.7284 2020-06-10 12:43:30,509 - mmdet - INFO - Epoch [1][350/371] lr: 0.01397, eta: 4:32:20, time: 4.065, data_time: 0.480, memory: 6958, loss_rpn_cls: 0.0647, loss_rpn_bbox: 0.1184, loss_cls: 0.4261, acc: 84.9155, loss_bbox: 0.5795, loss_mask: 0.4718, loss_mask_iou: 0.0130, loss: 1.6735 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 496/493, 1.5 task/s, elapsed: 336s, ETA: -1s
loading annotations into memory... Done (t=0.16s) creating index... index created! loading annotations into memory... Done (t=0.18s) creating index... index created! Traceback (most recent call last): File "./tools/train.py", line 159, in
main()
File "./tools/train.py", line 155, in main
meta=meta)
File "/home/dksingh/sandboxes/msrcnn_coco/mmdetection/mmdet/apis/train.py", line 165, in train_detector
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/home/dksingh/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/runner/runner.py", line 383, in run
epoch_runner(data_loaders[i], kwargs)
File "/home/dksingh/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/runner/runner.py", line 292, in train
self.call_hook('after_train_epoch')
File "/home/dksingh/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/mmcv/runner/runner.py", line 245, in call_hook
getattr(hook, fn_name)(self)
File "/home/dksingh/sandboxes/msrcnn_coco/mmdetection/mmdet/core/evaluation/eval_hooks.py", line 74, in after_train_epoch
self.evaluate(runner, results)
File "/home/dksingh/sandboxes/msrcnn_coco/mmdetection/mmdet/core/evaluation/eval_hooks.py", line 32, in evaluate
results, logger=runner.logger, self.eval_kwargs)
File "/home/dksingh/sandboxes/msrcnn_coco/mmdetection/mmdet/datasets/cityscapes.py", line 232, in evaluate
classwise, proposal_nums, iou_thrs))
File "/home/dksingh/sandboxes/msrcnn_coco/mmdetection/mmdet/datasets/coco.py", line 349, in evaluate
result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
File "/home/dksingh/sandboxes/msrcnn_coco/mmdetection/mmdet/datasets/coco.py", line 303, in format_results
format(len(results), len(self)))
AssertionError: The length of results is not equal to the dataset len: 493 != 500
Traceback (most recent call last):
File "/home/dksingh/anaconda3/envs/open-mmlab/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"main", mod_spec)
File "/home/dksingh/anaconda3/envs/open-mmlab/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/dksingh/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launch.py", line 263, in
main()
File "/home/dksingh/anaconda3/envs/open-mmlab/lib/python3.7/site-packages/torch/distributed/launch.py", line 259, in main
cmd=cmd)
subprocess.CalledProcessError: Command '['/home/dksingh/anaconda3/envs/open-mmlab/bin/python', '-u', './tools/train.py', '--local_rank=3', 'configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_cityscapes_new.py', '--launcher', 'pytorch']' returned non-zero exit status 1.