open-mmlab / mmdetection

OpenMMLab Detection Toolbox and Benchmark
https://mmdetection.readthedocs.io
Apache License 2.0
29.72k stars 9.48k forks source link

Feature mAP value is 0.00 #11592

Open Khhihubbroker opened 8 months ago

Khhihubbroker commented 8 months ago

I used the Faster-RCNN model in the latest mmdetection.

I want to train my model in custom datasets that are in Pascal VOC format. In training the batch accuracy looks fine but when mAP is calculated the value is getting 0.00.

here I attached my config file and a picture of the terminal while training the model.

image_2024_03_27T05_08_16_546Z

And additionally, I provide the code in my config file. Let me know if I made dome mistake or something irrelevant.

auto_scale_lr = dict(base_batch_size=16, enable=True) backend_args = None data_root = 'data/Invoice_latest/VOCdevkit/' dataset_type = 'VOCDataset' default_hooks = dict( checkpoint=dict(interval=5, type='CheckpointHook'), logger=dict(interval=50, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), sampler_seed=dict(type='DistSamplerSeedHook'), timer=dict(type='IterTimerHook'), visualization=dict(type='DetVisualizationHook')) default_scope = 'mmdet' env_cfg = dict( cudnn_benchmark=False, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) launcher = 'none' load_from = None log_level = 'INFO' log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) max_epochs = 100 meta_info = dict( classes=( 'Table', 'BankAccountNo', 'BankName', 'Currency', 'Customer', 'CustomerContact', 'Delivery', 'GIRONo', 'OrderReference', 'PaymentTerms', 'Supplier', 'SupplierContact', 'TotalValue', )) model = dict( backbone=dict( depth=50, frozen_stages=1, init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'), norm_cfg=dict(requires_grad=True, type='BN'), norm_eval=True, num_stages=4, out_indices=( 0, 1, 2, 3, ), style='pytorch', type='ResNet'), data_preprocessor=dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, ], pad_size_divisor=32, std=[ 58.395, 57.12, 57.375, ], type='DetDataPreprocessor'), neck=dict( in_channels=[ 256, 512, 1024, 2048, ], num_outs=5, out_channels=256, type='FPN'), roi_head=dict( bbox_head=dict( bbox_coder=dict( target_means=[ 0.0, 0.0, 0.0, 0.0, ], target_stds=[ 0.1, 0.1, 0.2, 0.2, ], type='DeltaXYWHBBoxCoder'), fc_out_channels=1024, in_channels=256, loss_bbox=dict(loss_weight=1.0, type='L1Loss'), loss_cls=dict( loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False), num_classes=13, reg_class_agnostic=False, roi_feat_size=7, type='Shared2FCBBoxHead'), bbox_roi_extractor=dict( featmap_strides=[ 4, 8, 16, 32, ], out_channels=256, roi_layer=dict(output_size=7, sampling_ratio=0, type='RoIAlign'), type='SingleRoIExtractor'), type='StandardRoIHead'), rpn_head=dict( anchor_generator=dict( ratios=[ 0.5, 1.0, 2.0, ], scales=[ 8, ], strides=[ 4, 8, 16, 32, 64, ], type='AnchorGenerator'), bbox_coder=dict( target_means=[ 0.0, 0.0, 0.0, 0.0, ], target_stds=[ 1.0, 1.0, 1.0, 1.0, ], type='DeltaXYWHBBoxCoder'), feat_channels=256, in_channels=256, loss_bbox=dict(loss_weight=1.0, type='L1Loss'), loss_cls=dict( loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True), type='RPNHead'), test_cfg=dict( rcnn=dict( max_per_img=100, nms=dict(iou_threshold=0.5, type='nms'), score_thr=0.05), rpn=dict( max_per_img=1000, min_bbox_size=0, nms=dict(iou_threshold=0.7, type='nms'), nms_pre=1000)), train_cfg=dict( rcnn=dict( assigner=dict( ignore_iof_thr=-1, match_low_quality=False, min_pos_iou=0.5, neg_iou_thr=0.5, pos_iou_thr=0.5, type='MaxIoUAssigner'), debug=False, pos_weight=-1, sampler=dict( add_gt_as_proposals=True, neg_pos_ub=-1, num=512, pos_fraction=0.25, type='RandomSampler')), rpn=dict( allowed_border=-1, assigner=dict( ignore_iof_thr=-1, match_low_quality=True, min_pos_iou=0.3, neg_iou_thr=0.3, pos_iou_thr=0.7, type='MaxIoUAssigner'), debug=False, pos_weight=-1, sampler=dict( add_gt_as_proposals=False, neg_pos_ub=-1, num=256, pos_fraction=0.5, type='RandomSampler')), rpn_proposal=dict( max_per_img=1000, min_bbox_size=0, nms=dict(iou_threshold=0.7, type='nms'), nms_pre=2000)), type='FasterRCNN') optim_wrapper = dict( clip_grad=None, optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001), type='AmpOptimWrapper') param_scheduler = [ dict( begin=0, by_epoch=True, end=100, gamma=0.1, milestones=[ 35, 50, 75, 100, ], type='MultiStepLR'), ] resume = False test_cfg = dict(type='TestLoop') test_dataloader = dict( batch_size=4, dataset=dict( ann_file='VOC2007/ImageSets/Main/test.txt', backend_args=None, data_prefix=dict(sub_data_root='VOC2007/'), data_root='data/Invoice_latest/VOCdevkit/', metainfo=dict( classes=( 'Table', 'BankAccountNo', 'BankName', 'Currency', 'Customer', 'CustomerContact', 'Delivery', 'GIRONo', 'OrderReference', 'PaymentTerms', 'Supplier', 'SupplierContact', 'TotalValue', )), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(keep_ratio=True, scale=( 1000, 600, ), type='Resize'), dict(prob=0.5, type='RandomFlip'), dict(type='PackDetInputs'), ], test_mode=True, type='VOCDataset'), drop_last=False, num_workers=4, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict(eval_mode='11points', metric='mAP', type='VOCMetric') test_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 1000, 600, ), type='Resize'), dict(type='LoadAnnotations', with_bbox=True), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='PackDetInputs'), ] train_cfg = dict(max_epochs=100, type='EpochBasedTrainLoop', val_interval=1) train_dataloader = dict( batch_sampler=dict(type='AspectRatioBatchSampler'), batch_size=8, dataset=dict( ann_file='VOC2007/ImageSets/Main/train.txt', backend_args=None, data_prefix=dict(sub_data_root='VOC2007/'), data_root='data/Invoice_latest/VOCdevkit/', filter_cfg=dict(bbox_min_size=32, filter_empty_gt=True, min_size=32), metainfo=dict( classes=( 'Table', 'BankAccountNo', 'BankName', 'Currency', 'Customer', 'CustomerContact', 'Delivery', 'GIRONo', 'OrderReference', 'PaymentTerms', 'Supplier', 'SupplierContact', 'TotalValue', )), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(keep_ratio=True, scale=( 1000, 600, ), type='Resize'), dict(prob=0.5, type='RandomFlip'), dict(type='PackDetInputs'), ], type='VOCDataset'), num_workers=8, persistent_workers=True, sampler=dict(shuffle=True, type='DefaultSampler')) train_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(keep_ratio=True, scale=( 1000, 600, ), type='Resize'), dict(prob=0.5, type='RandomFlip'), dict(type='PackDetInputs'), ] val_cfg = dict(type='ValLoop') val_dataloader = dict( batch_size=4, dataset=dict( ann_file='VOC2007/ImageSets/Main/val.txt', backend_args=None, data_prefix=dict(sub_data_root='VOC2007/'), data_root='data/Invoice_latest/VOCdevkit/', metainfo=dict( classes=( 'Table', 'BankAccountNo', 'BankName', 'Currency', 'Customer', 'CustomerContact', 'Delivery', 'GIRONo', 'OrderReference', 'PaymentTerms', 'Supplier', 'SupplierContact', 'TotalValue', )), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(keep_ratio=True, scale=( 1000, 600, ), type='Resize'), dict(prob=0.5, type='RandomFlip'), dict(type='PackDetInputs'), ], test_mode=True, type='VOCDataset'), drop_last=False, num_workers=4, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) val_evaluator = dict(eval_mode='11points', metric='mAP', type='VOCMetric') vis_backends = [ dict(type='LocalVisBackend'), ] visualizer = dict( name='visualizer', type='DetLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), ]) work_dir = './work_dirs/faster_rcnn_r50_fpn/'

Godk02 commented 6 months ago

Hello, have you solved this problem?

hieutomra commented 5 months ago

Can you try CocoMetric or try not using native Resize transform ? It seems like VOCMetric is incompatible (mAP always 0) with native Resize transform. When I switch to CocoMetric or use resize in Albumentation, the training run smoothly with "correct" mAP