Open Khhihubbroker opened 8 months ago
Hello, have you solved this problem?
Can you try CocoMetric
or try not using native Resize
transform ?
It seems like VOCMetric is incompatible (mAP always 0) with native Resize transform. When I switch to CocoMetric or use resize in Albumentation, the training run smoothly with "correct" mAP
I used the Faster-RCNN model in the latest mmdetection.
I want to train my model in custom datasets that are in Pascal VOC format. In training the batch accuracy looks fine but when mAP is calculated the value is getting 0.00.
here I attached my config file and a picture of the terminal while training the model.
And additionally, I provide the code in my config file. Let me know if I made dome mistake or something irrelevant.
auto_scale_lr = dict(base_batch_size=16, enable=True) backend_args = None data_root = 'data/Invoice_latest/VOCdevkit/' dataset_type = 'VOCDataset' default_hooks = dict( checkpoint=dict(interval=5, type='CheckpointHook'), logger=dict(interval=50, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), sampler_seed=dict(type='DistSamplerSeedHook'), timer=dict(type='IterTimerHook'), visualization=dict(type='DetVisualizationHook')) default_scope = 'mmdet' env_cfg = dict( cudnn_benchmark=False, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) launcher = 'none' load_from = None log_level = 'INFO' log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) max_epochs = 100 meta_info = dict( classes=( 'Table', 'BankAccountNo', 'BankName', 'Currency', 'Customer', 'CustomerContact', 'Delivery', 'GIRONo', 'OrderReference', 'PaymentTerms', 'Supplier', 'SupplierContact', 'TotalValue', )) model = dict( backbone=dict( depth=50, frozen_stages=1, init_cfg=dict(checkpoint='torchvision://resnet50', type='Pretrained'), norm_cfg=dict(requires_grad=True, type='BN'), norm_eval=True, num_stages=4, out_indices=( 0, 1, 2, 3, ), style='pytorch', type='ResNet'), data_preprocessor=dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, ], pad_size_divisor=32, std=[ 58.395, 57.12, 57.375, ], type='DetDataPreprocessor'), neck=dict( in_channels=[ 256, 512, 1024, 2048, ], num_outs=5, out_channels=256, type='FPN'), roi_head=dict( bbox_head=dict( bbox_coder=dict( target_means=[ 0.0, 0.0, 0.0, 0.0, ], target_stds=[ 0.1, 0.1, 0.2, 0.2, ], type='DeltaXYWHBBoxCoder'), fc_out_channels=1024, in_channels=256, loss_bbox=dict(loss_weight=1.0, type='L1Loss'), loss_cls=dict( loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False), num_classes=13, reg_class_agnostic=False, roi_feat_size=7, type='Shared2FCBBoxHead'), bbox_roi_extractor=dict( featmap_strides=[ 4, 8, 16, 32, ], out_channels=256, roi_layer=dict(output_size=7, sampling_ratio=0, type='RoIAlign'), type='SingleRoIExtractor'), type='StandardRoIHead'), rpn_head=dict( anchor_generator=dict( ratios=[ 0.5, 1.0, 2.0, ], scales=[ 8, ], strides=[ 4, 8, 16, 32, 64, ], type='AnchorGenerator'), bbox_coder=dict( target_means=[ 0.0, 0.0, 0.0, 0.0, ], target_stds=[ 1.0, 1.0, 1.0, 1.0, ], type='DeltaXYWHBBoxCoder'), feat_channels=256, in_channels=256, loss_bbox=dict(loss_weight=1.0, type='L1Loss'), loss_cls=dict( loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True), type='RPNHead'), test_cfg=dict( rcnn=dict( max_per_img=100, nms=dict(iou_threshold=0.5, type='nms'), score_thr=0.05), rpn=dict( max_per_img=1000, min_bbox_size=0, nms=dict(iou_threshold=0.7, type='nms'), nms_pre=1000)), train_cfg=dict( rcnn=dict( assigner=dict( ignore_iof_thr=-1, match_low_quality=False, min_pos_iou=0.5, neg_iou_thr=0.5, pos_iou_thr=0.5, type='MaxIoUAssigner'), debug=False, pos_weight=-1, sampler=dict( add_gt_as_proposals=True, neg_pos_ub=-1, num=512, pos_fraction=0.25, type='RandomSampler')), rpn=dict( allowed_border=-1, assigner=dict( ignore_iof_thr=-1, match_low_quality=True, min_pos_iou=0.3, neg_iou_thr=0.3, pos_iou_thr=0.7, type='MaxIoUAssigner'), debug=False, pos_weight=-1, sampler=dict( add_gt_as_proposals=False, neg_pos_ub=-1, num=256, pos_fraction=0.5, type='RandomSampler')), rpn_proposal=dict( max_per_img=1000, min_bbox_size=0, nms=dict(iou_threshold=0.7, type='nms'), nms_pre=2000)), type='FasterRCNN') optim_wrapper = dict( clip_grad=None, optimizer=dict(lr=0.02, momentum=0.9, type='SGD', weight_decay=0.0001), type='AmpOptimWrapper') param_scheduler = [ dict( begin=0, by_epoch=True, end=100, gamma=0.1, milestones=[ 35, 50, 75, 100, ], type='MultiStepLR'), ] resume = False test_cfg = dict(type='TestLoop') test_dataloader = dict( batch_size=4, dataset=dict( ann_file='VOC2007/ImageSets/Main/test.txt', backend_args=None, data_prefix=dict(sub_data_root='VOC2007/'), data_root='data/Invoice_latest/VOCdevkit/', metainfo=dict( classes=( 'Table', 'BankAccountNo', 'BankName', 'Currency', 'Customer', 'CustomerContact', 'Delivery', 'GIRONo', 'OrderReference', 'PaymentTerms', 'Supplier', 'SupplierContact', 'TotalValue', )), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(keep_ratio=True, scale=( 1000, 600, ), type='Resize'), dict(prob=0.5, type='RandomFlip'), dict(type='PackDetInputs'), ], test_mode=True, type='VOCDataset'), drop_last=False, num_workers=4, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict(eval_mode='11points', metric='mAP', type='VOCMetric') test_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 1000, 600, ), type='Resize'), dict(type='LoadAnnotations', with_bbox=True), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='PackDetInputs'), ] train_cfg = dict(max_epochs=100, type='EpochBasedTrainLoop', val_interval=1) train_dataloader = dict( batch_sampler=dict(type='AspectRatioBatchSampler'), batch_size=8, dataset=dict( ann_file='VOC2007/ImageSets/Main/train.txt', backend_args=None, data_prefix=dict(sub_data_root='VOC2007/'), data_root='data/Invoice_latest/VOCdevkit/', filter_cfg=dict(bbox_min_size=32, filter_empty_gt=True, min_size=32), metainfo=dict( classes=( 'Table', 'BankAccountNo', 'BankName', 'Currency', 'Customer', 'CustomerContact', 'Delivery', 'GIRONo', 'OrderReference', 'PaymentTerms', 'Supplier', 'SupplierContact', 'TotalValue', )), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(keep_ratio=True, scale=( 1000, 600, ), type='Resize'), dict(prob=0.5, type='RandomFlip'), dict(type='PackDetInputs'), ], type='VOCDataset'), num_workers=8, persistent_workers=True, sampler=dict(shuffle=True, type='DefaultSampler')) train_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(keep_ratio=True, scale=( 1000, 600, ), type='Resize'), dict(prob=0.5, type='RandomFlip'), dict(type='PackDetInputs'), ] val_cfg = dict(type='ValLoop') val_dataloader = dict( batch_size=4, dataset=dict( ann_file='VOC2007/ImageSets/Main/val.txt', backend_args=None, data_prefix=dict(sub_data_root='VOC2007/'), data_root='data/Invoice_latest/VOCdevkit/', metainfo=dict( classes=( 'Table', 'BankAccountNo', 'BankName', 'Currency', 'Customer', 'CustomerContact', 'Delivery', 'GIRONo', 'OrderReference', 'PaymentTerms', 'Supplier', 'SupplierContact', 'TotalValue', )), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(keep_ratio=True, scale=( 1000, 600, ), type='Resize'), dict(prob=0.5, type='RandomFlip'), dict(type='PackDetInputs'), ], test_mode=True, type='VOCDataset'), drop_last=False, num_workers=4, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) val_evaluator = dict(eval_mode='11points', metric='mAP', type='VOCMetric') vis_backends = [ dict(type='LocalVisBackend'), ] visualizer = dict( name='visualizer', type='DetLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), ]) work_dir = './work_dirs/faster_rcnn_r50_fpn/'