open-mmlab / mmdetection

OpenMMLab Detection Toolbox and Benchmark
https://mmdetection.readthedocs.io
Apache License 2.0
29.58k stars 9.46k forks source link

Want to save best model in MMdetection #6138

Closed xjtuzxk closed 3 years ago

xjtuzxk commented 3 years ago

Here is my config file:

model settings

model = dict( type = 'YOLOV3', pretrained = '/data0/steelai/mng/webapp/upload/preTrainedModel/2021/9/10/613b2624cc7ca91773574ffbZm72pD.pth', backbone = dict( type = 'Darknet', depth = 53, out_indices = (3,4,5), frozen_stages = -1, conv_cfg = None, norm_cfg = dict( type = 'BN', requires_grad = True), act_cfg = dict( type = 'LeakyReLU', negative_slope = 0.1), norm_eval = True), neck = dict( type = 'YOLOV3Neck', num_scales = 3, in_channels = [1024, 512, 256], out_channels = [512, 256, 128], conv_cfg = None, norm_cfg = dict( type = 'BN', requires_grad = True), act_cfg = dict( type = 'LeakyReLU', negative_slope = 0.1)), bbox_head = dict( type = 'YOLOV3Head', num_classes = 6, in_channels = [512, 256, 128], out_channels = [1024, 512, 256], anchor_generator = dict( type = 'YOLOAnchorGenerator', base_sizes = [[(116, 90), (156, 198), (373, 326)],[(30, 61), (62, 45), (59, 119)],[(10, 13), (16, 30), (33, 23)]], strides = [32, 16, 8]), bbox_coder = dict( type = 'YOLOBBoxCoder'), featmap_strides = [32, 16, 8], one_hot_smoother = 0., conv_cfg = None, norm_cfg = dict( type = 'BN', requires_grad = True), act_cfg = dict( type = 'LeakyReLU', negative_slope = 0.1), loss_cls = dict( type = 'CrossEntropyLoss', use_sigmoid = True, loss_weight = 1.0), loss_conf = dict( type = 'CrossEntropyLoss', use_sigmoid = True, loss_weight = 1.0), loss_xy = dict( type = 'CrossEntropyLoss', use_sigmoid = True, loss_weight = 1.0), loss_wh = dict( type = 'MSELoss', loss_weight = 1.0), train_cfg = None, test_cfg = None), train_cfg = dict( assigner = dict( type = 'GridAssigner', pos_iou_thr = 0.5, neg_iou_thr = 0.5, min_pos_iou = 0)), test_cfg = dict( nms_pre = 1000, min_bbox_size = 0, score_thr = 0.05, conf_thr = 0.005, nms = dict( type = 'nms', iou_threshold = 0.45), max_per_img = 100) ) dataset_type = 'CocoDataset' data_root = '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile', to_float32=True), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.5), dict( type='PhotoMetricDistortion', brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1333, 800), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ] classes = ('C', 'D', 'E', 'F', 'B', 'A') data = dict( samples_per_gpu=2, workers_per_gpu=1, train=dict( type='CocoDataset', ann_file= '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/annotations/train.json', img_prefix= '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/images/train/', classes=('C', 'D', 'E', 'F', 'B', 'A'), pipeline=[ dict(type='LoadImageFromFile', to_float32=True), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.5), dict( type='PhotoMetricDistortion', brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ]), val=dict( type='CocoDataset', ann_file= '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/annotations/val.json', img_prefix= '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/images/val/', classes=('C', 'D', 'E', 'F', 'B', 'A'), pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1333, 800), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ]), test=dict( type='CocoDataset', ann_file= '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/annotations/val.json', img_prefix= '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/images/val/', classes=('C', 'D', 'E', 'F', 'B', 'A'), pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1333, 800), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ])) evaluation = dict(metric=['bbox'], save_best='bbox_mAP') optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=None) lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=0.001, step=[2, 8]) runner = dict(type='EpochBasedRunner', max_epochs=12) checkpoint_config = dict(interval=-1) log_config = dict( interval=50, hooks=[dict(type='TextLoggerHook'), dict(type='TensorboardLoggerHook')]) custom_hooks = [dict(type='NumClassCheckHook')] dist_params = dict(backend='nccl') log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)] work_dir = '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/work_dir/'

I want to save best model pth, so I follow the answer I found in the issue list: evaluation = dict(metric=['bbox'], save_best='bbox_mAP') checkpoint_config = dict(interval=-1) but I got an KeyError: 'bbox_mAP'.

This is the log information: fatal: Not a git repository (or any parent up to mount point /data0) Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set). 2021-09-20 18:12:30,319 - mmdet - INFO - Environment info:

sys.platform: linux Python: 3.7.10 (default, Feb 26 2021, 18:47:35) [GCC 7.3.0] CUDA available: True GPU 0,1,2,3: GeForce RTX 2080 Ti CUDA_HOME: /usr/local/cuda NVCC: Cuda compilation tools, release 9.0, V9.0.176 GCC: gcc (Ubuntu 5.4.0-6ubuntu1~16.04.12) 5.4.0 20160609 PyTorch: 1.8.0 PyTorch compiling details: PyTorch built with:

TorchVision: 0.9.0 OpenCV: 4.5.1 MMCV: 1.3.13 MMCV Compiler: GCC 7.3 MMCV CUDA Compiler: 11.1 MMDetection: 2.16.0+

2021-09-20 18:12:32,098 - mmdet - INFO - Distributed training: False 2021-09-20 18:12:33,823 - mmdet - INFO - Config: model = dict( type='YOLOV3', pretrained= '/data0/steelai/mng/webapp/upload/preTrainedModel/2021/9/10/613b2624cc7ca91773574ffbZm72pD.pth', backbone=dict( type='Darknet', depth=53, out_indices=(3, 4, 5), frozen_stages=-1, conv_cfg=None, norm_cfg=dict(type='BN', requires_grad=True), act_cfg=dict(type='LeakyReLU', negative_slope=0.1), norm_eval=True), neck=dict( type='YOLOV3Neck', num_scales=3, in_channels=[1024, 512, 256], out_channels=[512, 256, 128], conv_cfg=None, norm_cfg=dict(type='BN', requires_grad=True), act_cfg=dict(type='LeakyReLU', negative_slope=0.1)), bbox_head=dict( type='YOLOV3Head', num_classes=6, in_channels=[512, 256, 128], out_channels=[1024, 512, 256], anchor_generator=dict( type='YOLOAnchorGenerator', base_sizes=[[(116, 90), (156, 198), (373, 326)], [(30, 61), (62, 45), (59, 119)], [(10, 13), (16, 30), (33, 23)]], strides=[32, 16, 8]), bbox_coder=dict(type='YOLOBBoxCoder'), featmap_strides=[32, 16, 8], one_hot_smoother=0.0, conv_cfg=None, norm_cfg=dict(type='BN', requires_grad=True), act_cfg=dict(type='LeakyReLU', negative_slope=0.1), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_conf=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_xy=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_wh=dict(type='MSELoss', loss_weight=1.0), train_cfg=None, test_cfg=None), train_cfg=dict( assigner=dict( type='GridAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0)), test_cfg=dict( nms_pre=1000, min_bbox_size=0, score_thr=0.05, conf_thr=0.005, nms=dict(type='nms', iou_threshold=0.45), max_per_img=100)) dataset_type = 'CocoDataset' data_root = '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile', to_float32=True), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.5), dict( type='PhotoMetricDistortion', brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1333, 800), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ] classes = ('C', 'D', 'E', 'F', 'B', 'A') data = dict( samples_per_gpu=2, workers_per_gpu=1, train=dict( type='CocoDataset', ann_file= '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/annotations/train.json', img_prefix= '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/images/train/', classes=('C', 'D', 'E', 'F', 'B', 'A'), pipeline=[ dict(type='LoadImageFromFile', to_float32=True), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.5), dict( type='PhotoMetricDistortion', brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ]), val=dict( type='CocoDataset', ann_file= '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/annotations/val.json', img_prefix= '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/images/val/', classes=('C', 'D', 'E', 'F', 'B', 'A'), pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1333, 800), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ]), test=dict( type='CocoDataset', ann_file= '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/annotations/val.json', img_prefix= '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/data/images/val/', classes=('C', 'D', 'E', 'F', 'B', 'A'), pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1333, 800), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ])) evaluation = dict(metric=['bbox'], save_best='bbox_mAP') optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=None) lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=0.001, step=[2, 8]) runner = dict(type='EpochBasedRunner', max_epochs=12) checkpoint_config = dict(interval=-1) log_config = dict( interval=50, hooks=[dict(type='TextLoggerHook'), dict(type='TensorboardLoggerHook')]) custom_hooks = [dict(type='NumClassCheckHook')] dist_params = dict(backend='nccl') log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)] work_dir = '/data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/work_dir/' gpu_ids = range(0, 1)

/data0/wzz/mmdetection-master/mmdet/models/detectors/single_stage.py:29: UserWarning: DeprecationWarning: pretrained is deprecated, please use "init_cfg" instead warnings.warn('DeprecationWarning: pretrained is deprecated, ' /data0/wzz/mmdetection-master/mmdet/models/backbones/darknet.py:138: UserWarning: DeprecationWarning: pretrained is deprecated, please use "init_cfg" instead warnings.warn('DeprecationWarning: pretrained is deprecated, ' /data0/wzz/mmdetection-master/mmdet/core/anchor/builder.py:17: UserWarning: build_anchor_generator would be deprecated soon, please use build_prior_generator 'build_anchor_generator would be deprecated soon, please use ' 2021-09-20 18:12:34,918 - mmdet - INFO - initialize Darknet with init_cfg {'type': 'Pretrained', 'checkpoint': '/data0/steelai/mng/webapp/upload/preTrainedModel/2021/9/10/613b2624cc7ca91773574ffbZm72pD.pth'} 2021-09-20 18:12:34,919 - mmcv - INFO - load model from: /data0/steelai/mng/webapp/upload/preTrainedModel/2021/9/10/613b2624cc7ca91773574ffbZm72pD.pth 2021-09-20 18:12:34,919 - mmcv - INFO - Use load_from_local loader loading annotations into memory... Done (t=0.01s) creating index... index created! fatal: Not a git repository (or any parent up to mount point /data0) Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set). loading annotations into memory... Done (t=0.00s) creating index... index created! 2021-09-20 18:12:38,928 - mmdet - INFO - Start running, host: wzz@amax3.xjtu.io, work_dir: /data0/steelai/projects/2021/09/20/61484257cc7ca9962fbacefc/work_dir 2021-09-20 18:12:38,929 - mmdet - INFO - Hooks will be executed in the following order: before_run: (VERY_HIGH ) StepLrUpdaterHook (NORMAL ) CheckpointHook (LOW ) EvalHook (VERY_LOW ) TextLoggerHook (VERY_LOW ) TensorboardLoggerHook

before_train_epoch: (VERY_HIGH ) StepLrUpdaterHook (NORMAL ) NumClassCheckHook (LOW ) IterTimerHook (LOW ) EvalHook (VERY_LOW ) TextLoggerHook (VERY_LOW ) TensorboardLoggerHook

before_train_iter: (VERY_HIGH ) StepLrUpdaterHook (LOW ) IterTimerHook (LOW ) EvalHook

after_train_iter: (ABOVE_NORMAL) OptimizerHook (NORMAL ) CheckpointHook (LOW ) IterTimerHook (LOW ) EvalHook (VERY_LOW ) TextLoggerHook (VERY_LOW ) TensorboardLoggerHook

after_train_epoch: (NORMAL ) CheckpointHook (LOW ) EvalHook (VERY_LOW ) TextLoggerHook (VERY_LOW ) TensorboardLoggerHook

before_val_epoch: (NORMAL ) NumClassCheckHook (LOW ) IterTimerHook (VERY_LOW ) TextLoggerHook (VERY_LOW ) TensorboardLoggerHook

before_val_iter: (LOW ) IterTimerHook

after_val_iter: (LOW ) IterTimerHook

after_val_epoch: (VERY_LOW ) TextLoggerHook (VERY_LOW ) TensorboardLoggerHook

after_run: (VERY_LOW ) TensorboardLoggerHook

2021-09-20 18:12:38,929 - mmdet - INFO - workflow: [('train', 1)], max: 12 epochs /data0/wzz/mmdetection-master/mmdet/core/anchor/anchor_generator.py:324: UserWarning: grid_anchors would be deprecated soon. Please use grid_priors warnings.warn('grid_anchors would be deprecated soon. ' /data0/wzz/mmdetection-master/mmdet/core/anchor/anchor_generator.py:361: UserWarning: single_level_grid_anchors would be deprecated soon. Please use single_level_grid_priors 'single_level_grid_anchors would be deprecated soon. ' 2021-09-20 18:12:55,093 - mmdet - INFO - Epoch [1][50/257] lr: 9.890e-05, eta: 0:16:12, time: 0.321, data_time: 0.049, memory: 4170, loss_cls: 0.0002, loss_conf: 0.0300, loss_xy: 0.0002, loss_wh: 0.0001, loss: 0.0305 2021-09-20 18:13:08,130 - mmdet - INFO - Epoch [1][100/257] lr: 1.988e-04, eta: 0:14:27, time: 0.261, data_time: 0.006, memory: 4170, loss_cls: 0.0002, loss_conf: 0.0290, loss_xy: 0.0002, loss_wh: 0.0001, loss: 0.0295 2021-09-20 18:13:22,053 - mmdet - INFO - Epoch [1][150/257] lr: 2.987e-04, eta: 0:14:00, time: 0.278, data_time: 0.006, memory: 5219, loss_cls: 0.0002, loss_conf: 0.0271, loss_xy: 0.0002, loss_wh: 0.0001, loss: 0.0276 2021-09-20 18:13:36,516 - mmdet - INFO - Epoch [1][200/257] lr: 3.986e-04, eta: 0:13:48, time: 0.289, data_time: 0.006, memory: 5219, loss_cls: 0.0002, loss_conf: 0.0248, loss_xy: 0.0002, loss_wh: 0.0001, loss: 0.0253 2021-09-20 18:13:52,802 - mmdet - INFO - Epoch [1][250/257] lr: 4.985e-04, eta: 0:13:55, time: 0.326, data_time: 0.007, memory: 5219, loss_cls: 0.0002, loss_conf: 0.0226, loss_xy: 0.0002, loss_wh: 0.0001, loss: 0.0231 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 164/164, 20.1 task/s, elapsed: 8s, ETA: 0s2021-09-20 18:14:03,441 - mmdet - INFO - Evaluating bbox... Loading and preparing results... 2021-09-20 18:14:03,442 - mmdet - ERROR - The testing results of the whole dataset is empty. Traceback (most recent call last): File "/data0/wzz/mmdetection-master/tools/train.py", line 189, in main() File "/data0/wzz/mmdetection-master/tools/train.py", line 185, in main meta=meta) File "/data0/wzz/mmdetection-master/mmdet/apis/train.py", line 174, in train_detector runner.run(data_loaders, cfg.workflow) File "/home/wzz/anaconda3/envs/mmdet/lib/python3.7/site-packages/mmcv/runner/epoch_based_runner.py", line 127, in run epoch_runner(data_loaders[i], **kwargs) File "/home/wzz/anaconda3/envs/mmdet/lib/python3.7/site-packages/mmcv/runner/epoch_based_runner.py", line 54, in train self.call_hook('after_train_epoch') File "/home/wzz/anaconda3/envs/mmdet/lib/python3.7/site-packages/mmcv/runner/base_runner.py", line 307, in call_hook getattr(hook, fn_name)(self) File "/home/wzz/anaconda3/envs/mmdet/lib/python3.7/site-packages/mmcv/runner/hooks/evaluation.py", line 237, in after_train_epoch self._do_evaluate(runner) File "/data0/wzz/mmdetection-master/mmdet/core/evaluation/eval_hooks.py", line 20, in _do_evaluate key_score = self.evaluate(runner, results) File "/home/wzz/anaconda3/envs/mmdet/lib/python3.7/site-packages/mmcv/runner/hooks/evaluation.py", line 335, in evaluate return eval_res[self.key_indicator] KeyError: 'bbox_mAP'

MMdetection version: 2.16

Please help with this.

jshilong commented 3 years ago

Is there any modification? I can not reproduce your error

jshilong commented 3 years ago

Feel free to reopen the issue if there is any question

Yang-Jianzhang commented 2 years ago

I also facing this KeyError: 'bbox_mAP' problem. Can I ask how you solve it? @xjtuzxk @jshilong