Closed WepLeo closed 3 years ago
Please follow the issue template to provide more details.
I followed this tutorial "https://github.com/open-mmlab/mmdetection/blob/master/demo/MMDet_Tutorial.ipynb"
I have dataset in VOC format , with one class but after running following code from notebook ,i am getting 20 classes instead one class , i made changes in voc512.py ,class_names.py but still getting 20 classes.
`from mmdet.datasets import build_dataset from mmdet.models import build_detector from mmdet.apis import train_detector
datasets = [build_dataset(cfg.data.train)]
model = build_detector( cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))
datasets[0].CLASSES`
from mmcv import Config cfg = Config.fromfile('/content/mmdetection/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py') print(f'Config:\n{cfg.pretty_text}')
Config: model = dict( type='FasterRCNN', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, style='pytorch', init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), rpn_head=dict( type='RPNHead', in_channels=256, feat_channels=256, anchor_generator=dict( type='AnchorGenerator', scales=[8], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0]), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='L1Loss', loss_weight=1.0)), roi_head=dict( type='StandardRoIHead', bbox_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=dict( type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=1, bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='L1Loss', loss_weight=1.0))), train_cfg=dict( rpn=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.7, neg_iou_thr=0.3, min_pos_iou=0.3, match_low_quality=True, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=256, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False), allowed_border=-1, pos_weight=-1, debug=False), rpn_proposal=dict( nms_pre=2000, max_per_img=1000, nms=dict(type='nms', iou_threshold=0.7), min_bbox_size=0), rcnn=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=False, ignore_iof_thr=-1), sampler=dict( type='RandomSampler', num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True), pos_weight=-1, debug=False)), test_cfg=dict( rpn=dict( nms_pre=1000, max_per_img=1000, nms=dict(type='nms', iou_threshold=0.7), min_bbox_size=0), rcnn=dict( score_thr=0.05, nms=dict(type='nms', iou_threshold=0.5), max_per_img=100))) dataset_type = 'VOCDataset' data_root = '/content/mmdetection/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.5), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1000, 600), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ] data = dict( samples_per_gpu=2, workers_per_gpu=2, train=dict( type='RepeatDataset', times=3, dataset=dict( type='VOCDataset', ann_file=[ '/content/mmdetection/VOC2007/ImageSets/Main/trainval.txt' ], img_prefix=['/content/mmdetection/VOC2007/'], pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), dict(type='RandomFlip', flip_ratio=0.5), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ])), val=dict( type='VOCDataset', ann_file='/content/mmdetection/VOC2007/ImageSets/Main/test.txt', img_prefix='/content/mmdetection/VOC2007/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1000, 600), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ]), test=dict( type='VOCDataset', ann_file='/content/mmdetection/VOC2007/ImageSets/Main/test.txt', img_prefix='/content/mmdetection/VOC2007/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(1000, 600), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ])) evaluation = dict(interval=1, metric='mAP') checkpoint_config = dict(interval=1) log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) dist_params = dict(backend='nccl') log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)] optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=None) lr_config = dict(policy='step', step=[3]) runner = dict(type='EpochBasedRunner', max_epochs=4)
Please follow the issue template to provide more details.
i have already updated the issue. tks~
Describe the bug if i use V100-16G machine, everything is ok, but A100 machine will report errors after running few steps.(sorry for bad English...)
There are many potential reasons, maybe because of the cuda and Nvidia driver versions. Some versions may have some compatibility issues on A100. Try to upgrade your GPU diver and cuda. Or maybe the GPU has broken.
Describe the bug if i use V100-16G machine, everything is ok, but A100 machine will report errors after running few steps.(sorry for bad English...)
There are many potential reasons, maybe because of the cuda and Nvidia driver versions. Some versions may have some compatibility issues on A100. Try to upgrade your GPU diver and cuda. Or maybe the GPU has broken.
GPU driver version: 460.27.04. The machine and the system worked well when training yolov5. And I tried to compile mmcv in local, but a compiler version error has occurred. Is there a version limit of mmcv in A100?
Describe the bug if i use V100-16G machine, everything is ok, but A100 machine will report errors after running few steps.(sorry for bad English...)
There are many potential reasons, maybe because of the cuda and Nvidia driver versions. Some versions may have some compatibility issues on A100. Try to upgrade your GPU diver and cuda. Or maybe the GPU has broken.
GPU driver version: 460.27.04. The machine and the system worked well when training yolov5. And I tried to compile mmcv in local, but a compiler version error has occurred. Is there a version limit of mmcv in A100?
So, the same code works fine on V100 but failed on A100. However, yolov5 is runnable in the same environment. High probability is because of the cuda version. Try to use cuda11.1 or a higher version. But I can not be sure because I do not have an A100 to reproduce this error. Just have a try.
pytorch 1.10 cudatoolkit=11.3
MMCV: 1.3.15 MMCV Compiler: GCC 5.4 MMCV CUDA Compiler: 11.0 MMDetection: 2.17.0+a5054bd
build and install MMCV and MMDetection from source follow the guide, everything is ok now. thank you ~
No issue template in General questions. I use the Error report issue Template as follow
Thanks for your error report and we appreciate it a lot.
Checklist
Describe the bug if i use V100-16G machine, everything is ok, but A100 machine will report errors after running few steps.(sorry for bad english...)
Reproduction
coco_config.py
Modify dataset related settings
dataset_type = 'CocoDataset' data_root = '/workdir/wepleo/data/open_datasets/coco/' data = dict( train=dict( type=dataset_type, ann_file=data_root + 'Annotations/instances_train2017.json', img_prefix=data_root + 'images/train'), val=dict( type=dataset_type, ann_file=data_root + 'Annotations/instances_val2017.json', img_prefix=data_root + 'images/val'), test=dict( type=dataset_type, ann_file=data_root + 'Annotations/instances_val2017.json', img_prefix=data_root + 'images/val'))
optimizer_config = dict(delete=True, grad_clip=dict(max_norm=35, norm_type=2))
We can use the pre-trained model to obtain higher performance
load_from = 'checkpoints/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth'
Error traceback If applicable, paste the error trackback here.