Closed JasonPan-ZJ closed 3 years ago
Hi,
I have just checked it again on my personal laptop (it seems you are doing the same), and it is running. Since I currently run it on a single basic GPU, I decrease the batch size from 8 to 2 from configs/base/datasets/coco500_detection_augm.py, and accordingly set learning rate in the config to 0.012/16. The rest is the same. At the bottom, you can see the output including environment and config information that I have just used, maybe it can help you.
At the same time, I notice that your mmdet\models\dense_heads\alrp_loss_rpn_head.py file seems different because "ordered_losses_bbox = loss_bbox[order.detach()].flip(dims=[0])" resides at line 145 in the original file (but it is at line 160 in your output):
sys.platform: linux Python: 3.7.7 (default, May 7 2020, 21:25:33) [GCC 7.3.0] CUDA available: True CUDA_HOME: /usr/local/cuda-10.1 NVCC: Cuda compilation tools, release 10.1, V10.1.105 GPU 0: GeForce GTX 1650 with Max-Q Design GCC: gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0 PyTorch: 1.4.0 PyTorch compiling details: PyTorch built with:
2020-12-14 17:16:12,022 - mmdet - INFO - Distributed training: False 2020-12-14 17:16:12,288 - mmdet - INFO - Config: dataset_type = 'CocoDataset' data_root = 'data/coco/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile', to_float32=True), dict(type='LoadAnnotations', with_bbox=True), dict( type='PhotoMetricDistortion', brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18), dict( type='Expand', mean=[123.675, 116.28, 103.53], to_rgb=True, ratio_range=(1, 4)), dict( type='MinIoURandomCrop', min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3), dict(type='Resize', img_scale=(512, 512), keep_ratio=False), dict(type='RandomFlip', flip_ratio=0.5), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(833, 500), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ] data = dict( samples_per_gpu=2, workers_per_gpu=4, train=dict( type='CocoDataset', ann_file='data/coco/annotations/instances_train2017.json', img_prefix='data/coco/train2017/', pipeline=[ dict(type='LoadImageFromFile', to_float32=True), dict(type='LoadAnnotations', with_bbox=True), dict( type='PhotoMetricDistortion', brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18), dict( type='Expand', mean=[123.675, 116.28, 103.53], to_rgb=True, ratio_range=(1, 4)), dict( type='MinIoURandomCrop', min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3), dict(type='Resize', img_scale=(512, 512), keep_ratio=False), dict(type='RandomFlip', flip_ratio=0.5), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ]), val=dict( type='CocoDataset', ann_file='data/coco/annotations/instances_val2017.json', img_prefix='data/coco/val2017/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(833, 500), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ]), test=dict( type='CocoDataset', ann_file='data/coco/annotations/instances_val2017.json', img_prefix='data/coco/val2017/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(833, 500), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ])) evaluation = dict(interval=1, metric='bbox') optimizer = dict(type='SGD', lr=0.00075, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=None) lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=0.001, step=[75, 95]) total_epochs = 100 checkpoint_config = dict(interval=1) log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) dist_params = dict(backend='nccl') log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)] model = dict( type='FasterRCNN', pretrained='torchvision://resnet50', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, style='pytorch'), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), rpn_head=dict( type='aLRPLossRPNHead', in_channels=256, feat_channels=256, anchor_generator=dict( type='AnchorGenerator', scales=[8], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2]), loss_bbox=dict(type='GIoULoss', reduction='none'), head_weight=0.2), roi_head=dict( type='StandardRoIHead', bbox_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=dict( type='aLRPLossShared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=80, bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=True, loss_bbox=dict(type='GIoULoss', reduction='none')))) train_cfg = dict( rpn=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.7, neg_iou_thr=0.3, min_pos_iou=0.3, match_low_quality=True, ignore_iof_thr=-1), allowed_border=-1, pos_weight=-1, debug=False), rpn_proposal=dict( nms_across_levels=False, nms_pre=2000, nms_post=1000, max_num=1000, nms_thr=0.7, min_bbox_size=0), rcnn=dict( assigner=dict( type='MaxIoUAssigner', pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=False, ignore_iof_thr=-1), sampler=dict(type='PseudoSampler'), pos_weight=-1, debug=False)) test_cfg = dict( rpn=dict( nms_across_levels=False, nms_pre=1000, nms_post=1000, max_num=1000, nms_thr=0.7, min_bbox_size=0), rcnn=dict( score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) work_dir = './work_dirs/alrp_loss_faster_rcnn_r50_fpn_100e_coco500' gpu_ids = range(0, 1)
2020-12-14 17:16:12,543 - mmdet - INFO - load model from: torchvision://resnet50 2020-12-14 17:16:12,680 - mmdet - WARNING - The model and loaded state dict do not match exactly
unexpected key in source state_dict: fc.weight, fc.bias
loading annotations into memory... Done (t=12.14s) creating index... index created! loading annotations into memory... Done (t=0.29s) creating index... index created! 2020-12-14 17:16:28,556 - mmdet - INFO - Start running, host: kemal@kemal-zenbook, work_dir: /home/kemal/GitHub/aLRPLoss/work_dirs/alrp_loss_faster_rcnn_r50_fpn_100e_coco500 2020-12-14 17:16:28,556 - mmdet - INFO - workflow: [('train', 1)], max: 100 epochs 2020-12-14 17:16:57,153 - mmdet - INFO - Epoch [1][50/58633] lr: 7.418e-05, eta: 38 days, 16:00:37, time: 0.570, data_time: 0.049, memory: 1336, loss_rpn_cls: 0.1999, loss_rpn_bbox: 0.0023, loss_cls: 0.9590, loss_bbox: 0.0206, loss: 1.1819 2020-12-14 17:17:24,254 - mmdet - INFO - Epoch [1][100/58633] lr: 1.491e-04, eta: 37 days, 17:12:23, time: 0.542, data_time: 0.005, memory: 1336, loss_rpn_cls: 0.1999, loss_rpn_bbox: 0.0026, loss_cls: 0.9707, loss_bbox: 0.5822, loss: 1.7554 2020-12-14 17:17:51,536 - mmdet - INFO - Epoch [1][150/58633] lr: 2.240e-04, eta: 37 days, 11:47:00, time: 0.546, data_time: 0.005, memory: 1336, loss_rpn_cls: 0.1999, loss_rpn_bbox: 0.0027, loss_cls: 0.9445, loss_bbox: 1.0871, loss: 2.2342 2020-12-14 17:18:20,904 - mmdet - INFO - Epoch [1][200/58633] lr: 2.990e-04, eta: 38 days, 1:58:48, time: 0.587, data_time: 0.005, memory: 1336, loss_rpn_cls: 0.1999, loss_rpn_bbox: 0.0034, loss_cls: 0.9486, loss_bbox: 0.6281, loss: 1.7800
Hi, I'm sorry that I forgot to tell you that I use the Windows10 system to run it, and I successfully to run the code such like configs\faster_rcnn\faster_rcnn_r50_fpn_1x_coco.py by mmdetection2.6 on windows10. Today, I reconfigured the mmdet, but I found that the problems still exist,heres are my environment and config information that I have used: sys.platform: win32 Python: 3.7.9 (default, Aug 31 2020, 17:10:11) [MSC v.1916 64 bit (AMD64)] CUDA available: True GPU 0: GeForce RTX 2070 CUDA_HOME: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1 NVCC: Cuda compilation tools, release 10.1, V10.1.168 GCC: gcc (x86_64-posix-seh-rev0, Built by MinGW-W64 project) 8.1.0 PyTorch: 1.6.0 PyTorch compiling details: PyTorch built with:
TorchVision: 0.7.0 OpenCV: 4.2.0 MMCV: 1.1.5 MMCV Compiler: MSVC 192729111 MMCV CUDA Compiler: 10.1 MMDetection: 2.6.0+**
In addition, I just modified my code in mmdet\models\dense_heads\alrp_loss_rpn_head.py file: Lind 6 : from mmdet.ops import batched_nms now: from mmcv.ops import batched_nms
Line 24: 1, in_channels, background_label=0, kwargs) now: 1, in_channels, kwargs)
Thanks for your answer!
Unfortunately, we have not tried this repo on windows. Our release is mmdetection 2.1 and you can see in install.md that windows is not supported officially in this version. At some point, we can move the code to the latest mmdetection, but currently we do not have such plan. If you can manage working with mmdetection 2.1 in windows, maybe you can consider creating a new environment for this repo, cloning this repo and building it with the settings I recommended (pytorch 1.4, torchvision 0.5 and mmcv 0.6).
I have successfully run it on my PC. Thanks for your comments!
Hi, There are some errors when I try to run python tools/train.py configs/alrp_loss/alrp_loss_faster_rcnn_r50_fpn_100e_coco500.py:
2020-12-14 22:30:17,778 - mmdet - INFO - load model from: torchvision://resnet50 2020-12-14 22:30:18,068 - mmdet - WARNING - The model and loaded state dict do not match exactly
unexpected key in source state_dict: fc.weight, fc.bias
loading annotations into memory... Done (t=20.27s) creating index... index created! fatal: not a git repository (or any of the parent directories): .git loading annotations into memory... Done (t=0.69s) creating index... index created! 2020-12-14 22:30:42,581 - mmdet - INFO - Start running, host: JasonTaka@DESKTOP-J21K1TG, work_dir: D:\paper\mmdetection2.6\work_dirs\alrp_loss_faster_rcnn_r50_fpn_100e_coco500 2020-12-14 22:30:42,581 - mmdet - INFO - workflow: [('train', 1)], max: 1 epochs Traceback (most recent call last): File "tools/train.py", line 220, in
main()
File "tools/train.py", line 216, in main
meta=meta)
File "d:\paper\mmdetection2.6\mmdet\apis\train.py", line 150, in train_detector
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "C:\Users\JasonPan\Anaconda3\envs\mmd26augfpn\lib\site-packages\mmcv\runner\epoch_based_runner.py", line 125, in run
epoch_runner(data_loaders[i], kwargs)
File "C:\Users\JasonPan\Anaconda3\envs\mmd26augfpn\lib\site-packages\mmcv\runner\epoch_based_runner.py", line 50, in train
self.run_iter(data_batch, train_mode=True)
File "C:\Users\JasonPan\Anaconda3\envs\mmd26augfpn\lib\site-packages\mmcv\runner\epoch_based_runner.py", line 30, in run_iter
kwargs)
File "C:\Users\JasonPan\Anaconda3\envs\mmd26augfpn\lib\site-packages\mmcv\parallel\data_parallel.py", line 67, in train_step
return self.module.train_step(inputs[0], kwargs[0])
File "d:\paper\mmdetection2.6\mmdet\models\detectors\base.py", line 234, in train_step
losses = self(data)
File "C:\Users\JasonPan\Anaconda3\envs\mmd26augfpn\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(input, *kwargs)
File "C:\Users\JasonPan\Anaconda3\envs\mmd26augfpn\lib\site-packages\mmcv\runner\fp16_utils.py", line 84, in new_func
return old_func(args, kwargs)
File "d:\paper\mmdetection2.6\mmdet\models\detectors\base.py", line 168, in forward
return self.forward_train(img, img_metas, kwargs)
File "d:\paper\mmdetection2.6\mmdet\models\detectors\two_stage.py", line 156, in forward_train
proposal_cfg=proposal_cfg)
File "d:\paper\mmdetection2.6\mmdet\models\dense_heads\base_dense_head.py", line 54, in forward_train
losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
File "d:\paper\mmdetection2.6\mmdet\models\dense_heads\alrp_loss_rpn_head.py", line 160, in loss
ordered_losses_bbox = loss_bbox[order.detach()].flip(dims=[0])
IndexError: too many indices for tensor of dimension 0