open-mmlab / mmrazor

OpenMMLab Model Compression Toolbox and Benchmark.
https://mmrazor.readthedocs.io/en/latest/
Apache License 2.0
1.45k stars 228 forks source link

mmrazor output file writing #136

Open kdh-awraw1019 opened 2 years ago

kdh-awraw1019 commented 2 years ago

mmdetection can write output to json file.

but, mmrazor/tools/mmdet/test_mmdet.py cannot write output

ex ) python tools/mmdet/test_mmdet.py ./configs/distill/cwd/cwd_cls_head_deformable_detr_rx101_r50.py ./deformable_detr_rx101_r50/epoch_211.pth --eval bbox --eval--options="jsonfile_prefix=./output_path"

how can I write output file?

the config file

model settings

student = dict( type='mmdet.DeformableDETR', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(1, 2, 3), frozen_stages=-1, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, style='pytorch', init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), neck=dict( type='ChannelMapper', in_channels=[512, 1024, 2048], kernel_size=1, out_channels=256, act_cfg=None, norm_cfg=dict(type='GN', num_groups=32), num_outs=4), bbox_head=dict( type='DeformableDETRHead', num_query=300, num_classes=2, in_channels=2048, sync_cls_avg_factor=True, as_two_stage=True, transformer=dict( type='DeformableDetrTransformer', encoder=dict( type='DetrTransformerEncoder', num_layers=6, transformerlayers=dict( type='BaseTransformerLayer', attn_cfgs=dict( type='MultiScaleDeformableAttention', embed_dims=256), feedforward_channels=1024, ffn_dropout=0.1, operation_order=('self_attn', 'norm', 'ffn', 'norm'))), decoder=dict( type='DeformableDetrTransformerDecoder', num_layers=6, return_intermediate=True, transformerlayers=dict( type='DetrTransformerDecoderLayer', attn_cfgs=[ dict( type='MultiheadAttention', embed_dims=256, num_heads=8, dropout=0.1), dict( type='MultiScaleDeformableAttention', embed_dims=256) ], feedforward_channels=1024, ffn_dropout=0.1, operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm')))), positional_encoding=dict( type='SinePositionalEncoding', num_feats=128, normalize=True, offset=-0.5), loss_cls=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=2.0), loss_bbox=dict(type='L1Loss', loss_weight=5.0), loss_iou=dict(type='GIoULoss', loss_weight=2.0), with_box_refine=True), train_cfg=dict( assigner=dict( type='HungarianAssigner', cls_cost=dict(type='FocalLossCost', weight=2.0), reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'), iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))), test_cfg=dict(max_per_img=100))

teacher = dict( type='mmdet.DeformableDETR', backbone=dict( type='ResNeXt', depth=101, groups=64, base_width=4, num_stages=4, out_indices=(1, 2, 3), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), style='pytorch', init_cfg=None), neck=dict( type='ChannelMapper', in_channels=[512, 1024, 2048], kernel_size=1, out_channels=256, act_cfg=None, norm_cfg=dict(type='GN', num_groups=32), num_outs=4), bbox_head=dict( type='DeformableDETRHead', num_query=300, num_classes=2, in_channels=2048, sync_cls_avg_factor=True, as_two_stage=True, transformer=dict( type='DeformableDetrTransformer', encoder=dict( type='DetrTransformerEncoder', num_layers=6, transformerlayers=dict( type='BaseTransformerLayer', attn_cfgs=dict( type='MultiScaleDeformableAttention', embed_dims=256), feedforward_channels=1024, ffn_dropout=0.1, operation_order=('self_attn', 'norm', 'ffn', 'norm'))), decoder=dict( type='DeformableDetrTransformerDecoder', num_layers=6, return_intermediate=True, transformerlayers=dict( type='DetrTransformerDecoderLayer', attn_cfgs=[ dict( type='MultiheadAttention', embed_dims=256, num_heads=8, dropout=0.1), dict( type='MultiScaleDeformableAttention', embed_dims=256) ], feedforward_channels=1024, ffn_dropout=0.1, operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm')))), positional_encoding=dict( type='SinePositionalEncoding', num_feats=128, normalize=True, offset=-0.5), loss_cls=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=2.0), loss_bbox=dict(type='L1Loss', loss_weight=5.0), loss_iou=dict(type='GIoULoss', loss_weight=2.0), with_box_refine=True), train_cfg=dict( assigner=dict( type='HungarianAssigner', cls_cost=dict(type='FocalLossCost', weight=2.0), reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'), iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))), test_cfg=dict(max_per_img=100))

img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomFlip', flip_ratio=0.5), dict( type='AutoAugment', policies=[[{ 'type': 'Resize', 'img_scale': [(480, 800), (512, 800), (544, 800), (576, 800), (608, 800), (640, 800), (672, 800), (704, 800), (736, 800), (768, 800), (800, 800)], 'multiscale_mode': 'value', 'keep_ratio': True }], [{ 'type': 'Resize', 'img_scale': [(400, 4200), (500, 4200), (600, 4200)], 'multiscale_mode': 'value', 'keep_ratio': True }, { 'type': 'RandomCrop', 'crop_type': 'absolute_range', 'crop_size': (384, 600), 'allow_negative_crop': True }, { 'type': 'Resize', 'img_scale': [(480, 800), (512, 800), (544, 800), (576, 800), (608, 800), (640, 800), (672, 800), (704, 800), (736, 800), (768, 800), (800, 800)], 'multiscale_mode': 'value', 'override': True, 'keep_ratio': True }]]), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=1), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(800, 800), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size_divisor=1), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ]

dataset settings

dataset_type = 'CocoDataset' classes = ('[T]', '[F]') data_source = './220308/imgs_all/' ann_source = './annotations/'

light_valid_1124_train_ann_file = './train.json' light_valid_1124_train_img_prefix = './train_images/'

light_valid_1124_val_ann_file = './annotations/valid.json' light_valid_1124_val_img_prefix = './valid_images/'

######################### train set

data_date = 220308 train_set_1st= dict( type=dataset_type, img_prefix=data_source, classes=classes, ann_file=ann_source + str(data_date) + '_train.json', filter_empty_gt=False, pipeline=train_pipeline)

train_set_2nd= dict( type=dataset_type, img_prefix=train_set_2nd_img_prefix, classes=classes, ann_file=train_set_2nd_ann_file, filter_empty_gt=False, pipeline=train_pipeline) ######################### val set

valid_set_1st= dict( type=dataset_type, img_prefix=data_source, classes=classes, ann_file=ann_source + str(data_date) + '_valid.json', pipeline=test_pipeline)

valid_set_2nd= dict( type=dataset_type, img_prefix=valid_set_2nd_img_prefix, classes=classes, ann_file=valid_set_2nd_ann_file, pipeline=test_pipeline)

######################### test set

test_set_1st= dict( type=dataset_type, img_prefix=data_source, classes=classes, ann_file=ann_source + str(data_date) + '_test.json', pipeline=test_pipeline)

test_set_2nd= dict( type=dataset_type, img_prefix=valid_set_2nd_img_prefix, classes=classes, ann_file=valid_set_2nd_ann_file, pipeline=test_pipeline)

''' data = dict( samples_per_gpu=8, workers_per_gpu=4, train=dict(type='ConcatDataset', datasets=[train_set_1st, train_set_2nd]), val=dict(type='ConcatDataset', datasets=[valid_set_1st, valid_set_2nd], separate_eval=True), test=test_set_2nd) '''

data = dict( samples_per_gpu=8, workers_per_gpu=4, train=train_set_2nd, val=valid_set_2nd, test=valid_set_2nd)

''' optimizer = dict( type='Adamax', lr=0.002, weight_decay=0.0001, paramwise_cfg=dict( custom_keys=dict( backbone=dict(lr_mult=0.1), sampling_offsets=dict(lr_mult=0.1), reference_points=dict(lr_mult=0.1)))) optimizer_config = dict(delete=True, grad_clip=dict(max_norm=0.1, norm_type=2)) '''

optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))

learning policy by mmdetection documents

lr_config = dict(policy='step', min_lr=2e-3,

by_epoch=True,

             warmup='linear',
             warmup_iters=5000,
             warmup_ratio=1.0 / 5,
             step=1,
             warmup_by_epoch=False)

runner = dict(type='EpochBasedRunner', max_epochs=300) evaluation= dict(classwise=True, interval=1, metric='bbox') checkpoint_config = dict(interval=1)

log_config = dict( interval=50, hooks=[dict(type='TextLoggerHook')])

custom_hooks = [dict(type='NumClassCheckHook')] dist_params = dict(backend='nccl') log_level = 'INFO' load_from = None

work_dir = './deformable_detr_rx101_r50' workflow = [('train', 1), ('val', 1)]

'''# optimizer optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=None)

learning policy

lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=0.001, step=[8, 11]) runner = dict(type='EpochBasedRunner', max_epochs=300) evaluation= dict(classwise=True, interval=1, metric='bbox') '''

algorithm setting

algorithm = dict( type='GeneralDistill', architecture=dict( type='MMDetArchitecture', model=student, ), distiller=dict( type='SingleTeacherDistiller', teacher=teacher, teacher_trainable=False, components=[ dict( student_module='bbox_head.cls_branches', teacher_module='bbox_head.cls_branches', losses=[ dict( type='ChannelWiseDivergence', name='loss_cwd_cls_head', tau=1, loss_weight=5, ) ]) ]), )

find_unused_parameters = True

humu789 commented 2 years ago

The useage is same as mmdetection's. You can try --eval--options -> --eval-options.

OBVIOUSDAWN commented 2 years ago

Sorry to bother you, I am also studying the content related to DETR and saw your issue when I was looking for the method of distillation model. I noticed that your Tracher model chose Resnext as backbone, does it bring promotion? I had previously chosen ResNext and RES2Net for my task, but there was a decrease in accuracy, which was far inferior to Resnet