[Bug] - Githubissues

Prerequisite

[X] I have searched Issues and Discussions but cannot get the expected help.
[X] The bug has not been fixed in the latest version(https://github.com/open-mmlab/mmpose).

Environment

cuda11.3, mmcv1.4.5, python3.9, torch1.10.1

Reproduces the problem - code sample

No problem

Reproduces the problem - command or script

base = [ '../../../base/default_runtime.py', '../../../base/datasets/coco.py' ]

work_dir = './tools/work_dirs/cp_hrnet_w32_coco_256x256_dist' log_level = 'INFO'
load_from = None

resume_from = None

checkpoint_config = dict(interval=10) evaluation = dict(interval=10, metric='mAP', save_best='AP')

total_epochs = 210
log_config = dict(
interval=10,
hooks=[dict(type='TextLoggerHook')])

optimizer = dict( type='Adam',
lr=5e-4,
) optimizer_config = dict(grad_clip=None)

lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])

channel_cfg = dict( num_output_channels=17, dataset_joints=17, dataset_channel=[ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], ], inference_channel=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ])

model = dict(
type='TopDown',
pretrained=None, backbone=dict(
type='HRNet',
in_channels=3,
extra=dict( stage1=dict( num_modules=1, num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict( num_modules=1, num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict( num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict( num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
), keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=32,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),

    loss_keypoint=dict(                                  
            type='JointsMSELoss',                                       
            use_target_weight=True)),                                      
train_cfg=dict(),             
test_cfg=dict(                
    flip_test=True,                                   
    post_process='default',                             
    shift_heatmap=True,                                    
    modulate_kernel=11))

data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False, nms_thr=1.0, oks_thr=0.9, vis_thr=0.2, det_bbox_thr=0.0,

use_gt_bbox=False,
# bbox_file='data/coco2017/annotations/person_detection_results/COCO_val2017_detections_AP_H_56_person.json',
bbox_file='data/coco2017/annotations/person_detection_results/COCO_test-dev2017_detections_AP_H_609_person.json',

)

train_pipeline = [ dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=1.25),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict( type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict( type='TopDownGetRandomScaleRotation',
rot_factor=40,
scale_factor=0.5),
dict( type='TopDownAffine',
use_udp=False),
dict(type='ToTensor'),
dict( type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget',
sigma=2),
dict( type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 'rotation', 'flip_pairs' ]), ]

val_pipeline = [ dict(type='LoadImageFromFile'),
dict(type='TopDownGetBboxCenterScale', padding=1.25),
dict(type='TopDownAffine'),

dict(type='ToTensor'),                 
dict(
    type='NormalizeTensor',              
    mean=[0.485, 0.456, 0.406],         
    std=[0.229, 0.224, 0.225]),            
dict(
    type='Collect',                    
    keys=['img'],                          
    meta_keys=['image_file', 'center',       
               'scale', 'rotation', 'flip_pairs']),

]

test_pipeline = val_pipeline

data_root = '/root/autodl-tmp/cp/mmpose-0.29.0/mmpose-0.29.0/data/coco2017' data = dict( samples_per_gpu=32, workers_per_gpu=40, val_dataloader=dict(samples_per_gpu=32), test_dataloader=dict(samples_per_gpu=32), train=dict( type='TopDownCocoDataset', ann_file=f'{data_root}/annotations/person_keypoints_train2017.json', img_prefix=f'{data_root}/train2017/', data_cfg=data_cfg, pipeline=train_pipeline, dataset_info={{base.dataset_info}}), val=dict( type='TopDownCocoDataset', ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', img_prefix=f'{data_root}/val2017/', data_cfg=data_cfg, pipeline=val_pipeline, dataset_info={{base.dataset_info}}), test=dict( type='TopDownCocoDataset', ann_file=f'{data_root}/annotations/image_info_test2017/image_info_test-dev2017.json', img_prefix=f'{data_root}/test2017/', data_cfg=data_cfg, pipeline=test_pipeline, dataset_info={{base.dataset_info}}), )

Reproduces the problem - error message

No problem

Additional information

The HRNet model is obtained by training 210 epoch on train2017 of COCO2017 dataset. The HRNet model reached 76.1 AP on val2017, 210f0524e3bd500a74d7b57bd770b41

but only 62.0 AP when uploaded on test-dev2017. 623d97215e1e6658fb3dd4f3b751d8f

Other models, such as HRFormer, Swin, PVT, etc., can get similar results in test-dev2017. I wonder what went wrong?

open-mmlab / mmpose

[Bug] #2772