this is my config: base = ['../../../base/default_runtime.py',] channel_cfg = dict( num_output_channels=17, dataset_joints=17, dataset_channel=[ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], ], inference_channel=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ])

runtime

train_cfg = dict(max_epochs=300, val_interval=50)

optimizer

optim_wrapper = dict(optimizer=dict( type='AdamW', lr=5e-4, ))

resume = True

load_from = None

log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'), dict(type='TensorboardLoggerHook') ])

learning policy

param_scheduler = [ dict( type='LinearLR', begin=0, end=500, start_factor=0.001, by_epoch=False), # warm-up dict( type='MultiStepLR', begin=0, end=210, milestones=[170, 240], gamma=0.1, by_epoch=True) ]

automatically scaling LR based on the actual training batch size

auto_scale_lr = dict(base_batch_size=512)

hooks

default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))

codec settings

codec = dict( type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)

model settings

model = dict( type='TopdownPoseEstimator', data_preprocessor=dict( type='PoseDataPreprocessor', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], bgr_to_rgb=True), backbone=dict( type='MyMobileViT', model_cfg={'layer1': {'out_channels': 32, 'expand_ratio': 4, 'num_blocks': 1, 'stride': 1, 'block_type': 'mv2', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer2': {'out_channels': 64, 'expand_ratio': 4, 'num_blocks': 3, 'stride': 2, 'block_type': 'mv2', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer3': {'out_channels': 96, 'transformer_channels': 144, 'ffn_dim': 288, 'transformer_blocks': 2, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer4': {'out_channels': 128, 'transformer_channels': 192, 'ffn_dim': 384, 'transformer_blocks': 4, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer5': {'out_channels': 160, 'transformer_channels': 240, 'ffn_dim': 480, 'transformer_blocks': 3, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'last_layer_exp_factor': 4, 'cls_dropout': 0.1}, ), head=dict( type='TopdownHeatmapSimpleHead', in_channels=640, out_channels=channel_cfg['num_output_channels'], loss_keypoint=dict(type='KeypointMSELoss', use_target_weight=True), decoder=codec), test_cfg=dict( flip_test=True, flip_mode='heatmap', shift_heatmap=True, output_heatmaps=True ))

base dataset settings

dataset_type = 'CocoDataset' data_mode = 'topdown' data_root = '/data/zgchen/ViTPose/tools/data/coco/'

pipelines

train_pipeline = [ dict(type='LoadImage', file_client_args={{base.file_client_args}}), dict(type='GetBBoxCenterScale'), dict(type='RandomFlip', direction='horizontal'), dict(type='RandomHalfBody'), dict(type='RandomBBoxTransform'), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='GenerateTarget', target_type='heatmap', encoder=codec), dict(type='PackPoseInputs') ] val_pipeline = [ dict(type='LoadImage', file_client_args={{base.file_client_args}}), dict(type='GetBBoxCenterScale',padding=1.5), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='PackPoseInputs'), ]

data loaders

train_dataloader = dict( batch_size=48, num_workers=2, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, data_root=data_root, data_mode=data_mode, ann_file='annotations/person_keypoints_train2017.json', data_prefix=dict(img='train2017/'), pipeline=train_pipeline, )) val_dataloader = dict( batch_size=48, num_workers=2, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), dataset=dict( type=dataset_type, data_root=data_root, data_mode=data_mode, ann_file='annotations/person_keypoints_val2017.json', bbox_file='/data/zgchen/ViTPose/tools/data/coco/person_detection_results/' 'COCO_val2017_detections_AP_H_56_person.json', data_prefix=dict(img='val2017/'), test_mode=True, pipeline=val_pipeline, )) test_dataloader = val_dataloader

evaluators

val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/person_keypoints_val2017.json') test_evaluator = val_evaluator

when I try to test the visualization,program error：img_path = data_batch[0]['data_samples'].get('img_path') KeyError: 0 then I try to debug and change data_batch[0]['data_samples'].get('img_path') to data_batch['data_samples'][0].get('img_path'),it works,but have new problem

open-mmlab / mmpose

mmpose1.0 visualization issue #1798