open-mmlab / mmpose

OpenMMLab Pose Estimation Toolbox and Benchmark.
https://mmpose.readthedocs.io/en/latest/
Apache License 2.0
5.75k stars 1.23k forks source link

webcam_demo.py #1812

Closed ChenZhenGui closed 1 year ago

ChenZhenGui commented 1 year ago

I used my trained model to run the camera demo. When no one was detected, everything was normal. When someone was detected, the error was as follows 1668671594181 I tried to print the texts: 1668671653532

Ben-Louis commented 1 year ago

Hi, thanks for using MMPose. It seems like a problem with the format of dataset metainfo. Could you please provide the information about the configs (webcam & pose estimator)? They will help us find out where the problem lies.

ChenZhenGui commented 1 year ago

yeah this is my config: base = ['../../../base/default_runtime.py',] channel_cfg = dict( num_output_channels=17, dataset_joints=17, dataset_channel=[ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], ], inference_channel=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ])

runtime

train_cfg = dict(max_epochs=300, val_interval=50)

optimizer

optim_wrapper = dict(optimizer=dict( type='AdamW', lr=5e-3, ))

resume = True

load_from = None

log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'), dict(type='TensorboardLoggerHook') ])

learning policy

param_scheduler = [ dict( type='LinearLR', begin=0, end=500, start_factor=0.001, by_epoch=False), # warm-up dict( type='MultiStepLR', begin=0, end=300, milestones=[170, 260], gamma=0.1, by_epoch=True) ]

automatically scaling LR based on the actual training batch size

auto_scale_lr = dict(base_batch_size=512)

hooks

default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))

codec settings

codec = dict( type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)

model settings

model = dict( type='TopdownPoseEstimator', data_preprocessor=dict( type='PoseDataPreprocessor', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], bgr_to_rgb=True), backbone=dict( type='MyMobileViT', model_cfg={'layer1': {'out_channels': 32, 'expand_ratio': 4, 'num_blocks': 1, 'stride': 1, 'block_type': 'mv2', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer2': {'out_channels': 64, 'expand_ratio': 4, 'num_blocks': 3, 'stride': 2, 'block_type': 'mv2', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer3': {'out_channels': 96, 'transformer_channels': 144, 'ffn_dim': 288, 'transformer_blocks': 2, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer4': {'out_channels': 128, 'transformer_channels': 192, 'ffn_dim': 384, 'transformer_blocks': 4, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'layer5': {'out_channels': 160, 'transformer_channels': 240, 'ffn_dim': 480, 'transformer_blocks': 3, 'patch_h': 2, 'patch_w': 2, 'stride': 2, 'mv_expand_ratio': 4, 'num_heads': 4, 'block_type': 'mobilevit', 'dropout': 0.1, 'ffn_dropout': 0.0, 'attn_dropout': 0.0}, 'last_layer_exp_factor': 4, 'cls_dropout': 0.1}, ), head=dict( type='TopdownHeatmapSimpleHead', in_channels=640, out_channels=channel_cfg['num_output_channels'], loss_keypoint=dict(type='KeypointMSELoss', use_target_weight=True), decoder=codec), test_cfg=dict( flip_test=True, flip_mode='heatmap', shift_heatmap=True, output_heatmaps=True ))

base dataset settings

dataset_type = 'CocoDataset' data_mode = 'topdown' data_root = '/data/zgchen/ViTPose/tools/data/coco/'

pipelines

train_pipeline = [ dict(type='LoadImage', file_client_args={{base.file_client_args}}), dict(type='GetBBoxCenterScale'), dict(type='RandomFlip', direction='horizontal'), dict(type='RandomHalfBody'), dict(type='RandomBBoxTransform'), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='GenerateTarget', target_type='heatmap', encoder=codec), dict(type='PackPoseInputs') ] val_pipeline = [ dict(type='LoadImage', file_client_args={{base.file_client_args}}), dict(type='GetBBoxCenterScale',padding=1.5), dict(type='TopdownAffine', input_size=codec['input_size']), dict(type='PackPoseInputs'), ]

data loaders

train_dataloader = dict( batch_size=64, num_workers=4, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, data_root=data_root, data_mode=data_mode, ann_file='annotations/person_keypoints_train2017.json', data_prefix=dict(img='train2017/'), pipeline=train_pipeline, )) val_dataloader = dict( batch_size=64, num_workers=4, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), dataset=dict( type=dataset_type, data_root=data_root, data_mode=data_mode, ann_file='annotations/person_keypoints_val2017.json', bbox_file='/data/zgchen/ViTPose/tools/data/coco/person_detection_results/' 'COCO_val2017_detections_AP_H_56_person.json', data_prefix=dict(img='val2017/'), test_mode=True, pipeline=val_pipeline, )) test_dataloader = val_dataloader

evaluators

val_evaluator = dict( type='CocoMetric', ann_file=data_root + 'annotations/person_keypoints_val2017.json') test_evaluator = val_evaluator

ChenZhenGui commented 1 year ago

and this is the cam_cfg:

Copyright (c) OpenMMLab. All rights reserved.

executor_cfg = dict(

Basic configurations of the executor

name='Pose Estimation',
camera_id=0,
# Define nodes.
# The configuration of a node usually includes:
#   1. 'type': Node class name
#   2. 'name': Node name
#   3. I/O buffers (e.g. 'input_buffer', 'output_buffer'): specify the
#       input and output buffer names. This may depend on the node class.
#   4. 'enable_key': assign a hot-key to toggle enable/disable this node.
#       This may depend on the node class.
#   5. Other class-specific arguments
nodes=[
    # 'DetectorNode':
    # This node performs object detection from the frame image using an
    # MMDetection model.
    dict(
        type='DetectorNode',
        name='detector',
        model_config='D://pythonProject//mmpose//demo//mmdetection_cfg//'
        'ssdlite_mobilenetv2-scratch_8xb24-600e_coco.py',
        model_checkpoint='https://download.openmmlab.com'
        '/mmdetection/v2.0/ssd/'
        'ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_'
        'scratch_600e_coco_20210629_110627-974d9307.pth',
        input_buffer='_input_',  # `_input_` is an executor-reserved buffer
        output_buffer='det_result'),
    # 'TopDownPoseEstimatorNode':
    # This node performs keypoint detection from the frame image using an
    # MMPose top-down model. Detection results is needed.
    dict(
        type='TopDownPoseEstimatorNode',
        name='human pose estimator',
        model_config='D:/pythonProject/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilevit_coco-256x192.py',
        model_checkpoint="D:/pythonProject/mmpose/work_dirs/AP_epoch_300.pth",
        labels=['person'],
        input_buffer='det_result',
        output_buffer='human_pose'),
    dict(
        type='TopDownPoseEstimatorNode',
        name='animal pose estimator',
        model_config='configs/animal_2d_keypoint/topdown_heatmap/'
        'animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py',
        model_checkpoint='https://download.openmmlab.com/mmpose/animal/'
        'hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth',
        labels=['cat', 'dog', 'horse', 'sheep', 'cow'],
        input_buffer='human_pose',
        output_buffer='animal_pose'),
    # 'ObjectAssignerNode':
    # This node binds the latest model inference result with the current
    # frame. (This means the frame image and inference result may be
    # asynchronous).
    dict(
        type='ObjectAssignerNode',
        name='object assigner',
        frame_buffer='_frame_',  # `_frame_` is an executor-reserved buffer
        object_buffer='animal_pose',
        output_buffer='frame'),
    # 'ObjectVisualizerNode':
    # This node draw the pose visualization result in the frame image.
    # Pose results is needed.
    dict(
        type='ObjectVisualizerNode',
        name='object visualizer',
        enable_key='v',
        enable=True,
        show_bbox=True,
        must_have_keypoint=False,
        show_keypoint=True,
        input_buffer='frame',
        output_buffer='vis'),
    # 'SunglassesNode':
    # This node draw the sunglasses effect in the frame image.
    # Pose results is needed.
    dict(
        type='SunglassesEffectNode',
        name='sunglasses',
        enable_key='s',
        enable=False,
        input_buffer='vis',
        output_buffer='vis_sunglasses'),
    # # 'BigeyeEffectNode':
    # # This node draw the big-eye effetc in the frame image.
    # # Pose results is needed.
    dict(
        type='BigeyeEffectNode',
        name='big-eye',
        enable_key='b',
        enable=False,
        input_buffer='vis_sunglasses',
        output_buffer='vis_bigeye'),
    # 'NoticeBoardNode':
    # This node show a notice board with given content, e.g. help
    # information.
    dict(
        type='NoticeBoardNode',
        name='instruction',
        enable_key='h',
        enable=True,
        input_buffer='vis_bigeye',
        output_buffer='vis_notice',
        content_lines=[
            'This is a demo for pose visualization and simple image '
            'effects. Have fun!', '', 'Hot-keys:',
            '"v": Pose estimation result visualization',
            '"s": Sunglasses effect B-)', '"b": Big-eye effect 0_0',
            '"h": Show help information',
            '"m": Show diagnostic information', '"q": Exit'
        ],
    ),
    # 'MonitorNode':
    # This node show diagnostic information in the frame image. It can
    # be used for debugging or monitoring system resource status.
    dict(
        type='MonitorNode',
        name='monitor',
        enable_key='m',
        enable=False,
        input_buffer='vis_notice',
        output_buffer='display'),
    # 'RecorderNode':
    # This node save the output video into a file.
    dict(
        type='RecorderNode',
        name='recorder',
        out_video_file='webcam_demo.mp4',
        input_buffer='display',
        output_buffer='_display_'
        # `_display_` is an executor-reserved buffer
    )
])
Ben-Louis commented 1 year ago

This bug is caused by the distinct format of metainfo in MMPose and MMDetection. Thank you very much for pointing it out. You can modify your code as in https://github.com/open-mmlab/mmpose/pull/1813 to fix this bug.