Open 11610 opened 6 months ago
配置文件如下
_base_ = ['../../../_base_/default_runtime.py']
vis_backends = [
dict(type='LocalVisBackend'),
]
visualizer = dict(
type='Pose3dLocalVisualizer', vis_backends=vis_backends, name='visualizer')
# runtime
train_cfg = dict(max_epochs=160, val_interval=10)
# optimizer
optim_wrapper = dict(optimizer=dict(type='Adam', lr=1e-3))
# learning policy
param_scheduler = [
dict(type='ExponentialLR', gamma=0.975, end=80, by_epoch=True)
]
auto_scale_lr = dict(base_batch_size=1024)
# hooks
default_hooks = dict(
checkpoint=dict(
type='CheckpointHook',
save_best='MPJPE',
rule='less',
max_keep_ckpts=1),
logger=dict(type='LoggerHook', interval=20),
)
# codec settings
codec = dict(
type='VideoPoseLifting',
num_keypoints=133,
zero_center=True,
root_index=0,
remove_root=False)
# model settings
model = dict(
type='PoseLifter',
backbone=dict(
type='TCN',
in_channels=2 * 133,
stem_channels=1024,
num_blocks=2,
kernel_sizes=(3, 3, 3),
dropout=0.25,
use_stride_conv=True,
),
head=dict(
type='TemporalRegressionHead',
in_channels=1024,
num_joints=133,
loss=dict(type='MPJPELoss'),
decoder=codec,
))
# base dataset settings
dataset_type = 'H36MWholeBodyDataset'
data_root = 'Human36m'
# pipelines
train_pipeline = [
dict(
type='RandomFlipAroundRoot',
keypoints_flip_cfg=dict(),
target_flip_cfg=dict(),
),
dict(type='GenerateTarget', encoder=codec),
dict(
type='PackPoseInputs',
meta_keys=('id', 'category_id', 'target_img_path', 'flip_indices',
'target_root'))
]
val_pipeline = [
dict(type='GenerateTarget', encoder=codec),
dict(
type='PackPoseInputs',
meta_keys=('id', 'category_id', 'target_img_path', 'flip_indices',
'target_root'))
]
# data loaders
train_dataloader = dict(
batch_size=128,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
ann_file='h3wb_train.npz',
seq_len=27,
causal=False,
pad_video_seq=True,
camera_param_file='cameras.pkl',
data_root=data_root,
data_prefix=dict(img='images/'),
pipeline=train_pipeline,
),
)
val_dataloader = dict(
batch_size=128,
num_workers=2,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
ann_file='h3wb_train.npz',
seq_len=27,
causal=False,
pad_video_seq=True,
camera_param_file='cameras.pkl',
data_root=data_root,
data_prefix=dict(img='images/'),
pipeline=val_pipeline,
test_mode=True,
))
test_dataloader = val_dataloader
# evaluators
val_evaluator = [
dict(type='MPJPE', mode='mpjpe'),
dict(type='MPJPE', mode='p-mpjpe')
]
test_evaluator = val_evaluator
是我的camera.pk文件不对吗?我是直接使用了MMPose里Human36提供的camera.pk文件(tests/data/h36m/cameras.pkl)
Prerequisite
Environment
`
`
Reproduces the problem - code sample
Reproduces the problem - command or script
PS F:\MyCode\BSVM\mmpose> & 'e:\Users\MSN\anaconda3\python.exe' 'c:\Users\MSN\.vscode\extensions\ms-python.debugpy-2024.2.0-win32-x64\bundled\libs\debugpy\adapter/../..\debugpy\launcher' '62392' '--' 'F:\MyCode\BSVM\mmpose\tools\train.py' 'configs/body_3d_keypoint/video_pose_lift/h36m/video-pose-lift_tcn-27frm-supv_8xb128-160e_h36m.py' '--work-dir' 'train_result' '--resume' '--auto-scale-lr'
Reproduces the problem - error message
Additional information
按照我的理解video_pose_lift输入的应该是连续2D的人体骨骼数据,但我按照官方文档中进行训练,告知我camera参数不对,似乎输入的数据是图片数据,这一步应该是训练2D人体骨骼模型所需要的数据吧,而不是3D的,是代码目前不支持3D的训练还是我的配置出了问题呢?我在官方文档也找不到相关的介绍,请问可以帮下我吗?非常感谢!!
我想确认下MMPose是否支持训练video_pose_lift的模型,是否需要改动代码或者加额外的配置? 例如加上2D模型的配置和权重,由2D模型输出的2D人体骨骼再传入video_pose_lift模型中。我尝试了,可是不行,好像train.py支持一个配置文件作为参数。 于是我看了下代码,似乎目前还不太支持,是要自己改动代码?还是说我的理解除了错误?配置错了,我是新手,希望有大佬捞我一手!!