openxrlab / xrmocap

OpenXRLab Multi-view Motion Capture Toolbox and Benchmark
https://xrmocap.readthedocs.io/
Other
353 stars 42 forks source link

SMPLX fitting bad result #166

Open rorrewang opened 1 month ago

rorrewang commented 1 month ago

Hello, I found that I can get better 3D poses during multiview-singleperson task, but I havng some difficulties fitting the SMPLX model. The head and neck of the human body look strange, and the SMPLX shape seems too large for female individuals, making their upper bodies look wierd.I have tried to change the config file but it didn't work well. Could you please advise on which specific parameters I should adjust to avoid these situations? Here is my config file and some necessary images:

type = 'MultiViewSinglePersonSMPLEstimator'
work_dir = './temp'
verbose = True
logger = None
bbox_detector = None
kps2d_estimator = None

smplify = dict(
    type='SMPLifyX',
    verbose=verbose,
    info_level='stage',
    n_epochs=1,
    use_one_betas_per_video=True,
    hooks=[
        dict(type='SMPLifyVerboseHook'),
    ],
    grad_clip=3.0,
    logger=logger,
    body_model=dict(
        type='SMPLX',
        gender='neutral',
        # gender='female',
        num_betas=10,
        keypoint_convention='smplx',
        model_path='mmhuman3d/data/body_models/smplx',
        batch_size=1,
        use_face_contour=True,
        use_pca=False,
        num_pca_comps=24,
        flat_hand_mean=False,
        logger=logger),
    optimizer=dict(
        type='LBFGS', max_iter=20, lr=1.0, line_search_fn='strong_wolfe'),
    ignore_keypoints=[
        'right_smalltoe', 'right_bigtoe', 'left_smalltoe', 'left_bigtoe'
    ],
    handlers=[
        dict(
            handler_key='keypoints3d_mse',
            type='Keypoint3dMSEHandler',
            mse_loss=dict(
                type='KeypointMSELoss',
                loss_weight=10.0,
                reduction='sum',
                sigma=100),
            logger=logger),
        dict(
            handler_key='keypoints2d_mse',
            type='MultiviewKeypoint2dMSEHandler',
            mse_loss=dict(
                type='KeypointMSELoss',
                loss_weight=1.0,
                reduction='sum',
                sigma=100),
            logger=logger),
        dict(
            handler_key='shape_prior',
            type='BetasPriorHandler',
            prior_loss=dict(
                type='ShapePriorLoss', loss_weight=5e-3, reduction='mean'),
            logger=logger),
        dict(
            handler_key='joint_prior',
            type='BodyPosePriorHandler',
            prior_loss=dict(
                type='JointPriorLoss',
                loss_weight=1.0,
                reduction='mean',
                use_full_body=True,
                smooth_spine=True,
                smooth_spine_loss_weight=1.0,
                lock_foot=False,
                lock_foot_loss_weight=1.0,
                lock_apose_spine=False,
                lock_apose_spine_loss_weight=1.0),
            logger=logger),
        dict(
            handler_key='smooth_joint',
            type='BodyPosePriorHandler',
            prior_loss=dict(
                type='SmoothJointLoss',
                loss_weight=1.0,
                reduction='mean',
                loss_func='L2'),
            logger=logger),
        dict(
            handler_key='pose_reg',
            type='BodyPosePriorHandler',
            prior_loss=dict(
                type='PoseRegLoss', loss_weight=0.001, reduction='mean'),
            logger=logger),
        dict(
            handler_key='keypoints3d_limb_len',
            type='Keypoint3dLimbLenHandler',
            loss=dict(
                type='LimbLengthLoss',
                convention='smplx',
                loss_weight=1.0,
                reduction='mean'),
            logger=logger),
    ],
    stages=[
        # stage 0 betas
        dict(
            n_iter=40,
            ftol=1e-4,
            fit_global_orient=False,
            fit_transl=False,
            fit_body_pose=False,
            fit_betas=True,
            fit_left_hand_pose=False,
            fit_right_hand_pose=False,
            fit_expression=False,
            fit_jaw_pose=False,
            fit_leye_pose=False,
            fit_reye_pose=False,
            keypoints3d_mse_weight=0.0,
            keypoints2d_mse_weight=0.0,
            keypoints3d_limb_len_weight=1.0,
            shape_prior_weight=1.0,
            joint_prior_weight=0.0,
            smooth_joint_weight=0.0,
            pose_reg_weight=0.0,
            pose_prior_weight=0.0),
        # stage 1 global orient, transl, betas
        dict(
            n_iter=50,
            ftol=1e-4,
            fit_global_orient=True,
            fit_transl=True,
            fit_body_pose=False,
            fit_betas=False,
            fit_left_hand_pose=True,
            fit_right_hand_pose=True,
            fit_expression=False,
            fit_jaw_pose=False,
            fit_leye_pose=False,
            fit_reye_pose=False,
            keypoints3d_mse_weight=1.0,
            keypoints2d_mse_weight=0.0,
            keypoints3d_limb_len_weight=0.0,
            shape_prior_weight=0.0,
            joint_prior_weight=0.0,
            smooth_joint_weight=0.0,
            pose_reg_weight=0.0,
            pose_prior_weight=0.0,
            shoulder_weight=1.0,
            hip_weight=1.0,
            body_weight=0.0,
            hand_weight=0.0,
            face_weight=0.0,
            foot_weight=0.0),
        # stage 2 pose: fit pose based on kps
        dict(
            n_iter=40,
            ftol=1e-4,
            fit_global_orient=True,
            fit_transl=True,
            fit_body_pose=True,
            fit_betas=True,
            fit_left_hand_pose=True,
            fit_right_hand_pose=True,
            fit_expression=True,
            fit_jaw_pose=True,
            fit_leye_pose=False,
            fit_reye_pose=False,
            keypoints3d_mse_weight=1.0,
            keypoints2d_mse_weight=0.0,
            keypoints3d_limb_len_weight=0.0,
            shape_prior_weight=1,
            joint_prior_weight=0.0,
            smooth_joint_weight=0.1,
            pose_reg_weight=0.1,
            pose_prior_weight=0.0,
            shoulder_weight=1.0,
            hip_weight=1.0,
            body_weight=1.0,
            hand_weight=0.5,
            face_weight=0.5,
            foot_weight=1.0),
        # 重新修正shape
        # dict(
        #     n_iter=40,
        #     ftol=1e-4,
        #     fit_global_orient=True,
        #     fit_transl=True,
        #     fit_body_pose=False,
        #     fit_betas=True,
        #     fit_left_hand_pose=False,
        #     fit_right_hand_pose=False,
        #     fit_expression=False,
        #     fit_jaw_pose=False,
        #     fit_leye_pose=False,
        #     fit_reye_pose=False,
        #     keypoints3d_mse_weight=0.5,
        #     keypoints2d_mse_weight=0.0,
        #     keypoints3d_limb_len_weight=0.0,
        #     shape_prior_weight=2,
        #     joint_prior_weight=0.0,
        #     smooth_joint_weight=0.1,
        #     pose_reg_weight=0.001,
        #     pose_prior_weight=0.0,
        #     shoulder_weight=0.0,
        #     hip_weight=0.0,
        #     body_weight=0.0,
        #     hand_weight=0.0,
        #     face_weight=0.0,
        #     foot_weight=0.0),
    ],

)

triangulator = dict(type='AniposelibTriangulator', camera_parameters=[])
cam_pre_selector = dict(
    type='ManualThresholdSelector', threshold=0.4, verbose=verbose)
cam_selector = dict(
    type='CameraErrorSelector',
    target_camera_number=15,
    triangulator=dict(type='AniposelibTriangulator', camera_parameters=[]),
    verbose=verbose)
final_selectors = [
    # use ManualThresholdSelector to set a lower bound
    dict(type='ManualThresholdSelector', threshold=0.4, verbose=verbose),
    dict(
        type='AutoThresholdSelector',
        start=0.95,
        stride=-0.025,
        verbose=verbose)
]
kps3d_optimizers = [
    dict(type='NanInterpolation', verbose=verbose),
    dict(
        type='AniposelibOptimizer',
        triangulator=dict(type='AniposelibTriangulator', camera_parameters=[]),
        smooth_weight=2.0,
        verbose=verbose)
]

I got results like:

159b3ec2e7731520eeff340d15b16711

https://github.com/user-attachments/assets/4d853dcb-8f42-43e7-b10c-efd433e3f413

b82e5127612d44b7aaa415385bd50308

https://github.com/user-attachments/assets/3c6f83d0-3bbd-48a5-8483-b65ff382024f The rendered person seems to have cervical spondylosis ......

github-actions[bot] commented 1 month ago

Hi @rorrewang, welcome to commit your first issue! 你好 @rorrewang,非常欢迎首次提交你的问题!