mks0601 / 3DMPPE_POSENET_RELEASE

Official PyTorch implementation of "Camera Distance-aware Top-down Approach for 3D Multi-person Pose Estimation from a Single RGB Image", ICCV 2019
MIT License
817 stars 147 forks source link

Problem with Human36m Data #40

Closed hansongfang closed 4 years ago

hansongfang commented 4 years ago

Hi, thank you for sharing the data. I observe a weird problem that the 2D pose was not matched to the person for some action. This is the visualization of subject9, action sittingDown, subact2. image

Do you observe the same issue? Is this a problem with the original Human36M Dataset?

mks0601 commented 4 years ago

Hi, no I haven't seen this kind of problem. How did you visualize the 2D pose?

hansongfang commented 4 years ago

Thank you for the quick reply. Attached, please find the code for visualization. I have checked it several times. With the same code, most actions look normal. It really bothers me. It would be really appreciated if you can help try running the attached code. I download the human dataset with the google drive link provided in the readme.md.

-------------------------------------------------Code------------------------------------------------------

import os
import os.path as osp
import numpy as np
import json
import cv2
import shutil

def world2cam(world_coord, R, T):
    cam_coord = np.dot(R, world_coord - T)
    return cam_coord

def cam2pixel(cam_coord, f, c):
    x = cam_coord[:, 0] / (cam_coord[:, 2] + 1e-8) * f[0] + c[0]
    y = cam_coord[:, 1] / (cam_coord[:, 2] + 1e-8) * f[1] + c[1]
    z = cam_coord[:, 2]
    img_coord = np.concatenate((x[:,None], y[:,None], z[:,None]),1)
    return img_coord

def vis_frame(joints, parents, joints_right, img, colors=None,
              use_score=False, joint_scores=None, threshold=None):
    """Draw joints in original image

    Args:
        joints: joint position, np.array, (n, 2)
        parents: joint parent id, np.array, (n, )
        joints_right: right joint ids, np.array, (m, )
        img: original image, np.array, (height, width, 3)
        threshold: threshold for drawing joints
        colors: color is used to draw joints and bones, list, [(255, 0, 0), ...]
        joint_scores: confidence of estimated joint, np.array, (n, 3)
        use_score: if True, alpha = score

    Returns:
        vis_img: drawing result, np.array, (height, width, 4)
    """
    if use_score is True:
        assert joint_scores is not None
        assert threshold is not None

    if threshold is None:
        threshold = 0.0

    if joint_scores is None:
        num_joints = joints.shape[0]
        joint_scores = np.ones(num_joints)

    if colors is None:
        colors = [(255, 0, 0) if j in joints_right else (0, 0, 255) for j in range(joints.shape[0])]

    for j in range(joints.shape[0]):
        if joint_scores[j] < threshold:
            continue
        x, y = int(joints[j, 0]), int(joints[j, 1])
        cv2.circle(img, (x, y), 6, colors[j], -1)

        j_parent = parents[j]
        if j_parent < 0 or joint_scores[j_parent] < threshold:
            continue

        x2, y2 = int(joints[j_parent, 0]), int(joints[j_parent, 1])
        cv2.line(img, (x, y), (x2, y2), colors[j], thickness=3)  # Control thickness with score

    return img

def render_2d(input_img_dir, seq_2d, joint_parent, joint_right, img_dir):
    if osp.exists(img_dir):
        shutil.rmtree(img_dir)

    base_name = osp.basename(input_img_dir)
    print(f'Render 2d pose to {img_dir}.')
    print(f'Human 36m inputs dir {base_name}')
    os.makedirs(img_dir, exist_ok=True)
    for frame_id in range(seq_2d.shape[0]):
        pose_2d = seq_2d[frame_id]
        img_file = osp.join(img_dir, f'frame_{frame_id}.jpg')
        # img = np.ones((height, width, 3), dtype=np.uint8) * 255
        input_img_file = f'{input_img_dir}/{base_name}_{(frame_id+1):06}.jpg'  # human36m start from 1
        assert osp.exists(input_img_file)
        img = cv2.imread(input_img_file)
        vis_img = vis_frame(pose_2d, joint_parent, joint_right, img)
        cv2.imwrite(img_file, vis_img)

if __name__ == "__main__":
    base_dir = '/media/shanaf/HDD/DATA/Human36m'
    h36m_img_dir = osp.join(base_dir, 'Human36m', 'images')
    annot_path = osp.join(base_dir, 'Human36m', 'annotations')

    subject_id = 9
    act_id = 10
    sub_act_id = 2
    h36m_cam_id = 3

    joints_file = osp.join(annot_path, 'Human36M_subject' + str(subject_id) + '_joint_3d.json')
    assert osp.exists(joints_file)
    with open(joints_file, 'r') as f:
        joints = json.load(f)

    camera_info_file = osp.join(annot_path, 'Human36M_subject' + str(subject_id) + '_camera.json')
    with open(camera_info_file, 'r') as f:
        cameras = json.load(f)
    cam_param = cameras[str(h36m_cam_id)]
    R = np.array(cam_param['R'], dtype=np.float32)
    t = np.array(cam_param['t'], dtype=np.float32)
    f = np.array(cam_param['f'], dtype=np.float32)
    c = np.array(cam_param['c'], dtype=np.float32)

    joints_act = joints[str(act_id)][str(sub_act_id)]
    duration = len(joints_act.keys())
    seq_pose = []
    seq_pose_2d = []
    for frame_id in range(duration):
        pose_3d_frame = np.array(joints_act[str(frame_id)], dtype=np.float32)
        seq_pose.append(pose_3d_frame)

        # 2d pose
        pose_3d_frame_cam = world2cam(pose_3d_frame.transpose(1, 0),
                                      R, t.reshape(3, 1)).transpose(1, 0)
        pose_2d_img = cam2pixel(pose_3d_frame_cam, f, c)
        pose_2d_img = pose_2d_img[:, :2]
        seq_pose_2d.append(pose_2d_img)

    seq_pose_2d = np.array(seq_pose_2d, dtype=np.float32)
    joint_parent = [-1, 0, 1, 2, 0, 4, 5, 0, 7, 8, 9, 8, 11, 12, 8, 14, 15]
    joint_right = [1, 2, 3, 14, 15, 16]

    subject_dir = './S9'
    os.makedirs(subject_dir, exist_ok=True)
    input_img_dir = f'{h36m_img_dir}/' \
                    f's_{subject_id:02}' \
                    f'_act_{act_id:02}' \
                    f'_subact_{sub_act_id:02}' \
                    f'_ca_{h36m_cam_id:02}'
    print(f'Human36m pose corresponding image dir {input_img_dir}.')
    img_dir = osp.join(subject_dir, f'act_{act_id}_subact_{sub_act_id}')
    video_file = osp.join(subject_dir, f's_{subject_id:02}_act_{act_id}_subact_{sub_act_id}_cam{h36m_cam_id}_2d.mp4')
    gif_file = osp.join(subject_dir, f's_{subject_id:02}_act_{act_id}_subact_{sub_act_id}_cam{h36m_cam_id}_2d.gif')
    render_2d(input_img_dir,
              seq_pose_2d,
              joint_parent,
              joint_right,
              img_dir)
mks0601 commented 4 years ago

Oh I found the same problem. However, most of other frames are normal. I do nothing special when pre-processing the h36m data.. maybe I think this error comes from the original h36m dataset.

mks0601 commented 4 years ago

As h36m dataset is very large scale, I think this error might not effect the test performance largely.

hansongfang commented 4 years ago

Thanks. I agree.