facebookresearch / InterHand2.6M

Official PyTorch implementation of "InterHand2.6M: A Dataset and Baseline for 3D Interacting Hand Pose Estimation from a Single RGB Image", ECCV 2020
Other
676 stars 92 forks source link

Wrong projection result on "cam400412" #100

Open TneitaP opened 2 years ago

TneitaP commented 2 years ago

With the render method described in "tool/MANO_render/render.py", We found that the projection results of the data from "Capture0/XXX/cam400412/XXXX.jpg " are wrong. What is the solution to this problem?

Using Annotation Type: H+M; Dataset Part: Test; Caputer Idx: Capture0; Camera Idx: cam400412.

TneitaP commented 2 years ago

001

mks0601 commented 2 years ago

How about changing the rendering target to the original one? split = 'train' capture_idx = '13' seq_name = '0266_dh_pray' cam_idx = '400030'

TneitaP commented 2 years ago

Thanks for your reply. It seems to be okay ↓ (By the way, I also notic how you improved the left-hand mano bug in https://github.com/vchoutas/smplx/issues/48, but it has no relevance to the current problem)

002
TneitaP commented 2 years ago

To make the problem easier to reproduce , I post the full-version code here:

import os, glob
import cv2
import json
import numpy as np
import pickle 
import open3d as o3d
import torch 
import smplx 

def render_mesh(img, vert, face, focal, princpt):
    # mesh
    import trimesh
    import pyrender
    mesh = trimesh.Trimesh(vert, face)
    rot = trimesh.transformations.rotation_matrix(
    np.radians(180), [1, 0, 0])
    mesh.apply_transform(rot)
    material = pyrender.MetallicRoughnessMaterial(metallicFactor=0.0, alphaMode='OPAQUE', baseColorFactor=(1.0, 1.0, 0.9, 1.0))
    mesh = pyrender.Mesh.from_trimesh(mesh, material=material, smooth=False)
    scene = pyrender.Scene(ambient_light=(0.3, 0.3, 0.3))
    scene.add(mesh, 'mesh')

    camera = pyrender.IntrinsicsCamera(fx=focal[0], fy=focal[1], cx=princpt[0], cy=princpt[1])
    scene.add(camera)

    # renderer
    renderer = pyrender.OffscreenRenderer(viewport_width=img.shape[1], viewport_height=img.shape[0], point_size=1.0)

    # light
    light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=0.8)
    light_pose = np.eye(4)
    light_pose[:3, 3] = np.array([0, -1, 1])
    scene.add(light, pose=light_pose)
    light_pose[:3, 3] = np.array([0, 1, 1])
    scene.add(light, pose=light_pose)
    light_pose[:3, 3] = np.array([1, 1, 2])
    scene.add(light, pose=light_pose)

    # render
    rgb, depth = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)
    rgb = rgb[:,:,:3].astype(np.float32)
    valid_mask = (depth > 0)[:,:,None]
    # save to image
    img = rgb * valid_mask + img * (1-valid_mask)
    return img

def params2verts(mano_param, mano_model, cam_param):
    # only use extrinsic info in 'cam_param'
    mano_pose = torch.FloatTensor(mano_param['pose']).view(-1,3)
    root_pose = mano_pose[0].view(1,3)
    hand_pose = mano_pose[1:,:].view(1,-1)
    shape = torch.FloatTensor(mano_param['shape']).view(1,-1)
    trans = torch.FloatTensor(mano_param['trans']).view(1,3)
    output = mano_model(global_orient=root_pose, hand_pose=hand_pose, betas=shape, transl=trans)
    # https://github.com/facebookresearch/InterHand2.6M/blob/main/tool/MANO_render/render.py
    mesh_vert = output.vertices[0].numpy() * 1000 # meter to milimeter
    t, R = np.array(cam_param['campos'][str(cam_id)], dtype=np.float32).reshape(3), np.array(cam_param['camrot'][str(cam_id)], dtype=np.float32).reshape(3,3)
    t = -np.dot(R,t.reshape(3,1)).reshape(3) # -Rt -> t
    mesh_vert = np.dot(R, mesh_vert.transpose(1,0)).transpose(1,0) + t.reshape(1,3)
    mesh_vert = mesh_vert / 1000 # milimeter to meter
    return mesh_vert # verts with meter unit

if __name__ == '__main__':
    type_repl_str = "XXXXX"
    mano_model_root = r"\\105.1.1.1\Hand\HO&HH\Grab\downloads\out\models_smplx_v1_1\models"
    data_root = r"\\105.1.1.1\Hand\HO&HH\InterHand2.6Mv1.0\downloads"
    image_subroot = os.path.join(data_root, "images", "InterHand2.6M_5fps_batch1", "images", type_repl_str)

    cam_jsonname = os.path.join(data_root, "annotations", type_repl_str, "InterHand2.6M_%s_camera.json"%(type_repl_str))
    data_jsonname = os.path.join(data_root, "annotations", type_repl_str, "InterHand2.6M_%s_data.json"%(type_repl_str))
    joint_jsonname = os.path.join(data_root, "annotations", type_repl_str, "InterHand2.6M_%s_joint_3d.json"%(type_repl_str))

    mano_jsonname = os.path.join(data_root, "annotations", type_repl_str, "InterHand2.6M_%s_MANO_NeuralAnnot.json"%(type_repl_str))
    subdatatypes = ['test', 'train', 'val'] # ['train']

    mano_models = {
        'left': smplx.create(mano_model_root, model_type = "mano", is_rhand = False, use_pca = False), 
        'right': smplx.create(mano_model_root, model_type = "mano", is_rhand = True, use_pca = False),  
    }
    mano_models['left'].shapedirs[:,0,:] *= -1 # bug found by moon https://github.com/vchoutas/smplx/issues/48

    for subdatatype in subdatatypes:
        image_subroot = image_subroot.replace(type_repl_str, subdatatype)
        cam_jsonname = cam_jsonname.replace(type_repl_str, subdatatype)
        data_jsonname = data_jsonname.replace(type_repl_str, subdatatype)
        joint_jsonname = joint_jsonname.replace(type_repl_str, subdatatype)
        mano_jsonname = mano_jsonname.replace(type_repl_str, subdatatype)

        data_Dic = json.load(open(data_jsonname,'r')) # img <-> anno mapping info

        cam_Dic = json.load(open(cam_jsonname,'r')) # cam_Dic[capture_id]
        joint_Dic = json.load(open(joint_jsonname,'r')) # joint_Dic[capture_id][frame_id]
        mano_Dic = json.load(open(mano_jsonname,'r')) # mano_Dic[capture_id]
        print("successfully load json, began to parse ...")
        # for file_index, (img_info, anno_info) in enumerate(zip(data_Dic['images'], data_Dic['annotations'])):
        for file_index in range(0, len(data_Dic['images']), 1):
            img_info = data_Dic['images'][file_index]
            anno_info = data_Dic['annotations'][file_index]
            capture_id = str(img_info['capture'])
            cam_id = img_info['camera']

            # if capture_id != '13': continue
            # if img_info['seq_name'] != '0266_dh_pray': continue
            # if int(cam_id) != 400030: continue
            if(int(cam_id) != 400412): continue # wrong in Capture0/X/cam400412

            frame_id = str(img_info['frame_idx'])

            print(img_info['file_name'])
            img_Arr = cv2.imread(os.path.join(image_subroot, img_info['file_name']))
            cam_param = cam_Dic[capture_id]

            cam_focal = np.array(cam_param['focal'][cam_id], dtype=np.float32).reshape(2)
            cam_princpt = np.array(cam_param['princpt'][cam_id], dtype=np.float32).reshape(2)
            # mask_Arr = np.zeros_like(img_Arr).astype(np.float32) / 255.0
            mask_Arr = img_Arr.astype(np.float32) / 255.0
            # mesh = o3d.geometry.TriangleMesh()

            handtypes_in_img = [anno_info['hand_type']] if anno_info['hand_type'] != "interacting" else ['left', 'right']
            for onehandtype in handtypes_in_img:
                mano_param = mano_Dic[capture_id][frame_id][onehandtype]
                mano_model = mano_models[onehandtype]
                vert_camCoord = params2verts(mano_param, mano_model, cam_param)

                # mesh.vertices= o3d.utility.Vector3dVector(vert_camCoord)
                # mesh.triangles= o3d.utility.Vector3iVector(mano_models[onehandtype].faces)
                # mesh.compute_vertex_normals()
                # mesh.paint_uniform_color(np.array([0.5,0.5,0]))
                # o3d.visualization.draw_geometries([mesh])

                mask_Arr = render_mesh(mask_Arr, vert_camCoord, mano_models[onehandtype].faces, 
                            focal = cam_focal, princpt = cam_princpt)
            mask_Arr = np.clip(mask_Arr[:, :, 0] * 255.0, a_min=0, a_max=255).astype(np.uint8)
            cv2.imshow("img_Arr", img_Arr)
            cv2.imshow("overlay_Arr", mask_Arr)
            cv2.waitKey()
mks0601 commented 2 years ago

Sorry I don't have time to read the full codes :( Did you made some modifications on the original codes?

TneitaP commented 2 years ago

Nope, just copy the same function with the same parameters as you give.

zhufeida commented 2 years ago

I have met the same problem. The projection of the joints is not correct. GNjZKOZyMD

mks0601 commented 2 years ago

How did you project the joints?

zhufeida commented 2 years ago

Thanks for the apply. I just reused the code in your "dataset.py". By the way, the projections of other images seem to be correct.

      ann = db.anns[aid]      ## {'id', 'image_id', 'bbox', 'hand_type'}
      image_id = ann['image_id']  # https://mks0601.github.io/InterHand2.6M/
      img = db.loadImgs(image_id)[0] # {'id', 'file_name', 'capture', 'camera' ... }
      # pdb.set_trace()

      capture_id = img['capture']  # 0
      seq_name = img['seq_name']  # 'ROM07_RT_Finger_Occlusions'
      cam = img['camera']  # '400262'
      frame_idx = img['frame_idx']
      img_path = osp.join(self.img_path, self.mode, img['file_name'])  ##    img['file_name']='Capture0/ROM07_RT_Finger_Occlusions/cam400262/image22360.jpg',

      campos, camrot = np.array(cameras[str(capture_id)]['campos'][str(cam)], dtype=np.float32), np.array(cameras[str(capture_id)]['camrot'][str(cam)], dtype=np.float32)
      focal, princpt = np.array(cameras[str(capture_id)]['focal'][str(cam)], dtype=np.float32), np.array(cameras[str(capture_id)]['princpt'][str(cam)], dtype=np.float32)
      joint_world = np.array(self.joints[str(capture_id)][str(frame_idx)]['world_coord'], dtype=np.float32)
      joint_cam = world2cam(joint_world.transpose(1, 0), camrot, campos.reshape(3, 1)).transpose(1, 0) ## milimeter 
      joint_img = cam2pixel(joint_cam, focal, princpt)[:, :2]
mks0601 commented 2 years ago

There can be some images with wrong annotations as the annotations are obtained with a marker-less MoCap system, while the images are very challenging. Also, please note that you should check joint_valid. The joints with zero values of joint_valid should be ignored.

zhufeida commented 2 years ago

There can be some images with wrong annotations as the annotations are obtained with a marker-less MoCap system, while the images are very challenging. Also, please note that you should check joint_valid. The joints with zero values of joint_valid should be ignored.

Noticed. Thank you.