How can i train the net with the FreiHAND dataset?

lpx153623 commented 1 year ago

Is there a code?

hongsukchoi commented 1 year ago

Hi,

Yes, you can. There was the code for that, but I can't find it now. So, you have to write some code yourself. It won't be that difficult. Refer to this dataloader https://github.com/hongsukchoi/Pose2Mesh_RELEASE/blob/master/data/FreiHAND/dataset.py

lpx153623 commented 1 year ago

Thank you very much! @hongsukchoi

lpx153623 commented 1 year ago

I have some problems about how to augment mano params and cameras. Can you help me find it again? That's important to me.I am extremely appreciative of your help. @hongsukchoi

hongsukchoi commented 1 year ago

Hi, maybe this can help you? You can use a detected 2d pose while training instead of a GT 2d pose.

import os
import os.path as osp
import numpy as np
import torch
import cv2
import json
import copy
from pycocotools.coco import COCO
from config import cfg
from utils.human_models import mano
from utils.preprocessing import load_img, process_bbox, augmentation, process_human_model_output
from utils.vis import vis_keypoints, vis_mesh, save_obj

class FreiHAND(torch.utils.data.Dataset):
    def __init__(self, transform, data_split):
        self.transform = transform
        self.data_split = data_split
        self.data_path = osp.join('..', 'data', 'FreiHAND', 'data')
        self.human_bbox_root_dir = osp.join('..', 'data', 'FreiHAND', 'rootnet_output', 'bbox_root_freihand_output.json')
        self.datalist = self.load_data()

    def load_data(self):
        if self.data_split == 'train':
            db = COCO(osp.join(self.data_path, 'freihand_train_coco.json'))
            with open(osp.join(self.data_path, 'freihand_train_data.json')) as f:
                data = json.load(f)

        else:
            db = COCO(osp.join(self.data_path, 'freihand_eval_coco.json'))
            with open(osp.join(self.data_path, 'freihand_eval_data.json')) as f:
                data = json.load(f)
            print("Get bounding box and root from " + self.human_bbox_root_dir)
            bbox_root_result = {}
            with open(self.human_bbox_root_dir) as f:
                annot = json.load(f)
            for i in range(len(annot)):
                bbox_root_result[str(annot[i]['image_id'])] = {'bbox': np.array(annot[i]['bbox']), 'root': np.array(annot[i]['root_cam'])}

        datalist = []
        for aid in db.anns.keys():
            ann = db.anns[aid]
            image_id = ann['image_id']
            img = db.loadImgs(image_id)[0]
            img_path = osp.join(self.data_path, img['file_name'])
            img_shape = (img['height'], img['width'])
            db_idx = str(img['db_idx'])

            if self.data_split == 'train':
                cam_param, mano_param = data[db_idx]['cam_param'], data[db_idx]['mano_param']
                mano_param['hand_type'] = 'right' # FreiHAND only contains right hand
                hand_bbox = process_bbox(np.array(ann['bbox']), img['width'], img['height'])
                if hand_bbox is None: continue

                datalist.append({
                    'img_path': img_path,
                    'img_shape': img_shape,
                    'hand_bbox': hand_bbox,
                    'cam_param': cam_param,
                    'mano_param': mano_param})
            else:
                hand_bbox = bbox_root_result[str(image_id)]['bbox'] # bbox should be aspect ratio preserved-extended. It is done in RootNet.

                datalist.append({
                    'img_path': img_path,
                    'img_shape': img_shape,
                    'hand_bbox': hand_bbox})

        return datalist

    def __len__(self):
        return len(self.datalist)

    def __getitem__(self, idx):
        data = copy.deepcopy(self.datalist[idx])
        img_path, img_shape, hand_bbox = data['img_path'], data['img_shape'], data['hand_bbox']

        # img
        img = load_img(img_path)
        hand_img, hand_img2bb_trans, hand_bb2img_trans, hand_rot, hand_do_flip = augmentation(img, hand_bbox, self.data_split, enforce_flip=False)
        hand_img = self.transform(hand_img.astype(np.float32))/255.

        if self.data_split == 'train':
            cam_param, mano_param = data['cam_param'], data['mano_param']

            # mano coordinates
            mano_joint_img, mano_joint_cam, mano_joint_trunc, mano_pose, mano_shape, mano_mesh_cam_orig = process_human_model_output(mano_param, cam_param, hand_do_flip, img_shape, hand_img2bb_trans, hand_rot, 'mano')

            """
            # for debug
            _tmp = mano_joint_img.copy()
            _tmp[:,0] = _tmp[:,0] / cfg.output_hm_shape[1] * cfg.input_img_shape[1]
            _tmp[:,1] = _tmp[:,1] / cfg.output_hm_shape[0] * cfg.input_img_shape[0]
            _img = hand_img.numpy().transpose(1,2,0)[:,:,::-1] * 255
            _img = vis_keypoints(_img, _tmp)
            cv2.imwrite('freihand_' + str(idx) + '.jpg', _img)
            """

            inputs = {'hand_img': hand_img}
            targets = {'hand_joint_img': mano_joint_img, 'mano_joint_img': mano_joint_img, 'hand_joint_cam': mano_joint_cam, 'mano_joint_cam': mano_joint_cam, 'mano_pose': mano_pose, 'mano_shape': mano_shape}
            meta_info = {'hand_joint_valid': np.ones_like(mano_joint_trunc), 'hand_joint_trunc': mano_joint_trunc, 'mano_joint_trunc': mano_joint_trunc, 'is_valid_mano_fit': float(True), 'is_3D': float(True)}
        else:
            inputs = {'hand_img': hand_img}
            targets = {}
            meta_info = {}

        return inputs, targets, meta_info

    def evaluate(self, outs, cur_sample_idx):

        annots = self.datalist
        sample_num = len(outs)
        eval_result = {'joint_out': [], 'mesh_out': []}
        for n in range(sample_num):
            annot = annots[cur_sample_idx + n]
            out = outs[n]

            mesh_out_cam = out['mano_mesh_cam']
            joint_out_cam = np.dot(mano.joint_regressor, mesh_out_cam)

            eval_result['mesh_out'].append(mesh_out_cam.tolist())
            eval_result['joint_out'].append(joint_out_cam.tolist())

            vis = False
            if vis:
                filename = annot['img_path'].split('/')[-1][:-4]

                img = load_img(annot['img_path'])[:,:,::-1]
                cv2.imwrite(filename + '.jpg', img)

                save_obj(mesh_out_cam, mano.face['right'], filename + '.obj')

        return eval_result

    def print_eval_result(self, eval_result):
        output_save_path = osp.join(cfg.result_dir, 'FreiHAND')
        os.makedirs(output_save_path, exist_ok=True)
        output_save_path = osp.join(output_save_path, 'pred.json')
        with open(output_save_path, 'w') as f:
            json.dump([eval_result['joint_out'], eval_result['mesh_out']], f)
        print('Saved at ' + output_save_path)

lpx153623 commented 1 year ago

Thanks!

hongsukchoi / 3DCrowdNet_RELEASE

How can i train the net with the FreiHAND dataset? #21