mks0601 / Hand4Whole_RELEASE

Official PyTorch implementation of "Accurate 3D Hand Pose Estimation for Whole-Body 3D Human Mesh Estimation", CVPRW 2022 (Oral.)
MIT License
315 stars 31 forks source link

body pose #81

Closed Dong09 closed 1 year ago

Dong09 commented 1 year ago

Hello, I would like to ask, when I use demo_body script to reason about local images, the results are crazy, I use body-only Pose2Pose fine-tuned on AGORA.The result plot is shown below 微信截图_20231026164732

Dong09 commented 1 year ago

this is my code

import sys
import os
import os.path as osp
import argparse
import numpy as np
import cv2
import torch
import torchvision.transforms as transforms
from torch.nn.parallel.data_parallel import DataParallel
import torch.backends.cudnn as cudnn
from pycocotools.coco import COCO

sys.path.append('./common')
sys.path.append('./data')
sys.path.append('./main')
from config import cfg
from model import get_model
from utils.preprocessing import process_bbox, generate_patch_image
from utils.human_models import smpl, smpl_x, mano, flame
# from utils.vis import render_mesh, save_obj
import json

def load_img(path, order='RGB'):
    img = cv2.imread(path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
    if not isinstance(img, np.ndarray):
        raise IOError("Fail to read %s" % path)

    if order == 'RGB':
        img = img[:, :, ::-1].copy()

    img = img.astype(np.float32)
    return img

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu',default='0', type=str, dest='gpu_ids')
    parser.add_argument('--img_path', type=str, default=r'image_path')
    parser.add_argument('--start', type=str, default=200)
    parser.add_argument('--end', type=str, default=385)
    parser.add_argument('--output_folder', type=str, default='demo_output')
    args = parser.parse_args()

    # test gpus
    if not args.gpu_ids:
        assert 0, print("Please set proper gpu ids")

    if '-' in args.gpu_ids:
        gpus = args.gpu_ids.split('-')
        gpus[0] = int(gpus[0])
        gpus[1] = int(gpus[1]) + 1
        args.gpu_ids = ','.join(map(lambda x: str(x), list(range(*gpus))))

    return args

args = parse_args()
cfg.set_args(args.gpu_ids, 'body')
cudnn.benchmark = True

# snapshot load
model_path = r'../../output/model_dump/snapshot_12.pth.tar'
assert osp.exists(model_path), 'Cannot find model at ' + model_path
print('Load checkpoint from {}'.format(model_path))
model = get_model('test')
model = model.cuda()
# model = DataParallel(model).cuda()
ckpt = torch.load(model_path)
model.load_state_dict(ckpt['network'], strict=False)
model.eval()

start = int(args.start)
end = start + int(args.end)
for frame in range(start, end):
    print(args.img_path)
    img_path = args.img_path + f'\\WIN_20231026_13_35_47_Pro_{int(frame):08d}.png'

    transform = transforms.ToTensor()
    original_img = load_img(img_path)
    vis_img = original_img.copy()
    original_img_height, original_img_width = original_img.shape[:2]
    os.makedirs(args.output_folder, exist_ok=True)

    # prepare bbox
    bbox = [1080, 215, 1636-1080, 1079-215] # xmin, ymin, width, height
    bbox = process_bbox(bbox, original_img_width, original_img_height)
    img, img2bb_trans, bb2img_trans = generate_patch_image(original_img, bbox, 1.0, 0.0, False, cfg.input_img_shape) 
    img = transform(img.astype(np.float32))/255
    img = img.cuda()[None,:,:,:]

    # forward
    inputs = {'img': img}
    targets = {}
    meta_info = {}
    with torch.no_grad():
        out = model(inputs, targets, meta_info, 'test')
    mesh = out['smpl_mesh_cam'].detach().cpu().numpy()[0]

    # vis_img = original_img.copy()
    focal = [cfg.focal[0] / cfg.input_img_shape[1] * bbox[2], cfg.focal[1] / cfg.input_img_shape[0] * bbox[3]]
    princpt = [cfg.princpt[0] / cfg.input_img_shape[1] * bbox[2] + bbox[0], cfg.princpt[1] / cfg.input_img_shape[0] * bbox[3] + bbox[1]]
    rendered_img = render_mesh(vis_img, mesh, smpl.face, {'focal': focal, 'princpt': princpt})
    cv2.imwrite(f'render_body_WIN_20231026_13_35_47_Pro_{int(frame):08d}.jpg', rendered_img)
mks0601 commented 1 year ago

Your demo code works for the images in this folder? https://github.com/mks0601/Hand4Whole_RELEASE/tree/Pose2Pose/demo/body

You should get the same results as the result images in that folder.

Dong09 commented 1 year ago

es in this fo

I use the (1920,1080,3) image as input, and then I get the bbox on the (1920,1080) shape through the MediaPipe overall model, and the rest of the steps are exactly the same as in the demo. here is my code:

model = get_model('test')
model = model.cuda()
# model = DataParallel(model).cuda()
ckpt = torch.load(model_path)
model.load_state_dict(ckpt['network'], strict=False)
model.eval()

mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
holistic = mp_holistic.Holistic(static_image_mode=True,
                                model_complexity=2,
                                enable_segmentation=True,
                                refine_face_landmarks=True)

start = int(args.start)
end = start + int(args.end)
for frame in range(start, end):
    img_path = args.img_path + f'\\WIN_20231027_17_43_31_Pro_{int(frame):08d}.png'

    transform = transforms.ToTensor()
    original_img = load_img(img_path)
    vis_img = original_img.copy()
    original_img_height, original_img_width = original_img.shape[:2]
    os.makedirs(args.output_folder, exist_ok=True)

    imgRGB1 = cv2.imread(img_path)
    imgRGB2 = cv2.cvtColor(imgRGB1, cv2.COLOR_BGR2RGB)

    result = holistic.process(imgRGB2)
    pose_coor_list = np.zeros((33,2))
    bbox = []
    if result.pose_landmarks:
        for i, lm in enumerate(result.pose_landmarks.landmark):
            xPos = int(lm.x * original_img_width)
            yPos = int(lm.y * original_img_height)

            pose_coor_list[i][0] = xPos
            pose_coor_list[i][1] = yPos
        mx = int(np.min(pose_coor_list[:,0])) if np.min(pose_coor_list[:,0])>0 else 0
        my = int(np.min(pose_coor_list[:,1])) if np.min(pose_coor_list[:,1])>0 else 0
        xm = int(np.max(pose_coor_list[:,0])) if np.max(pose_coor_list[:,0])<original_img_width else original_img_width
        ym = int(np.max(pose_coor_list[:,1])) if np.max(pose_coor_list[:,1])<original_img_height else original_img_height

        bbox = [mx,my,xm-mx,ym-my]
    else:
        continue

    bbox = process_bbox(bbox, original_img_width, original_img_height)
    img, img2bb_trans, bb2img_trans = generate_patch_image(original_img, bbox, 1.0, 0.0, False, cfg.input_img_shape) 
    cv2.imwrite('./body.png',img)
    img = transform(img.astype(np.float32))/255
    img = img.cuda()[None,:,:,:]
    # forward
    inputs = {'img': img}
    targets = {}
    meta_info = {}
    with torch.no_grad():
        out = model(inputs, targets, meta_info, 'test')
    mesh = out['smpl_mesh_cam'].detach().cpu().numpy()[0]
mks0601 commented 1 year ago

Please make sure given my checkpoint, you should get the same results as mine.

Dong09 commented 1 year ago

Thank you for your patience and answers.