ilovepose / DarkPose

Distribution-Aware Coordinate Representation for Human Pose Estimation
https://ilovepose.github.io/coco
Apache License 2.0
547 stars 80 forks source link

How can I test it on arbitrary RGB image? #10

Open aligoglos opened 3 years ago

aligoglos commented 3 years ago

I've tried to write demo code but I got stuck how to interpreter output of network:

import argparse
import os
import cv2
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from config import cfg
from config import update_config
from core.inference import get_final_preds
from utils.vis import save_debug_images
import glob
from models.pose_hrnet import get_pose_net

def parse_args():
    parser = argparse.ArgumentParser(description='Train keypoints network')
    # general
    parser.add_argument('--cfg',
                        help='experiment configure file name',
                        default='experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml',
                        type=str)

    parser.add_argument('opts',
                        help="Modify config options using the command-line",
                        default=None,
                        nargs=argparse.REMAINDER)

    parser.add_argument('--modelDir',
                        help='model directory',
                        type=str,
                        default='')
    parser.add_argument('--logDir',
                        help='log directory',
                        type=str,
                        default='')
    parser.add_argument('--dataDir',
                        help='data directory',
                        type=str,
                        default='./Inputs/')
    parser.add_argument('--prevModelDir',
                        help='prev Model directory',
                        type=str,
                        default='')

    args = parser.parse_args()
    return args

def save_images(img, joints_pred, name,nrow=8, padding=2):
    height = int(img.size(0) + padding)
    width = int(img.size(1) + padding)
    nmaps = 1
    xmaps = min(nrow, nmaps)
    ymaps = int(math.ceil(float(nmaps) / xmaps))
    height = int(batch_image.size(2) + padding)
    width = int(batch_image.size(3) + padding)
    k = 0
    for y in range(ymaps):
        for x in range(xmaps):
            if k >= nmaps:
                break
            joints = batch_joints[k]
            joints_vis = batch_joints_vis[k]
            for joint in joints:
                joint[0] = x * width + padding + joint[0]
                joint[1] = y * height + padding + joint[1]
                cv2.circle(img, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
            k = k + 1
    cv2.imwrite(f"Results/{name}", img)

def main():
    normalize = transforms.Normalize(
            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
        )
    transform = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])
    args = parse_args()
    update_config(cfg, args)
    image_size = np.array(cfg.MODEL.IMAGE_SIZE)

    model = get_pose_net(
        cfg, is_train=False
    )

    if cfg.TEST.MODEL_FILE:
        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
    else:
        model_state_file = os.path.join(
            final_output_dir, 'final_state.pth'
        )
        model.load_state_dict(torch.load(model_state_file))

    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

    img_path_l = sorted(glob.glob('./Inputs' + '/*'))
    with torch.no_grad():
        for path in img_path_l:
            name  = path.split('/')[-1]
            image = cv2.imread(path)
            image = cv2.resize(image, (384, 288))
            input = transform(image).unsqueeze(0)
            #print(input.shape)
            outputs = model(input)
            if isinstance(outputs, list):
                output = outputs[-1]
            else:
                output = outputs
            print(f"{name} : {output.shape}")

if __name__ == '__main__':
    main()

I don't know what I set scale and center in get_final_preds .

hbin-ac commented 3 years ago

I've tried to write demo code but I got stuck how to interpreter output of network:

import argparse
import os
import cv2
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from config import cfg
from config import update_config
from core.inference import get_final_preds
from utils.vis import save_debug_images
import glob
from models.pose_hrnet import get_pose_net

def parse_args():
  parser = argparse.ArgumentParser(description='Train keypoints network')
  # general
  parser.add_argument('--cfg',
                      help='experiment configure file name',
                      default='experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml',
                      type=str)

  parser.add_argument('opts',
                      help="Modify config options using the command-line",
                      default=None,
                      nargs=argparse.REMAINDER)

  parser.add_argument('--modelDir',
                      help='model directory',
                      type=str,
                      default='')
  parser.add_argument('--logDir',
                      help='log directory',
                      type=str,
                      default='')
  parser.add_argument('--dataDir',
                      help='data directory',
                      type=str,
                      default='./Inputs/')
  parser.add_argument('--prevModelDir',
                      help='prev Model directory',
                      type=str,
                      default='')

  args = parser.parse_args()
  return args

def save_images(img, joints_pred, name,nrow=8, padding=2):
  height = int(img.size(0) + padding)
  width = int(img.size(1) + padding)
  nmaps = 1
  xmaps = min(nrow, nmaps)
  ymaps = int(math.ceil(float(nmaps) / xmaps))
  height = int(batch_image.size(2) + padding)
  width = int(batch_image.size(3) + padding)
  k = 0
  for y in range(ymaps):
      for x in range(xmaps):
          if k >= nmaps:
              break
          joints = batch_joints[k]
          joints_vis = batch_joints_vis[k]
          for joint in joints:
              joint[0] = x * width + padding + joint[0]
              joint[1] = y * height + padding + joint[1]
              cv2.circle(img, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
          k = k + 1
  cv2.imwrite(f"Results/{name}", img)

def main():
  normalize = transforms.Normalize(
          mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
      )
  transform = transforms.Compose([
      transforms.ToTensor(),
      normalize,
  ])
  args = parse_args()
  update_config(cfg, args)
  image_size = np.array(cfg.MODEL.IMAGE_SIZE)

  model = get_pose_net(
      cfg, is_train=False
  )

  if cfg.TEST.MODEL_FILE:
      model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
  else:
      model_state_file = os.path.join(
          final_output_dir, 'final_state.pth'
      )
      model.load_state_dict(torch.load(model_state_file))

  model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()

  img_path_l = sorted(glob.glob('./Inputs' + '/*'))
  with torch.no_grad():
      for path in img_path_l:
          name  = path.split('/')[-1]
          image = cv2.imread(path)
          image = cv2.resize(image, (384, 288))
          input = transform(image).unsqueeze(0)
          #print(input.shape)
          outputs = model(input)
          if isinstance(outputs, list):
              output = outputs[-1]
          else:
              output = outputs
          print(f"{name} : {output.shape}")

if __name__ == '__main__':
  main()

I don't know what I set scale and center in get_final_preds .

cv2.resize hided scale and center.

aligoglos commented 3 years ago

What should I do?

q5390498 commented 3 years ago

did you solved this problem?

Kuekua commented 3 years ago

I find follow code from mmpose project, maybe it can sovle this problem:

`
def _get_multi_scale_size(image, input_size, current_scale, min_scale):
    """Get the size for multi-scale training
    Args:
        image: Input image.
        input_size (int): Size of the image input.
        current_scale (float): Scale factor.
        min_scale (float): Minimal scale.

    Returns:
        tuple: A tuple containing multi-scale sizes.

        - (w_resized, h_resized) (tuple(int)): resized width/height
        - center (np.ndarray)image center
        - scale (np.ndarray): scales wrt width/height
    """
    h, w, _ = image.shape

    center = np.array([round(w / 2.0), round(h / 2.0)])

    # calculate the size for min_scale
    min_input_size = _ceil_to_multiples_of(min_scale * input_size, 64)
    if w < h:
        w_resized = int(min_input_size * current_scale / min_scale)
        h_resized = int(
            _ceil_to_multiples_of(min_input_size / w * h, 64) * current_scale /
            min_scale)
        scale_w = w / 200.0
        scale_h = h_resized / w_resized * w / 200.0
    else:
        h_resized = int(min_input_size * current_scale / min_scale)
        w_resized = int(
            _ceil_to_multiples_of(min_input_size / h * w, 64) * current_scale /
            min_scale)
        scale_h = h / 200.0
        scale_w = w_resized / h_resized * h / 200.0

    return (w_resized, h_resized), center, np.array([scale_w, scale_h])
`
mlantern commented 2 years ago

I find follow code from this project, maybe it can sovle this problem:

def box_to_center_scale(box, model_image_width, model_image_height):

"""convert a box to center,scale information required for pose transformation
Parameters
----------
box : list of tuple
    list of length 2 with two tuples of floats representing
    bottom left and top right corner of a box
model_image_width : int
model_image_height : int
Returns
-------
(numpy array, numpy array)
    Two numpy arrays, coordinates for the center of the box and the scale of the box
"""

center = np.zeros((2), dtype=np.float32)
bottom_left_corner = box[0]
top_right_corner = box[1]
box_width = top_right_corner[0]-bottom_left_corner[0]
box_height = top_right_corner[1]-bottom_left_corner[1]
bottom_left_x = bottom_left_corner[0]
bottom_left_y = bottom_left_corner[1]
center[0] = bottom_left_x + box_width * 0.5
center[1] = bottom_left_y + box_height * 0.5

aspect_ratio = model_image_width * 1.0 / model_image_height
pixel_std = 200

if box_width > aspect_ratio * box_height:
    box_height = box_width * 1.0 / aspect_ratio
elif box_width < aspect_ratio * box_height:
    box_width = box_height * aspect_ratio
scale = np.array(
    [box_width * 1.0 / pixel_std, box_height * 1.0 / pixel_std],
    dtype=np.float32)
if center[0] != -1:
    scale = scale * 1.25

return center, scale