Closed simple123456T closed 2 years ago
import time
import cv2
import torch
from common.camera import *
from common.model import *
from utils_all import Person_23d
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from lib.preprocess import coco_h36m
import numpy as np
import argparse
import sys
# sys.path.append("./")
# sys.path.append("../")
l_leg = [[0, 1], [1, 2], [2, 3]]
r_leg = [[0, 4], [4, 5], [5, 6]]
bone = [[0, 7], [7, 8], [8, 9], [9, 10]]
l_hand = [[8, 14], [14, 15], [15, 16]]
r_hand = [[8, 11], [11, 12], [12, 13]]
draw = [l_leg, r_leg, bone, l_hand, r_hand]
metadata = {'layout_name': 'coco', 'num_joints': 17,
'keypoints_symmetry': [[1, 3, 5, 7, 9, 11, 13, 15], [2, 4, 6, 8, 10, 12, 14, 16]]}
def args():
parser = argparse.ArgumentParser(description='person3d args')
parser.add_argument('--video', type=str, default=None, metavar='N', help='camera or video')
parser.add_argument('--camera', type=int, default=1, metavar='C', help='camera or video')
parser.add_argument('--arc', default=[3, 3, 3, 3], metavar='LAYERS', help='filter widths separated by comma')
parser.add_argument('--w', '--weight-file', type=str, default='lib/checkpoint/epoch_120_3333.bin', help='The path')
return parser.parse_args()
if __name__ == "__main__":
args = args()
print('\n args is:', args)
if args.video is not None:
cap = cv2.VideoCapture(args.video)
elif args.camera is not None:
cap = cv2.VideoCapture(args.camera)
else:
"camera or video load error,please check your code !"
p23d = Person_23d()
p23d.load_model(CUDA=True) # 加载yolov3模型
p23d.reset_config()
pose_model = p23d.model_load() # 加载HRNet模型
model = TemporalModel(17, 2, 17, filter_widths=args.arc, causal=True, dropout=0.25, channels=1024)
checkpoint = torch.load(args.w, map_location=lambda storage, loc: storage)
model.load_state_dict(checkpoint['model_pos'])
model = model.cuda()
model.eval()
kps_2d = []
fig = plt.figure()
while cap.isOpened():
ax = fig.add_subplot(111, projection='3d')
# ax.view_init(elev=0., azim=90) # 有点效果的写法
ax.view_init(elev=45.)
ax.set_xlim3d([-1.5, 1.5])
ax.set_zlim3d([0.0, 2.0])
ax.set_ylim3d([2.5, -2.5])
_, frame = cap.read()
b, c = p23d.yolo_human_det(frame) # yolo人形检测器,返回四个边框顶点
if b is None:
continue
cv2.rectangle(frame, (int(b[0][0]), int(b[0][1])), (int(b[0][2]), int(b[0][3])), (0, 255, 0), 2)
# 预处理
kps_input, data_numpy, center, scale = p23d.PreProcess(frame, b[0])
kps_inputs = kps_input[:, [2, 1, 0]]
# print('\n kps_inputs:\n', kps_inputs)
# print('\n data_numpy:\n', data_numpy)
# print('\n center:\n', center)
# print('\n scale:\n', scale)
# print('\n kps_inputs.shape:\n', kps_inputs.shape) # [1, 3, 256, 256]
# print('\n data_numpy.shape:\n', data_numpy.shape) # (1080, 1920, 3)
# print('\n center.shape:\n', center.shape) # (2,)
# print('\n scale.shape:\n', scale.shape) # (2,)
# kps
if torch.cuda.is_available():
kps_inputs = kps_inputs.cuda()
output = pose_model(kps_inputs) #
# print('\n pose_model 的 output: \n', output)
# print('\n pose_model 的 output 形状: \n', output.shape) # [1, 17, 64, 64]
kps_pre, maxval = p23d.get_final_preds(output.clone().cpu().detach().numpy(), np.asarray([center]), np.asarray([scale]))
print('\n get_final_preds 的 kps_pre: \n', kps_pre) # 2D关键点数据
print('\n get_final_preds 的 kps_pre 形状: \n', kps_pre.shape) # (1, 17, 2)
# print('\n get_final_preds 的 maxval: \n', maxval)
# print('\n get_final_preds 的 maxval 形状: \n', maxval.shape) # (1, 17, 1)
####归一化 2D 输出数据 kps_pre
# normlization keypoints Suppose using the camera parameter
print('********************')
print(frame.shape[1])
print(frame.shape[0])
# time.sleep(55000)
print('********************')
kps_pre[..., :2] = normalize_screen_coordinates(kps_pre[..., :2], w=frame.shape[1], h=frame.shape[0])
# print('\n kps_pre :\n', kps_pre)
print('\n 归一化后的kps_pre的形状:\n', kps_pre.shape) # (1, 17, 2)
h36m_kps, _ = coco_h36m(kps_pre) # h36m数据集的2d pose
# print('\n coco_h36m 的 h36m_kps: \n', h36m_kps)
print('\n coco_h36m 的 h36m_kps 形状: \n', h36m_kps.shape) # (1, 17, 2)
# time.sleep(60000)
kps_2d.append(h36m_kps[0])
fps_nums = 3 ** (len(args.arc)) + 1
if len(kps_2d) == fps_nums:
print('\n')
print('\n')
print('\n')
print('8' * 89)
# print('kps_2d:', kps_2d)
print('kps_2d 的形状是:', len(kps_2d)) # 82
print('fps_nums:', fps_nums) # 82
kps_2d.pop(0)
input_2d = np.array([kps_2d])
print('input_2d 的形状:', input_2d.shape) # (1, 81, 17, 2)
input_2d[:, :, :, 0] /= 480.
input_2d[:, :, :, 1] /= 640.
input_2d = torch.from_numpy(input_2d)
print('input_2d 形状是:', input_2d.shape) # torch.Size([1, 81, 17, 2])
if torch.cuda.is_available():
input_2d = input_2d.cuda()
pre_3d, pre_traj = model(input_2d) # Inference the 3d positions from 2d position.
print('\n model 的 pre_3d:\n', pre_3d)
print('\n model 的 pre_3d 形状:\n', pre_3d.shape) # torch.Size([1, 1, 17, 3])
# print('\n model 的 pre_traj:\n', pre_traj)
print('\n model 的 pre_traj 形状:\n', pre_traj.shape) # torch.Size([1, 1, 17, 3])
######将预测的三维点从相机坐标系转换到世界坐标系
rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804], dtype=np.float32)
# t = np.array([1.841107, 4.9552846, 1.5634454], dtype=np.float32)
# 降维
pre_3d = pre_3d.view([1, 17, 3])
print('降维逅的pre3d', pre_3d)
pre_3d = pre_3d.cpu().detach().numpy()
print('\n 相机世界坐标系之前的pre_3d.shape:\n', pre_3d.shape)
pre_3d = camera_to_world(pre_3d, R=rot, t=0) # todo
print('\n 相机世界坐标系之后的pre_3d.shape:\n', pre_3d.shape)
# We don't have the trajectory, but at least we can rebase the height将预测的三维点的Z值减去预测的三维点中Z的最小值,得到正向的Z值
pre_3d[:, :, 2] -= np.min(pre_3d[:, :, 2])
# 升维
pre_3d = pre_3d[np.newaxis, :, :, :, ]
print('\n 升维后 \n', pre_3d.shape)
time.sleep(50000)
pre_3d = torch.tensor(pre_3d)
pre_3d = pre_3d.cpu().detach().numpy() # cuda tensor 加载在cpu上
pre_traj = pre_traj.cpu().detach().numpy()
# output = pre_3d + pre_traj
output = pre_3d # 固定小人
# print('\n output:\n', output)
print('\n output 形状:\n', output.shape) # (1, 1, 17, 3)
x = output[:, :, :, 0]
y = output[:, :, :, 2]
z = -1 * output[:, :, :, 1]
# print('\n x:\n', x)
# print('\n y:\n', y)
# print('\n z:\n', z)
print('\n x 的形状:\n', x.shape) # (1, 1, 17)
print('\n y 的形状:\n', y.shape) # (1, 1, 17)
print('\n z 的形状:\n', z.shape) # (1, 1, 17)
for dd in draw:
for line in dd:
a = [x[0, 0, line[0]], x[0, 0, line[1]]]
b = [y[0, 0, line[0]], y[0, 0, line[1]]]
c = [z[0, 0, line[0]], z[0, 0, line[1]]]
# print('\n a:\n', a)
# print('\n b:\n', b)
# print('\n c:\n', c)
ax.plot(a, b, c, c='b') # 小人身上点的连线的颜色
# ax.scatter(x, y, z, c='r') # 小人身上的点的颜色
# ax.set_xlabel('X Label')
# ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')
plt.pause(0.0001)
# time.sleep(50000)
plt.clf() # 清除当前画面
for i in range(17): # 标记17个坐标点 BGR
cv2.circle(frame, (int(h36m_kps[0][i][0]), int(h36m_kps[0][i][1])), 2, (255, 0, 0), 2) # 标记人身上的17个点
for dd in draw: # 将17个点连线起来 BGR
for line in dd:
cv2.line(frame, (int(h36m_kps[0][line[0]][0]), int(h36m_kps[0][line[0]][1])),
(int(h36m_kps[0][line[1]][0]), int(h36m_kps[0][line[1]][1])), (0, 255, 0), 2)
cv2.namedWindow("show 2d keypoints", cv2.WINDOW_AUTOSIZE)
cv2.imshow("show 2d keypoints", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cv2.destroyAllWindows()
cap.release()
已更新main.py,你那份代码做了两次归一化,当初我是为了简单直接做的归一化,你加进去就做了两次。
原来是这样啊,明白了,谢谢老师
我们在对2D模型预测出来的关键点做归一化时,发现摄像头画面里人身上的画线消失了,想知道 2D的输出数据做完归一化需要经过coco_h36m转化成h36m数据集的2d pose吗?, 下面是我的代码