Closed dongdongqiang2018 closed 4 years ago
My code is like this
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
from argparse import ArgumentParser
import cv2
import numpy as np
import time
import socket
from collections import deque
from platform import system
from head_pose_estimation.pose_estimator import PoseEstimator
from head_pose_estimation.stabilizer import Stabilizer
from head_pose_estimation.visualization import *
from head_pose_estimation.misc import *
def get_face(detector, image, cpu=False):
if cpu:
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
bboxs = []
try:
box = detector(image)
for bbox in list(box):
x1 = bbox.left()
y1 = bbox.top()
x2 = bbox.right()
y2 = bbox.bottom()
bboxs.append([x1, y1, x2, y2])
return bboxs
except:
return None
else:
image = cv2.resize(image, None, fx=0.5, fy=0.5)
box = detector.detect_from_image(image)[0]
if box is None:
return None
return (2*box[:4]).astype(int)
def main():
# Setup face detection models
if args.cpu: # use dlib to do face detection and facial landmark detection
import dlib
dlib_model_path = 'head_pose_estimation/assets/shape_predictor_68_face_landmarks.dat'
shape_predictor = dlib.shape_predictor(dlib_model_path)
face_detector = dlib.get_frontal_face_detector()
else: # use better models on GPU
import face_alignment # the local directory in this repo
try:
import onnxruntime
use_onnx = True
except:
use_onnx = False
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, use_onnx=use_onnx,
flip_input=False)
face_detector = fa.face_detector
os_name = system()
if os_name in ['Windows']: # CAP_DSHOW is required on my windows PC to get 30 FPS
cap = cv2.VideoCapture(args.cam+cv2.CAP_DSHOW)
else: # linux PC is as usual
cap = cv2.VideoCapture(args.cam)
cap.set(cv2.CAP_PROP_FPS, 30)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
_, sample_frame = cap.read()
# Introduce pose estimator to solve pose. Get one frame to setup the
# estimator according to the image size.
pose_estimator = PoseEstimator(img_size=sample_frame.shape[:2])
# Introduce scalar stabilizers for pose.
pose_stabilizers = [Stabilizer(
state_num=2,
measure_num=1,
cov_process=0.01,
cov_measure=0.1) for _ in range(8)]
# Establish a TCP connection to unity.
if args.connect:
address = ('127.0.0.1', 5066)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(address)
ts = []
frame_count = 0
no_face_count = 0
prev_boxes = deque(maxlen=5)
prev_marks = deque(maxlen=5)
while True:
_, frame = cap.read()
frame = cv2.flip(frame, 2)
frame_count += 1
if args.connect and frame_count > 60: # send information to unity
msg = '%.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f'% \
(roll, pitch, yaw, min_ear, mar, mdst, steady_pose[6], steady_pose[7])
s.send(bytes(msg, "utf-8"))
t = time.time()
# Pose estimation by 3 steps:
# 1. detect face;
# 2. detect landmarks;
# 3. estimate pose
if frame_count % 2 == 1: # do face detection every odd frame
box = get_face(face_detector, frame, args.cpu)
# print('people_num:',len(box))
if box is not None:
no_face_count = 0
elif len(prev_boxes) > 1: # use a linear movement assumption
if no_face_count > 1: # don't estimate more than 1 frame
facebox = None
else:
facebox = prev_boxes[-1] + np.mean(np.diff(np.array(prev_boxes), axis=0), axis=0)[0]
facebox = facebox.astype(int)
no_face_count += 1
for facebox in box:
if facebox is not None: # if face is detected
prev_boxes.append(facebox)
# Do facial landmark detection and iris detection.
if args.cpu: # do detection every frame
face = dlib.rectangle(left=facebox[0], top=facebox[1],
right=facebox[2], bottom=facebox[3])
marks = shape_to_np(shape_predictor(frame, face))
else:
if len(prev_marks) == 0 \
or frame_count == 1 \
or frame_count % 2 == 0: # do landmark detection on first frame
# or every even frame
face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]]
marks = fa.get_landmarks(face_img[:,:,::-1],
detected_faces=[(0, 0, facebox[2]-facebox[0], facebox[3]-facebox[1])])
marks = marks[-1]
marks[:, 0] += facebox[0]
marks[:, 1] += facebox[1]
elif len(prev_marks) > 1: # use a linear movement assumption
marks = prev_marks[-1] + np.mean(np.diff(np.array(prev_marks), axis=0), axis=0)
prev_marks.append(marks)
x_l, y_l, ll, lu = detect_iris(frame, marks, "left")
x_r, y_r, rl, ru = detect_iris(frame, marks, "right")
# Try pose estimation with 68 points.
error, R, T = pose_estimator.solve_pose_by_68_points(marks)
pose = list(R) + list(T)
# Add iris positions to stabilize.
pose+= [(ll+rl)/2.0, (lu+ru)/2.0]
if error > 100: # large error means tracking fails: reinitialize pose estimator
# at the same time, keep sending the same information (e.g. same roll)
pose_estimator = PoseEstimator(img_size=sample_frame.shape[:2])
else:
# Stabilize the pose.
steady_pose = []
pose_np = np.array(pose).flatten()
for value, ps_stb in zip(pose_np, pose_stabilizers):
ps_stb.update([value])
steady_pose.append(ps_stb.state[0])
if args.debug: # draw landmarks, etc.
# show iris.
if x_l > 0 and y_l > 0:
draw_iris(frame, x_l, y_l)
if x_r > 0 and y_r > 0:
draw_iris(frame, x_r, y_r)
# show facebox.
draw_box(frame, facebox)
if error < 100:
# show face landmarks.
draw_marks(frame, marks, color=(0, 255, 0))
# draw stable pose annotation on frame.
pose_estimator.draw_annotation_box(
frame, np.expand_dims(steady_pose[:3],0), np.expand_dims(steady_pose[3:6],0),
color=(128, 255, 128))
# # draw head axes on frame.
pose_estimator.draw_axis(frame, np.expand_dims(steady_pose[:3],0),
np.expand_dims(steady_pose[3:6],0))
dt = time.time()-t
ts += [dt]
FPS = int(1/(np.mean(ts[-10:])+1e-6))
print('\r', '%.3f'%dt, end=' ')
if args.debug:
draw_FPS(frame, FPS)
cv2.imshow("face", frame)
if cv2.waitKey(1) & 0xFF == ord('q'): # press q to exit.
break
# Clean up the process.
cap.release()
if args.connect:
s.close()
if args.debug:
cv2.destroyAllWindows()
print('%.3f'%np.mean(ts))
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument("--cam",
help="specify the camera number if you have multiple cameras",
default=0)
parser.add_argument("--cpu", action="store_true",
help="use cpu to do face detection and facial landmark detection",
default=False)
parser.add_argument("--debug", action="store_true",
help="show camera image to debug (need to uncomment to show results)",
default=False)
parser.add_argument("--connect", action="store_true",
help="connect to unity character",
default=False)
args = parser.parse_args()
main()
回复的代码在我的电脑上无法运行,你指的问题是人脸检测框的位置吗?
这里提供一个思路供你参考。
pose_stabilizers
。你好,谢谢你的回复 我这边人脸检测框没有问题,人脸68检测点也没有问题(可以看我1L发的图) 我现在的问题是stabile pose的框标记不准确。
我测试发现pose_np的输出是没有问题的,问题是出现在下面的循环
pose_np = np.array(pose).flatten()
for value, ps_stb in zip(pose_np, pose_stabilizers)
ps_stb.update([value])
steady_pose.append(ps_stb.state[0])
steady_pose = np.reshape(steady_pose, (-1, 3))
是不是需要对pose_stabilizers作出一定的修改?
如果有多张人脸的话,你需要为每张人脸单独创建 stabilizer。
感谢,现在可以了
Thank you for your excellent work,When I tried multiple face recognition, an error occurred,like this how to solve this problem