Closed jamalknight closed 3 years ago
Hi! The pattern will not work here. You can name image one by one: ./00_03/00_03_00000001.jpg ./00_03/00_03_00000002.jpg ...
or modify the code and call cv2.VideoCapture
directly with this pattern.
Thanks for clarifying.
Where can I get the output of keypoints in 3D space coordinates?
Thanks
Will #28 help?
Hope, it helped.
Hi there
Is there a .pkl output with 3D coordinates? I am looking for the keypoints output in world space, but cannot see where it would be output to.
thanks
Hi! You can find pose coordinates in the world space in pose_3d
after the line 110.
So would I have to write code to output the data to a file after line 110?
Yes.
Could you please let me know what the line of code would be to output 3D keypoint positions to a file? (sorry I have limited Python knowledge)
Thanks
Here is the diff:
diff --git a/demo.py b/demo.py
index 6ccd39e..b18d40c 100644
--- a/demo.py
+++ b/demo.py
@@ -4,6 +4,7 @@ import os
import cv2
import numpy as np
+import pickle
from modules.input_reader import VideoReader, ImageReader
from modules.draw import Plotter3d, draw_poses
@@ -85,6 +86,8 @@ if __name__ == '__main__':
p_code = 112
space_code = 32
mean_time = 0
+ f = open('coordinates_3d.pkl', 'wb')
+ coordinates_history = []
for frame in frame_provider:
current_time = cv2.getTickCount()
if frame is None:
@@ -108,6 +111,7 @@ if __name__ == '__main__':
poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3]
edges = (Plotter3d.SKELETON_EDGES + 19 * np.arange(poses_3d.shape[0]).reshape((-1, 1, 1))).reshape((-1, 2))
+ coordinates_history.append(poses_3d)
plotter.plot(canvas_3d, poses_3d, edges)
cv2.imshow(canvas_3d_window_name, canvas_3d)
@@ -141,3 +145,5 @@ if __name__ == '__main__':
break
else:
delay = 1
+ pickle.dump(coordinates_history, f)
+ f.close()
I added the above code after line 110, but errored.
To run:
python demo.py --model human-pose-estimation-3d.pth --video 0
error:
File "demo.py", line 114
diff --git a/demo.py b/demo.py
^
SyntaxError: invalid syntax
Is there something I'm missing?
I uploaded demo.py here
from argparse import ArgumentParser
import json
import os
import cv2
import numpy as np
from modules.input_reader import VideoReader, ImageReader
from modules.draw import Plotter3d, draw_poses
from modules.parse_poses import parse_poses
def rotate_poses(poses_3d, R, t):
R_inv = np.linalg.inv(R)
for pose_id in range(len(poses_3d)):
pose_3d = poses_3d[pose_id].reshape((-1, 4)).transpose()
pose_3d[0:3, :] = np.dot(R_inv, pose_3d[0:3, :] - t)
poses_3d[pose_id] = pose_3d.transpose().reshape(-1)
return poses_3d
if __name__ == '__main__':
parser = ArgumentParser(description='Lightweight 3D human pose estimation demo. '
'Press esc to exit, "p" to (un)pause video or process next image.')
parser.add_argument('-m', '--model',
help='Required. Path to checkpoint with a trained model '
'(or an .xml file in case of OpenVINO inference).',
type=str, required=True)
parser.add_argument('--video', help='Optional. Path to video file or camera id.', type=str, default='')
parser.add_argument('-d', '--device',
help='Optional. Specify the target device to infer on: CPU or GPU. '
'The demo will look for a suitable plugin for device specified '
'(by default, it is GPU).',
type=str, default='GPU')
parser.add_argument('--use-openvino',
help='Optional. Run network with OpenVINO as inference engine. '
'CPU, GPU, FPGA, HDDL or MYRIAD devices are supported.',
action='store_true')
parser.add_argument('--use-tensorrt', help='Optional. Run network with TensorRT as inference engine.',
action='store_true')
parser.add_argument('--images', help='Optional. Path to input image(s).', nargs='+', default='')
parser.add_argument('--height-size', help='Optional. Network input layer height size.', type=int, default=256)
parser.add_argument('--extrinsics-path',
help='Optional. Path to file with camera extrinsics.',
type=str, default=None)
parser.add_argument('--fx', type=np.float32, default=-1, help='Optional. Camera focal length.')
args = parser.parse_args()
if args.video == '' and args.images == '':
raise ValueError('Either --video or --image has to be provided')
stride = 8
if args.use_openvino:
from modules.inference_engine_openvino import InferenceEngineOpenVINO
net = InferenceEngineOpenVINO(args.model, args.device)
else:
from modules.inference_engine_pytorch import InferenceEnginePyTorch
net = InferenceEnginePyTorch(args.model, args.device, use_tensorrt=args.use_tensorrt)
canvas_3d = np.zeros((720, 1280, 3), dtype=np.uint8)
plotter = Plotter3d(canvas_3d.shape[:2])
canvas_3d_window_name = 'Canvas 3D'
cv2.namedWindow(canvas_3d_window_name)
cv2.setMouseCallback(canvas_3d_window_name, Plotter3d.mouse_callback)
file_path = args.extrinsics_path
if file_path is None:
file_path = os.path.join('data', 'extrinsics.json')
with open(file_path, 'r') as f:
extrinsics = json.load(f)
R = np.array(extrinsics['R'], dtype=np.float32)
t = np.array(extrinsics['t'], dtype=np.float32)
frame_provider = ImageReader(args.images)
is_video = False
if args.video != '':
frame_provider = VideoReader(args.video)
is_video = True
base_height = args.height_size
fx = args.fx
delay = 1
esc_code = 27
p_code = 112
space_code = 32
mean_time = 0
for frame in frame_provider:
current_time = cv2.getTickCount()
if frame is None:
break
input_scale = base_height / frame.shape[0]
scaled_img = cv2.resize(frame, dsize=None, fx=input_scale, fy=input_scale)
scaled_img = scaled_img[:, 0:scaled_img.shape[1] - (scaled_img.shape[1] % stride)] # better to pad, but cut out for demo
if fx < 0: # Focal length is unknown
fx = np.float32(0.8 * frame.shape[1])
inference_result = net.infer(scaled_img)
poses_3d, poses_2d = parse_poses(inference_result, input_scale, stride, fx, is_video)
edges = []
if len(poses_3d):
poses_3d = rotate_poses(poses_3d, R, t)
poses_3d_copy = poses_3d.copy()
x = poses_3d_copy[:, 0::4]
y = poses_3d_copy[:, 1::4]
z = poses_3d_copy[:, 2::4]
poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = -z, x, -y
poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3]
edges = (Plotter3d.SKELETON_EDGES + 19 * np.arange(poses_3d.shape[0]).reshape((-1, 1, 1))).reshape((-1, 2))
#adding code to print 3d positions
diff --git a/demo.py b/demo.py
index 6ccd39e..b18d40c 100644
--- a/demo.py
+++ b/demo.py
@@ -4,6 +4,7 @@ import os
import cv2
import numpy as np
+import pickle
from modules.input_reader import VideoReader, ImageReader
from modules.draw import Plotter3d, draw_poses
@@ -85,6 +86,8 @@ if __name__ == '__main__':
p_code = 112
space_code = 32
mean_time = 0
+ f = open('coordinates_3d.pkl', 'wb')
+ coordinates_history = []
for frame in frame_provider:
current_time = cv2.getTickCount()
if frame is None:
@@ -108,6 +111,7 @@ if __name__ == '__main__':
poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3]
edges = (Plotter3d.SKELETON_EDGES + 19 * np.arange(poses_3d.shape[0]).reshape((-1, 1, 1))).reshape((-1, 2))
+ coordinates_history.append(poses_3d)
plotter.plot(canvas_3d, poses_3d, edges)
cv2.imshow(canvas_3d_window_name, canvas_3d)
@@ -141,3 +145,5 @@ if __name__ == '__main__':
break
else:
delay = 1
+ pickle.dump(coordinates_history, f)
+ f.close()
#end adding code to print 3d positions
plotter.plot(canvas_3d, poses_3d, edges)
cv2.imshow(canvas_3d_window_name, canvas_3d)
draw_poses(frame, poses_2d)
current_time = (cv2.getTickCount() - current_time) / cv2.getTickFrequency()
if mean_time == 0:
mean_time = current_time
else:
mean_time = mean_time * 0.95 + current_time * 0.05
cv2.putText(frame, 'FPS: {}'.format(int(1 / mean_time * 10) / 10),
(40, 80), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255))
cv2.imshow('ICV 3D Human Pose Estimation', frame)
key = cv2.waitKey(delay)
if key == esc_code:
break
if key == p_code:
if delay == 1:
delay = 0
else:
delay = 1
if delay == 0 or not is_video: # allow to rotate 3D canvas while on pause
key = 0
while (key != p_code
and key != esc_code
and key != space_code):
plotter.plot(canvas_3d, poses_3d, edges)
cv2.imshow(canvas_3d_window_name, canvas_3d)
key = cv2.waitKey(33)
if key == esc_code:
break
else:
delay = 1
Thanks
I have shared a diff-file, you can read more about it here. Basically you need to add lines marked with a special sign +
in a diff file to the demo.py
file. Surrounding lines (where to add in demo.py
) are also shown in a diff-file. If you are using git
with this repository, you can just type git apply <path-to-diff-file>
in a repository root to apply changes.
Got it! Thanks
Hi there
Thanks for setting up this repo, the instructions are clear to setup.
I have a couple of questions, I would like to run this model on a sequence of images of my own, however a window pops up of only 1 image.
Code I run:
python demo.py --model human-pose-estimation-3d.pth --image ./00_03/*.jpg
My list of images:
The other question is, where is the output of 3D coordinates of keypoints written?
Thanks