Open NQHuy1905 opened 9 months ago
Hi @NQHuy1905 , I'd follow this tutorial: You'd likely need to use default NeuralNetwork node, and then do full decoding yourself, as we don't have on-device support for yolo pose architectures. Thanks ,Erik
@Erol444 I see, but in the post process , i have non_max_suppression that have build in torchvision in that, so can OAK use torch library also
Hi @NQHuy1905 , For YOLO detection models we run NMS directly on the device. For such models, I would remove the bottom layers (NMS), compile the model, run it on the device, then perform bottom layers (NMS) on the host device itself. This would be the most straightforward path. Thoughts? Thanks, Erik
@Erol444 so the latency of preprocess and infer will base on OAK, and the latency of postprocess like NMS will base on host device hardware right?
Hi @NQHuy1905 , Yes, that is correct. I believe NMS (mostly) runs on (device's) CPU anyways, instead of vector cores ("gpus"), so running it on the host wouldn't affect performance negatively.
Hello. I tried to implement the YOLOv8-Pose with the "Spatial Tiny-yolo example" but I couldn't get the keypoints. I have converted the model to "yolov8n-pose_openvino_2022.1_6shave.blob". I tried a lot of times and didn't get the keypoints. I would appreciate it if someone could help me. The code is below:
`#!/usr/bin/env python3
from pathlib import Path import sys import cv2 import depthai as dai import numpy as np import time
arg = ""
def getPath(path): return str((Path(file).parent / Path(path)).resolve().absolute())
nnBlobPath = getPath('models/yolov8n-pose_openvino_2022.1_6shave.blob')
if not Path(nnBlobPath).exists(): import sys raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable}"')
labelMapPose = [ "head", "neck", "right_shoulder", "right_elbow", "right_wrist", "left_shoulder", "left_elbow", "left_wrist", "right_hip", "right_knee", "right_ankle", "left_hip", "left_knee", "left_ankle", "right_eye", "left_eye", "right_ear", "left_ear" ]
syncNN = True pipeline = dai.Pipeline() camRgb = pipeline.create(dai.node.ColorCamera) spatialDetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork) monoLeft = pipeline.create(dai.node.MonoCamera) monoRight = pipeline.create(dai.node.MonoCamera) stereo = pipeline.create(dai.node.StereoDepth) nnNetworkOut = pipeline.create(dai.node.XLinkOut)
xoutRgb = pipeline.create(dai.node.XLinkOut) xoutNN = pipeline.create(dai.node.XLinkOut) xoutDepth = pipeline.create(dai.node.XLinkOut)
xoutRgb.setStreamName("rgb") xoutNN.setStreamName("detections") xoutDepth.setStreamName("depth") nnNetworkOut.setStreamName("nnNetwork")
camRgb.setPreviewSize(640, 640) print("640, 640") spatialDetectionNetwork.setAnchorMasks({ "side26": [1,2,3], "side13": [3,4,5], "side8400": [6,7,8] # Define anchor masks for side8400 })
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P) camRgb.setInterleaved(False) camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)
monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) monoLeft.setCamera("left") monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P) monoRight.setCamera("right")
stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A) stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight()) stereo.setSubpixel(True)
spatialDetectionNetwork.setBlobPath(nnBlobPath) spatialDetectionNetwork.setConfidenceThreshold(0.5) spatialDetectionNetwork.input.setBlocking(False) spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5) spatialDetectionNetwork.setDepthLowerThreshold(100) spatialDetectionNetwork.setDepthUpperThreshold(5000)
spatialDetectionNetwork.setNumClasses(80) spatialDetectionNetwork.setCoordinateSize(4) spatialDetectionNetwork.setAnchors([10,14, 23,27, 37,58, 81,82, 135,169, 344,319]) spatialDetectionNetwork.setIouThreshold(0.5) if syncNN: else:
with dai.Device(pipeline) as device: previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False) detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False) depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False) networkQueue = device.getOutputQueue(name="nnNetwork", maxSize=4, blocking=False)
startTime = time.monotonic()
counter = 0
fps = 0
color = (255, 255, 255)
printOutputLayersOnce = True
while True:
inPreview = previewQueue.get()
inDet = detectionNNQueue.get()
depth = depthQueue.get()
inNN = networkQueue.get()
if printOutputLayersOnce:
toPrint = 'Output layer names:'
for ten in inNN.getAllLayerNames():
toPrint = f'{toPrint} {ten},'
printOutputLayersOnce = False
frame = inPreview.getCvFrame()
depthFrame = depth.getFrame() # depthFrame values are in millimeters
depth_downscaled = depthFrame[::4]
if np.all(depth_downscaled == 0):
min_depth = 0 # Set a default minimum depth value when all elements are zero
min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
max_depth = np.percentile(depth_downscaled, 99)
depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)
current_time = time.monotonic()
if (current_time - startTime) > 1 :
fps = counter / (current_time - startTime)
counter = 0
startTime = current_time
detections = inDet.detections
height = frame.shape[0]
width = frame.shape[1]
for detection in detections:
for keypoint in detection.keypoints.xy[0]:
x, y = keypoint[0].item(), keypoint[1].item(), (int(x), int(y)), 5, (0, 0, 255), -1) # Red circle for keypoints
cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
cv2.imshow("depth", depthFrameColor)
cv2.imshow("rgb", frame)
if cv2.waitKey(1) == ord('q'):
Hi, i am trying to deploy yolov8-pose on OAK-D
I have seen some tutorial before but it is about old version of yolo
How should i do it