levan92 / deep_sort_realtime

A really more real-time adaptation of deep sort
MIT License
167 stars 51 forks source link

Falling to implement deep -sort-realtime with YOLOv8 #44

Closed uma-oo closed 1 year ago

uma-oo commented 1 year ago

Hi there, I'm trying to impement tracking for a specific use case, but before i was just trying to make the tracking work with yolov8 for a video , here's the whole code :

import cv2
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort

object_tracker = DeepSort(max_age=5,
                          n_init=2,
                          nms_max_overlap=1.0,
                          max_cosine_distance=0.3,
                          nn_budget=None,
                          override_track_class=None,
                          embedder="mobilenet",
                          half=True,
                          bgr=True,
                          embedder_gpu=True,
                          embedder_model_name=None,
                          embedder_wts=None,
                          polygon=False,
                          today=None)

cap = cv2.VideoCapture("Videos/ASMR KITTY CRUNCH Dry Food Feast _ Extreme Cat Eating Sounds _  고양이 + 먹방.mp4")
model = YOLO("Yolo-Weights/yolov8s.pt")
classes = model.names
print(classes)
while True:
    success, image = cap.read()
    if success:

        results = model(image)

        for result in results:
            detections = []
            boxes = result.boxes
            for box in boxes:
                detection = []
                print("Only one Box:", box.boxes.cpu().numpy())
                r = box.boxes.cpu().numpy()[0]
                x1, y1, x2, y2 = r[:4]
                w, h = x2 - x1, y2 - y1
                coordinates = list((int(x1), int(x2), int(w), int(h)))
                conf = r[4]
                clsId = int(r[5])
                cls = classes[clsId]
                detection.extend((coordinates, conf, cls))
                detection = tuple(detection)
                detections.append(detection)
                print("detection: ", detection)

                print("r: ", r)
            print("detections: ", detections)
        tracks = object_tracker.update_tracks(detections, frame=image)

        cv2.imshow("Image", image)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

I'm too much confused because i'm sure of the input format i give to the tracker , which is like this : detections: [([495, 794, 299, 167], 0.9417956, 'bowl'), ([111, 921, 810, 597], 0.9322504, 'cat'), ([75, 1126, 1050, 92], 0.33967713, 'dining table')] I think this is the right format of input we should give, i tried also to initialize the tracker with only the max_age hyperparameter only but i still encounter the same error :

image

please any help !!!!

3i-Haseeb commented 1 year ago

@uma-oo were you able to run it with YoloV8?

3i-Haseeb commented 1 year ago

@uma-oo I fixed your code. Had to change:

coordinates = list((int(x1), int(x2), int(w), int(h)))

to:

coordinates = list((int(x1), int(y1), int(w), int(h)))

Also, I removed some lines. Below is the full example:

import cv2
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort

object_tracker = DeepSort()

cap = cv2.VideoCapture(
    "/Users/3i-a1-2021-15/Developer/projects/pivo-tracking/videos/2.mp4"
)
model = YOLO("yolov8n.pt")
classes = model.names

while cap.isOpened():
    ret, image = cap.read()
    if ret:
        results = model(image)

        for result in results:
            detections = []
            boxes = result.boxes
            for r in result.boxes.data.tolist():
                x1, y1, x2, y2 = r[:4]
                w, h = x2 - x1, y2 - y1
                coordinates = list((int(x1), int(y1), int(w), int(h)))
                conf = r[4]
                clsId = int(r[5])
                cls = classes[clsId]
                if cls == "person":
                    detections.append((coordinates, conf, cls))

            print("detections: ", detections)
            tracks = object_tracker.update_tracks(detections, frame=image)

            for track in tracks:
                if not track.is_confirmed():
                    continue
                track_id = track.track_id
                bbox = track.to_ltrb()

                cv2.rectangle(
                    image,
                    (int(bbox[0]), int(bbox[1])),
                    (int(bbox[2]), int(bbox[3])),
                    color=(0, 0, 255),
                    thickness=4,
                )
                cv2.putText(
                    image,
                    "ID: " + str(track_id),
                    (int(bbox[0]), int(bbox[1]) - 10),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    2,
                    (0, 255, 0),
                    2,
                )

        cv2.imshow("Image", image)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

cap.release()
cv2.destroyAllWindows()