Zhongdao / Towards-Realtime-MOT

Joint Detection and Embedding for fast multi-object tracking
MIT License
2.39k stars 539 forks source link

here is a simpler demo, and question about detection model #24

Closed Finniu closed 5 years ago

Finniu commented 5 years ago

Hi, thanks for your great contribution, I write a simpler demo code, which is good for fresh, but I am wondering that is there any faster-rcnn detection model or other model which is better than yolo3?

import os.path as osp
import cv2
import logging
import argparse
import motmetrics as mm

from tracker.multitracker import JDETracker
from utils import visualization as vis
from utils.log import logger
from utils.timer import Timer
from utils.evaluation import Evaluator
import utils.datasets as datasets
import torch
from utils.utils import *
class opt_c(object):
    def __init__(self):
        self.img_size=(1088,608)
        self.cfg="cfg/yolov3.cfg"
        self.weights="/home/apptech/Towards-Realtime-MOT/jde.1088x608.uncertainty.pt"
        self.conf_thres=0.5
        self.track_buffer=30
        self.nms_thres=0.4
        self.min_box_area=200
opt=opt_c()

def letterbox(img, height=608, width=1088, color=(127.5, 127.5, 127.5)):  # resize a rectangular image to a padded rectangular 
    shape = img.shape[:2]  # shape = [height, width]
    ratio = min(float(height)/shape[0], float(width)/shape[1])
    new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # new_shape = [width, height]
    dw = (width - new_shape[0]) / 2  # width padding
    dh = (height - new_shape[1]) / 2  # height padding
    top, bottom = round(dh - 0.1), round(dh + 0.1)
    left, right = round(dw - 0.1), round(dw + 0.1)
    img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # padded rectangular
    return img, ratio, dw, dh
def eval_seq(opt,save_dir=None, show_image=True, frame_rate=30):

    tracker = JDETracker(opt, frame_rate=frame_rate)
    results = []
    frame_id = 0
    cam=cv2.VideoCapture(0)

    while True:
        _,img0=cam.read()
        img, _, _, _ = letterbox(img0)
        # Normalize RGB
        img = img[:, :, ::-1].transpose(2, 0, 1)
        img = np.ascontiguousarray(img, dtype=np.float32)
        img /= 255.0
        # run tracking
        blob = torch.from_numpy(img).cuda().unsqueeze(0)
        online_targets = tracker.update(blob, img0)
        online_tlwhs = []
        online_ids = []
        for t in online_targets:
            tlwh = t.tlwh
            tid = t.track_id
            vertical = tlwh[2] / tlwh[3] > 1.6
            if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical:
                online_tlwhs.append(tlwh)
                online_ids.append(tid)
        # save results
        results.append((frame_id + 1, online_tlwhs, online_ids))
        online_im = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id)

        cv2.imshow('online_im', online_im)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        frame_id += 1

def main():

    # run tracking
    eval_seq(opt)

if __name__ == '__main__':
    main()
Zhongdao commented 5 years ago

Thanks for your code! Since we focus more on the speed of the system, we only employ single-stage detectors in our experiments. In the future, we will pay more attention to improve the detector, but it will still be a single-stage one.

Finniu commented 5 years ago

Thanks for your code! Since we focus more on the speed of the system, we only employ single-stage detectors in our experiments. In the future, we will pay more attention to improve the detector, but it will still be a single-stage one.

Ok, yeah, make sense, thanks for you reply!