openvinotoolkit / open_model_zoo

Pre-trained Deep Learning models and demos (high quality and extremely fast)
https://docs.openvino.ai/latest/model_zoo.html
Apache License 2.0
4.08k stars 1.37k forks source link

GT File Generation For Multi-Camera Multi-Target Tracking #3914

Closed Nas-Azzam closed 2 months ago

Nas-Azzam commented 7 months ago

HI Intel Community, Below is the evaluation file code for Multi-Camera Multi-Target tracking

`""" Copyright (c) 2019-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """

import argparse import json import logging as log

import defusedxml.ElementTree as etree import motmetrics as mm import numpy as np from tqdm import tqdm

from mc_tracker.sct import TrackedObj

def read_gt_tracks(gt_filenames, size_divisor=1, skip_frames=0, skip_heavy_occluded_objects=False): min_last_frame_idx = -1 cameratracks = [[] for in gt_filenames] for i, filename in enumerate(gt_filenames): last_frame_idx = -1 tree = etree.parse(filename) root = tree.getroot() for track_xml_subtree in tqdm(root, desc='Reading ' + filename): if track_xml_subtree.tag != 'track': continue track = {'id': None, 'boxes': [], 'timestamps': []} for box_tree in track_xml_subtree.findall('box'): if skip_frames > 0 and int(box_tree.get('frame')) % skip_frames == 0: continue occlusion = [tag.text for tag in box_tree if tag.attrib['name'] == 'occlusion'][0] if skip_heavy_occluded_objects and occlusion == 'heavy_occluded': continue x_left = int(float(box_tree.get('xtl'))) // size_divisor x_right = int(float(box_tree.get('xbr'))) // size_divisor y_top = int(float(box_tree.get('ytl'))) // size_divisor y_bottom = int(float(box_tree.get('ybr'))) // size_divisor assert x_right > x_left assert y_bottom > y_top track['boxes'].append([x_left, y_top, x_right, y_bottom]) track['timestamps'].append(int(box_tree.get('frame')) // size_divisor) last_frame_idx = max(last_frame_idx, track['timestamps'][-1]) id = [int(tag.text) for tag in box_tree if tag.attrib['name'] == 'id'][0] track['id'] = id camera_tracks[i].append(track) if min_last_frame_idx < 0: min_last_frame_idx = last_frame_idx else: min_last_frame_idx = min(min_last_frame_idx, last_frame_idx)

return camera_tracks, min_last_frame_idx

def get_detections_from_tracks(tracks_history, time): activedetections = [[] for in tracks_history] for i, camera_hist in enumerate(tracks_history): for track in camera_hist: if time in track['timestamps']: idx = track['timestamps'].index(time) active_detections[i].append(TrackedObj(track['boxes'][idx], track['id'])) return active_detections

def check_contain_duplicates(all_detections): for detections in all_detections: all_labels = [obj.label for obj in detections] uniq = set(all_labels) if len(all_labels) != len(uniq): return True

return False

def main(): """Computes MOT metrics for the multi camera multi person tracker""" parser = argparse.ArgumentParser(description='Multi camera multi person \ tracking visualization demo script') parser.add_argument('--history_file', type=str, default='', required=True, help='File with tracker history') parser.add_argument('--gt_files', type=str, nargs='+', required=True, help='Files with ground truth annotation') parser.add_argument('--size_divisor', type=int, default=1, help='Scale factor for GT image resolution') parser.add_argument('--skip_frames', type=int, default=0, help='Frequency of skipping frames')

args = parser.parse_args()

with open(args.history_file) as hist_f:
    history = json.load(hist_f)

assert len(args.gt_files) == len(history)
gt_tracks, last_frame_idx = read_gt_tracks(args.gt_files,
                                           size_divisor=args.size_divisor,
                                           skip_frames=args.skip_frames)
accs = [mm.MOTAccumulator(auto_id=True) for _ in args.gt_files]

for time in tqdm(range(last_frame_idx + 1), 'Processing detections'):
    active_detections = get_detections_from_tracks(history, time)
    if check_contain_duplicates(active_detections):
        log.warning('At least one IDs collision has occurred at the timestamp ' + str(time))
    gt_detections = get_detections_from_tracks(gt_tracks, time)

    for i, camera_gt_detections in enumerate(gt_detections):
        gt_boxes = []
        gt_labels = []
        for obj in camera_gt_detections:
            gt_boxes.append([obj.rect[0], obj.rect[1],
                             obj.rect[2] - obj.rect[0],
                             obj.rect[3] - obj.rect[1]])
            gt_labels.append(obj.label)

        ht_boxes = []
        ht_labels = []
        for obj in active_detections[i]:
            ht_boxes.append([obj.rect[0], obj.rect[1],
                             obj.rect[2] - obj.rect[0],
                             obj.rect[3] - obj.rect[1]])
            ht_labels.append(obj.label)

        distances = mm.distances.iou_matrix(np.array(gt_boxes),
                                            np.array(ht_boxes), max_iou=0.5)
        accs[i].update(gt_labels, ht_labels, distances)

mh = mm.metrics.create()
summary = mh.compute_many(accs,
                          metrics=mm.metrics.motchallenge_metrics,
                          generate_overall=True,
                          names=['video ' + str(i) for i in range(len(accs))])

strsummary = mm.io.render_summary(summary,
                                  formatters=mh.formatters,
                                  namemap=mm.io.motchallenge_metric_names)
print(strsummary)

if name == 'main': main() ` link https://github.com/openvinotoolkit/open_model_zoo/blob/master/demos/multi_camera_multi_target_tracking_demo/python/run_evaluate.py

I'm having trouble creating the GT file for evaluation. The format mentioned in the Readme file is not supported by CVAT. I was hoping the Community could provide more information on this or update the evaluation script for MOT series. Any help would be greatly appreciated. Thank you.

Wovchena commented 7 months ago

@sovrasov, maybe you remember and can answer :)