The same GlobalOfflineID is assigned to more than one person at the same time (frame)

Dear @riips and @ryuto-yoshida ,

Thank you for sharing one of your great work and congratulation!

I have successfully run your code on my use case. In my use case, there are two overlapped cameras. However, the results are not what I expected. The same GlobalOfflineID is assigned to more than one person at the same time (frame), but what you state in the paper that this cannot be happened (allowed) due to violation of spatial and temporal constraints?

I would like to end you the output video and all the data as well. Would it be possible that you could take a look and point out what I need to fix? I really appreciate your time.

Output video: https://drive.google.com/file/d/1lOLqHQBRC9bpiNIwKibwy7UqPU9yWDkV/view?usp=drive_link Data: https://drive.google.com/file/d/1XrI4BlQEZ3pVnJoCXPIUDheT_okO5I5W/view?usp=drive_link Data structure

aic24_track1
└── YACHIYO_RIIPS
    ├── Detection
    │   └── scene_001
    │       ├── camera_0001.json
    │       ├── camera_0001.txt
    │       ├── camera_0002.json
    │       └── camera_0002.txt
    ├── EmbedFeature
    │   └── scene_001
    │       ├── camera_0001
    │       └── camera_0002
    ├── Original
    │   └── scene_001
    │       ├── camera_0001
    │       └── camera_0002
    ├── Output
    │   └── scene_001
    │       └── out.mp4
    ├── Pose
    │   └── scene_001
    │       ├── camera_0001
    │       └── camera_0002
    └── Tracking
        └── scene_001
            ├── camera001_tracking_results.json
            ├── camera002_tracking_results.json
            ├── fixed_camera001_tracking_results.json
            ├── fixed_camera002_tracking_results.json
            ├── fixed_whole_tracking_results.json
            ├── representative_nodes_scene1.json
            └── whole_tracking_results.json

Here is the code I use to do visualization.

from pprint import pprint
import os.path as osp
import shutil
from pathlib import Path
import glob
from tqdm import tqdm
from collections import defaultdict
import json
import random

import cv2

n_colors = 32
colors = [[random.randint(0, 255) for _ in range(3)] for i in range(n_colors)]

def draw_track(frame, pid_box):
    pid, box = pid_box
    x1, y1 = box[0], box[1]
    x2, y2 = box[2], box[3]
    color = colors[pid]
    cv2.putText(frame, str(pid), (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 1.3, color, 2)
    cv2.rectangle(frame, (x1, y1), (x2, y2), color=color, thickness=2)

def draw_tracks(frame, pids_boxes):
    for pid_box in pids_boxes:
        draw_track(frame, pid_box)

def make_dir(new_dir, parents=True, exist_ok=True, rm=False):
    if rm and osp.exists(new_dir) and osp.isdir(new_dir):
        shutil.rmtree(new_dir)
    Path(new_dir).mkdir(parents=True, exist_ok=True)

def read_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

def get_mct_rets(tracking_rets_p):
    tracking_rets = read_json_file(tracking_rets_p)
    mct_rets_l = []
    for cam_id, _ in tracking_rets.items():
        for seq, item in tracking_rets[cam_id].items():
            if "GlobalOfflineID" not in item: continue
            pid = int(item["GlobalOfflineID"])
            frame_id = int(item["Frame"])
            x1 = int(item["Coordinate"]["x1"])
            y1 = int(item["Coordinate"]["y1"])
            x2 = int(item["Coordinate"]["x2"])
            y2 = int(item["Coordinate"]["y2"])
            mct_rets_l.append((cam_id, frame_id, pid, (x1, y1, x2, y2)))
    mct_rets_l = sorted(mct_rets_l, key=lambda x: (x[0], x[1], x[2]))

    mct_rets_d = defaultdict(dict)
    for cam_id, frame_id, pid, (x1, y1, x2, y2) in mct_rets_l:
        if frame_id not in mct_rets_d[cam_id]:
            mct_rets_d[cam_id][frame_id] = [(pid, (x1, y1, x2, y2))]
        else:
            mct_rets_d[cam_id][frame_id].append((pid, (x1, y1, x2, y2)))

    return mct_rets_d

def get_img_paths(img_dir, ext="/*.jpg"):
    img_paths = glob.glob(img_dir + ext)
    img_paths = list(img_paths)
    img_paths.sort()
    return img_paths

root_dir = "path/to/aic24_track1/YACHIYO_RIIPS"
scene = "scene_001"
tracking_dir = osp.join(root_dir, "Tracking")
scenne_dir = osp.join(tracking_dir, scene)
tracking_rets_p = osp.join(scenne_dir, "fixed_whole_tracking_results.json")
mct_rets = get_mct_rets(tracking_rets_p)

cam1_img_dir = osp.join(root_dir, f"Original/{scene}/camera_0001/Frame")
cam2_img_dir = osp.join(root_dir, f"Original/{scene}/camera_0002/Frame")
cam1_img_paths = get_img_paths(cam1_img_dir)
cam2_img_paths = get_img_paths(cam2_img_dir)
n = min(len(cam1_img_paths), len(cam2_img_paths))
cam1_img_paths = cam1_img_paths[:n]
cam2_img_paths = cam2_img_paths[:n]

output_dir = osp.join(root_dir, "Output", scene)
make_dir(output_dir, rm=True)
out_vid_p = osp.join(output_dir, "out.mp4")

w, h = 1920, 1080
fps = 59.94
scale = 0.5
vid_writer = cv2.VideoWriter(out_vid_p, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(2*scale*w), int(scale*h)))

for frame_id, (cam1_img_p, cam2_img_p) in tqdm(enumerate(zip(cam1_img_paths, cam2_img_paths)), total=n, desc="viz mct rets"):
    f_id = frame_id + 1
    cam1_img = cv2.imread(cam1_img_p)
    cam1_rets = mct_rets["1"][f_id] if f_id in mct_rets["1"] else []
    if len(cam1_rets)>1:
        draw_tracks(cam1_img, cam1_rets)

    cam2_img = cv2.imread(cam2_img_p)
    cam2_rets = mct_rets["2"][f_id] if f_id in mct_rets["2"] else []
    if len(cam2_rets)>1:
        draw_tracks(cam2_img, cam2_rets)

    cam1_img_ = cv2.resize(cam1_img, (0, 0), fx=scale, fy=scale)
    cam2_img_ = cv2.resize(cam2_img, (0, 0), fx=scale, fy=scale)
    concat_img = cv2.hconcat([cam1_img_, cam2_img_]) 
    vid_writer.write(concat_img)

vid_writer.release()

riips / AIC24_Track1_YACHIYO_RIIPS

The same GlobalOfflineID is assigned to more than one person at the same time (frame) #5