Open crj1998 opened 1 month ago
Reference:
import cv2
from argparse import ArgumentParser
from tqdm import tqdm
from facex.detection import Detector
facedetector = Detector()
def IoU(boxA, boxB):
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])
interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
iou = interArea / float(boxAArea + boxBArea - interArea)
return iou
def join(tube_bbox, bbox):
xA = min(tube_bbox[0], bbox[0])
yA = min(tube_bbox[1], bbox[1])
xB = max(tube_bbox[2], bbox[2])
yB = max(tube_bbox[3], bbox[3])
return (xA, yA, xB, yB)
def compute_bbox(start, end, fps, tube_bbox, frame_shape, inp, image_shape, increase_area=0.4):
left, top, right, bot = tube_bbox
width = right - left
height = bot - top
#Computing aspect preserving bbox
width_increase = max(increase_area, ((1 + 2 * increase_area) * height - width) / (2 * width))
height_increase = max(increase_area, ((1 + 2 * increase_area) * width - height) / (2 * height))
left = int(left - width_increase * width)
top = int(top - height_increase * height)
right = int(right + width_increase * width)
bot = int(bot + height_increase * height)
top, bot, left, right = max(0, top), min(bot, frame_shape[0]), max(0, left), min(right, frame_shape[1])
h, w = bot - top, right - left
start = start / fps
end = end / fps
time = end - start
scale = f'{image_shape[0]}:{image_shape[1]}'
return f'ffmpeg -i {inp} -ss {start} -t {time} -filter:v "crop={w}:{h}:{left}:{top}, scale={scale}" crop.mp4'
def compute_bbox_trajectories(trajectories, fps, frame_shape, args):
commands = []
for bbox, tube_bbox, start, end in trajectories:
if (end - start) > args.min_frames:
command = compute_bbox(start, end, fps, tube_bbox, frame_shape, inp=args.inp, image_shape=args.image_shape, increase_area=args.increase)
commands.append(command)
return commands
def process_video(args):
videocapture = cv2.VideoCapture(args.inp)
fps = int(videocapture.get(cv2.CAP_PROP_FPS))
resolution = (int(videocapture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(videocapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
num_frames = int(videocapture.get(cv2.CAP_PROP_FRAME_COUNT))
frame_shape = resolution[::-1] + (3, )
print("fps:", fps)
print("resolution:", resolution)
print("num frames:", num_frames)
trajectories = []
commands = []
i = 0
success =True
intersection = 0
while success:
success, frame = videocapture.read()
if not success:
break
faces, _ = facedetector(frame)
bboxes = faces[0].cpu().numpy()
## For each trajectory check the criterion
not_valid_trajectories = []
valid_trajectories = []
for trajectory in trajectories:
tube_bbox = trajectory[0]
intersection = 0
for bbox in bboxes:
intersection = max(intersection, IoU(tube_bbox, bbox))
if intersection > args.iou_with_initial:
valid_trajectories.append(trajectory)
else:
not_valid_trajectories.append(trajectory)
commands += compute_bbox_trajectories(not_valid_trajectories, fps, frame_shape, args)
trajectories = valid_trajectories
## Assign bbox to trajectories, create new trajectories
for bbox in bboxes:
intersection = 0
current_trajectory = None
for trajectory in trajectories:
tube_bbox = trajectory[0]
current_intersection = IoU(tube_bbox, bbox)
if intersection < current_intersection and current_intersection > args.iou_with_initial:
intersection = IoU(tube_bbox, bbox)
current_trajectory = trajectory
## Create new trajectory
if current_trajectory is None:
trajectories.append([bbox, bbox, i, i])
else:
current_trajectory[1] = join(current_trajectory[1], bbox)
current_trajectory[3] = i
print(trajectories)
i += 1
commands += compute_bbox_trajectories(trajectories, fps, frame_shape, args)
return commands
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--inp", required=True, help='Input image or video')
parser.add_argument("--image_shape", default=(512, 512), type=lambda x: tuple(map(int, x.split(','))), help="Image shape")
parser.add_argument("--increase", default=0.1, type=float, help='Increase bbox by this amount')
parser.add_argument("--iou_with_initial", type=float, default=0.25, help="The minimal allowed iou with inital bbox")
parser.add_argument("--min_frames", type=int, default=50, help='Minimum number of frames')
args = parser.parse_args()
commands = process_video(args)
for command in commands:
print (command)
Reference