import os
import ffmpeg
from natsort import natsorted
import numpy as np
from PIL import Image

sample_video_path = "/content/Abuse001_x264.mp4"

if not os.path.exists("outputs"):
    os.makedirs("outputs")

ffmpeg.input(sample_video_path ).output(
    "{}%d.jpg".format("/content/outputs/"),
    start_number=0
).global_args('-loglevel', 'quiet').run()
rgb_files = natsorted(list(os.listdir("/content/outputs")))
img = Image.open(os.path.join("/content/outputs", rgb_files[0]))
data = np.array(img).astype(float)

import av
import numpy as np

container = av.open(sample_video_path)
container.seek(0)
decoded_video = container.decode(video=0)

frames = []
for i, frame in enumerate(decoded_video):
    frames.append(frame)

data2 = frames[0].to_ndarray(format="rgb24")

np.isclose(data, data2).all()

False

Code

Libraries

$ pip install datasets av ffmpeg-python decord opencv-python

import os
import datasets
from datasets import load_dataset

import numpy as np

import av
import cv2
import decord
import ffmpeg

Load sample video

cache_dir = "/content/drive/MyDrive/ucf_crime"
repo_id = "jinmang2/ucf_crime"

test = load_dataset(repo_id, "test", cache_dir=cache_dir)
# Abuse001_x264.mp4 -> 2,729 frames
video_path = test["train"][0]["video_path"]

Decord

%%time
vr = decord.VideoReader(uri=video_path, height=240, width=320)
decord_images = []
for i in range(len(vr)):
    arr = vr[i].asnumpy()
    if i == 0:
        res_decord = arr
    decord_images.append(Image.fromarray(arr))

CPU times: user 3.04 s, sys: 143 ms, total: 3.18 s
Wall time: 1.01 s

PyAV

%%time
container = av.open(video_path)
av_images = []
for frame in container.decode(video=0):
    av_images .append(frame.to_image())
res_pyav = np.array(images[0]).astype(float)

WARNING:libav.mov,mp4,m4a,3gp,3g2,mj2:stream 0, timescale not set
CPU times: user 3.05 s, sys: 51.2 ms, total: 3.1 s
Wall time: 3.1 s

ffmpeg-python

%%time
outpath = "/content/ffmpeg-python"
if not os.path.exists(outpath):
    os.makedirs(outpath)
ffmpeg.input(video_path).output(
    f"{outpath}/image_%d.jpg", start_number=0,
).run()
img = Image.open(os.path.join(outpath, "image_0.jpg"))
res_ffmpeg = np.array(img).astype(float)

CPU times: user 14.5 ms, sys: 2.08 ms, total: 16.6 ms
Wall time: 2.35 s

OpenCV

%%time
cap = cv2.VideoCapture(video_path)
cv_images = []
for i in range(2729):
    _, img = cap.read()
    cv_images.append(img)
res_cv = np.array(images[0]).astype(float)

CPU times: user 1.93 s, sys: 174 ms, total: 2.1 s
Wall time: 809 ms

Comparison

(
    np.isclose(res_decord, res_pyav).all(),
    np.isclose(res_pyav, res_ffmpeg).all(),
    np.isclose(res_decord, res_ffmpeg).all(),
    np.isclose(res_cv, res_ffmpeg).all(),
    np.isclose(res_cv, res_pyav).all(),
    np.isclose(res_cv, res_decord).all(),
)

(True, False, False, False, True, True)

Conclusion

The result of using the ffmpeg-python library or using the ffmpeg -i {VIDEO_PATH} command as a subprocess creates the same array.
The results of cv2, decord, PyAV, used in recent competitions or huggingface are the same.
OpenCV is the fastest and the fasted library targeting the video/audio domain is decord.

jinmang2 / anomaly_detection_on_video

Comparison of methods for extracting frames from video #1

Code

Libraries

Load sample video

Decord

PyAV

ffmpeg-python

OpenCV

Comparison

Conclusion