Closed jinmang2 closed 1 year ago
$ pip install datasets av ffmpeg-python decord opencv-python
import os
import datasets
from datasets import load_dataset
import numpy as np
import av
import cv2
import decord
import ffmpeg
cache_dir = "/content/drive/MyDrive/ucf_crime"
repo_id = "jinmang2/ucf_crime"
test = load_dataset(repo_id, "test", cache_dir=cache_dir)
# Abuse001_x264.mp4 -> 2,729 frames
video_path = test["train"][0]["video_path"]
%%time
vr = decord.VideoReader(uri=video_path, height=240, width=320)
decord_images = []
for i in range(len(vr)):
arr = vr[i].asnumpy()
if i == 0:
res_decord = arr
decord_images.append(Image.fromarray(arr))
CPU times: user 3.04 s, sys: 143 ms, total: 3.18 s
Wall time: 1.01 s
%%time
container = av.open(video_path)
av_images = []
for frame in container.decode(video=0):
av_images .append(frame.to_image())
res_pyav = np.array(images[0]).astype(float)
WARNING:libav.mov,mp4,m4a,3gp,3g2,mj2:stream 0, timescale not set
CPU times: user 3.05 s, sys: 51.2 ms, total: 3.1 s
Wall time: 3.1 s
%%time
outpath = "/content/ffmpeg-python"
if not os.path.exists(outpath):
os.makedirs(outpath)
ffmpeg.input(video_path).output(
f"{outpath}/image_%d.jpg", start_number=0,
).run()
img = Image.open(os.path.join(outpath, "image_0.jpg"))
res_ffmpeg = np.array(img).astype(float)
CPU times: user 14.5 ms, sys: 2.08 ms, total: 16.6 ms
Wall time: 2.35 s
%%time
cap = cv2.VideoCapture(video_path)
cv_images = []
for i in range(2729):
_, img = cap.read()
cv_images.append(img)
res_cv = np.array(images[0]).astype(float)
CPU times: user 1.93 s, sys: 174 ms, total: 2.1 s
Wall time: 809 ms
(
np.isclose(res_decord, res_pyav).all(),
np.isclose(res_pyav, res_ffmpeg).all(),
np.isclose(res_decord, res_ffmpeg).all(),
np.isclose(res_cv, res_ffmpeg).all(),
np.isclose(res_cv, res_pyav).all(),
np.isclose(res_cv, res_decord).all(),
)
(True, False, False, False, True, True)
ffmpeg-python
library or using the ffmpeg -i {VIDEO_PATH}
command as a subprocess creates the same array.cv2
, decord
, PyAV
, used in recent competitions or huggingface are the same.OpenCV
is the fastest and the fasted library targeting the video/audio domain is decord
.