gfieldGG / videohash

Near Duplicate Video Detection (Perceptual Video Hashing) - Get a 256-bit comparable hash value for any video.
MIT License
0 stars 0 forks source link

re-evaluate whash vs phash with larger hash size #8

Closed gfieldGG closed 1 year ago

gfieldGG commented 1 year ago

maybe PDQ

Maybe generate both since ImageHash generation cost is negligible compared to frame extraction anyways.

gfieldGG commented 1 year ago

whash seems to take around 9 times longer to compute than phash for some reason? Also I initially switched from whash to phash because of better precision.

# collage vs no collage (and phash vs whash)
import imagehash
from PIL import Image
from timeit import timeit
from pathlib import Path
from videohash.collage import make_collage 
frames = [Image.open(x) for x in Path("./tests/gold/rocket/storagedir/frames/").iterdir()]

def collage_p():
    c = make_collage(frames, 240)
    h = f"{imagehash.phash(c, hash_size=16)}"
    return h

def nocollage_p():
    h = "".join([f"{imagehash.phash(f, hash_size=4)}" for f in frames])
    return h

def collage_w():
    c = make_collage(frames, 240)
    h = f"{imagehash.whash(c, hash_size=16)}"
    return h

def nocollage_w():
    h = "".join([f"{imagehash.whash(f, hash_size=4)}" for f in frames])
    return h

runs = 1000
print(timeit(lambda:collage_p(), number=runs))
print(timeit(lambda:nocollage_p(), number=runs))
print(timeit(lambda:collage_w(), number=runs))
print(timeit(lambda:nocollage_w(), number=runs))
3.256334900001093
4.264412800002901
29.879121599999053
28.33230530000219