from collections import defaultdict
from hashlib import md5
hash_dict = defaultdict(list)
img_dir = Path('./images')
for image in img_dir.glob('*.jpg'):
with image.open('rb') as f:
img_hash = md5(f.read()).hexdigest()
hash_dict[img_hash].append(image)
len(hash_dict)
duplicate_img = []
for k, v in hash_dict.items():
if len(v) > 1:
if v[0].name != v[1].name:
duplicate_img.append(v[0])
duplicate_img.append(v[1])
print(v)
len(duplicate_img)