feat: duplicate detection via perceptual hashing
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
36
analyzer.py
36
analyzer.py
@@ -1,6 +1,8 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import imagehash
|
||||
from typing import List
|
||||
|
||||
|
||||
def is_blurry(path: str, threshold: float = 100.0) -> bool:
|
||||
@@ -27,3 +29,37 @@ def is_overexposed(path: str, threshold: float = 240.0) -> bool:
|
||||
def is_underexposed(path: str, threshold: float = 30.0) -> bool:
|
||||
"""Gibt True zurueck, wenn das Bild unterbelichtet ist."""
|
||||
return _mean_brightness(path) < threshold
|
||||
|
||||
|
||||
def find_duplicates(paths: List[str], threshold: int = 8) -> List[List[str]]:
|
||||
"""
|
||||
Findet Gruppen aehnlicher Bilder via perceptual hashing.
|
||||
Das erste Element jeder Gruppe gilt als Original, der Rest als Duplikate.
|
||||
"""
|
||||
hashes = {}
|
||||
for path in paths:
|
||||
try:
|
||||
h = imagehash.phash(Image.open(path))
|
||||
hashes[path] = h
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
groups = []
|
||||
used = set()
|
||||
path_list = list(hashes.keys())
|
||||
|
||||
for i, p1 in enumerate(path_list):
|
||||
if p1 in used:
|
||||
continue
|
||||
group = [p1]
|
||||
for p2 in path_list[i + 1:]:
|
||||
if p2 in used:
|
||||
continue
|
||||
if abs(hashes[p1] - hashes[p2]) <= threshold:
|
||||
group.append(p2)
|
||||
used.add(p2)
|
||||
if len(group) > 1:
|
||||
used.add(p1)
|
||||
groups.append(group)
|
||||
|
||||
return groups
|
||||
|
||||
Reference in New Issue
Block a user