import cv2 import numpy as np from PIL import Image import imagehash from typing import List def is_blurry(path: str, threshold: float = 100.0) -> bool: """Gibt True zurueck, wenn das Bild unscharf ist (Laplacian Variance < threshold).""" img = cv2.imread(path, cv2.IMREAD_GRAYSCALE) if img is None: return False variance = cv2.Laplacian(img, cv2.CV_64F).var() return bool(variance < threshold) def _mean_brightness(path: str) -> float: """Durchschnittliche Helligkeit eines Bildes (0-255).""" img = Image.open(path).convert("L") arr = np.array(img, dtype=np.float32) return float(arr.mean()) def is_overexposed(path: str, threshold: float = 240.0) -> bool: """Gibt True zurueck, wenn das Bild ueberbelichtet ist.""" return _mean_brightness(path) > threshold def is_underexposed(path: str, threshold: float = 30.0) -> bool: """Gibt True zurueck, wenn das Bild unterbelichtet ist.""" return _mean_brightness(path) < threshold def find_duplicates(paths: List[str], threshold: int = 8) -> List[List[str]]: """ Findet Gruppen aehnlicher Bilder via perceptual hashing. Das erste Element jeder Gruppe gilt als Original, der Rest als Duplikate. """ hashes = {} for path in paths: try: h = imagehash.phash(Image.open(path)) hashes[path] = h except Exception: continue groups = [] used = set() path_list = list(hashes.keys()) for i, p1 in enumerate(path_list): if p1 in used: continue group = [p1] for p2 in path_list[i + 1:]: if p2 in used: continue if abs(hashes[p1] - hashes[p2]) <= threshold: group.append(p2) used.add(p2) if len(group) > 1: used.add(p1) groups.append(group) return groups