66 lines
1.8 KiB
Python
66 lines
1.8 KiB
Python
import cv2
|
|
import numpy as np
|
|
from PIL import Image
|
|
import imagehash
|
|
from typing import List
|
|
|
|
|
|
def is_blurry(path: str, threshold: float = 100.0) -> bool:
|
|
"""Gibt True zurueck, wenn das Bild unscharf ist (Laplacian Variance < threshold)."""
|
|
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
|
|
if img is None:
|
|
return False
|
|
variance = cv2.Laplacian(img, cv2.CV_64F).var()
|
|
return bool(variance < threshold)
|
|
|
|
|
|
def _mean_brightness(path: str) -> float:
|
|
"""Durchschnittliche Helligkeit eines Bildes (0-255)."""
|
|
img = Image.open(path).convert("L")
|
|
arr = np.array(img, dtype=np.float32)
|
|
return float(arr.mean())
|
|
|
|
|
|
def is_overexposed(path: str, threshold: float = 240.0) -> bool:
|
|
"""Gibt True zurueck, wenn das Bild ueberbelichtet ist."""
|
|
return _mean_brightness(path) > threshold
|
|
|
|
|
|
def is_underexposed(path: str, threshold: float = 30.0) -> bool:
|
|
"""Gibt True zurueck, wenn das Bild unterbelichtet ist."""
|
|
return _mean_brightness(path) < threshold
|
|
|
|
|
|
def find_duplicates(paths: List[str], threshold: int = 8) -> List[List[str]]:
|
|
"""
|
|
Findet Gruppen aehnlicher Bilder via perceptual hashing.
|
|
Das erste Element jeder Gruppe gilt als Original, der Rest als Duplikate.
|
|
"""
|
|
hashes = {}
|
|
for path in paths:
|
|
try:
|
|
h = imagehash.phash(Image.open(path))
|
|
hashes[path] = h
|
|
except Exception:
|
|
continue
|
|
|
|
groups = []
|
|
used = set()
|
|
path_list = list(hashes.keys())
|
|
|
|
for i, p1 in enumerate(path_list):
|
|
if p1 in used:
|
|
continue
|
|
group = [p1]
|
|
for p2 in path_list[i + 1:]:
|
|
if p2 in used:
|
|
continue
|
|
if abs(hashes[p1] - hashes[p2]) <= threshold:
|
|
group.append(p2)
|
|
used.add(p2)
|
|
if len(group) > 1:
|
|
used.add(p1)
|
|
groups.append(group)
|
|
|
|
return groups
|