OnlyFrames/analyzer.py

import cv2
import numpy as np
from PIL import Image
import imagehash
from typing import List


def is_blurry(path: str, threshold: float = 100.0) -> bool:
    """Gibt True zurueck, wenn das Bild unscharf ist (Laplacian Variance < threshold)."""
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return False
    variance = cv2.Laplacian(img, cv2.CV_64F).var()
    return bool(variance < threshold)


def _mean_brightness(path: str) -> float:
    """Durchschnittliche Helligkeit eines Bildes (0-255)."""
    img = Image.open(path).convert("L")
    arr = np.array(img, dtype=np.float32)
    return float(arr.mean())


def is_overexposed(path: str, threshold: float = 240.0) -> bool:
    """Gibt True zurueck, wenn das Bild ueberbelichtet ist."""
    return _mean_brightness(path) > threshold


def is_underexposed(path: str, threshold: float = 30.0) -> bool:
    """Gibt True zurueck, wenn das Bild unterbelichtet ist."""
    return _mean_brightness(path) < threshold


def find_duplicates(paths: List[str], threshold: int = 8) -> List[List[str]]:
    """
    Findet Gruppen aehnlicher Bilder via perceptual hashing.
    Das erste Element jeder Gruppe gilt als Original, der Rest als Duplikate.
    """
    hashes = {}
    for path in paths:
        try:
            h = imagehash.phash(Image.open(path))
            hashes[path] = h
        except Exception:
            continue

    groups = []
    used = set()
    path_list = list(hashes.keys())

    for i, p1 in enumerate(path_list):
        if p1 in used:
            continue
        group = [p1]
        for p2 in path_list[i + 1:]:
            if p2 in used:
                continue
            if abs(hashes[p1] - hashes[p2]) <= threshold:
                group.append(p2)
                used.add(p2)
        if len(group) > 1:
            used.add(p1)
            groups.append(group)

    return groups