OnlyFrames/analyzer.py

import cv2
import numpy as np
from PIL import Image
import imagehash
import os
import base64
from typing import List, Optional


def is_blurry(path: str, threshold: float = 100.0) -> bool:
    """Gibt True zurueck, wenn das Bild unscharf ist (Laplacian Variance < threshold)."""
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return False
    variance = cv2.Laplacian(img, cv2.CV_64F).var()
    return bool(variance < threshold)


def _mean_brightness(path: str) -> float:
    """Durchschnittliche Helligkeit eines Bildes (0-255)."""
    img = Image.open(path).convert("L")
    arr = np.array(img, dtype=np.float32)
    return float(arr.mean())


def is_overexposed(path: str, threshold: float = 240.0) -> bool:
    """Gibt True zurueck, wenn das Bild ueberbelichtet ist."""
    return _mean_brightness(path) > threshold


def is_underexposed(path: str, threshold: float = 30.0) -> bool:
    """Gibt True zurueck, wenn das Bild unterbelichtet ist."""
    return _mean_brightness(path) < threshold


def find_exact_copies(paths: List[str]) -> List[List[str]]:
    """
    Findet exakte Kopien anhand von MD5-Hash (byte-identische Dateien).
    Das erste Element jeder Gruppe gilt als Original, der Rest als Kopien.
    """
    import hashlib

    hashes: dict = {}
    for path in paths:
        try:
            h = hashlib.md5()
            with open(path, "rb") as f:
                for chunk in iter(lambda: f.read(65536), b""):
                    h.update(chunk)
            digest = h.hexdigest()
            hashes.setdefault(digest, []).append(path)
        except Exception:
            continue

    return [group for group in hashes.values() if len(group) > 1]


def find_duplicates(paths: List[str], threshold: int = 8) -> List[List[str]]:
    """
    Findet Gruppen aehnlicher Bilder via perceptual hashing.
    Das erste Element jeder Gruppe gilt als Original, der Rest als Duplikate.
    """
    hashes = {}
    for path in paths:
        try:
            h = imagehash.phash(Image.open(path))
            hashes[path] = h
        except Exception:
            continue

    groups = []
    used = set()
    path_list = list(hashes.keys())

    for i, p1 in enumerate(path_list):
        if p1 in used:
            continue
        group = [p1]
        for p2 in path_list[i + 1:]:
            if p2 in used:
                continue
            if abs(hashes[p1] - hashes[p2]) <= threshold:
                group.append(p2)
                used.add(p2)
        if len(group) > 1:
            used.add(p1)
            groups.append(group)

    return groups


SUPPORTED_EXTENSIONS = {".jpg", ".jpeg", ".png"}


def _analyze_with_ai(paths: List[str], api_key: str) -> dict:
    """
    Sendet Bilder an Claude Vision API zur Qualitaetsanalyse.
    Gibt {path: [reasons]} zurueck. Bei Fehler wird der Pfad uebersprungen.
    """
    import anthropic

    client = anthropic.Anthropic(api_key=api_key)
    ai_results: dict = {path: [] for path in paths}

    PROMPT = (
        "Analysiere dieses Foto auf Qualitaetsprobleme fuer einen professionellen Fotografen. "
        "Antworte NUR mit einer kommagetrennten Liste von Problemen aus diesen Kategorien: "
        "unscharf, ueberbelichtet, unterbelichtet, schlechter Bildausschnitt, stoerende Elemente, "
        "schlechter Weissabgleich. Wenn das Bild in Ordnung ist, antworte mit 'ok'."
    )

    for path in paths:
        try:
            with open(path, "rb") as f:
                img_data = base64.standard_b64encode(f.read()).decode("utf-8")
            ext = os.path.splitext(path)[1].lower().lstrip(".")
            media_type = "image/jpeg" if ext in ("jpg", "jpeg") else "image/png"

            response = client.messages.create(
                model="claude-opus-4-6",
                max_tokens=100,
                messages=[{
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": media_type,
                                "data": img_data,
                            },
                        },
                        {"type": "text", "text": PROMPT},
                    ],
                }],
            )
            answer = response.content[0].text.strip().lower()
            if answer != "ok":
                reasons = [r.strip() for r in answer.split(",") if r.strip()]
                ai_results[path].extend(reasons)
        except Exception:
            continue

    return ai_results


def analyze_folder(
    folder: str,
    blur_threshold: float = 100.0,
    over_threshold: float = 240.0,
    under_threshold: float = 30.0,
    dup_threshold: int = 8,
    use_ai: bool = False,
    api_key: Optional[str] = None,
) -> List[dict]:
    """
    Analysiert alle Bilder im Ordner.
    Gibt Liste zurueck: [{"path": "/foo/bar.jpg", "reasons": ["unscharf"]}, ...]
    Nur Bilder mit mindestens einem Grund werden zurueckgegeben.
    """
    paths = [
        os.path.join(folder, f)
        for f in os.listdir(folder)
        if os.path.splitext(f)[1].lower() in SUPPORTED_EXTENSIONS
    ]

    results: dict = {path: [] for path in paths}

    for path in paths:
        try:
            if is_blurry(path, blur_threshold):
                results[path].append("unscharf")
            if is_overexposed(path, over_threshold):
                results[path].append("ueberbelichtet")
            if is_underexposed(path, under_threshold):
                results[path].append("unterbelichtet")
        except Exception:
            continue

    exact_copy_paths: set = set()
    exact_groups = find_exact_copies(paths)
    for group in exact_groups:
        original = os.path.basename(group[0])
        for copy_path in group[1:]:
            results[copy_path].append(f"exakte Kopie von {original}")
            exact_copy_paths.add(copy_path)

    dup_paths = [p for p in paths if p not in exact_copy_paths]
    dup_groups = find_duplicates(dup_paths, dup_threshold)
    for group in dup_groups:
        original = os.path.basename(group[0])
        for dup_path in group[1:]:
            results[dup_path].append(f"Duplikat von {original}")

    if use_ai and api_key:
        ai_results = _analyze_with_ai(paths, api_key)
        for path, ai_reasons in ai_results.items():
            results[path].extend(ai_reasons)

    return [
        {"path": path, "reasons": reasons}
        for path, reasons in results.items()
        if reasons
    ]