From 5ea2863b701ac3aa8f193dd496dc0d7c833f2343 Mon Sep 17 00:00:00 2001 From: Ferdinand Urban Date: Thu, 23 Apr 2026 12:58:16 +0000 Subject: [PATCH] feat: progress callbacks in analyzer, WebP support, HEIC dependency - analyzer.py: progress_callback parameter with phase reporting - analyzer.py: add .webp to SUPPORTED_EXTENSIONS - requirements.txt: add pillow-heif==1.3.0 for HEIC/HEIF support Co-Authored-By: Claude Sonnet 4.6 --- analyzer.py | 21 ++++++++++++++++++--- requirements.txt | 1 + 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/analyzer.py b/analyzer.py index eb4df75..b9e3f4e 100644 --- a/analyzer.py +++ b/analyzer.py @@ -89,7 +89,7 @@ def find_duplicates(paths: List[str], threshold: int = 8) -> List[List[str]]: return groups -SUPPORTED_EXTENSIONS = {".jpg", ".jpeg", ".png"} +SUPPORTED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp"} def _analyze_with_ai(paths: List[str], api_key: str) -> dict: @@ -152,21 +152,28 @@ def analyze_folder( dup_threshold: int = 8, use_ai: bool = False, api_key: Optional[str] = None, + progress_callback=None, # callable(done: int, total: int, phase: str) ) -> List[dict]: """ Analysiert alle Bilder im Ordner. Gibt Liste zurueck: [{"path": "/foo/bar.jpg", "reasons": ["unscharf"]}, ...] Nur Bilder mit mindestens einem Grund werden zurueckgegeben. """ + def report(done, total, phase): + if progress_callback: + progress_callback(done, total, phase) + paths = [ os.path.join(folder, f) for f in os.listdir(folder) if os.path.splitext(f)[1].lower() in SUPPORTED_EXTENSIONS ] + total = len(paths) results: dict = {path: [] for path in paths} - for path in paths: + # Phase 1: Qualitätsanalyse pro Foto (macht ~70% der Arbeit aus) + for i, path in enumerate(paths): try: if is_blurry(path, blur_threshold): results[path].append("unscharf") @@ -175,8 +182,11 @@ def analyze_folder( if is_underexposed(path, under_threshold): results[path].append("unterbelichtet") except Exception: - continue + pass + report(i + 1, total, "quality") + # Phase 2: Exakte Kopien (MD5) + report(total, total, "exact_copies") exact_copy_paths: set = set() exact_groups = find_exact_copies(paths) for group in exact_groups: @@ -185,6 +195,8 @@ def analyze_folder( results[copy_path].append(f"exakte Kopie von {original}") exact_copy_paths.add(copy_path) + # Phase 3: Duplikate (pHash) + report(total, total, "duplicates") dup_paths = [p for p in paths if p not in exact_copy_paths] dup_groups = find_duplicates(dup_paths, dup_threshold) for group in dup_groups: @@ -192,11 +204,14 @@ def analyze_folder( for dup_path in group[1:]: results[dup_path].append(f"Duplikat von {original}") + # Phase 4: KI-Analyse (optional) if use_ai and api_key: + report(total, total, "ai") ai_results = _analyze_with_ai(paths, api_key) for path, ai_reasons in ai_results.items(): results[path].extend(ai_reasons) + report(total, total, "done") return [ {"path": path, "reasons": reasons} for path, reasons in results.items() diff --git a/requirements.txt b/requirements.txt index 7f599b1..0c3e03d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,6 @@ opencv-python-headless==4.13.0.92 imagehash==4.3.1 python-dotenv==1.0.1 anthropic==0.89.0 +pillow-heif==1.3.0 pytest==8.1.1 httpx==0.27.0