feat: analyze_folder orchestrates all checks

This commit is contained in:
Ferdinand
2026-04-07 13:34:46 +02:00
parent 76b77046f9
commit 2ed2ae3d16
2 changed files with 77 additions and 1 deletions

View File

@@ -2,7 +2,8 @@ import cv2
import numpy as np import numpy as np
from PIL import Image from PIL import Image
import imagehash import imagehash
from typing import List import os
from typing import List, Optional
def is_blurry(path: str, threshold: float = 100.0) -> bool: def is_blurry(path: str, threshold: float = 100.0) -> bool:
@@ -63,3 +64,57 @@ def find_duplicates(paths: List[str], threshold: int = 8) -> List[List[str]]:
groups.append(group) groups.append(group)
return groups return groups
SUPPORTED_EXTENSIONS = {".jpg", ".jpeg", ".png"}
def analyze_folder(
folder: str,
blur_threshold: float = 100.0,
over_threshold: float = 240.0,
under_threshold: float = 30.0,
dup_threshold: int = 8,
use_ai: bool = False,
api_key: Optional[str] = None,
) -> List[dict]:
"""
Analysiert alle Bilder im Ordner.
Gibt Liste zurueck: [{"path": "/foo/bar.jpg", "reasons": ["unscharf"]}, ...]
Nur Bilder mit mindestens einem Grund werden zurueckgegeben.
"""
paths = [
os.path.join(folder, f)
for f in os.listdir(folder)
if os.path.splitext(f)[1].lower() in SUPPORTED_EXTENSIONS
]
results: dict = {path: [] for path in paths}
for path in paths:
try:
if is_blurry(path, blur_threshold):
results[path].append("unscharf")
if is_overexposed(path, over_threshold):
results[path].append("ueberbelichtet")
if is_underexposed(path, under_threshold):
results[path].append("unterbelichtet")
except Exception:
continue
dup_groups = find_duplicates(paths, dup_threshold)
for group in dup_groups:
original = os.path.basename(group[0])
for dup_path in group[1:]:
results[dup_path].append(f"Duplikat von {original}")
if use_ai and api_key:
ai_results = _analyze_with_ai(paths, api_key)
for path, ai_reasons in ai_results.items():
results[path].extend(ai_reasons)
return [
{"path": path, "reasons": reasons}
for path, reasons in results.items()
if reasons
]

View File

@@ -88,3 +88,24 @@ def test_different_images_are_not_duplicates(tmp_path):
p2 = make_diagonal_image(tmp_path, "diagonal.jpg") p2 = make_diagonal_image(tmp_path, "diagonal.jpg")
groups = find_duplicates([p1, p2], threshold=8) groups = find_duplicates([p1, p2], threshold=8)
assert len(groups) == 0 assert len(groups) == 0
from analyzer import analyze_folder
def test_analyze_folder_returns_results(tmp_path):
make_test_image(tmp_path, color=(128, 128, 128))
from PIL import Image
white = tmp_path / "white.jpg"
Image.new("RGB", (100, 100), color=(255, 255, 255)).save(white)
results = analyze_folder(
folder=str(tmp_path),
blur_threshold=100,
over_threshold=240,
under_threshold=30,
dup_threshold=8,
use_ai=False,
)
reasons_flat = [r for item in results for r in item["reasons"]]
assert "ueberbelichtet" in reasons_flat