import cv2
import json
from pathlib import Path
from tqdm import tqdm

SCRIPT_DIR = Path(__file__).resolve().parent
PROJECT_DIR = SCRIPT_DIR.parents[1]  # ← geht von /src/reformat zu /BachlorArbeit

FACES_DIR = PROJECT_DIR / "data" / "face_data_combined"
INPUT_VIDEO_DIR = PROJECT_DIR / "data" / "output" / "raw_clips"
OUTPUT_DIR = PROJECT_DIR / "output" / "output_preview_faces"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# === Alle *_faces.json Dateien durchgehen ===
face_files = sorted(FACES_DIR.glob("*_faces.json"))

for face_file in tqdm(face_files, desc="🔍 Erzeuge Vorschau mit Sprechererkennung"):
    clip_name = face_file.stem.replace("_faces", "") + ".mp4"
    input_path = INPUT_VIDEO_DIR / clip_name
    output_path = OUTPUT_DIR / clip_name.replace(".mp4", "_preview_faces.mp4")

    if not input_path.exists():
        print(f"❌ Clip nicht gefunden: {clip_name}")
        continue

    # Video-Setup
    cap = cv2.VideoCapture(str(input_path))
    fps = cap.get(cv2.CAP_PROP_FPS)
    fps = fps if fps > 1 else 25  # fallback falls FPS = 0
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*"avc1")  # Kompatibler als mp4v
    out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))

    # Gesichts-Daten laden
    data = json.loads(face_file.read_text())
    data_by_frame = {d["frame"]: d["faces"] for d in data if d["faces"]}

    print(f"🔢 Frames mit Gesichtern: {len(data_by_frame)}")

    frame_idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        faces = data_by_frame.get(frame_idx, [])
        speaker_idx = None

        # Sprecher anhand Mundöffnung
        if faces and all("mouth_openness" in f for f in faces):
            mouth_vals = [f["mouth_openness"] for f in faces]
            if any(v > 0.01 for v in mouth_vals):  # einfache Aktivitäts-Schwelle
                speaker_idx = mouth_vals.index(max(mouth_vals))

        for i, face in enumerate(faces):
            x, y, w, h = face["bbox"]
            color = (0, 255, 0) if i == speaker_idx else (255, 255, 255)
            label = f"Mouth: {face.get('mouth_openness', 0):.2f}"

            # Debug-Ausgabe (optional)
            print(f"Frame {frame_idx} | Face {i} | BBox: ({x},{y},{w},{h}) | Speaker: {speaker_idx}")

            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
            cv2.putText(frame, label, (x, y - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        out.write(frame)
        frame_idx += 1

    cap.release()
    out.release()
    print(f"✅ Vorschau exportiert: {output_path.name}")

print("🏁 Alle Vorschauvideos mit Sprecherkennung erstellt.")