import cv2 import json from pathlib import Path from tqdm import tqdm SCRIPT_DIR = Path(__file__).resolve().parent PROJECT_DIR = SCRIPT_DIR.parents[1] # ← geht von /src/reformat zu /BachlorArbeit FACES_DIR = PROJECT_DIR / "data" / "face_data_combined" INPUT_VIDEO_DIR = PROJECT_DIR / "data" / "output" / "raw_clips" OUTPUT_DIR = PROJECT_DIR / "output" / "output_preview_faces" OUTPUT_DIR.mkdir(parents=True, exist_ok=True) # === Alle *_faces.json Dateien durchgehen === face_files = sorted(FACES_DIR.glob("*_faces.json")) for face_file in tqdm(face_files, desc="🔍 Erzeuge Vorschau mit Sprechererkennung"): clip_name = face_file.stem.replace("_faces", "") + ".mp4" input_path = INPUT_VIDEO_DIR / clip_name output_path = OUTPUT_DIR / clip_name.replace(".mp4", "_preview_faces.mp4") if not input_path.exists(): print(f"❌ Clip nicht gefunden: {clip_name}") continue # Video-Setup cap = cv2.VideoCapture(str(input_path)) fps = cap.get(cv2.CAP_PROP_FPS) fps = fps if fps > 1 else 25 # fallback falls FPS = 0 width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc = cv2.VideoWriter_fourcc(*"avc1") # Kompatibler als mp4v out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height)) # Gesichts-Daten laden data = json.loads(face_file.read_text()) data_by_frame = {d["frame"]: d["faces"] for d in data if d["faces"]} print(f"🔢 Frames mit Gesichtern: {len(data_by_frame)}") frame_idx = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break faces = data_by_frame.get(frame_idx, []) speaker_idx = None # Sprecher anhand Mundöffnung if faces and all("mouth_openness" in f for f in faces): mouth_vals = [f["mouth_openness"] for f in faces] if any(v > 0.01 for v in mouth_vals): # einfache Aktivitäts-Schwelle speaker_idx = mouth_vals.index(max(mouth_vals)) for i, face in enumerate(faces): x, y, w, h = face["bbox"] color = (0, 255, 0) if i == speaker_idx else (255, 255, 255) label = f"Mouth: {face.get('mouth_openness', 0):.2f}" # Debug-Ausgabe (optional) print(f"Frame {frame_idx} | Face {i} | BBox: ({x},{y},{w},{h}) | Speaker: {speaker_idx}") cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) out.write(frame) frame_idx += 1 cap.release() out.release() print(f"✅ Vorschau exportiert: {output_path.name}") print("🏁 Alle Vorschauvideos mit Sprecherkennung erstellt.")