bachlorarbeit/src/reformat/old/preview_faces.py
2025-10-19 16:22:26 +02:00

76 lines
2.7 KiB
Python

import cv2
import json
from pathlib import Path
from tqdm import tqdm
SCRIPT_DIR = Path(__file__).resolve().parent
PROJECT_DIR = SCRIPT_DIR.parents[1] # ← geht von /src/reformat zu /BachlorArbeit
FACES_DIR = PROJECT_DIR / "data" / "face_data_combined"
INPUT_VIDEO_DIR = PROJECT_DIR / "data" / "output" / "raw_clips"
OUTPUT_DIR = PROJECT_DIR / "output" / "output_preview_faces"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# === Alle *_faces.json Dateien durchgehen ===
face_files = sorted(FACES_DIR.glob("*_faces.json"))
for face_file in tqdm(face_files, desc="🔍 Erzeuge Vorschau mit Sprechererkennung"):
clip_name = face_file.stem.replace("_faces", "") + ".mp4"
input_path = INPUT_VIDEO_DIR / clip_name
output_path = OUTPUT_DIR / clip_name.replace(".mp4", "_preview_faces.mp4")
if not input_path.exists():
print(f"❌ Clip nicht gefunden: {clip_name}")
continue
# Video-Setup
cap = cv2.VideoCapture(str(input_path))
fps = cap.get(cv2.CAP_PROP_FPS)
fps = fps if fps > 1 else 25 # fallback falls FPS = 0
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*"avc1") # Kompatibler als mp4v
out = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))
# Gesichts-Daten laden
data = json.loads(face_file.read_text())
data_by_frame = {d["frame"]: d["faces"] for d in data if d["faces"]}
print(f"🔢 Frames mit Gesichtern: {len(data_by_frame)}")
frame_idx = 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
faces = data_by_frame.get(frame_idx, [])
speaker_idx = None
# Sprecher anhand Mundöffnung
if faces and all("mouth_openness" in f for f in faces):
mouth_vals = [f["mouth_openness"] for f in faces]
if any(v > 0.01 for v in mouth_vals): # einfache Aktivitäts-Schwelle
speaker_idx = mouth_vals.index(max(mouth_vals))
for i, face in enumerate(faces):
x, y, w, h = face["bbox"]
color = (0, 255, 0) if i == speaker_idx else (255, 255, 255)
label = f"Mouth: {face.get('mouth_openness', 0):.2f}"
# Debug-Ausgabe (optional)
print(f"Frame {frame_idx} | Face {i} | BBox: ({x},{y},{w},{h}) | Speaker: {speaker_idx}")
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
cv2.putText(frame, label, (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
out.write(frame)
frame_idx += 1
cap.release()
out.release()
print(f"✅ Vorschau exportiert: {output_path.name}")
print("🏁 Alle Vorschauvideos mit Sprecherkennung erstellt.")