2025-10-19 16:22:26 +02:00

233 lines
9.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""Run the full Bachelor pipeline end-to-end with timing, errors, and flexible flags.
Steps:
1) transcription.py → Whisper transcripts (segments + timed words)
2) segment_transcript.py → LLM selects highlight candidates → SQLite
3) cutClips.py → export highlight_*.mp4 (raw clips)
4) main_detect_faces.py → YOLO + MediaPipe → faces.json per clip
5) make_segments.py → *_target_by_frame.json (center+side per frame)
6) main_apply_crop.py → 9:16 crop with smoothing & optional audio mux
7) rateCluster.py → (optional) LLM scoring (virality, emotion, ...)
8) add_subtitles.py → (optional) word-cap subtitles burned in
Usage examples:
python main.py --input data/input/meinvideo.mp4 --limit 10 --openai-model gpt-4o
python main.py --no-rate --no-subs
"""
from __future__ import annotations
import argparse
import os
import sys
import subprocess
import time
from datetime import datetime
from pathlib import Path
# --- Import project config ---
try:
from config import (
PROJECT_ROOT, INPUT_DIR, RAW_CLIPS_DIR, CROPPED_DIR, SUBTITLED_DIR,
WHISPER_CACHE_DIR
)
except Exception:
PROJECT_ROOT = Path(__file__).resolve().parent
sys.path.insert(0, str(PROJECT_ROOT))
from config import (
PROJECT_ROOT, INPUT_DIR, RAW_CLIPS_DIR, CROPPED_DIR, SUBTITLED_DIR,
WHISPER_CACHE_DIR
)
LOGS_DIR = PROJECT_ROOT / "logs"
LOGS_DIR.mkdir(parents=True, exist_ok=True)
# --- korrekte Pfade zu den Skripten ---
SCRIPTS = {
"transcription": str(PROJECT_ROOT / "src" / "text" / "transcription.py"),
"segment_transcript": str(PROJECT_ROOT / "src" / "text" / "segment_transcript.py"),
"cutClips": str(PROJECT_ROOT / "src" / "text" / "cutClips.py"),
"detect_faces": str(PROJECT_ROOT / "src" / "reformat" / "main_detect_faces.py"),
"make_segments": str(PROJECT_ROOT / "src" / "reformat" / "make_segments.py"),
"apply_crop": str(PROJECT_ROOT / "src" / "reformat" / "main_apply_crop.py"),
"rateCluster": str(PROJECT_ROOT / "src" / "text" / "rateCluster.py"),
"add_subtitles": str(PROJECT_ROOT / "src" / "subtitles" / "add_subtitles.py"),
}
def shlex_join(cmd):
return " ".join(str(c) for c in cmd)
def run_step(cmd: list[str], name: str, env: dict[str, str] | None = None) -> float:
"""Run a subprocess step, raise on error, return duration in seconds."""
t0 = time.perf_counter()
print(f"\n===== {name} =====")
print(" ", shlex_join(cmd))
cp = subprocess.run(cmd, env=env)
dt = time.perf_counter() - t0
if cp.returncode != 0:
print(f"❌ Fehler in {name} (Exit {cp.returncode}) nach {dt:.2f}s")
print(" → Prüfe das Logfile oben für Details und stelle sicher, dass Abhängigkeiten installiert sind:")
print(" - ffmpeg/ffprobe im PATH")
print(" - Python-Pakete: openai-whisper, torch, ffmpeg-python, ultralytics, opencv-python, mediapipe, moviepy, tqdm, numpy")
print(" - OPENAI_API_KEY gesetzt (für LLM-Schritte)")
raise SystemExit(cp.returncode)
print(f"{name} in {dt:.2f}s")
return dt
def infer_base_from_input(input_path: Path) -> str:
return input_path.stem
def default_input() -> Path | None:
if not INPUT_DIR.exists():
return None
for p in sorted(INPUT_DIR.iterdir()):
if p.suffix.lower() in {".mp4", ".mov", ".mkv", ".m4v", ".mp3", ".wav"}:
return p
return None
def main():
ap = argparse.ArgumentParser(description="Bachelor Pipeline Runner")
ap.add_argument("--input", type=str, default=None, help="Pfad zu Eingabedatei (Default: erstes File in data/input)")
ap.add_argument("--limit", type=int, default=10, help="Anzahl Highlights (cutClips)")
ap.add_argument("--whisper-model", type=str, default=os.getenv("WHISPER_MODEL", "small"))
ap.add_argument("--lang", type=str, default=None, help="Sprachcode (z. B. de)")
ap.add_argument("--openai-model", type=str, default=os.getenv("OPENAI_MODEL", "gpt-4o"))
ap.add_argument("--pattern", type=str, default="highlight_*.mp4")
ap.add_argument("--overwrite", action="store_true")
ap.add_argument("--no-rate", action="store_true")
ap.add_argument("--no-subs", action="store_true")
ap.add_argument("--no-detect", action="store_true")
ap.add_argument("--no-make", action="store_true")
ap.add_argument("--no-apply", action="store_true")
ap.add_argument("--logfile", type=str, default=None)
args = ap.parse_args()
os.chdir(PROJECT_ROOT)
env = os.environ.copy()
env.setdefault("OPENAI_MODEL", args.openai_model)
env.setdefault("XDG_CACHE_HOME", str(WHISPER_CACHE_DIR))
if not env.get("OPENAI_API_KEY"):
print("⚠️ OPENAI_API_KEY ist nicht gesetzt LLM-Schritte könnten fehlschlagen.")
# Input-Datei bestimmen
if args.input:
input_path = Path(args.input)
if not input_path.is_file():
candidate = INPUT_DIR / args.input
if candidate.is_file():
input_path = candidate
else:
raise SystemExit(f"Input nicht gefunden: {args.input}")
else:
picked = default_input()
if not picked:
raise SystemExit(f"Kein Input in {INPUT_DIR} gefunden. Bitte --input setzen.")
input_path = picked
base = infer_base_from_input(input_path)
print(f"📥 Input: {input_path}")
print(f"🔤 Whisper: {args.whisper_model} | 🌐 LLM: {env.get('OPENAI_MODEL')}")
print(f"🧩 Base: {base}")
# Logfile
if args.logfile:
log_path = Path(args.logfile)
else:
log_path = LOGS_DIR / f"run_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
# Tee: schreibe in Datei UND Konsole
try:
log_fh = open(log_path, "w", encoding="utf-8")
class _Tee:
def __init__(self, *streams): self.streams = streams
def write(self, data):
for s in self.streams:
try: s.write(data); s.flush()
except Exception: pass
def flush(self):
for s in self.streams:
try: s.flush()
except Exception: pass
sys.stdout = _Tee(sys.__stdout__, log_fh)
sys.stderr = _Tee(sys.__stderr__, log_fh)
print(f"📝 Logfile: {log_path}")
except Exception:
print(f"⚠️ Konnte Logfile nicht initialisieren: {log_path}")
durations = []
started = datetime.now()
print(f"🚀 Start: {started:%Y-%m-%d %H:%M:%S}")
try:
# 1) Transcription
t_args = [sys.executable, SCRIPTS["transcription"], "--input", str(input_path), "--model", args.whisper_model]
if args.lang: t_args += ["--lang", args.lang]
durations.append(("Transcription", run_step(t_args, "Transcription", env=env)))
# 2) LLM Segmentierung
st_args = [sys.executable, SCRIPTS["segment_transcript"], "--base", base]
durations.append(("Segment Transcript", run_step(st_args, "Segment Transcript", env=env)))
# 3) Highlights schneiden
cut_filename = input_path.name
cc_args = [sys.executable, SCRIPTS["cutClips"], "--file", cut_filename, "--limit", str(args.limit)]
durations.append(("Cut Clips", run_step(cc_args, "Cut Clips", env=env)))
# 4) Faces
if not args.no_detect:
df_args = [sys.executable, SCRIPTS["detect_faces"]]
durations.append(("Detect Faces", run_step(df_args, "Detect Faces", env=env)))
else:
print("⏭️ Detect Faces übersprungen.")
# 5) Make Targets
if not args.no_make:
ms_args = [sys.executable, SCRIPTS["make_segments"], "--pattern", args.pattern]
durations.append(("Make Targets", run_step(ms_args, "Make Targets", env=env)))
else:
print("⏭️ Make Targets übersprungen.")
# 6) Crop
if not args.no_apply:
ac_args = [sys.executable, SCRIPTS["apply_crop"], "--pattern", args.pattern, "--mux_audio"]
if args.overwrite: ac_args.append("--overwrite")
durations.append(("Apply Crop", run_step(ac_args, "Apply Crop", env=env)))
else:
print("⏭️ Apply Crop übersprungen.")
# 7) Bewertung
if not args.no_rate:
rc_args = [sys.executable, SCRIPTS["rateCluster"]]
durations.append(("Rate Clusters", run_step(rc_args, "Rate Clusters", env=env)))
else:
print("⏭️ Rate Clusters übersprungen.")
# 8) Untertitel
if not args.no_subs:
as_args = [sys.executable, SCRIPTS["add_subtitles"]]
durations.append(("Subtitles", run_step(as_args, "Subtitles", env=env)))
else:
print("⏭️ Subtitles übersprungen.")
except KeyboardInterrupt:
print("\n⛔ Abgebrochen (Ctrl+C).")
finally:
finished = datetime.now()
total = sum(dt for _, dt in durations)
print("\n======================== ZUSAMMENFASSUNG ============================")
for name, dt in durations:
print(f"{name:<24} {dt:7.2f}s")
print("---------------------------------------------------------------------")
print(f"⏱️ Gesamtdauer: {total:.2f}s")
print(f"🕒 Start : {started:%Y-%m-%d %H:%M:%S}")
print(f"🕒 Ende : {finished:%Y-%m-%d %H:%M:%S}")
print(f"📂 Output:")
print(f" Raw Clips : {RAW_CLIPS_DIR}")
print(f" 9:16 : {CROPPED_DIR}")
print(f" Subbed : {SUBTITLED_DIR}")
print("=====================================================================")
if __name__ == "__main__":
main()