bachlorarbeit/rateCluster.py
2025-06-16 12:29:08 +02:00

136 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import sqlite3
import re
from openai import OpenAI
from time import sleep
# === Einstellungen ===
DB_PATH = "clips_openai.db"
VIDEO_ID = "testVideoShort"
MAX_CLIPS = 5 # oder "all"
OPENAI_API_KEY = "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"
client = OpenAI(api_key=OPENAI_API_KEY)
# === DB-Verbindung
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
cursor.execute("DROP TABLE IF EXISTS highlights")
cursor.execute("""
CREATE TABLE highlights (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file TEXT,
start REAL,
end REAL,
text TEXT,
viralitaet INTEGER,
emotionalitaet INTEGER,
witz INTEGER,
provokation INTEGER,
score_total INTEGER
)
""")
conn.commit()
print(f"🧹 Tabelle 'highlights' neu erstellt für: {VIDEO_ID}")
# === Segmente laden
cursor.execute("SELECT start, end, text FROM segments ORDER BY start")
segments = cursor.fetchall()
print(f"📥 {len(segments)} Segmente (Originaltext) geladen.")
# === Bewertungsfunktion (GPT-4o)
def analyse_segment(text, start, end):
print(f"\n🔎 Bewerte Clip: {start:.2f}s {end:.2f}s")
prompt = f"""
Bewerte folgenden Podcast-Ausschnitt mit genau vier Zahlen zwischen 1 und 10. Achte darauf das es abgeschlossene Clips sind und als eigenstaendiger Clip funktionieren kann.
\"\"\"{text}\"\"\"
Dauer: {start:.2f} bis {end:.2f} Sekunden.
Antwortformat (bitte exakt einhalten, keine weiteren Kommentare):
Viralität: [Zahl]
Emotionalität: [Zahl]
Witz: [Zahl]
Provokation: [Zahl]
"""
try:
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
temperature=0.4
)
output = response.choices[0].message.content.strip()
print(f"📤 GPT-Antwort:\n{output}")
values = {
"viralitaet": None,
"emotionalitaet": None,
"witz": None,
"provokation": None
}
for line in output.splitlines():
line = line.strip().lower().replace("ä", "ae")
if line.startswith("viralitaet"):
values["viralitaet"] = int(re.search(r"\d+", line).group())
elif line.startswith("emotionalitaet"):
values["emotionalitaet"] = int(re.search(r"\d+", line).group())
elif line.startswith("witz"):
values["witz"] = int(re.search(r"\d+", line).group())
elif line.startswith("provokation"):
values["provokation"] = int(re.search(r"\d+", line).group())
if all(v is not None for v in values.values()):
total_score = sum(values.values())
cursor.execute("""
INSERT INTO highlights (
file, start, end, text,
viralitaet, emotionalitaet, witz, provokation, score_total
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
VIDEO_ID, start, end, text.strip(),
values["viralitaet"], values["emotionalitaet"],
values["witz"], values["provokation"],
total_score
))
conn.commit()
return {
"start": start,
"end": end,
"text": text.strip(),
"score": values,
"total": total_score
}
else:
raise ValueError("Unvollständige Bewertung")
except Exception as e:
print(f"⚠️ Fehler bei GPT-Auswertung: {e}")
return None
# === Clips bewerten
rated = []
for start, end, text in segments:
result = analyse_segment(text, float(start), float(end))
if result:
rated.append(result)
sleep(1.2) # Anti-Rate-Limit
# === Beste Clips anzeigen
rated.sort(key=lambda x: x["total"], reverse=True)
selected = rated if MAX_CLIPS == "all" else rated[:int(MAX_CLIPS)]
print(f"\n🎬 Beste {len(selected)} Highlights für: {VIDEO_ID}")
for clip in selected:
print(f"\n🚀 {clip['start']:.2f}s {clip['end']:.2f}s")
print(f"🎙️ {clip['text'][:200]}...")
print("📊 Bewertung:")
for k, v in clip["score"].items():
print(f" {k.capitalize()}: {v}")
print(f" 👉 Gesamt: {clip['total']}")
conn.close()