fix: robust text extraction for chat messages + debug output

This commit is contained in:
Oliver Hofmann 2026-05-17 13:35:31 +02:00
parent ad0ed7528c
commit ed3bff76e5

View File

@ -53,12 +53,21 @@ _POLL_JS = """() => {
const senderEl = msg.querySelector("[data-tid='message-author-name']"); const senderEl = msg.querySelector("[data-tid='message-author-name']");
const sender = senderEl ? senderEl.innerText.trim() : ''; const sender = senderEl ? senderEl.innerText.trim() : '';
if (!sender) return; if (!sender) return;
// Extract only the message text (p elements), not the full innerText // Try <p> elements first (plain text messages)
// which includes sender name and timestamp
const pEls = msg.querySelectorAll('p'); const pEls = msg.querySelectorAll('p');
const text = pEls.length > 0 let text;
? Array.from(pEls).map(p => p.innerText.trim()).filter(t => t).join('\\n') if (pEls.length > 0) {
: msg.innerText.trim(); text = Array.from(pEls).map(p => p.innerText.trim()).filter(t => t).join('\\n');
} else {
// Fallback: strip sender name + timestamp from innerText
let raw = msg.innerText.trim();
if (sender && raw.startsWith(sender)) {
raw = raw.slice(sender.length);
}
// Remove leading timestamp like "13:30" or "1:30 PM"
raw = raw.replace(/^\s*\d{1,2}:\d{2}(?:\s*(?:AM|PM))?\s*/i, '').trim();
text = raw;
}
const id = msg.getAttribute('id') || (sender + '_' + text.substring(0, 40)); const id = msg.getAttribute('id') || (sender + '_' + text.substring(0, 40));
msgs.push({ sender, text, id }); msgs.push({ sender, text, id });
}); });
@ -149,6 +158,8 @@ class Monitor:
while True: while True:
try: try:
new_msgs = self.poll_new_messages() new_msgs = self.poll_new_messages()
for m in new_msgs:
print(f" [MSG] von='{m.sender}' text={m.text[:60]!r}")
except Exception as e: except Exception as e:
print(f"Verbindung verloren ({type(e).__name__}: {e!s:.120}), warte auf Chat...") print(f"Verbindung verloren ({type(e).__name__}: {e!s:.120}), warte auf Chat...")
time.sleep(2) time.sleep(2)