fix: robust text extraction for chat messages + debug output
This commit is contained in:
parent
ad0ed7528c
commit
ed3bff76e5
@ -53,12 +53,21 @@ _POLL_JS = """() => {
|
||||
const senderEl = msg.querySelector("[data-tid='message-author-name']");
|
||||
const sender = senderEl ? senderEl.innerText.trim() : '';
|
||||
if (!sender) return;
|
||||
// Extract only the message text (p elements), not the full innerText
|
||||
// which includes sender name and timestamp
|
||||
// Try <p> elements first (plain text messages)
|
||||
const pEls = msg.querySelectorAll('p');
|
||||
const text = pEls.length > 0
|
||||
? Array.from(pEls).map(p => p.innerText.trim()).filter(t => t).join('\\n')
|
||||
: msg.innerText.trim();
|
||||
let text;
|
||||
if (pEls.length > 0) {
|
||||
text = Array.from(pEls).map(p => p.innerText.trim()).filter(t => t).join('\\n');
|
||||
} else {
|
||||
// Fallback: strip sender name + timestamp from innerText
|
||||
let raw = msg.innerText.trim();
|
||||
if (sender && raw.startsWith(sender)) {
|
||||
raw = raw.slice(sender.length);
|
||||
}
|
||||
// Remove leading timestamp like "13:30" or "1:30 PM"
|
||||
raw = raw.replace(/^\s*\d{1,2}:\d{2}(?:\s*(?:AM|PM))?\s*/i, '').trim();
|
||||
text = raw;
|
||||
}
|
||||
const id = msg.getAttribute('id') || (sender + '_' + text.substring(0, 40));
|
||||
msgs.push({ sender, text, id });
|
||||
});
|
||||
@ -149,6 +158,8 @@ class Monitor:
|
||||
while True:
|
||||
try:
|
||||
new_msgs = self.poll_new_messages()
|
||||
for m in new_msgs:
|
||||
print(f" [MSG] von='{m.sender}' text={m.text[:60]!r}")
|
||||
except Exception as e:
|
||||
print(f"Verbindung verloren ({type(e).__name__}: {e!s:.120}), warte auf Chat...")
|
||||
time.sleep(2)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user