feat: browser-based email extraction from Teams profile cards

This commit is contained in:
Oliver Hofmann 2026-05-16 17:28:52 +02:00
parent 3332592bef
commit 290bb553a4
2 changed files with 84 additions and 2 deletions

58
scripts/discover_dom.py Normal file
View File

@ -0,0 +1,58 @@
"""Run this script once with a Teams meeting chat open.
It prints DOM info to identify selectors for messages and profile cards.
Usage:
.venv/bin/python scripts/discover_dom.py [teams-chat-url]
"""
import sys
from pathlib import Path
from playwright.sync_api import sync_playwright
URL = sys.argv[1] if len(sys.argv) > 1 else "https://teams.microsoft.com"
SESSION_DIR = str(Path.home() / ".teampulse" / "session")
def main():
with sync_playwright() as p:
browser = p.chromium.launch_persistent_context(
SESSION_DIR,
headless=False,
args=["--no-sandbox"],
)
page = browser.pages[0] if browser.pages else browser.new_page()
page.goto(URL)
print("Navigiere im Browser zu einem Meeting-Chat, dann hier Enter drücken...")
input()
candidates = page.query_selector_all(
"[data-tid*='message'], [class*='message'], [role='listitem']"
)
print(f"\n{len(candidates)} potenzielle Nachrichten-Elemente gefunden")
for i, el in enumerate(candidates[:5]):
print(f"\n--- Element {i} ---")
print(f" tag: {el.evaluate('e => e.tagName')}")
print(f" data-tid: {el.get_attribute('data-tid')}")
print(f" class (100 Zch): {(el.get_attribute('class') or '')[:100]}")
print(f" text (100 Zch): {el.inner_text()[:100].replace(chr(10), ' ')}")
print("\nKlicke im Browser auf einen Sender-Namen, dann hier Enter drücken...")
input()
cards = page.query_selector_all(
"[class*='persona'], [class*='Persona'], [class*='profile'], [data-tid*='persona']"
)
print(f"\n{len(cards)} potenzielle Profilkarten-Elemente gefunden")
for i, el in enumerate(cards[:5]):
print(f"\n--- Karte {i} ---")
print(f" tag: {el.evaluate('e => e.tagName')}")
print(f" data-tid: {el.get_attribute('data-tid')}")
print(f" class (100 Zch): {(el.get_attribute('class') or '')[:100]}")
print(f" text (100 Zch): {el.inner_text()[:100].replace(chr(10), ' ')}")
print("\nEnter zum Schließen...")
input()
browser.close()
if __name__ == "__main__":
main()

View File

@ -1,8 +1,11 @@
import json import json
import time
from pathlib import Path from pathlib import Path
from playwright.sync_api import Page from playwright.sync_api import Page
from teampulse.monitor import _PROFILE_EMAIL_SELECTOR, _SENDER_SELECTOR
class Resolver: class Resolver:
def __init__(self, cache_path: Path, page: Page): def __init__(self, cache_path: Path, page: Page):
@ -34,5 +37,26 @@ class Resolver:
) )
def _extract_email_from_profile(self, display_name: str) -> str | None: def _extract_email_from_profile(self, display_name: str) -> str | None:
# Implemented in Task 8 — browser interaction with Teams profile cards sender_elements = self._page.query_selector_all(_SENDER_SELECTOR)
raise NotImplementedError target = None
for el in sender_elements:
if el.inner_text().strip() == display_name:
target = el
break
if target is None:
print(f" Sender '{display_name}' nicht im Chat gefunden.")
return None
try:
target.click()
self._page.wait_for_selector(_PROFILE_EMAIL_SELECTOR, timeout=5000)
email_el = self._page.query_selector(_PROFILE_EMAIL_SELECTOR)
email = email_el.inner_text().strip() if email_el else None
self._page.keyboard.press("Escape")
time.sleep(0.5)
return email
except Exception as e:
print(f" Profilkarte für '{display_name}' nicht ladbar: {e}")
self._page.keyboard.press("Escape")
return None