feat: browser-based email extraction from Teams profile cards
This commit is contained in:
parent
3332592bef
commit
290bb553a4
58
scripts/discover_dom.py
Normal file
58
scripts/discover_dom.py
Normal file
@ -0,0 +1,58 @@
|
||||
"""Run this script once with a Teams meeting chat open.
|
||||
It prints DOM info to identify selectors for messages and profile cards.
|
||||
|
||||
Usage:
|
||||
.venv/bin/python scripts/discover_dom.py [teams-chat-url]
|
||||
"""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
URL = sys.argv[1] if len(sys.argv) > 1 else "https://teams.microsoft.com"
|
||||
SESSION_DIR = str(Path.home() / ".teampulse" / "session")
|
||||
|
||||
|
||||
def main():
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch_persistent_context(
|
||||
SESSION_DIR,
|
||||
headless=False,
|
||||
args=["--no-sandbox"],
|
||||
)
|
||||
page = browser.pages[0] if browser.pages else browser.new_page()
|
||||
page.goto(URL)
|
||||
print("Navigiere im Browser zu einem Meeting-Chat, dann hier Enter drücken...")
|
||||
input()
|
||||
|
||||
candidates = page.query_selector_all(
|
||||
"[data-tid*='message'], [class*='message'], [role='listitem']"
|
||||
)
|
||||
print(f"\n{len(candidates)} potenzielle Nachrichten-Elemente gefunden")
|
||||
for i, el in enumerate(candidates[:5]):
|
||||
print(f"\n--- Element {i} ---")
|
||||
print(f" tag: {el.evaluate('e => e.tagName')}")
|
||||
print(f" data-tid: {el.get_attribute('data-tid')}")
|
||||
print(f" class (100 Zch): {(el.get_attribute('class') or '')[:100]}")
|
||||
print(f" text (100 Zch): {el.inner_text()[:100].replace(chr(10), ' ')}")
|
||||
|
||||
print("\nKlicke im Browser auf einen Sender-Namen, dann hier Enter drücken...")
|
||||
input()
|
||||
|
||||
cards = page.query_selector_all(
|
||||
"[class*='persona'], [class*='Persona'], [class*='profile'], [data-tid*='persona']"
|
||||
)
|
||||
print(f"\n{len(cards)} potenzielle Profilkarten-Elemente gefunden")
|
||||
for i, el in enumerate(cards[:5]):
|
||||
print(f"\n--- Karte {i} ---")
|
||||
print(f" tag: {el.evaluate('e => e.tagName')}")
|
||||
print(f" data-tid: {el.get_attribute('data-tid')}")
|
||||
print(f" class (100 Zch): {(el.get_attribute('class') or '')[:100]}")
|
||||
print(f" text (100 Zch): {el.inner_text()[:100].replace(chr(10), ' ')}")
|
||||
|
||||
print("\nEnter zum Schließen...")
|
||||
input()
|
||||
browser.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -1,8 +1,11 @@
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from playwright.sync_api import Page
|
||||
|
||||
from teampulse.monitor import _PROFILE_EMAIL_SELECTOR, _SENDER_SELECTOR
|
||||
|
||||
|
||||
class Resolver:
|
||||
def __init__(self, cache_path: Path, page: Page):
|
||||
@ -34,5 +37,26 @@ class Resolver:
|
||||
)
|
||||
|
||||
def _extract_email_from_profile(self, display_name: str) -> str | None:
|
||||
# Implemented in Task 8 — browser interaction with Teams profile cards
|
||||
raise NotImplementedError
|
||||
sender_elements = self._page.query_selector_all(_SENDER_SELECTOR)
|
||||
target = None
|
||||
for el in sender_elements:
|
||||
if el.inner_text().strip() == display_name:
|
||||
target = el
|
||||
break
|
||||
|
||||
if target is None:
|
||||
print(f" Sender '{display_name}' nicht im Chat gefunden.")
|
||||
return None
|
||||
|
||||
try:
|
||||
target.click()
|
||||
self._page.wait_for_selector(_PROFILE_EMAIL_SELECTOR, timeout=5000)
|
||||
email_el = self._page.query_selector(_PROFILE_EMAIL_SELECTOR)
|
||||
email = email_el.inner_text().strip() if email_el else None
|
||||
self._page.keyboard.press("Escape")
|
||||
time.sleep(0.5)
|
||||
return email
|
||||
except Exception as e:
|
||||
print(f" Profilkarte für '{display_name}' nicht ladbar: {e}")
|
||||
self._page.keyboard.press("Escape")
|
||||
return None
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user