feat: browser-based email extraction from Teams profile cards
This commit is contained in:
parent
3332592bef
commit
290bb553a4
58
scripts/discover_dom.py
Normal file
58
scripts/discover_dom.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
"""Run this script once with a Teams meeting chat open.
|
||||||
|
It prints DOM info to identify selectors for messages and profile cards.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
.venv/bin/python scripts/discover_dom.py [teams-chat-url]
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
URL = sys.argv[1] if len(sys.argv) > 1 else "https://teams.microsoft.com"
|
||||||
|
SESSION_DIR = str(Path.home() / ".teampulse" / "session")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch_persistent_context(
|
||||||
|
SESSION_DIR,
|
||||||
|
headless=False,
|
||||||
|
args=["--no-sandbox"],
|
||||||
|
)
|
||||||
|
page = browser.pages[0] if browser.pages else browser.new_page()
|
||||||
|
page.goto(URL)
|
||||||
|
print("Navigiere im Browser zu einem Meeting-Chat, dann hier Enter drücken...")
|
||||||
|
input()
|
||||||
|
|
||||||
|
candidates = page.query_selector_all(
|
||||||
|
"[data-tid*='message'], [class*='message'], [role='listitem']"
|
||||||
|
)
|
||||||
|
print(f"\n{len(candidates)} potenzielle Nachrichten-Elemente gefunden")
|
||||||
|
for i, el in enumerate(candidates[:5]):
|
||||||
|
print(f"\n--- Element {i} ---")
|
||||||
|
print(f" tag: {el.evaluate('e => e.tagName')}")
|
||||||
|
print(f" data-tid: {el.get_attribute('data-tid')}")
|
||||||
|
print(f" class (100 Zch): {(el.get_attribute('class') or '')[:100]}")
|
||||||
|
print(f" text (100 Zch): {el.inner_text()[:100].replace(chr(10), ' ')}")
|
||||||
|
|
||||||
|
print("\nKlicke im Browser auf einen Sender-Namen, dann hier Enter drücken...")
|
||||||
|
input()
|
||||||
|
|
||||||
|
cards = page.query_selector_all(
|
||||||
|
"[class*='persona'], [class*='Persona'], [class*='profile'], [data-tid*='persona']"
|
||||||
|
)
|
||||||
|
print(f"\n{len(cards)} potenzielle Profilkarten-Elemente gefunden")
|
||||||
|
for i, el in enumerate(cards[:5]):
|
||||||
|
print(f"\n--- Karte {i} ---")
|
||||||
|
print(f" tag: {el.evaluate('e => e.tagName')}")
|
||||||
|
print(f" data-tid: {el.get_attribute('data-tid')}")
|
||||||
|
print(f" class (100 Zch): {(el.get_attribute('class') or '')[:100]}")
|
||||||
|
print(f" text (100 Zch): {el.inner_text()[:100].replace(chr(10), ' ')}")
|
||||||
|
|
||||||
|
print("\nEnter zum Schließen...")
|
||||||
|
input()
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -1,8 +1,11 @@
|
|||||||
import json
|
import json
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from playwright.sync_api import Page
|
from playwright.sync_api import Page
|
||||||
|
|
||||||
|
from teampulse.monitor import _PROFILE_EMAIL_SELECTOR, _SENDER_SELECTOR
|
||||||
|
|
||||||
|
|
||||||
class Resolver:
|
class Resolver:
|
||||||
def __init__(self, cache_path: Path, page: Page):
|
def __init__(self, cache_path: Path, page: Page):
|
||||||
@ -34,5 +37,26 @@ class Resolver:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _extract_email_from_profile(self, display_name: str) -> str | None:
|
def _extract_email_from_profile(self, display_name: str) -> str | None:
|
||||||
# Implemented in Task 8 — browser interaction with Teams profile cards
|
sender_elements = self._page.query_selector_all(_SENDER_SELECTOR)
|
||||||
raise NotImplementedError
|
target = None
|
||||||
|
for el in sender_elements:
|
||||||
|
if el.inner_text().strip() == display_name:
|
||||||
|
target = el
|
||||||
|
break
|
||||||
|
|
||||||
|
if target is None:
|
||||||
|
print(f" Sender '{display_name}' nicht im Chat gefunden.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
target.click()
|
||||||
|
self._page.wait_for_selector(_PROFILE_EMAIL_SELECTOR, timeout=5000)
|
||||||
|
email_el = self._page.query_selector(_PROFILE_EMAIL_SELECTOR)
|
||||||
|
email = email_el.inner_text().strip() if email_el else None
|
||||||
|
self._page.keyboard.press("Escape")
|
||||||
|
time.sleep(0.5)
|
||||||
|
return email
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Profilkarte für '{display_name}' nicht ladbar: {e}")
|
||||||
|
self._page.keyboard.press("Escape")
|
||||||
|
return None
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user