AlgoDatSoSe26/praktika/06_hashtable/aufgabe5_vergleich.py

import sys, os, math
_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
_l07  = os.path.join(_root, 'vorlesung', 'L07_hashtable')
_p06  = os.path.dirname(__file__)
if _l07  not in sys.path: sys.path.insert(0, _l07)
if _root not in sys.path: sys.path.insert(0, _root)
if _p06  not in sys.path: sys.path.insert(0, _p06)

import matplotlib.pyplot as plt
from utils.algo_context import AlgoContext
from utils.algo_array import Array
from hashtable import HashTableOpenAddressing
from analyze_hashtable import h, f
from aufgabe1_chaining import HashTableChaining


def measure(m: int, target_alpha: float, seed: int = 42):
    """Befüllt beide Tabellentypen bis Ziel-α, misst Vergleiche beim Suchen."""
    n = int(m * target_alpha)

    import random
    random.seed(seed)

    ctx_oa = AlgoContext()
    vals_oa = Array.random(n, -10000, 10000, ctx_oa)
    ht_oa = HashTableOpenAddressing(m, f, ctx_oa)
    inserted = []
    for cell in vals_oa:
        if ht_oa.insert(cell):
            inserted.append(cell)

    ctx_oa.reset()
    for cell in inserted:
        ht_oa.search(cell)
    cmp_oa = ctx_oa.comparisons

    ctx_ch = AlgoContext()
    vals_ch = Array.random(n, -10000, 10000, ctx_ch)
    ht_ch = HashTableChaining(m, h, ctx_ch)
    for cell in vals_ch:
        ht_ch.insert(cell)

    ctx_ch.reset()
    for i, chain in enumerate(ht_ch.table):
        for elem in chain:
            ht_ch.search(elem)
    cmp_ch = ctx_ch.comparisons

    return cmp_oa, cmp_ch


sizes = [50, 100, 200, 500, 1000]

for target_alpha, label in [(0.7, "α ≈ 0,7"), (0.9, "α ≈ 0,9")]:
    oa_vals, ch_vals = [], []
    for m in sizes:
        coa, cch = measure(m, target_alpha)
        oa_vals.append(coa)
        ch_vals.append(cch)
    print(f"\n{'m':>6}  {'OA Vergl.':>12}  {'Chaining Vergl.':>16}  ({label})")
    for m, co, cc in zip(sizes, oa_vals, ch_vals):
        print(f"{m:>6}  {co:>12}  {cc:>16}")

# ── Plot ──────────────────────────────────────────────────────────────────────
fig, axes = plt.subplots(1, 2, figsize=(11, 4))
for ax, target_alpha, label in zip(axes, [0.7, 0.9], ["α ≈ 0,7", "α ≈ 0,9"]):
    oa_vals, ch_vals = [], []
    for m in sizes:
        coa, cch = measure(m, target_alpha)
        oa_vals.append(coa)
        ch_vals.append(cch)
    ax.plot(sizes, oa_vals, marker='o', label='Offene Adressierung')
    ax.plot(sizes, ch_vals, marker='s', label='Verkettung')
    ax.set_xlabel('Tabellengröße m')
    ax.set_ylabel('Vergleiche (Suchen aller Werte)')
    ax.set_title(label)
    ax.legend()
plt.tight_layout()
plt.savefig('vergleich_strategien.pdf')
plt.show()

# ── Antworten ─────────────────────────────────────────────────────────────────
print("\n=== a) Welche Strategie benötigt mehr Vergleiche? ===")
print(
    "Bei gleichem α erzeugt die offene Adressierung mehr Vergleiche:\n"
    "Kollisionen führen zu langen Sondierungssequenzen, bei denen jeder\n"
    "besuchte Slot einen Vergleich kostet. Die Verkettung sucht nur in der\n"
    "jeweiligen Kette (im Schnitt α/2 Vergleiche pro Suche)."
)

print("\n=== b) α = 0,9 ===")
print(
    "Die offene Adressierung verschlechtert sich drastisch: 1/(1−0,9) = 10\n"
    "Sondierungen im Schnitt – das ist im Plot klar erkennbar. Bei der\n"
    "Verkettung wächst die mittlere Kettenlänge auf α = 0,9, also ca. 1 Extra-\n"
    "Vergleich. Der Nachteil der offenen Adressierung wird bei hohem α dominant."
)

print("\n=== c) Systemvorteile ===")
print(
    "Verkettung:        Keine Kapazitätsgrenze; einfacheres Löschen (kein\n"
    "                   Tombstone); gut parallelisierbar (eine Kette pro Slot).\n"
    "Offene Adressierung: Besseres Cache-Verhalten (alle Einträge im\n"
    "                   zusammenhängenden Array, keine Pointer-Indirektion);\n"
    "                   geringerer Speicherverbrauch (keine Listenknoten)."
)