Musterlösung Praktikum 6

2026-06-01 11:51:33 +02:00 · 2026-06-01 11:51:33 +02:00 · 779fbab7c8
commit 779fbab7c8
parent ed329d5d85
5 changed files with 380 additions and 0 deletions
--- a/praktika/06_hashtable/aufgabe1_chaining.py
+++ b/praktika/06_hashtable/aufgabe1_chaining.py
@ -0,0 +1,58 @@
 import sys, os
 _root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
 _l07  = os.path.join(_root, 'vorlesung', 'L07_hashtable')
 if _l07  not in sys.path: sys.path.insert(0, _l07)
 if _root not in sys.path: sys.path.insert(0, _root)
 from collections.abc import Callable
 from utils.algo_context import AlgoContext
 from utils.algo_int import Int
 class HashTableChaining:
    """Hashtabelle mit Verkettung (Chaining) zur Kollisionsauflösung."""
    def __init__(self, m: int, h_func: Callable, ctx: AlgoContext):
        self.ctx   = ctx
        self.m     = Int(m, ctx)
        self.h     = h_func
        self.table = [[] for _ in range(m)]
        self._n    = 0
    def _slot(self, x: Int) -> int:
        return int(self.h(x, self.m))
    def insert(self, x: Int) -> bool:
        j = self._slot(x)
        for elem in self.table[j]:
            if elem == x:        # Duplikat – Vergleich wird gezählt
                return False
        self.table[j].append(x)
        self._n += 1
        return True
    def search(self, x: Int) -> bool:
        j = self._slot(x)
        for elem in self.table[j]:
            if elem == x:        # Vergleich wird gezählt
                return True
        return False
    def delete(self, x: Int) -> bool:
        j = self._slot(x)
        for i, elem in enumerate(self.table[j]):
            if elem == x:        # Vergleich wird gezählt
                self.table[j].pop(i)
                self._n -= 1
                return True
        return False
    def __str__(self):
        rows = []
        for i, chain in enumerate(self.table):
            vals = ' -> '.join(str(e.value) for e in chain) if chain else '—'
            rows.append(f'[{i:2d}] {vals}')
        return '\n'.join(rows)
    def alpha(self) -> float:
        return self._n / int(self.m)
--- a/praktika/06_hashtable/aufgabe2_chaining_test.py
+++ b/praktika/06_hashtable/aufgabe2_chaining_test.py
@ -0,0 +1,49 @@
 import sys, os
 _root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
 _l07  = os.path.join(_root, 'vorlesung', 'L07_hashtable')
 _p06  = os.path.dirname(__file__)
 if _l07  not in sys.path: sys.path.insert(0, _l07)
 if _root not in sys.path: sys.path.insert(0, _root)
 if _p06  not in sys.path: sys.path.insert(0, _p06)
 from utils.algo_context import AlgoContext
 from utils.algo_array import Array
 from utils.algo_int import Int
 from analyze_hashtable import h
 from aufgabe1_chaining import HashTableChaining
 ctx = AlgoContext()
 values = Array.from_file('data/seq0.txt', ctx)
 ht = HashTableChaining(20, h, ctx)
 for cell in values:
    ht.insert(cell)
 # ── a) Belegungsfaktor ───────────────────────────────────────────────────────
 print("=== a) Nach Einfügen von seq0.txt (14 Werte, m=20) ===")
 print(ht)
 print(f"\nBelegungsfaktor α = {ht._n}/{int(ht.m)} = {ht.alpha():.2f}")
 # ── b) Löschen von 52 ────────────────────────────────────────────────────────
 print("\n=== b) Nach delete(52) ===")
 ht.delete(Int(52, ctx))
 print(ht)
 print(f"Belegungsfaktor α = {ht._n}/{int(ht.m)} = {ht.alpha():.2f}")
 # ── c) Erneut seq0.txt einfügen ──────────────────────────────────────────────
 print("\n=== c) Erneut seq0.txt einfügen ===")
 for cell in values:
    ht.insert(cell)
 print(f"Belegungsfaktor α = {ht._n}/{int(ht.m)} = {ht.alpha():.2f}  (kann > 1 sein!)")
 print("insert kann bei Verkettung nie False zurückgeben –")
 print("die Kette wächst unbegrenzt, es gibt kein 'Tabelle voll'.")
 # ── d) Erklärung kein DELETED_MARK ───────────────────────────────────────────
 print("\n=== d) Warum kein DELETED_MARK? ===")
 print(
    "Bei offener Adressierung wird DELETED_MARK benötigt, damit die\n"
    "Sondierungssequenz bei search nicht vorzeitig abbricht. Bei Verkettung\n"
    "wird die vollständige Liste eines Slots linear durchsucht – ein gelöschtes\n"
    "Element wird einfach aus der Liste entfernt. Es gibt keine Sequenz, die\n"
    "'unterbrochen' werden könnte, daher ist kein Tombstone notwendig."
 )
--- a/praktika/06_hashtable/aufgabe3_tombstones.py
+++ b/praktika/06_hashtable/aufgabe3_tombstones.py
@ -0,0 +1,97 @@
 import sys, os, math
 _root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
 _l07  = os.path.join(_root, 'vorlesung', 'L07_hashtable')
 if _l07  not in sys.path: sys.path.insert(0, _l07)
 if _root not in sys.path: sys.path.insert(0, _root)
 from utils.algo_context import AlgoContext
 from utils.algo_array import Array
 from utils.algo_int import Int
 from hashtable import HashTableOpenAddressing, DELETED_MARK, UNUSED_MARK
 from analyze_hashtable import h, f
 ctx = AlgoContext()
 values = Array.from_file('data/seq0.txt', ctx)
 ht = HashTableOpenAddressing(20, f, ctx)
 for cell in values:
    ht.insert(cell)
 # ── a) delete(52) ────────────────────────────────────────────────────────────
 print("=== a) Nach delete(52) ===")
 ht.delete(Int(52, ctx))
 print(ht)
 slot_52 = int(h(Int(52, ctx), ht.m))
 print(f"\nSlot h(52) = {slot_52}: Inhalt = {ht.table[Int(slot_52, ctx)].value!r}  ← DELETED_MARK")
 # ── b) Warum search bei DELETED_MARK weitersucht ─────────────────────────────
 print("\n=== b) Warum search bei DELETED_MARK fortgesetzt wird ===")
 print(
    "search prüft in jeder Iteration: Ist der aktuelle Slot UNUSED? → Abbruch\n"
    "(der gesuchte Wert wurde nie an eine spätere Position sondiert, also ist\n"
    "er nicht vorhanden). Ist der Slot DELETED_MARK? → weitersuchen, denn der\n"
    "Wert könnte durch einen früheren Eintrag über diesen Slot hinaus sondiert\n"
    "worden sein. DELETED_MARK erhält die Sondierungssequenz aufrecht."
 )
 # ── c) Konkretes Gegenbeispiel: UNUSED_MARK statt DELETED_MARK ───────────────
 print("\n=== c) Konkretes Gegenbeispiel ===")
 # Wir bauen eine kleine Tabelle, in der das Problem klar sichtbar ist.
 ctx2 = AlgoContext()
 ht2  = HashTableOpenAddressing(20, f, ctx2)
 for cell in values:
    ht2.insert(cell)
 # Finde einen Wert, der über denselben Slot wie 52 sondiert
 # h(52) = slot_52; suche einen anderen Wert v mit h(v)=slot_52 oder f(v,0)=slot_52
 slot_52_v = int(h(Int(52, ctx2), ht2.m))
 print(f"h(52) = {slot_52_v}")
 # Prüfe: gibt es einen Wert im Baum, der bei i=0 auf slot_52 trifft?
 # Sondierungssequenz für jeden eingefügten Wert anzeigen
 from utils.algo_int import Int as I
 _A = (math.sqrt(5) - 1) / 2
 def h_raw(v, m=20):
    full = v * _A
    return int(abs(full - int(full)) * m)
 def f_raw(v, i, m=20):
    return (h_raw(v, m) + i + 5*i*i) % m
 print("\nSondierungssequenzen (erste 3 Schritte) für seq0.txt-Werte:")
 raw_vals = [int(str(c)) for c in values]
 for v in raw_vals:
    seq = [f_raw(v, i) for i in range(3)]
    print(f"  h({v:4d}) = {h_raw(v):2d}, f(·,0)={seq[0]:2d}, f(·,1)={seq[1]:2d}, f(·,2)={seq[2]:2d}")
 # 52 belegt slot_52; angenommen 58 kollidiert und liegt hinter 52
 # → delete(52) mit UNUSED würde search(58) frühzeitig abbrechen
 print(
    "\nAngenommen, zwei Werte v1 und v2 sondieren über denselben Slot:\n"
    "  v1 landet in Slot s (erste Sondierung)\n"
    "  v2 kollidiert bei Slot s und landet bei s' (zweite Sondierung)\n"
    "Wird v1 gelöscht und Slot s auf UNUSED gesetzt, bricht search(v2)\n"
    "bei Slot s ab – obwohl v2 noch in s' liegt. Das Ergebnis ist 'nicht\n"
    "gefunden', obwohl v2 vorhanden ist. DELETED_MARK verhindert das."
 )
 # ── d) Wo landet 24 nach delete(52)? ─────────────────────────────────────────
 print("\n=== d) Wo landet 24 nach delete(52)? ===")
 h24 = h_raw(24)
 print(f"h(24)       = int({{24·A}} mod 1 · 20) = {h24}")
 for i in range(5):
    slot = f_raw(24, i)
    val  = ht.table[Int(slot, ctx)].value
    status = "frei (DELETED/UNUSED)" if val in (DELETED_MARK, UNUSED_MARK) else f"belegt ({val})"
    print(f"f(24, {i}, 20) = {slot:2d}  → {status}")
    if val in (DELETED_MARK, UNUSED_MARK):
        print(f"  ⇒ 24 landet in Slot {slot}")
        break
 ht.insert(Int(24, ctx))
 print(f"\nVerifikation __str__ (relevante Slots):")
 for idx in [0, 1, 2, 3, 16, 17]:
    v = ht.table[Int(idx, ctx)].value
    marker = " ← 24 eingefügt" if str(v) == "24" else ""
    print(f"  Slot {idx:2d}: {v}{marker}")
--- a/praktika/06_hashtable/aufgabe4_kapazitaet.py
+++ b/praktika/06_hashtable/aufgabe4_kapazitaet.py
@ -0,0 +1,70 @@
 import sys, os
 _root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
 _l07  = os.path.join(_root, 'vorlesung', 'L07_hashtable')
 _p06  = os.path.dirname(__file__)
 if _l07  not in sys.path: sys.path.insert(0, _l07)
 if _root not in sys.path: sys.path.insert(0, _root)
 if _p06  not in sys.path: sys.path.insert(0, _p06)
 from utils.algo_context import AlgoContext
 from utils.algo_array import Array
 from hashtable import HashTableOpenAddressing
 from analyze_hashtable import h, f
 from aufgabe1_chaining import HashTableChaining
 ctx = AlgoContext()
 values = Array.from_file('data/seq1.txt', ctx)
 n = len(values)
 # ── a) Größe 90 ───────────────────────────────────────────────────────────────
 ht90 = HashTableOpenAddressing(90, f, ctx)
 inserted90 = sum(1 for cell in values if ht90.insert(cell))
 rejected90 = n - inserted90
 print("=== a) HashTableOpenAddressing m=90, seq1.txt (100 Werte) ===")
 print(f"Eingefügt: {inserted90}, Abgewiesen: {rejected90}, "
      f"Freie Slots: {90 - inserted90}")
 print(
    "Ursache: Die quadratische Sondierungsfunktion f(x,i) = (h(x)+i+5i²) mod m\n"
    "erzeugt für m=90 (keine Primzahl) zyklische Sondierungssequenzen, die nicht\n"
    "alle 90 Slots abdecken. Sobald alle erreichbaren Slots belegt sind, schlägt\n"
    "insert fehl – auch wenn noch andere Slots frei sind."
 )
 # ── b) Größe 89 ───────────────────────────────────────────────────────────────
 ctx2 = AlgoContext()
 values2 = Array.from_file('data/seq1.txt', ctx2)
 ht89 = HashTableOpenAddressing(89, f, ctx2)
 inserted89 = sum(1 for cell in values2 if ht89.insert(cell))
 rejected89 = n - inserted89
 print(f"\n=== b) HashTableOpenAddressing m=89 (Primzahl), seq1.txt ===")
 print(f"Eingefügt: {inserted89}, Abgewiesen: {rejected89}")
 print(
    "Bei Primzahlgröße garantiert die quadratische Sondierung, dass mindestens\n"
    "m/2 verschiedene Slots erreicht werden (bei geeigneten Konstanten c1, c2\n"
    "sogar alle m). Für m=89 ist die Sondierungssequenz deutlich länger, bevor\n"
    "sie sich wiederholt – dadurch können mehr Werte platziert werden."
 )
 # ── c) HashTableChaining m=20, seq1.txt ───────────────────────────────────────
 ctx3 = AlgoContext()
 values3 = Array.from_file('data/seq1.txt', ctx3)
 htc = HashTableChaining(20, h, ctx3)
 for cell in values3:
    htc.insert(cell)
 print(f"\n=== c) HashTableChaining m=20, seq1.txt ===")
 unique = len(set(int(str(c)) for c in values3))
 print(f"Eingefügt: {htc._n} (seq1.txt hat {n} Werte, davon {unique} eindeutig; "
      f"Duplikate werden nicht doppelt gespeichert)")
 print(f"Belegungsfaktor α = {htc._n}/{int(htc.m)} = {htc.alpha():.2f}")
 print("Verkettung hat keine Kapazitätsgrenze – die Ketten wachsen unbegrenzt.")
 # ── d) Theoretische Sondierungsanzahl ─────────────────────────────────────────
 print("\n=== d) Mittlere Sondierungsanzahl 1/(1-α) ===")
 for alpha in [0.5, 0.9]:
    probes = 1 / (1 - alpha)
    print(f"  α = {alpha}: 1/(1−{alpha}) = {probes:.1f} Sondierungen")
 print(
    "Bis α ≈ 0,7 bleibt die offene Adressierung praktikabel (≈ 3,3 Sondierungen).\n"
    "Für α > 0,8 steigen die Kosten rapide; α = 0,9 bedeutet bereits 10 Sondierungen\n"
    "im Schnitt. Als Faustregel gilt: Tabelle vergrößern (Rehashing), wenn α > 0,75."
 )
--- a/praktika/06_hashtable/aufgabe5_vergleich.py
+++ b/praktika/06_hashtable/aufgabe5_vergleich.py
@ -0,0 +1,106 @@
 import sys, os, math
 _root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
 _l07  = os.path.join(_root, 'vorlesung', 'L07_hashtable')
 _p06  = os.path.dirname(__file__)
 if _l07  not in sys.path: sys.path.insert(0, _l07)
 if _root not in sys.path: sys.path.insert(0, _root)
 if _p06  not in sys.path: sys.path.insert(0, _p06)
 import matplotlib.pyplot as plt
 from utils.algo_context import AlgoContext
 from utils.algo_array import Array
 from hashtable import HashTableOpenAddressing
 from analyze_hashtable import h, f
 from aufgabe1_chaining import HashTableChaining
 def measure(m: int, target_alpha: float, seed: int = 42):
    """Befüllt beide Tabellentypen bis Ziel-α, misst Vergleiche beim Suchen."""
    n = int(m * target_alpha)
    import random
    random.seed(seed)
    ctx_oa = AlgoContext()
    vals_oa = Array.random(n, -10000, 10000, ctx_oa)
    ht_oa = HashTableOpenAddressing(m, f, ctx_oa)
    inserted = []
    for cell in vals_oa:
        if ht_oa.insert(cell):
            inserted.append(cell)
    ctx_oa.reset()
    for cell in inserted:
        ht_oa.search(cell)
    cmp_oa = ctx_oa.comparisons
    ctx_ch = AlgoContext()
    vals_ch = Array.random(n, -10000, 10000, ctx_ch)
    ht_ch = HashTableChaining(m, h, ctx_ch)
    for cell in vals_ch:
        ht_ch.insert(cell)
    ctx_ch.reset()
    for i, chain in enumerate(ht_ch.table):
        for elem in chain:
            ht_ch.search(elem)
    cmp_ch = ctx_ch.comparisons
    return cmp_oa, cmp_ch
 sizes = [50, 100, 200, 500, 1000]
 for target_alpha, label in [(0.7, "α ≈ 0,7"), (0.9, "α ≈ 0,9")]:
    oa_vals, ch_vals = [], []
    for m in sizes:
        coa, cch = measure(m, target_alpha)
        oa_vals.append(coa)
        ch_vals.append(cch)
    print(f"\n{'m':>6}  {'OA Vergl.':>12}  {'Chaining Vergl.':>16}  ({label})")
    for m, co, cc in zip(sizes, oa_vals, ch_vals):
        print(f"{m:>6}  {co:>12}  {cc:>16}")
 # ── Plot ──────────────────────────────────────────────────────────────────────
 fig, axes = plt.subplots(1, 2, figsize=(11, 4))
 for ax, target_alpha, label in zip(axes, [0.7, 0.9], ["α ≈ 0,7", "α ≈ 0,9"]):
    oa_vals, ch_vals = [], []
    for m in sizes:
        coa, cch = measure(m, target_alpha)
        oa_vals.append(coa)
        ch_vals.append(cch)
    ax.plot(sizes, oa_vals, marker='o', label='Offene Adressierung')
    ax.plot(sizes, ch_vals, marker='s', label='Verkettung')
    ax.set_xlabel('Tabellengröße m')
    ax.set_ylabel('Vergleiche (Suchen aller Werte)')
    ax.set_title(label)
    ax.legend()
 plt.tight_layout()
 plt.savefig('vergleich_strategien.pdf')
 plt.show()
 # ── Antworten ─────────────────────────────────────────────────────────────────
 print("\n=== a) Welche Strategie benötigt mehr Vergleiche? ===")
 print(
    "Bei gleichem α erzeugt die offene Adressierung mehr Vergleiche:\n"
    "Kollisionen führen zu langen Sondierungssequenzen, bei denen jeder\n"
    "besuchte Slot einen Vergleich kostet. Die Verkettung sucht nur in der\n"
    "jeweiligen Kette (im Schnitt α/2 Vergleiche pro Suche)."
 )
 print("\n=== b) α = 0,9 ===")
 print(
    "Die offene Adressierung verschlechtert sich drastisch: 1/(1−0,9) = 10\n"
    "Sondierungen im Schnitt – das ist im Plot klar erkennbar. Bei der\n"
    "Verkettung wächst die mittlere Kettenlänge auf α = 0,9, also ca. 1 Extra-\n"
    "Vergleich. Der Nachteil der offenen Adressierung wird bei hohem α dominant."
 )
 print("\n=== c) Systemvorteile ===")
 print(
    "Verkettung:        Keine Kapazitätsgrenze; einfacheres Löschen (kein\n"
    "                   Tombstone); gut parallelisierbar (eine Kette pro Slot).\n"
    "Offene Adressierung: Besseres Cache-Verhalten (alle Einträge im\n"
    "                   zusammenhängenden Array, keine Pointer-Indirektion);\n"
    "                   geringerer Speicherverbrauch (keine Listenknoten)."
 )