diff --git a/schoeffelbe/pr07.py b/schoeffelbe/pr07.py new file mode 100644 index 0000000..8b43aa0 --- /dev/null +++ b/schoeffelbe/pr07.py @@ -0,0 +1,264 @@ +import logging + +logger = logging.getLogger(__name__) +# logging.basicConfig(level=logging.DEBUG) + +import time + +def timeMS(func, *args, **kwargs): + startTime = time.perf_counter() + result = func(*args, **kwargs) + endTime = time.perf_counter() + elapsedMS = (endTime - startTime) * 1000 # Convert to milliseconds + print(f"{func.__name__} took {elapsedMS:.2f} ms") + return result + +from utils.memory_array import MemoryArray +from utils.memory_cell import MemoryCell +from utils.literal import Literal +from utils.memory_range import mrange +from utils.memory_manager import MemoryManager + +def getHashFunction(m : Literal): + # Hash function using golden ratio + # Golden ratio (sqrt(5) - 1) / 2; Allowed to to this way as it is a one-time calc ^^ + A : Literal = Literal((5 ** 0.5 - 1) / 2) + def hashFunctionGoldenRatio(key : Literal|MemoryCell) -> Literal: + if not isinstance(key, MemoryCell): + key = MemoryCell(key) + + assert(isinstance(key, MemoryCell)) + + product : Literal = key * A + + # discard decimal part + intPart = Literal(int(product.value)) + fracPart = MemoryCell(product) - intPart + + # Scale by m + hashVal = Literal(int(fracPart.value * m.value)) + + return hashVal + + return hashFunctionGoldenRatio + +# Probing function h + i + 5i^2 +def getProbingFunction(hashFunc, m: Literal, probingMethod="quadratic"): + def quadraticProbe(key, i: Literal|MemoryCell): + if not isinstance(i, MemoryCell): + i = MemoryCell(i) + + # TODO Can save operation/allocation of MemCEll here, but for better flow leave this way + hPrime_x = MemoryCell(hashFunc(key)) + iSquared = MemoryCell(i * i) + + result = MemoryCell(MemoryCell(hPrime_x + i) + iSquared * Literal(5)) % m + return result + + + def symmetricProbe(key, i: Literal|MemoryCell): + if not isinstance(i, MemoryCell): + i = MemoryCell(i) + + hPrime_x = MemoryCell(hashFunc(key)) + iSquared = MemoryCell(i * i) + + if i % Literal(2) == Literal(0): + result = MemoryCell(hPrime_x + iSquared) % m + else: + result = MemoryCell(hPrime_x - iSquared) % m + return result + + if probingMethod == "symmetric": + return symmetricProbe + else: + return quadraticProbe + +class HashTable: + # marker for deleted entries + DELETED = MemoryCell("DELETED") + + def __init__(self, size, probingMethod="quadratic"): + if not isinstance (size, Literal|MemoryCell): + size = Literal(size) + self.size = size + + self.table = MemoryArray(self.size) + for i in mrange(self.size): + self.table[i].set(None) + + self.count = Literal(0) # Count of actual elements + self.deletedCount = Literal(0) # Count of deleted slots + + # Initialize hash and probe function + self.hashFunc = getHashFunction(self.size) + self.probe = getProbingFunction(self.hashFunc, self.size, probingMethod) + + def insert(self, value): + if not isinstance(value, MemoryCell): + value = MemoryCell(value) + + # Check if table is full (considering deleted entries asd well as normal count) + if (MemoryCell(self.count) + self.deletedCount) == self.size: + logger.info(f"Failed to insert {value} - table is full") + return False + + i = Literal(0) + while i < self.size: + # Get our Index via the ProbingFkt + index = self.probe(value, i) + + # Empty slot or deleted marker found + # Need the get here as we want to compare the value (none) to None. + # For the rest normal should be fine + if self.table[index].get() is None or self.table[index] == self.DELETED: + if self.table[index] == self.DELETED: + # Were on a cell that was deleted. Now we write in it, but decrease delCount + self.deletedCount = self.deletedCount.pred() + + self.table[index] = value + self.count = self.count.succ() # Inc fillcounter + logger.debug(f"value {value} was inserted successfully at {index}, count is now at \ + {self.count}|{self.deletedCount}"); + return True + + # Value already exists + # TODO DISCUSSION Wouldnt it defcato be a successfull insert if we are already in our hashmap? Maybe then return true here? + if self.table[index] == value: + logger.info(f"value {value} already exits in the table"); + return False + + i = i.succ() + + # Couldn't find a slot even though count < size + logger.info(f"Failed to insert {value} - table is full") + return False + + def search(self, value): + if not isinstance(value, MemoryCell): + value = MemoryCell(value) + + i = Literal(0) + while i < self.size: + index = self.probe(value, i) + + # Empty slot found - value doesn't exist + if self.table[index].get() is None: + return False + + # Value found + if self.table[index] == value: + return True + + # If deleted marker or different value, continue + i = i.succ() + + return False + + def delete(self, value): + if not isinstance(value, MemoryCell): + value = MemoryCell(value) + + i = Literal(0) + while i < self.size: + index = self.probe(value, i) + + # Empty slot found - value doesn't exist + if self.table[index].get() is None: + return False + + # Value found - mark as deleted + if self.table[index] == value: + # TODO Check wheter it is .set() or = here + self.table[index].set(self.DELETED) + self.count = self.count.pred() + self.deletedCount = self.deletedCount.succ() + return True + + i = i.succ() + + return False + + def __str__(self): + result = "[" + for i in mrange(self.size): + if self.table[i].get() is None: + result += "None" + else: + result += str(self.table[i]) + + if i < self.size.pred(): + result += ", " + + result += "]" + return result + + def alpha(self): + # load factor (count / size) + return Literal(self.count.value / self.size.value) + +if __name__ == "__main__": + table = HashTable(20) + seq0Data = MemoryArray.create_array_from_file("data/seq0.txt") + + print(f"Hash table before inserting seq0.txt: {table}") + + print("Inserting values from seq0.txt...") + for value in seq0Data: + table.insert(value) + + strFirst = str(table.table[Literal(0)]) + + # delete first 5 entries and then try to reinsert them + for i in mrange(5): + table.delete(seq0Data[i]) + for i in mrange(5): + table.insert(seq0Data[i]) + + assert(strFirst == str(table.table[Literal(0)])), "First entry should be the same as before" + print(f"Hashtable after inserting seq0.txt: {table}") + print(f"Load factor after inserting: {table.alpha()}") + + print("Deleting 52...") + table.delete(Literal(52)) + print(f"Hash table after deletion: {table}") + print(f"New load factr: {table.alpha()}") + + print("\nInserting value 24...") + table.insert(Literal(24)) + print(f"Hash table after inserting 24: {table}") + + table90 = HashTable(90) + seq1Data = MemoryArray.create_array_from_file("data/seq1.txt") + + print(f"Inserting values from seq1.txt into table with size 90...") + insertedCount = Literal(0) + for value in seq1Data: + if table90.insert(value): + insertedCount = insertedCount.succ() + + print(f"Successfully inserted {insertedCount} out of {Literal(len(seq1Data))} values") + print(f"Load factor: {table90.alpha()}") + + print("\n--- Task 5 ---") + table89 = HashTable(89) + + print(f"Inserting values from seq1.txt into table with 89...") + insertedCount = Literal(0) + for value in seq1Data: + if table89.insert(value): + insertedCount = insertedCount.succ() + + print(f"Successfully inserted {insertedCount} out of {Literal(len(seq1Data))} values") + print(f"Load factor: {table89.alpha()}") + + tableSymmetric = HashTable(90, probingMethod="symmetric") + print(f"Inserting values from seq1.txt into table with size 90 and symmetric probing...") + insertedCount = Literal(0) + for value in seq1Data: + if tableSymmetric.insert(value): + insertedCount = insertedCount.succ() + + print(f"Successfully inserted {insertedCount} out of {Literal(len(seq1Data))} values") + print(f"Load factor: {tableSymmetric.alpha()}") +