267 lines
9.1 KiB
Python

import logging
logger = logging.getLogger(__name__)
# logging.basicConfig(level=logging.DEBUG)
import time
def timeMS(func, *args, **kwargs):
startTime = time.perf_counter()
result = func(*args, **kwargs)
endTime = time.perf_counter()
elapsedMS = (endTime - startTime) * 1000 # Convert to milliseconds
print(f"{func.__name__} took {elapsedMS:.2f} ms")
return result
from utils.memory_array import MemoryArray
from utils.memory_cell import MemoryCell
from utils.literal import Literal
from utils.memory_range import mrange
from utils.memory_manager import MemoryManager
def getHashFunction(m : Literal):
# Hash function using golden ratio
# Golden ratio (sqrt(5) - 1) / 2; Allowed to to this way as it is a one-time calc ^^
A : Literal = Literal((5 ** 0.5 - 1) / 2)
def hashFunctionGoldenRatio(key : Literal|MemoryCell) -> Literal:
if not isinstance(key, MemoryCell):
key = MemoryCell(key)
assert(isinstance(key, MemoryCell))
product : Literal = key * A
# discard decimal part
intPart = Literal(int(product.value))
fracPart = MemoryCell(product) - intPart
# Scale by m
hashVal = Literal(int(fracPart.value * m.value))
return hashVal
return hashFunctionGoldenRatio
# Probing function h + i + 5i^2
def getProbingFunction(hashFunc, m: Literal, probingMethod="quadratic"):
def quadraticProbe(key, i: Literal|MemoryCell):
if not isinstance(i, MemoryCell):
i = MemoryCell(i)
# TODO Can save operation/allocation of MemCEll here, but for better flow leave this way
hPrime_x = MemoryCell(hashFunc(key))
iSquared = MemoryCell(i * i)
result = MemoryCell(MemoryCell(hPrime_x + i) + iSquared * Literal(5)) % m
return result
def symmetricProbe(key, i: Literal|MemoryCell):
if not isinstance(i, MemoryCell):
i = MemoryCell(i)
hPrime_x = MemoryCell(hashFunc(key))
iSquared = MemoryCell(i * i)
if i % Literal(2) == Literal(0):
result = MemoryCell(hPrime_x + iSquared) % m
else:
result = MemoryCell(hPrime_x - iSquared) % m
return result
if probingMethod == "symmetric":
return symmetricProbe
else:
return quadraticProbe
class HashTable:
# marker for deleted entries
DELETED = MemoryCell("DELETED")
def __init__(self, size, probingMethod="quadratic"):
if not isinstance (size, Literal|MemoryCell):
size = Literal(size)
self.size = size
self.table = MemoryArray(self.size)
for i in mrange(self.size):
self.table[i].set(None)
self.count = Literal(0) # Count of actual elements
self.deletedCount = Literal(0) # Count of deleted slots
# Initialize hash and probe function
self.hashFunc = getHashFunction(self.size)
self.probe = getProbingFunction(self.hashFunc, self.size, probingMethod)
def insert(self, value):
if not isinstance(value, MemoryCell):
value = MemoryCell(value)
# Check if table is full (considering deleted entries asd well as normal count)
if (MemoryCell(self.count) + self.deletedCount) == self.size:
logger.info(f"Failed to insert {value} - table is full")
return False
i = Literal(0)
while i < self.size:
# Get our Index via the ProbingFkt
index = self.probe(value, i)
# Empty slot or deleted marker found
# Need the get here as we want to compare the value (none) to None.
# For the rest normal should be fine
if self.table[index].get() is None or self.table[index] == self.DELETED:
if self.table[index] == self.DELETED:
# Were on a cell that was deleted. Now we write in it, but decrease delCount
self.deletedCount = self.deletedCount.pred()
self.table[index] = value
self.count = self.count.succ() # Inc fillcounter
logger.debug(f"value {value} was inserted successfully at {index}, count is now at \
{self.count}|{self.deletedCount}");
return True
# Value already exists
# TODO DISCUSSION Wouldnt it defcato be a successfull insert if we are already in our hashmap? Maybe then return true here?
if self.table[index] == value:
logger.info(f"value {value} already exits in the table");
return False
i = i.succ()
# Couldn't find a slot even though count < size
logger.info(f"Failed to insert {value} - table is full")
return False
def search(self, value):
if not isinstance(value, MemoryCell):
value = MemoryCell(value)
i = Literal(0)
while i < self.size:
index = self.probe(value, i)
# Empty slot found - value doesn't exist
if self.table[index].get() is None:
return False
# Value found
if self.table[index] == value:
return True
# If deleted marker or different value, continue
i = i.succ()
return False
def delete(self, value):
if not isinstance(value, MemoryCell):
value = MemoryCell(value)
i = Literal(0)
while i < self.size:
index = self.probe(value, i)
# Empty slot found - value doesn't exist
if self.table[index].get() is None:
return False
# Value found - mark as deleted
if self.table[index] == value:
# TODO Check wheter it is .set() or = here
self.table[index].set(self.DELETED)
self.count = self.count.pred()
self.deletedCount = self.deletedCount.succ()
return True
i = i.succ()
return False
def __str__(self):
result = "["
for i in mrange(self.size):
if self.table[i].get() is None:
result += "None"
else:
result += str(self.table[i])
if i < self.size.pred():
result += ", "
result += "]"
return result
def alpha(self):
# load factor (count / size)
return Literal(self.count.value / self.size.value)
if __name__ == "__main__":
table = HashTable(20)
seq0Data = MemoryArray.create_array_from_file("data/seq0.txt")
print(f"Hash table before inserting seq0.txt: {table}")
print("Inserting values from seq0.txt...")
for value in seq0Data:
table.insert(value)
strFirst = str(table.table[Literal(0)])
# delete first 5 entries and then try to reinsert them
for i in mrange(5):
table.delete(seq0Data[i])
assert(table.deletedCount == Literal(5)), "Deleted count should be 5"
for i in mrange(5):
table.insert(seq0Data[i])
assert(table.deletedCount == Literal(0)), "Deleted count should be 0 after reinserting"
assert(strFirst == str(table.table[Literal(0)])), "First entry should be the same as before"
print(f"Hashtable after inserting seq0.txt: {table}")
print(f"Load factor after inserting: {table.alpha()}")
print("Deleting 52...")
table.delete(Literal(52))
print(f"Hash table after deletion: {table}")
print(f"New load factr: {table.alpha()}")
print("\nInserting value 24...")
table.insert(Literal(24))
print(f"Hash table after inserting 24: {table}")
table90 = HashTable(90)
seq1Data = MemoryArray.create_array_from_file("data/seq1.txt")
print(f"Inserting values from seq1.txt into table with size 90...")
insertedCount = Literal(0)
for value in seq1Data:
if table90.insert(value):
insertedCount = insertedCount.succ()
print(f"Successfully inserted {insertedCount} out of {Literal(len(seq1Data))} values")
print(f"Load factor: {table90.alpha()}")
table89 = HashTable(89)
print(f"Inserting values from seq1.txt into table with 89...")
insertedCount = Literal(0)
for value in seq1Data:
if table89.insert(value):
insertedCount = insertedCount.succ()
print(f"Successfully inserted {insertedCount} out of {Literal(len(seq1Data))} values")
print(f"Load factor: {table89.alpha()}")
tableSymmetric = HashTable(90, probingMethod="symmetric")
print(f"Inserting values from seq1.txt into table with size 90 and symmetric probing...")
insertedCount = Literal(0)
for value in seq1Data:
if tableSymmetric.insert(value):
insertedCount = insertedCount.succ()
print(f"Successfully inserted {insertedCount} out of {Literal(len(seq1Data))} values")
print(f"Load factor: {tableSymmetric.alpha()}")