import mmh3
from random_things import string_generator
class HashMap:
def __init__(self, num_of_buckets=4, load_factor=0.75):
assert (num_of_buckets >= 1)
self.num_of_buckets = num_of_buckets
self.mp = [[] for i in range(num_of_buckets)]
self.LOAD_FACTOR = load_factor
self.num_of_elements_in_ht = 0
self.longest_chain = [-1, -1]
def bucket(self, key, num_of_buckets):
k = mmh3.hash(key)
return k % num_of_buckets
# return HashMap.my_own_hash_function(key) % 3
def _does_it_need_resizing(self):
return self.num_of_elements_in_ht / self.num_of_buckets >= self.LOAD_FACTOR
def resize(self):
if self._does_it_need_resizing():
new_hash_table_size = self.num_of_buckets * 2
new_hash_table = [[] for i in range(new_hash_table_size)]
for linkedlist in self.mp:
for element in linkedlist:
bucket_index = self.bucket(element[0], new_hash_table_size)
new_hash_table[bucket_index].append(element)
if len(new_hash_table[bucket_index]) > self.longest_chain[0]:
self.longest_chain = [len(new_hash_table[bucket_index]), new_hash_table[bucket_index]]
self.num_of_buckets = new_hash_table_size
self.mp = new_hash_table
def set(self, key, value):
bucket_index = self.bucket(key, self.num_of_buckets)
element = self._get(self.mp[bucket_index], key)
if element:
element[1] = value
return element
self.mp[bucket_index].append([key, value])
if len(self.mp[bucket_index]) > self.longest_chain[0]:
self.longest_chain = [len(self.mp[bucket_index]), self.mp[bucket_index]]
self.num_of_elements_in_ht += 1
self.resize()
return [key, value]
def _get(self, bucket, key):
for ele in bucket:
if ele[0] == key:
return ele
return None
def get(self, key):
bucket_index = self.bucket(key, self.num_of_buckets)
get_resp = self._get(self.mp[bucket_index], key)
return get_resp[1] if get_resp else None
def remove(self, key):
bucket_index = self.bucket(key, self.num_of_buckets)
for ele in self.mp[bucket_index]:
if ele[0] == key:
self.mp[bucket_index].remove(ele)
self.resize()
self.num_of_elements_in_ht -= 1
if len(self.mp[bucket_index]) < self.longest_chain[0]:
self.longest_chain = [len(self.mp[bucket_index]), self.mp[bucket_index]]
return
def get_the_entire_map(self):
return self.mp.copy()
@staticmethod
def my_own_hash_function(key):
return 0
def get_longest_chain(self):
return self.longest_chain
def num_of_elements(self):
return self.num_of_elements_in_ht
def keys(self):
k = []
for linkedlist in self.mp:
for element in linkedlist:
k.append(element[0])
return k
def main():
mp = HashMap(2, load_factor=1)
# mp.set("my_string", "my_string")
# mp.set("my_string2", "my_string")
# mp.set("my_string3", "my_string")
# mp.set("my_string4", "my_string")
# mp.set("my_string5", "my_string")
# mp.set("my_string6", "my_string")
# mp.set("my_string7", "my_string")
# mp.set("my_string8", "my_string")
# print(f"Size of map {len(mp.get_the_entire_map())} and number of elements is : {mp.num_of_elements()}")
import time
import random
start = time.time()
while True:
for my_count, my_string in enumerate(string_generator(20, random.randint(10, 50000))):
mp.set(my_string, my_string)
mp.set(my_string, my_string)
if time.time() - start >= 1:
print(
f"Size of map {len(mp.get_the_entire_map())} and number of elements is : {mp.num_of_elements()}. Longest chain {mp.get_longest_chain()[0]}")
time.sleep(0.200)
# start = time.time()
for key in mp.keys():
mp.remove(key)
if time.time() - start >= 1:
print(
f"Size of map {len(mp.get_the_entire_map())} and number of elements is : {mp.num_of_elements()}. Longest chain {mp.get_longest_chain()[0]}")
time.sleep(0.200)
start = time.time()
mp = HashMap(2)
# for idx, val in enumerate(mp.get_the_entire_map()):
# print(idx, len(val))
# print(mp.set("my_string", "my_string1"))
# print(mp.get("my_string"))
if __name__ == "__main__":
main()