Skip to main content
added "wheel" tag
Link
J_H
  • 42.3k
  • 3
  • 38
  • 157
Source Link
piepi
  • 833
  • 7
  • 18

Implemented a basic dictionary in python using lists as linked list and murmur hash as a hash function

import mmh3
from random_things import string_generator


class HashMap:
    def __init__(self, num_of_buckets=4, load_factor=0.75):
        assert (num_of_buckets >= 1)
        self.num_of_buckets = num_of_buckets
        self.mp = [[] for i in range(num_of_buckets)]
        self.LOAD_FACTOR = load_factor
        self.num_of_elements_in_ht = 0
        self.longest_chain = [-1, -1]

    def bucket(self, key, num_of_buckets):
        k = mmh3.hash(key)
        return k % num_of_buckets
        # return HashMap.my_own_hash_function(key) % 3

    def _does_it_need_resizing(self):
        return self.num_of_elements_in_ht / self.num_of_buckets >= self.LOAD_FACTOR

    def resize(self):
        if self._does_it_need_resizing():
            new_hash_table_size = self.num_of_buckets * 2
            new_hash_table = [[] for i in range(new_hash_table_size)]
            for linkedlist in self.mp:
                for element in linkedlist:
                    bucket_index = self.bucket(element[0], new_hash_table_size)
                    new_hash_table[bucket_index].append(element)
                    if len(new_hash_table[bucket_index]) > self.longest_chain[0]:
                        self.longest_chain = [len(new_hash_table[bucket_index]), new_hash_table[bucket_index]]
            self.num_of_buckets = new_hash_table_size
            self.mp = new_hash_table

    def set(self, key, value):
        bucket_index = self.bucket(key, self.num_of_buckets)
        element = self._get(self.mp[bucket_index], key)
        if element:
            element[1] = value
            return element
        self.mp[bucket_index].append([key, value])
        if len(self.mp[bucket_index]) > self.longest_chain[0]:
            self.longest_chain = [len(self.mp[bucket_index]), self.mp[bucket_index]]
        self.num_of_elements_in_ht += 1
        self.resize()
        return [key, value]

    def _get(self, bucket, key):
        for ele in bucket:
            if ele[0] == key:
                return ele
        return None

    def get(self, key):
        bucket_index = self.bucket(key, self.num_of_buckets)
        get_resp = self._get(self.mp[bucket_index], key)
        return get_resp[1] if get_resp else None

    def remove(self, key):
        bucket_index = self.bucket(key, self.num_of_buckets)
        for ele in self.mp[bucket_index]:
            if ele[0] == key:
                self.mp[bucket_index].remove(ele)
                self.resize()
                self.num_of_elements_in_ht -= 1
                if len(self.mp[bucket_index]) < self.longest_chain[0]:
                    self.longest_chain = [len(self.mp[bucket_index]), self.mp[bucket_index]]
                return

    def get_the_entire_map(self):
        return self.mp.copy()

    @staticmethod
    def my_own_hash_function(key):
        return 0

    def get_longest_chain(self):
        return self.longest_chain

    def num_of_elements(self):
        return self.num_of_elements_in_ht

    def keys(self):
        k = []
        for linkedlist in self.mp:
            for element in linkedlist:
                k.append(element[0])
        return k


def main():
    mp = HashMap(2, load_factor=1)
    # mp.set("my_string", "my_string")
    # mp.set("my_string2", "my_string")
    # mp.set("my_string3", "my_string")
    # mp.set("my_string4", "my_string")
    # mp.set("my_string5", "my_string")
    # mp.set("my_string6", "my_string")
    # mp.set("my_string7", "my_string")
    # mp.set("my_string8", "my_string")
    # print(f"Size of map {len(mp.get_the_entire_map())} and number of elements is : {mp.num_of_elements()}")

    import time
    import random
    start = time.time()
    while True:
        for my_count, my_string in enumerate(string_generator(20, random.randint(10, 50000))):
            mp.set(my_string, my_string)
            mp.set(my_string, my_string)
        if time.time() - start >= 1:
            print(
                f"Size of map {len(mp.get_the_entire_map())} and number of elements is : {mp.num_of_elements()}. Longest chain {mp.get_longest_chain()[0]}")
            time.sleep(0.200)
            # start = time.time()
        for key in mp.keys():
            mp.remove(key)
        if time.time() - start >= 1:
            print(
                f"Size of map {len(mp.get_the_entire_map())} and number of elements is : {mp.num_of_elements()}. Longest chain {mp.get_longest_chain()[0]}")
            time.sleep(0.200)
            start = time.time()
        mp = HashMap(2)

    # for idx, val in enumerate(mp.get_the_entire_map()):
    #     print(idx, len(val))

    # print(mp.set("my_string", "my_string1"))
    # print(mp.get("my_string"))


if __name__ == "__main__":
    main()