Wavelet tree (#4267)

* Added the matrix_exponentiation.py file in maths directory * Implemented the requested changes * Update matrix_exponentiation.py * resolve merge conflict with upstream branch * add new line at end of file * add wavelet_tree * fix isort issue * updating DIRECTORY.md * fix variable names in wavelet_tree and correct typo * Add type hints and variable renaming * Update data_structures/binary_tree/wavelet_tree.py Add doctests to placate the algorithm-bot, thanks to @cclauss. Co-authored-by: Christian Clauss <[email protected]> * Move doctest to individual functions and reformat code * Move common test array to the global scope and reuse in tests * MMove test array to global scope and minor linting changes * Correct the failing pytest tests * MUse built-in list for type annotation * Update wavelet_tree.py * types-requests * updating DIRECTORY.md * Update wavelet_tree.py * # type: ignore * # type: ignore * Update decrypt_caesar_with_chi_squared.py * , * Update decrypt_caesar_with_chi_squared.py Co-authored-by: Christian Clauss <[email protected]> Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Co-authored-by: Aniruddha Bhattacharjee <[email protected]>
TheAlgorithms · Jun 8, 2021 · b743e442599a5bf7e1cb14d9dc41bd17bde1504c · b743e44
1 parent f37d415
commit b743e442599a5bf7e1cb14d9dc41bd17bde1504c
Showing with 214 additions and 4 deletions.

+2 −0 DIRECTORY.md

+4 −3 ciphers/decrypt_caesar_with_chi_squared.py

+206 −0 data_structures/binary_tree/wavelet_tree.py

+1 −0 requirements.txt

+1 −1 scripts/validate_solutions.py
diff --git a/DIRECTORY.md b/DIRECTORY.md
@@ -136,6 +136,7 @@
     * [Segment Tree](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/segment_tree.py)
     * [Segment Tree Other](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/segment_tree_other.py)
     * [Treap](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/treap.py)
+    * [Wavelet Tree](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/wavelet_tree.py)
   * Disjoint Set
     * [Alternate Disjoint Set](https://github.com/TheAlgorithms/Python/blob/master/data_structures/disjoint_set/alternate_disjoint_set.py)
     * [Disjoint Set](https://github.com/TheAlgorithms/Python/blob/master/data_structures/disjoint_set/disjoint_set.py)
@@ -232,6 +233,7 @@
 ## Dynamic Programming
   * [Abbreviation](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/abbreviation.py)
   * [Bitmask](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/bitmask.py)
+  * [Catalan Numbers](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/catalan_numbers.py)
   * [Climbing Stairs](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/climbing_stairs.py)
   * [Edit Distance](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/edit_distance.py)
   * [Factorial](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/factorial.py)

diff --git a/ciphers/decrypt_caesar_with_chi_squared.py b/ciphers/decrypt_caesar_with_chi_squared.py
@@ -222,9 +222,10 @@ def decrypt_caesar_with_chi_squared(
 
     # Get the most likely cipher by finding the cipher with the smallest chi squared
     # statistic
-    most_likely_cipher: int = min(
-        chi_squared_statistic_values, key=chi_squared_statistic_values.get
-    )  # type: ignore # First argument to `min` is not optional
+    most_likely_cipher: int = min(  # type: ignore
+        chi_squared_statistic_values,  # type: ignore
+        key=chi_squared_statistic_values.get,  # type: ignore
+    )  # type: ignore
 
     # Get all the data from the most likely cipher (key, decoded message)
     (

diff --git a/data_structures/binary_tree/wavelet_tree.py b/data_structures/binary_tree/wavelet_tree.py
@@ -0,0 +1,206 @@
+"""
+Wavelet tree is a data-structure designed to efficiently answer various range queries
+for arrays. Wavelets trees are different from other binary trees in the sense that
+the nodes are split based on the actual values of the elements and not on indices,
+such as the with segment trees or fenwick trees. You can read more about them here:
+1. https://users.dcc.uchile.cl/~jperez/papers/ioiconf16.pdf
+2. https://www.youtube.com/watch?v=4aSv9PcecDw&t=811s
+3. https://www.youtube.com/watch?v=CybAgVF-MMc&t=1178s
+"""
+
+from typing import Optional
+
+test_array = [2, 1, 4, 5, 6, 0, 8, 9, 1, 2, 0, 6, 4, 2, 0, 6, 5, 3, 2, 7]
+
+
+class Node:
+    def __init__(self, length: int) -> None:
+        self.minn: int = -1
+        self.maxx: int = -1
+        self.map_left: list[int] = [-1] * length
+        self.left: Optional[Node] = None
+        self.right: Optional[Node] = None
+
+    def __repr__(self) -> str:
+        """
+        >>> node = Node(length=27)
+        >>> repr(node)
+        'min_value: -1, max_value: -1'
+        >>> repr(node) == str(node)
+        True
+        """
+        return f"min_value: {self.minn}, max_value: {self.maxx}"
+
+
+def build_tree(arr: list[int]) -> Node:
+    """
+    Builds the tree for arr and returns the root
+    of the constructed tree
+
+    >>> build_tree(test_array)
+    min_value: 0, max_value: 9
+    """
+    root = Node(len(arr))
+    root.minn, root.maxx = min(arr), max(arr)
+    # Leaf node case where the node contains only one unique value
+    if root.minn == root.maxx:
+        return root
+    """
+    Take the mean of min and max element of arr as the pivot and
+    partition arr into left_arr and right_arr with all elements <= pivot in the
+    left_arr and the rest in right_arr, maintaining the order of the elements,
+    then recursively build trees for left_arr and right_arr
+    """
+    pivot = (root.minn + root.maxx) // 2
+    left_arr, right_arr = [], []
+    for index, num in enumerate(arr):
+        if num <= pivot:
+            left_arr.append(num)
+        else:
+            right_arr.append(num)
+        root.map_left[index] = len(left_arr)
+    root.left = build_tree(left_arr)
+    root.right = build_tree(right_arr)
+    return root
+
+
+def rank_till_index(node: Node, num: int, index: int) -> int:
+    """
+    Returns the number of occurrences of num in interval [0, index] in the list
+
+    >>> root = build_tree(test_array)
+    >>> rank_till_index(root, 6, 6)
+    1
+    >>> rank_till_index(root, 2, 0)
+    1
+    >>> rank_till_index(root, 1, 10)
+    2
+    >>> rank_till_index(root, 17, 7)
+    0
+    >>> rank_till_index(root, 0, 9)
+    1
+    """
+    if index < 0:
+        return 0
+    # Leaf node cases
+    if node.minn == node.maxx:
+        return index + 1 if node.minn == num else 0
+    pivot = (node.minn + node.maxx) // 2
+    if num <= pivot:
+        # go the left subtree and map index to the left subtree
+        return rank_till_index(node.left, num, node.map_left[index] - 1)
+    else:
+        # go to the right subtree and map index to the right subtree
+        return rank_till_index(node.right, num, index - node.map_left[index])
+
+
+def rank(node: Node, num: int, start: int, end: int) -> int:
+    """
+    Returns the number of occurrences of num in interval [start, end] in the list
+
+    >>> root = build_tree(test_array)
+    >>> rank(root, 6, 3, 13)
+    2
+    >>> rank(root, 2, 0, 19)
+    4
+    >>> rank(root, 9, 2 ,2)
+    0
+    >>> rank(root, 0, 5, 10)
+    2
+    """
+    if start > end:
+        return 0
+    rank_till_end = rank_till_index(node, num, end)
+    rank_before_start = rank_till_index(node, num, start - 1)
+    return rank_till_end - rank_before_start
+
+
+def quantile(node: Node, index: int, start: int, end: int) -> int:
+    """
+    Returns the index'th smallest element in interval [start, end] in the list
+    index is 0-indexed
+
+    >>> root = build_tree(test_array)
+    >>> quantile(root, 2, 2, 5)
+    5
+    >>> quantile(root, 5, 2, 13)
+    4
+    >>> quantile(root, 0, 6, 6)
+    8
+    >>> quantile(root, 4, 2, 5)
+    -1
+    """
+    if index > (end - start) or start > end:
+        return -1
+    # Leaf node case
+    if node.minn == node.maxx:
+        return node.minn
+    # Number of elements in the left subtree in interval [start, end]
+    num_elements_in_left_tree = node.map_left[end] - (
+        node.map_left[start - 1] if start else 0
+    )
+    if num_elements_in_left_tree > index:
+        return quantile(
+            node.left,
+            index,
+            (node.map_left[start - 1] if start else 0),
+            node.map_left[end] - 1,
+        )
+    else:
+        return quantile(
+            node.right,
+            index - num_elements_in_left_tree,
+            start - (node.map_left[start - 1] if start else 0),
+            end - node.map_left[end],
+        )
+
+
+def range_counting(
+    node: Node, start: int, end: int, start_num: int, end_num: int
+) -> int:
+    """
+    Returns the number of elememts in range [start_num, end_num]
+    in interval [start, end] in the list
+
+    >>> root = build_tree(test_array)
+    >>> range_counting(root, 1, 10, 3, 7)
+    3
+    >>> range_counting(root, 2, 2, 1, 4)
+    1
+    >>> range_counting(root, 0, 19, 0, 100)
+    20
+    >>> range_counting(root, 1, 0, 1, 100)
+    0
+    >>> range_counting(root, 0, 17, 100, 1)
+    0
+    """
+    if (
+        start > end
+        or start_num > end_num
+        or node.minn > end_num
+        or node.maxx < start_num
+    ):
+        return 0
+    if start_num <= node.minn and node.maxx <= end_num:
+        return end - start + 1
+    left = range_counting(
+        node.left,
+        (node.map_left[start - 1] if start else 0),
+        node.map_left[end] - 1,
+        start_num,
+        end_num,
+    )
+    right = range_counting(
+        node.right,
+        start - (node.map_left[start - 1] if start else 0),
+        end - node.map_left[end],
+        start_num,
+        end_num,
+    )
+    return left + right
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
diff --git a/requirements.txt b/requirements.txt
@@ -14,4 +14,5 @@ sklearn
 statsmodels
 sympy
 tensorflow
+types-requests
 xgboost
diff --git a/scripts/validate_solutions.py b/scripts/validate_solutions.py
@@ -21,7 +21,7 @@
 def convert_path_to_module(file_path: pathlib.Path) -> ModuleType:
     """Converts a file path to a Python module"""
     spec = importlib.util.spec_from_file_location(file_path.name, str(file_path))
-    module = importlib.util.module_from_spec(spec)
+    module = importlib.util.module_from_spec(spec)  # type: ignore
     spec.loader.exec_module(module)  # type: ignore
     return module