From b743e442599a5bf7e1cb14d9dc41bd17bde1504c Mon Sep 17 00:00:00 2001 From: Aniruddha Bhattacharjee Date: Wed, 9 Jun 2021 02:19:33 +0530 Subject: [PATCH] Wavelet tree (#4267) * Added the matrix_exponentiation.py file in maths directory * Implemented the requested changes * Update matrix_exponentiation.py * resolve merge conflict with upstream branch * add new line at end of file * add wavelet_tree * fix isort issue * updating DIRECTORY.md * fix variable names in wavelet_tree and correct typo * Add type hints and variable renaming * Update data_structures/binary_tree/wavelet_tree.py Add doctests to placate the algorithm-bot, thanks to @cclauss. Co-authored-by: Christian Clauss * Move doctest to individual functions and reformat code * Move common test array to the global scope and reuse in tests * MMove test array to global scope and minor linting changes * Correct the failing pytest tests * MUse built-in list for type annotation * Update wavelet_tree.py * types-requests * updating DIRECTORY.md * Update wavelet_tree.py * # type: ignore * # type: ignore * Update decrypt_caesar_with_chi_squared.py * , * Update decrypt_caesar_with_chi_squared.py Co-authored-by: Christian Clauss Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Co-authored-by: Aniruddha Bhattacharjee --- DIRECTORY.md | 2 + ciphers/decrypt_caesar_with_chi_squared.py | 7 +- data_structures/binary_tree/wavelet_tree.py | 206 ++++++++++++++++++++ requirements.txt | 1 + scripts/validate_solutions.py | 2 +- 5 files changed, 214 insertions(+), 4 deletions(-) create mode 100644 data_structures/binary_tree/wavelet_tree.py diff --git a/DIRECTORY.md b/DIRECTORY.md index 9905753b2..e5ca6d62f 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -136,6 +136,7 @@ * [Segment Tree](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/segment_tree.py) * [Segment Tree Other](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/segment_tree_other.py) * [Treap](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/treap.py) + * [Wavelet Tree](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/wavelet_tree.py) * Disjoint Set * [Alternate Disjoint Set](https://github.com/TheAlgorithms/Python/blob/master/data_structures/disjoint_set/alternate_disjoint_set.py) * [Disjoint Set](https://github.com/TheAlgorithms/Python/blob/master/data_structures/disjoint_set/disjoint_set.py) @@ -232,6 +233,7 @@ ## Dynamic Programming * [Abbreviation](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/abbreviation.py) * [Bitmask](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/bitmask.py) + * [Catalan Numbers](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/catalan_numbers.py) * [Climbing Stairs](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/climbing_stairs.py) * [Edit Distance](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/edit_distance.py) * [Factorial](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/factorial.py) diff --git a/ciphers/decrypt_caesar_with_chi_squared.py b/ciphers/decrypt_caesar_with_chi_squared.py index e7faeae73..7e3705b8f 100644 --- a/ciphers/decrypt_caesar_with_chi_squared.py +++ b/ciphers/decrypt_caesar_with_chi_squared.py @@ -222,9 +222,10 @@ def decrypt_caesar_with_chi_squared( # Get the most likely cipher by finding the cipher with the smallest chi squared # statistic - most_likely_cipher: int = min( - chi_squared_statistic_values, key=chi_squared_statistic_values.get - ) # type: ignore # First argument to `min` is not optional + most_likely_cipher: int = min( # type: ignore + chi_squared_statistic_values, # type: ignore + key=chi_squared_statistic_values.get, # type: ignore + ) # type: ignore # Get all the data from the most likely cipher (key, decoded message) ( diff --git a/data_structures/binary_tree/wavelet_tree.py b/data_structures/binary_tree/wavelet_tree.py new file mode 100644 index 000000000..1607244f7 --- /dev/null +++ b/data_structures/binary_tree/wavelet_tree.py @@ -0,0 +1,206 @@ +""" +Wavelet tree is a data-structure designed to efficiently answer various range queries +for arrays. Wavelets trees are different from other binary trees in the sense that +the nodes are split based on the actual values of the elements and not on indices, +such as the with segment trees or fenwick trees. You can read more about them here: +1. https://users.dcc.uchile.cl/~jperez/papers/ioiconf16.pdf +2. https://www.youtube.com/watch?v=4aSv9PcecDw&t=811s +3. https://www.youtube.com/watch?v=CybAgVF-MMc&t=1178s +""" + +from typing import Optional + +test_array = [2, 1, 4, 5, 6, 0, 8, 9, 1, 2, 0, 6, 4, 2, 0, 6, 5, 3, 2, 7] + + +class Node: + def __init__(self, length: int) -> None: + self.minn: int = -1 + self.maxx: int = -1 + self.map_left: list[int] = [-1] * length + self.left: Optional[Node] = None + self.right: Optional[Node] = None + + def __repr__(self) -> str: + """ + >>> node = Node(length=27) + >>> repr(node) + 'min_value: -1, max_value: -1' + >>> repr(node) == str(node) + True + """ + return f"min_value: {self.minn}, max_value: {self.maxx}" + + +def build_tree(arr: list[int]) -> Node: + """ + Builds the tree for arr and returns the root + of the constructed tree + + >>> build_tree(test_array) + min_value: 0, max_value: 9 + """ + root = Node(len(arr)) + root.minn, root.maxx = min(arr), max(arr) + # Leaf node case where the node contains only one unique value + if root.minn == root.maxx: + return root + """ + Take the mean of min and max element of arr as the pivot and + partition arr into left_arr and right_arr with all elements <= pivot in the + left_arr and the rest in right_arr, maintaining the order of the elements, + then recursively build trees for left_arr and right_arr + """ + pivot = (root.minn + root.maxx) // 2 + left_arr, right_arr = [], [] + for index, num in enumerate(arr): + if num <= pivot: + left_arr.append(num) + else: + right_arr.append(num) + root.map_left[index] = len(left_arr) + root.left = build_tree(left_arr) + root.right = build_tree(right_arr) + return root + + +def rank_till_index(node: Node, num: int, index: int) -> int: + """ + Returns the number of occurrences of num in interval [0, index] in the list + + >>> root = build_tree(test_array) + >>> rank_till_index(root, 6, 6) + 1 + >>> rank_till_index(root, 2, 0) + 1 + >>> rank_till_index(root, 1, 10) + 2 + >>> rank_till_index(root, 17, 7) + 0 + >>> rank_till_index(root, 0, 9) + 1 + """ + if index < 0: + return 0 + # Leaf node cases + if node.minn == node.maxx: + return index + 1 if node.minn == num else 0 + pivot = (node.minn + node.maxx) // 2 + if num <= pivot: + # go the left subtree and map index to the left subtree + return rank_till_index(node.left, num, node.map_left[index] - 1) + else: + # go to the right subtree and map index to the right subtree + return rank_till_index(node.right, num, index - node.map_left[index]) + + +def rank(node: Node, num: int, start: int, end: int) -> int: + """ + Returns the number of occurrences of num in interval [start, end] in the list + + >>> root = build_tree(test_array) + >>> rank(root, 6, 3, 13) + 2 + >>> rank(root, 2, 0, 19) + 4 + >>> rank(root, 9, 2 ,2) + 0 + >>> rank(root, 0, 5, 10) + 2 + """ + if start > end: + return 0 + rank_till_end = rank_till_index(node, num, end) + rank_before_start = rank_till_index(node, num, start - 1) + return rank_till_end - rank_before_start + + +def quantile(node: Node, index: int, start: int, end: int) -> int: + """ + Returns the index'th smallest element in interval [start, end] in the list + index is 0-indexed + + >>> root = build_tree(test_array) + >>> quantile(root, 2, 2, 5) + 5 + >>> quantile(root, 5, 2, 13) + 4 + >>> quantile(root, 0, 6, 6) + 8 + >>> quantile(root, 4, 2, 5) + -1 + """ + if index > (end - start) or start > end: + return -1 + # Leaf node case + if node.minn == node.maxx: + return node.minn + # Number of elements in the left subtree in interval [start, end] + num_elements_in_left_tree = node.map_left[end] - ( + node.map_left[start - 1] if start else 0 + ) + if num_elements_in_left_tree > index: + return quantile( + node.left, + index, + (node.map_left[start - 1] if start else 0), + node.map_left[end] - 1, + ) + else: + return quantile( + node.right, + index - num_elements_in_left_tree, + start - (node.map_left[start - 1] if start else 0), + end - node.map_left[end], + ) + + +def range_counting( + node: Node, start: int, end: int, start_num: int, end_num: int +) -> int: + """ + Returns the number of elememts in range [start_num, end_num] + in interval [start, end] in the list + + >>> root = build_tree(test_array) + >>> range_counting(root, 1, 10, 3, 7) + 3 + >>> range_counting(root, 2, 2, 1, 4) + 1 + >>> range_counting(root, 0, 19, 0, 100) + 20 + >>> range_counting(root, 1, 0, 1, 100) + 0 + >>> range_counting(root, 0, 17, 100, 1) + 0 + """ + if ( + start > end + or start_num > end_num + or node.minn > end_num + or node.maxx < start_num + ): + return 0 + if start_num <= node.minn and node.maxx <= end_num: + return end - start + 1 + left = range_counting( + node.left, + (node.map_left[start - 1] if start else 0), + node.map_left[end] - 1, + start_num, + end_num, + ) + right = range_counting( + node.right, + start - (node.map_left[start - 1] if start else 0), + end - node.map_left[end], + start_num, + end_num, + ) + return left + right + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/requirements.txt b/requirements.txt index 8bbb8d524..4867de26f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,5 @@ sklearn statsmodels sympy tensorflow +types-requests xgboost diff --git a/scripts/validate_solutions.py b/scripts/validate_solutions.py index 68461dca6..ca4af5261 100755 --- a/scripts/validate_solutions.py +++ b/scripts/validate_solutions.py @@ -21,7 +21,7 @@ with open(PROJECT_EULER_ANSWERS_PATH) as file_handle: def convert_path_to_module(file_path: pathlib.Path) -> ModuleType: """Converts a file path to a Python module""" spec = importlib.util.spec_from_file_location(file_path.name, str(file_path)) - module = importlib.util.module_from_spec(spec) + module = importlib.util.module_from_spec(spec) # type: ignore spec.loader.exec_module(module) # type: ignore return module