mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-02-17 14:58:10 +00:00
Wavelet tree (#4267)
* Added the matrix_exponentiation.py file in maths directory * Implemented the requested changes * Update matrix_exponentiation.py * resolve merge conflict with upstream branch * add new line at end of file * add wavelet_tree * fix isort issue * updating DIRECTORY.md * fix variable names in wavelet_tree and correct typo * Add type hints and variable renaming * Update data_structures/binary_tree/wavelet_tree.py Add doctests to placate the algorithm-bot, thanks to @cclauss. Co-authored-by: Christian Clauss <cclauss@me.com> * Move doctest to individual functions and reformat code * Move common test array to the global scope and reuse in tests * MMove test array to global scope and minor linting changes * Correct the failing pytest tests * MUse built-in list for type annotation * Update wavelet_tree.py * types-requests * updating DIRECTORY.md * Update wavelet_tree.py * # type: ignore * # type: ignore * Update decrypt_caesar_with_chi_squared.py * , * Update decrypt_caesar_with_chi_squared.py Co-authored-by: Christian Clauss <cclauss@me.com> Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Co-authored-by: Aniruddha Bhattacharjee <aniruddha@Aniruddhas-MacBook-Air.local>
This commit is contained in:
parent
f37d415227
commit
b743e44259
|
@ -136,6 +136,7 @@
|
||||||
* [Segment Tree](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/segment_tree.py)
|
* [Segment Tree](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/segment_tree.py)
|
||||||
* [Segment Tree Other](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/segment_tree_other.py)
|
* [Segment Tree Other](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/segment_tree_other.py)
|
||||||
* [Treap](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/treap.py)
|
* [Treap](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/treap.py)
|
||||||
|
* [Wavelet Tree](https://github.com/TheAlgorithms/Python/blob/master/data_structures/binary_tree/wavelet_tree.py)
|
||||||
* Disjoint Set
|
* Disjoint Set
|
||||||
* [Alternate Disjoint Set](https://github.com/TheAlgorithms/Python/blob/master/data_structures/disjoint_set/alternate_disjoint_set.py)
|
* [Alternate Disjoint Set](https://github.com/TheAlgorithms/Python/blob/master/data_structures/disjoint_set/alternate_disjoint_set.py)
|
||||||
* [Disjoint Set](https://github.com/TheAlgorithms/Python/blob/master/data_structures/disjoint_set/disjoint_set.py)
|
* [Disjoint Set](https://github.com/TheAlgorithms/Python/blob/master/data_structures/disjoint_set/disjoint_set.py)
|
||||||
|
@ -232,6 +233,7 @@
|
||||||
## Dynamic Programming
|
## Dynamic Programming
|
||||||
* [Abbreviation](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/abbreviation.py)
|
* [Abbreviation](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/abbreviation.py)
|
||||||
* [Bitmask](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/bitmask.py)
|
* [Bitmask](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/bitmask.py)
|
||||||
|
* [Catalan Numbers](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/catalan_numbers.py)
|
||||||
* [Climbing Stairs](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/climbing_stairs.py)
|
* [Climbing Stairs](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/climbing_stairs.py)
|
||||||
* [Edit Distance](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/edit_distance.py)
|
* [Edit Distance](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/edit_distance.py)
|
||||||
* [Factorial](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/factorial.py)
|
* [Factorial](https://github.com/TheAlgorithms/Python/blob/master/dynamic_programming/factorial.py)
|
||||||
|
|
|
@ -222,9 +222,10 @@ def decrypt_caesar_with_chi_squared(
|
||||||
|
|
||||||
# Get the most likely cipher by finding the cipher with the smallest chi squared
|
# Get the most likely cipher by finding the cipher with the smallest chi squared
|
||||||
# statistic
|
# statistic
|
||||||
most_likely_cipher: int = min(
|
most_likely_cipher: int = min( # type: ignore
|
||||||
chi_squared_statistic_values, key=chi_squared_statistic_values.get
|
chi_squared_statistic_values, # type: ignore
|
||||||
) # type: ignore # First argument to `min` is not optional
|
key=chi_squared_statistic_values.get, # type: ignore
|
||||||
|
) # type: ignore
|
||||||
|
|
||||||
# Get all the data from the most likely cipher (key, decoded message)
|
# Get all the data from the most likely cipher (key, decoded message)
|
||||||
(
|
(
|
||||||
|
|
206
data_structures/binary_tree/wavelet_tree.py
Normal file
206
data_structures/binary_tree/wavelet_tree.py
Normal file
|
@ -0,0 +1,206 @@
|
||||||
|
"""
|
||||||
|
Wavelet tree is a data-structure designed to efficiently answer various range queries
|
||||||
|
for arrays. Wavelets trees are different from other binary trees in the sense that
|
||||||
|
the nodes are split based on the actual values of the elements and not on indices,
|
||||||
|
such as the with segment trees or fenwick trees. You can read more about them here:
|
||||||
|
1. https://users.dcc.uchile.cl/~jperez/papers/ioiconf16.pdf
|
||||||
|
2. https://www.youtube.com/watch?v=4aSv9PcecDw&t=811s
|
||||||
|
3. https://www.youtube.com/watch?v=CybAgVF-MMc&t=1178s
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
test_array = [2, 1, 4, 5, 6, 0, 8, 9, 1, 2, 0, 6, 4, 2, 0, 6, 5, 3, 2, 7]
|
||||||
|
|
||||||
|
|
||||||
|
class Node:
|
||||||
|
def __init__(self, length: int) -> None:
|
||||||
|
self.minn: int = -1
|
||||||
|
self.maxx: int = -1
|
||||||
|
self.map_left: list[int] = [-1] * length
|
||||||
|
self.left: Optional[Node] = None
|
||||||
|
self.right: Optional[Node] = None
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
"""
|
||||||
|
>>> node = Node(length=27)
|
||||||
|
>>> repr(node)
|
||||||
|
'min_value: -1, max_value: -1'
|
||||||
|
>>> repr(node) == str(node)
|
||||||
|
True
|
||||||
|
"""
|
||||||
|
return f"min_value: {self.minn}, max_value: {self.maxx}"
|
||||||
|
|
||||||
|
|
||||||
|
def build_tree(arr: list[int]) -> Node:
|
||||||
|
"""
|
||||||
|
Builds the tree for arr and returns the root
|
||||||
|
of the constructed tree
|
||||||
|
|
||||||
|
>>> build_tree(test_array)
|
||||||
|
min_value: 0, max_value: 9
|
||||||
|
"""
|
||||||
|
root = Node(len(arr))
|
||||||
|
root.minn, root.maxx = min(arr), max(arr)
|
||||||
|
# Leaf node case where the node contains only one unique value
|
||||||
|
if root.minn == root.maxx:
|
||||||
|
return root
|
||||||
|
"""
|
||||||
|
Take the mean of min and max element of arr as the pivot and
|
||||||
|
partition arr into left_arr and right_arr with all elements <= pivot in the
|
||||||
|
left_arr and the rest in right_arr, maintaining the order of the elements,
|
||||||
|
then recursively build trees for left_arr and right_arr
|
||||||
|
"""
|
||||||
|
pivot = (root.minn + root.maxx) // 2
|
||||||
|
left_arr, right_arr = [], []
|
||||||
|
for index, num in enumerate(arr):
|
||||||
|
if num <= pivot:
|
||||||
|
left_arr.append(num)
|
||||||
|
else:
|
||||||
|
right_arr.append(num)
|
||||||
|
root.map_left[index] = len(left_arr)
|
||||||
|
root.left = build_tree(left_arr)
|
||||||
|
root.right = build_tree(right_arr)
|
||||||
|
return root
|
||||||
|
|
||||||
|
|
||||||
|
def rank_till_index(node: Node, num: int, index: int) -> int:
|
||||||
|
"""
|
||||||
|
Returns the number of occurrences of num in interval [0, index] in the list
|
||||||
|
|
||||||
|
>>> root = build_tree(test_array)
|
||||||
|
>>> rank_till_index(root, 6, 6)
|
||||||
|
1
|
||||||
|
>>> rank_till_index(root, 2, 0)
|
||||||
|
1
|
||||||
|
>>> rank_till_index(root, 1, 10)
|
||||||
|
2
|
||||||
|
>>> rank_till_index(root, 17, 7)
|
||||||
|
0
|
||||||
|
>>> rank_till_index(root, 0, 9)
|
||||||
|
1
|
||||||
|
"""
|
||||||
|
if index < 0:
|
||||||
|
return 0
|
||||||
|
# Leaf node cases
|
||||||
|
if node.minn == node.maxx:
|
||||||
|
return index + 1 if node.minn == num else 0
|
||||||
|
pivot = (node.minn + node.maxx) // 2
|
||||||
|
if num <= pivot:
|
||||||
|
# go the left subtree and map index to the left subtree
|
||||||
|
return rank_till_index(node.left, num, node.map_left[index] - 1)
|
||||||
|
else:
|
||||||
|
# go to the right subtree and map index to the right subtree
|
||||||
|
return rank_till_index(node.right, num, index - node.map_left[index])
|
||||||
|
|
||||||
|
|
||||||
|
def rank(node: Node, num: int, start: int, end: int) -> int:
|
||||||
|
"""
|
||||||
|
Returns the number of occurrences of num in interval [start, end] in the list
|
||||||
|
|
||||||
|
>>> root = build_tree(test_array)
|
||||||
|
>>> rank(root, 6, 3, 13)
|
||||||
|
2
|
||||||
|
>>> rank(root, 2, 0, 19)
|
||||||
|
4
|
||||||
|
>>> rank(root, 9, 2 ,2)
|
||||||
|
0
|
||||||
|
>>> rank(root, 0, 5, 10)
|
||||||
|
2
|
||||||
|
"""
|
||||||
|
if start > end:
|
||||||
|
return 0
|
||||||
|
rank_till_end = rank_till_index(node, num, end)
|
||||||
|
rank_before_start = rank_till_index(node, num, start - 1)
|
||||||
|
return rank_till_end - rank_before_start
|
||||||
|
|
||||||
|
|
||||||
|
def quantile(node: Node, index: int, start: int, end: int) -> int:
|
||||||
|
"""
|
||||||
|
Returns the index'th smallest element in interval [start, end] in the list
|
||||||
|
index is 0-indexed
|
||||||
|
|
||||||
|
>>> root = build_tree(test_array)
|
||||||
|
>>> quantile(root, 2, 2, 5)
|
||||||
|
5
|
||||||
|
>>> quantile(root, 5, 2, 13)
|
||||||
|
4
|
||||||
|
>>> quantile(root, 0, 6, 6)
|
||||||
|
8
|
||||||
|
>>> quantile(root, 4, 2, 5)
|
||||||
|
-1
|
||||||
|
"""
|
||||||
|
if index > (end - start) or start > end:
|
||||||
|
return -1
|
||||||
|
# Leaf node case
|
||||||
|
if node.minn == node.maxx:
|
||||||
|
return node.minn
|
||||||
|
# Number of elements in the left subtree in interval [start, end]
|
||||||
|
num_elements_in_left_tree = node.map_left[end] - (
|
||||||
|
node.map_left[start - 1] if start else 0
|
||||||
|
)
|
||||||
|
if num_elements_in_left_tree > index:
|
||||||
|
return quantile(
|
||||||
|
node.left,
|
||||||
|
index,
|
||||||
|
(node.map_left[start - 1] if start else 0),
|
||||||
|
node.map_left[end] - 1,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return quantile(
|
||||||
|
node.right,
|
||||||
|
index - num_elements_in_left_tree,
|
||||||
|
start - (node.map_left[start - 1] if start else 0),
|
||||||
|
end - node.map_left[end],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def range_counting(
|
||||||
|
node: Node, start: int, end: int, start_num: int, end_num: int
|
||||||
|
) -> int:
|
||||||
|
"""
|
||||||
|
Returns the number of elememts in range [start_num, end_num]
|
||||||
|
in interval [start, end] in the list
|
||||||
|
|
||||||
|
>>> root = build_tree(test_array)
|
||||||
|
>>> range_counting(root, 1, 10, 3, 7)
|
||||||
|
3
|
||||||
|
>>> range_counting(root, 2, 2, 1, 4)
|
||||||
|
1
|
||||||
|
>>> range_counting(root, 0, 19, 0, 100)
|
||||||
|
20
|
||||||
|
>>> range_counting(root, 1, 0, 1, 100)
|
||||||
|
0
|
||||||
|
>>> range_counting(root, 0, 17, 100, 1)
|
||||||
|
0
|
||||||
|
"""
|
||||||
|
if (
|
||||||
|
start > end
|
||||||
|
or start_num > end_num
|
||||||
|
or node.minn > end_num
|
||||||
|
or node.maxx < start_num
|
||||||
|
):
|
||||||
|
return 0
|
||||||
|
if start_num <= node.minn and node.maxx <= end_num:
|
||||||
|
return end - start + 1
|
||||||
|
left = range_counting(
|
||||||
|
node.left,
|
||||||
|
(node.map_left[start - 1] if start else 0),
|
||||||
|
node.map_left[end] - 1,
|
||||||
|
start_num,
|
||||||
|
end_num,
|
||||||
|
)
|
||||||
|
right = range_counting(
|
||||||
|
node.right,
|
||||||
|
start - (node.map_left[start - 1] if start else 0),
|
||||||
|
end - node.map_left[end],
|
||||||
|
start_num,
|
||||||
|
end_num,
|
||||||
|
)
|
||||||
|
return left + right
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import doctest
|
||||||
|
|
||||||
|
doctest.testmod()
|
|
@ -14,4 +14,5 @@ sklearn
|
||||||
statsmodels
|
statsmodels
|
||||||
sympy
|
sympy
|
||||||
tensorflow
|
tensorflow
|
||||||
|
types-requests
|
||||||
xgboost
|
xgboost
|
||||||
|
|
|
@ -21,7 +21,7 @@ with open(PROJECT_EULER_ANSWERS_PATH) as file_handle:
|
||||||
def convert_path_to_module(file_path: pathlib.Path) -> ModuleType:
|
def convert_path_to_module(file_path: pathlib.Path) -> ModuleType:
|
||||||
"""Converts a file path to a Python module"""
|
"""Converts a file path to a Python module"""
|
||||||
spec = importlib.util.spec_from_file_location(file_path.name, str(file_path))
|
spec = importlib.util.spec_from_file_location(file_path.name, str(file_path))
|
||||||
module = importlib.util.module_from_spec(spec)
|
module = importlib.util.module_from_spec(spec) # type: ignore
|
||||||
spec.loader.exec_module(module) # type: ignore
|
spec.loader.exec_module(module) # type: ignore
|
||||||
return module
|
return module
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user