Python/data_structures/persistent_segment_tree.py

122 lines
4.1 KiB
Python
Raw Normal View History

2024-10-19 16:43:40 +05:30
from __future__ import annotations
2024-10-19 15:58:29 +05:30
class Node:
def __init__(self, value: int = 0) -> None:
2024-10-19 16:27:52 +05:30
self.value: int = value
2024-10-19 16:43:40 +05:30
self.left: Node | None = None
self.right: Node | None = None
2024-10-19 15:58:29 +05:30
class PersistentSegmentTree:
2024-10-19 16:43:40 +05:30
def __init__(self, arr: list[int]) -> None:
2024-10-19 16:27:52 +05:30
self.n: int = len(arr)
2024-10-19 16:43:40 +05:30
self.roots: list[Node] = []
2024-10-19 16:16:21 +05:30
self.roots.append(self._build(arr, 0, self.n - 1))
2024-10-19 16:43:40 +05:30
def _build(self, arr: list[int], start: int, end: int) -> Node:
2024-10-19 16:19:23 +05:30
"""
Builds a segment tree from the provided array.
>>> pst = PersistentSegmentTree([1, 2, 3, 4])
>>> root = pst._build([1, 2, 3, 4], 0, 3)
>>> root.value # Sum of the whole array
10
>>> root.left.value # Sum of the left half
3
>>> root.right.value # Sum of the right half
7
"""
2024-10-19 15:58:29 +05:30
if start == end:
return Node(arr[start])
mid = (start + end) // 2
node = Node()
node.left = self._build(arr, start, mid)
node.right = self._build(arr, mid + 1, end)
node.value = node.left.value + node.right.value
return node
def update(self, version: int, index: int, value: int) -> int:
2024-10-19 16:22:59 +05:30
"""
Updates the value at the given index and returns the new version.
>>> pst = PersistentSegmentTree([1, 2, 3, 4])
>>> version_1 = pst.update(0, 1, 5) # Update index 1 to 5
>>> pst.query(version_1, 0, 3) # Query sum of all elements in new version
13
>>> pst.query(0, 0, 3) # Original version remains unchanged
10
>>> version_2 = pst.update(version_1, 3, 6) # Update index 3 to 6 in version_1
>>> pst.query(version_2, 0, 3) # Query sum of all elements in newest version
15
"""
2024-10-19 15:58:29 +05:30
new_root = self._update(self.roots[version], 0, self.n - 1, index, value)
self.roots.append(new_root)
2024-10-19 16:16:21 +05:30
return len(self.roots) - 1
2024-10-19 15:58:29 +05:30
def _update(
self, node: Node | None, start: int, end: int, index: int, value: int
) -> Node:
2024-10-19 16:54:55 +05:30
if node is None:
raise ValueError("Cannot update a None node")
2024-10-19 16:19:23 +05:30
2024-10-19 15:58:29 +05:30
if start == end:
2024-10-19 16:16:21 +05:30
return Node(value)
2024-10-19 15:58:29 +05:30
mid = (start + end) // 2
new_node = Node()
2024-10-19 16:16:21 +05:30
2024-10-19 15:58:29 +05:30
if index <= mid:
new_node.left = self._update(node.left, start, mid, index, value)
2024-10-19 16:54:55 +05:30
new_node.right = node.right
2024-10-19 15:58:29 +05:30
else:
2024-10-19 16:54:55 +05:30
new_node.left = node.left
2024-10-19 15:58:29 +05:30
new_node.right = self._update(node.right, mid + 1, end, index, value)
2024-10-19 16:16:21 +05:30
new_node.value = new_node.left.value + (
new_node.right.value if new_node.right else 0
)
2024-10-19 16:16:21 +05:30
2024-10-19 15:58:29 +05:30
return new_node
def query(self, version: int, left: int, right: int) -> int:
2024-10-19 16:22:59 +05:30
"""
Queries the sum in the given range for the specified version.
>>> pst = PersistentSegmentTree([1, 2, 3, 4])
>>> pst.query(0, 0, 3) # Sum of all elements in original version
10
>>> pst.query(0, 1, 2) # Sum of elements at index 1 and 2 in original version
5
>>> version_1 = pst.update(0, 1, 5) # Update index 1 to 5
>>> pst.query(version_1, 0, 3) # Sum of all elements in new version
13
2024-10-19 16:54:55 +05:30
>>> pst.query(version_1, 1, 2) # Sum of elements at index 1 and 2 in new version
2024-10-19 16:22:59 +05:30
8
"""
2024-10-19 15:58:29 +05:30
return self._query(self.roots[version], 0, self.n - 1, left, right)
def _query(
self, node: Node | None, start: int, end: int, left: int, right: int
) -> int:
2024-10-19 16:54:55 +05:30
if node is None:
return 0
2024-10-19 16:19:23 +05:30
2024-10-19 16:54:55 +05:30
if left > end or right < start:
2024-10-19 15:58:29 +05:30
return 0
if left <= start and right >= end:
return node.value
mid = (start + end) // 2
return self._query(node.left, start, mid, left, right) + self._query(
node.right, mid + 1, end, left, right
)
2024-10-19 16:16:21 +05:30
2024-10-19 16:19:23 +05:30
# Running the doctests
2024-10-19 16:16:21 +05:30
if __name__ == "__main__":
import doctest
2024-10-19 16:16:21 +05:30
print("Running doctests...")
result = doctest.testmod()
print(f"Ran {result.attempted} tests, {result.failed} failed.")