2021-09-07 11:37:03 +00:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2023-04-27 17:32:07 +00:00
|
|
|
from abc import abstractmethod
|
2022-07-11 08:19:52 +00:00
|
|
|
from collections.abc import Iterable
|
2023-04-27 17:32:07 +00:00
|
|
|
from typing import Generic, Protocol, TypeVar
|
2018-10-19 12:48:28 +00:00
|
|
|
|
2020-01-14 11:16:11 +00:00
|
|
|
|
2023-04-27 17:32:07 +00:00
|
|
|
class Comparable(Protocol):
|
|
|
|
@abstractmethod
|
|
|
|
def __lt__(self: T, other: T) -> bool:
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def __gt__(self: T, other: T) -> bool:
|
|
|
|
pass
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def __eq__(self: T, other: object) -> bool:
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
T = TypeVar("T", bound=Comparable)
|
|
|
|
|
|
|
|
|
|
|
|
class Heap(Generic[T]):
|
2020-12-26 03:12:37 +00:00
|
|
|
"""A Max Heap Implementation
|
|
|
|
|
2020-01-14 11:16:11 +00:00
|
|
|
>>> unsorted = [103, 9, 1, 7, 11, 15, 25, 201, 209, 107, 5]
|
|
|
|
>>> h = Heap()
|
2020-12-26 03:12:37 +00:00
|
|
|
>>> h.build_max_heap(unsorted)
|
2022-10-27 20:52:00 +00:00
|
|
|
>>> h
|
2020-01-14 11:16:11 +00:00
|
|
|
[209, 201, 25, 103, 107, 15, 1, 9, 7, 11, 5]
|
|
|
|
>>>
|
2020-12-26 03:12:37 +00:00
|
|
|
>>> h.extract_max()
|
2020-01-14 11:16:11 +00:00
|
|
|
209
|
2022-10-27 20:52:00 +00:00
|
|
|
>>> h
|
2020-01-14 11:16:11 +00:00
|
|
|
[201, 107, 25, 103, 11, 15, 1, 9, 7, 5]
|
|
|
|
>>>
|
|
|
|
>>> h.insert(100)
|
2022-10-27 20:52:00 +00:00
|
|
|
>>> h
|
2020-01-14 11:16:11 +00:00
|
|
|
[201, 107, 25, 103, 100, 15, 1, 9, 7, 5, 11]
|
|
|
|
>>>
|
|
|
|
>>> h.heap_sort()
|
2022-10-27 20:52:00 +00:00
|
|
|
>>> h
|
2020-01-14 11:16:11 +00:00
|
|
|
[1, 5, 7, 9, 11, 15, 25, 100, 103, 107, 201]
|
|
|
|
"""
|
2020-01-18 12:24:33 +00:00
|
|
|
|
2020-12-26 03:12:37 +00:00
|
|
|
def __init__(self) -> None:
|
2023-04-27 17:32:07 +00:00
|
|
|
self.h: list[T] = []
|
2020-12-26 03:12:37 +00:00
|
|
|
self.heap_size: int = 0
|
|
|
|
|
|
|
|
def __repr__(self) -> str:
|
|
|
|
return str(self.h)
|
2018-10-19 12:48:28 +00:00
|
|
|
|
2021-09-07 11:37:03 +00:00
|
|
|
def parent_index(self, child_idx: int) -> int | None:
|
2023-11-13 11:05:22 +00:00
|
|
|
"""
|
|
|
|
returns the parent index based on the given child index
|
|
|
|
|
|
|
|
>>> h = Heap()
|
|
|
|
>>> h.build_max_heap([103, 9, 1, 7, 11, 15, 25, 201, 209, 107, 5])
|
|
|
|
>>> h
|
|
|
|
[209, 201, 25, 103, 107, 15, 1, 9, 7, 11, 5]
|
|
|
|
|
|
|
|
>>> h.parent_index(-1) # returns none if index is <=0
|
|
|
|
|
|
|
|
>>> h.parent_index(0) # returns none if index is <=0
|
|
|
|
|
|
|
|
>>> h.parent_index(1)
|
|
|
|
0
|
|
|
|
>>> h.parent_index(2)
|
|
|
|
0
|
|
|
|
>>> h.parent_index(3)
|
|
|
|
1
|
|
|
|
>>> h.parent_index(4)
|
|
|
|
1
|
|
|
|
>>> h.parent_index(5)
|
|
|
|
2
|
|
|
|
>>> h.parent_index(10.5)
|
|
|
|
4.0
|
|
|
|
>>> h.parent_index(209.0)
|
|
|
|
104.0
|
|
|
|
>>> h.parent_index("Test")
|
|
|
|
Traceback (most recent call last):
|
|
|
|
...
|
|
|
|
TypeError: '>' not supported between instances of 'str' and 'int'
|
|
|
|
"""
|
2020-12-26 03:12:37 +00:00
|
|
|
if child_idx > 0:
|
|
|
|
return (child_idx - 1) // 2
|
|
|
|
return None
|
|
|
|
|
2021-09-07 11:37:03 +00:00
|
|
|
def left_child_idx(self, parent_idx: int) -> int | None:
|
2020-12-26 03:12:37 +00:00
|
|
|
"""
|
|
|
|
return the left child index if the left child exists.
|
|
|
|
if not, return None.
|
|
|
|
"""
|
|
|
|
left_child_index = 2 * parent_idx + 1
|
|
|
|
if left_child_index < self.heap_size:
|
2020-01-14 11:16:11 +00:00
|
|
|
return left_child_index
|
2019-10-05 05:14:13 +00:00
|
|
|
return None
|
2018-10-19 12:48:28 +00:00
|
|
|
|
2021-09-07 11:37:03 +00:00
|
|
|
def right_child_idx(self, parent_idx: int) -> int | None:
|
2020-12-26 03:12:37 +00:00
|
|
|
"""
|
|
|
|
return the right child index if the right child exists.
|
|
|
|
if not, return None.
|
|
|
|
"""
|
|
|
|
right_child_index = 2 * parent_idx + 2
|
|
|
|
if right_child_index < self.heap_size:
|
2020-01-14 11:16:11 +00:00
|
|
|
return right_child_index
|
2019-10-05 05:14:13 +00:00
|
|
|
return None
|
2018-10-19 12:48:28 +00:00
|
|
|
|
2020-12-26 03:12:37 +00:00
|
|
|
def max_heapify(self, index: int) -> None:
|
|
|
|
"""
|
|
|
|
correct a single violation of the heap property in a subtree's root.
|
2023-10-28 19:43:20 +00:00
|
|
|
|
|
|
|
It is the function that is responsible for restoring the property
|
|
|
|
of Max heap i.e the maximum element is always at top.
|
2020-12-26 03:12:37 +00:00
|
|
|
"""
|
|
|
|
if index < self.heap_size:
|
|
|
|
violation: int = index
|
|
|
|
left_child = self.left_child_idx(index)
|
|
|
|
right_child = self.right_child_idx(index)
|
|
|
|
# check which child is larger than its parent
|
|
|
|
if left_child is not None and self.h[left_child] > self.h[violation]:
|
|
|
|
violation = left_child
|
|
|
|
if right_child is not None and self.h[right_child] > self.h[violation]:
|
|
|
|
violation = right_child
|
|
|
|
# if violation indeed exists
|
|
|
|
if violation != index:
|
|
|
|
# swap to fix the violation
|
|
|
|
self.h[violation], self.h[index] = self.h[index], self.h[violation]
|
|
|
|
# fix the subsequent violation recursively if any
|
|
|
|
self.max_heapify(violation)
|
|
|
|
|
2023-04-27 17:32:07 +00:00
|
|
|
def build_max_heap(self, collection: Iterable[T]) -> None:
|
2023-10-28 19:43:20 +00:00
|
|
|
"""
|
|
|
|
build max heap from an unsorted array
|
|
|
|
|
|
|
|
>>> h = Heap()
|
|
|
|
>>> h.build_max_heap([20,40,50,20,10])
|
|
|
|
>>> h
|
|
|
|
[50, 40, 20, 20, 10]
|
|
|
|
|
|
|
|
>>> h = Heap()
|
|
|
|
>>> h.build_max_heap([1,2,3,4,5,6,7,8,9,0])
|
|
|
|
>>> h
|
|
|
|
[9, 8, 7, 4, 5, 6, 3, 2, 1, 0]
|
|
|
|
|
|
|
|
>>> h = Heap()
|
|
|
|
>>> h.build_max_heap([514,5,61,57,8,99,105])
|
|
|
|
>>> h
|
|
|
|
[514, 57, 105, 5, 8, 99, 61]
|
|
|
|
|
|
|
|
>>> h = Heap()
|
|
|
|
>>> h.build_max_heap([514,5,61.6,57,8,9.9,105])
|
|
|
|
>>> h
|
|
|
|
[514, 57, 105, 5, 8, 9.9, 61.6]
|
|
|
|
"""
|
2020-01-14 11:16:11 +00:00
|
|
|
self.h = list(collection)
|
2020-12-26 03:12:37 +00:00
|
|
|
self.heap_size = len(self.h)
|
|
|
|
if self.heap_size > 1:
|
|
|
|
# max_heapify from right to left but exclude leaves (last level)
|
|
|
|
for i in range(self.heap_size // 2 - 1, -1, -1):
|
|
|
|
self.max_heapify(i)
|
|
|
|
|
2023-04-27 17:32:07 +00:00
|
|
|
def extract_max(self) -> T:
|
2023-10-28 19:43:20 +00:00
|
|
|
"""
|
|
|
|
get and remove max from heap
|
|
|
|
|
|
|
|
>>> h = Heap()
|
|
|
|
>>> h.build_max_heap([20,40,50,20,10])
|
|
|
|
>>> h.extract_max()
|
|
|
|
50
|
|
|
|
|
|
|
|
>>> h = Heap()
|
|
|
|
>>> h.build_max_heap([514,5,61,57,8,99,105])
|
|
|
|
>>> h.extract_max()
|
|
|
|
514
|
|
|
|
|
|
|
|
>>> h = Heap()
|
|
|
|
>>> h.build_max_heap([1,2,3,4,5,6,7,8,9,0])
|
|
|
|
>>> h.extract_max()
|
|
|
|
9
|
|
|
|
"""
|
2020-12-26 03:12:37 +00:00
|
|
|
if self.heap_size >= 2:
|
2019-10-05 05:14:13 +00:00
|
|
|
me = self.h[0]
|
2020-01-14 11:16:11 +00:00
|
|
|
self.h[0] = self.h.pop(-1)
|
2020-12-26 03:12:37 +00:00
|
|
|
self.heap_size -= 1
|
2020-01-14 11:16:11 +00:00
|
|
|
self.max_heapify(0)
|
2019-10-05 05:14:13 +00:00
|
|
|
return me
|
2020-12-26 03:12:37 +00:00
|
|
|
elif self.heap_size == 1:
|
|
|
|
self.heap_size -= 1
|
2020-01-14 11:16:11 +00:00
|
|
|
return self.h.pop(-1)
|
2020-12-26 03:12:37 +00:00
|
|
|
else:
|
|
|
|
raise Exception("Empty heap")
|
|
|
|
|
2023-04-27 17:32:07 +00:00
|
|
|
def insert(self, value: T) -> None:
|
2023-10-28 19:43:20 +00:00
|
|
|
"""
|
|
|
|
insert a new value into the max heap
|
|
|
|
|
|
|
|
>>> h = Heap()
|
|
|
|
>>> h.insert(10)
|
|
|
|
>>> h
|
|
|
|
[10]
|
|
|
|
|
|
|
|
>>> h = Heap()
|
|
|
|
>>> h.insert(10)
|
|
|
|
>>> h.insert(10)
|
|
|
|
>>> h
|
|
|
|
[10, 10]
|
|
|
|
|
|
|
|
>>> h = Heap()
|
|
|
|
>>> h.insert(10)
|
|
|
|
>>> h.insert(10.1)
|
|
|
|
>>> h
|
|
|
|
[10.1, 10]
|
|
|
|
|
|
|
|
>>> h = Heap()
|
|
|
|
>>> h.insert(0.1)
|
|
|
|
>>> h.insert(0)
|
|
|
|
>>> h.insert(9)
|
|
|
|
>>> h.insert(5)
|
|
|
|
>>> h
|
|
|
|
[9, 5, 0.1, 0]
|
|
|
|
"""
|
2020-12-26 03:12:37 +00:00
|
|
|
self.h.append(value)
|
|
|
|
idx = (self.heap_size - 1) // 2
|
|
|
|
self.heap_size += 1
|
|
|
|
while idx >= 0:
|
|
|
|
self.max_heapify(idx)
|
|
|
|
idx = (idx - 1) // 2
|
|
|
|
|
|
|
|
def heap_sort(self) -> None:
|
|
|
|
size = self.heap_size
|
2020-01-14 11:16:11 +00:00
|
|
|
for j in range(size - 1, 0, -1):
|
|
|
|
self.h[0], self.h[j] = self.h[j], self.h[0]
|
2020-12-26 03:12:37 +00:00
|
|
|
self.heap_size -= 1
|
2020-01-14 11:16:11 +00:00
|
|
|
self.max_heapify(0)
|
2020-12-26 03:12:37 +00:00
|
|
|
self.heap_size = size
|
2020-01-14 11:16:11 +00:00
|
|
|
|
|
|
|
|
2020-12-26 03:12:37 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
import doctest
|
2018-10-19 12:48:28 +00:00
|
|
|
|
2020-12-26 03:12:37 +00:00
|
|
|
# run doc test
|
|
|
|
doctest.testmod()
|
2018-10-19 12:48:28 +00:00
|
|
|
|
2020-12-26 03:12:37 +00:00
|
|
|
# demo
|
2020-01-14 11:16:11 +00:00
|
|
|
for unsorted in [
|
|
|
|
[0],
|
|
|
|
[2],
|
|
|
|
[3, 5],
|
|
|
|
[5, 3],
|
|
|
|
[5, 5],
|
|
|
|
[0, 0, 0, 0],
|
|
|
|
[1, 1, 1, 1],
|
|
|
|
[2, 2, 3, 5],
|
|
|
|
[0, 2, 2, 3, 5],
|
|
|
|
[2, 5, 3, 0, 2, 3, 0, 3],
|
|
|
|
[6, 1, 2, 7, 9, 3, 4, 5, 10, 8],
|
|
|
|
[103, 9, 1, 7, 11, 15, 25, 201, 209, 107, 5],
|
2020-01-18 12:24:33 +00:00
|
|
|
[-45, -2, -5],
|
2020-01-14 11:16:11 +00:00
|
|
|
]:
|
2020-12-26 03:12:37 +00:00
|
|
|
print(f"unsorted array: {unsorted}")
|
2020-01-14 11:16:11 +00:00
|
|
|
|
2023-04-27 17:32:07 +00:00
|
|
|
heap: Heap[int] = Heap()
|
2020-12-26 03:12:37 +00:00
|
|
|
heap.build_max_heap(unsorted)
|
|
|
|
print(f"after build heap: {heap}")
|
2020-01-14 11:16:11 +00:00
|
|
|
|
2020-12-26 03:12:37 +00:00
|
|
|
print(f"max value: {heap.extract_max()}")
|
|
|
|
print(f"after max value removed: {heap}")
|
2020-01-14 11:16:11 +00:00
|
|
|
|
2020-12-26 03:12:37 +00:00
|
|
|
heap.insert(100)
|
|
|
|
print(f"after new value 100 inserted: {heap}")
|
2020-01-14 11:16:11 +00:00
|
|
|
|
2020-12-26 03:12:37 +00:00
|
|
|
heap.heap_sort()
|
|
|
|
print(f"heap-sorted array: {heap}\n")
|