mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-02-24 01:48:39 +00:00
Compare commits
3 Commits
179298e3a2
...
14bdd174bb
Author | SHA1 | Date | |
---|---|---|---|
|
14bdd174bb | ||
|
2f9b03393c | ||
|
5cb0a000c4 |
@ -232,6 +232,7 @@
|
|||||||
* [Double Ended Queue](data_structures/queue/double_ended_queue.py)
|
* [Double Ended Queue](data_structures/queue/double_ended_queue.py)
|
||||||
* [Linked Queue](data_structures/queue/linked_queue.py)
|
* [Linked Queue](data_structures/queue/linked_queue.py)
|
||||||
* [Priority Queue Using List](data_structures/queue/priority_queue_using_list.py)
|
* [Priority Queue Using List](data_structures/queue/priority_queue_using_list.py)
|
||||||
|
* [Queue By Two Stacks](data_structures/queue/queue_by_two_stacks.py)
|
||||||
* [Queue On List](data_structures/queue/queue_on_list.py)
|
* [Queue On List](data_structures/queue/queue_on_list.py)
|
||||||
* [Queue On Pseudo Stack](data_structures/queue/queue_on_pseudo_stack.py)
|
* [Queue On Pseudo Stack](data_structures/queue/queue_on_pseudo_stack.py)
|
||||||
* Stacks
|
* Stacks
|
||||||
|
105
data_structures/hashing/bloom_filter.py
Normal file
105
data_structures/hashing/bloom_filter.py
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
"""
|
||||||
|
See https://en.wikipedia.org/wiki/Bloom_filter
|
||||||
|
|
||||||
|
The use of this data structure is to test membership in a set.
|
||||||
|
Compared to Python's built-in set() it is more space-efficient.
|
||||||
|
In the following example, only 8 bits of memory will be used:
|
||||||
|
>>> bloom = Bloom(size=8)
|
||||||
|
|
||||||
|
Initially, the filter contains all zeros:
|
||||||
|
>>> bloom.bitstring
|
||||||
|
'00000000'
|
||||||
|
|
||||||
|
When an element is added, two bits are set to 1
|
||||||
|
since there are 2 hash functions in this implementation:
|
||||||
|
>>> "Titanic" in bloom
|
||||||
|
False
|
||||||
|
>>> bloom.add("Titanic")
|
||||||
|
>>> bloom.bitstring
|
||||||
|
'01100000'
|
||||||
|
>>> "Titanic" in bloom
|
||||||
|
True
|
||||||
|
|
||||||
|
However, sometimes only one bit is added
|
||||||
|
because both hash functions return the same value
|
||||||
|
>>> bloom.add("Avatar")
|
||||||
|
>>> "Avatar" in bloom
|
||||||
|
True
|
||||||
|
>>> bloom.format_hash("Avatar")
|
||||||
|
'00000100'
|
||||||
|
>>> bloom.bitstring
|
||||||
|
'01100100'
|
||||||
|
|
||||||
|
Not added elements should return False ...
|
||||||
|
>>> not_present_films = ("The Godfather", "Interstellar", "Parasite", "Pulp Fiction")
|
||||||
|
>>> {
|
||||||
|
... film: bloom.format_hash(film) for film in not_present_films
|
||||||
|
... } # doctest: +NORMALIZE_WHITESPACE
|
||||||
|
{'The Godfather': '00000101',
|
||||||
|
'Interstellar': '00000011',
|
||||||
|
'Parasite': '00010010',
|
||||||
|
'Pulp Fiction': '10000100'}
|
||||||
|
>>> any(film in bloom for film in not_present_films)
|
||||||
|
False
|
||||||
|
|
||||||
|
but sometimes there are false positives:
|
||||||
|
>>> "Ratatouille" in bloom
|
||||||
|
True
|
||||||
|
>>> bloom.format_hash("Ratatouille")
|
||||||
|
'01100000'
|
||||||
|
|
||||||
|
The probability increases with the number of elements added.
|
||||||
|
The probability decreases with the number of bits in the bitarray.
|
||||||
|
>>> bloom.estimated_error_rate
|
||||||
|
0.140625
|
||||||
|
>>> bloom.add("The Godfather")
|
||||||
|
>>> bloom.estimated_error_rate
|
||||||
|
0.25
|
||||||
|
>>> bloom.bitstring
|
||||||
|
'01100101'
|
||||||
|
"""
|
||||||
|
from hashlib import md5, sha256
|
||||||
|
|
||||||
|
HASH_FUNCTIONS = (sha256, md5)
|
||||||
|
|
||||||
|
|
||||||
|
class Bloom:
|
||||||
|
def __init__(self, size: int = 8) -> None:
|
||||||
|
self.bitarray = 0b0
|
||||||
|
self.size = size
|
||||||
|
|
||||||
|
def add(self, value: str) -> None:
|
||||||
|
h = self.hash_(value)
|
||||||
|
self.bitarray |= h
|
||||||
|
|
||||||
|
def exists(self, value: str) -> bool:
|
||||||
|
h = self.hash_(value)
|
||||||
|
return (h & self.bitarray) == h
|
||||||
|
|
||||||
|
def __contains__(self, other: str) -> bool:
|
||||||
|
return self.exists(other)
|
||||||
|
|
||||||
|
def format_bin(self, bitarray: int) -> str:
|
||||||
|
res = bin(bitarray)[2:]
|
||||||
|
return res.zfill(self.size)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def bitstring(self) -> str:
|
||||||
|
return self.format_bin(self.bitarray)
|
||||||
|
|
||||||
|
def hash_(self, value: str) -> int:
|
||||||
|
res = 0b0
|
||||||
|
for func in HASH_FUNCTIONS:
|
||||||
|
position = (
|
||||||
|
int.from_bytes(func(value.encode()).digest(), "little") % self.size
|
||||||
|
)
|
||||||
|
res |= 2**position
|
||||||
|
return res
|
||||||
|
|
||||||
|
def format_hash(self, value: str) -> str:
|
||||||
|
return self.format_bin(self.hash_(value))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def estimated_error_rate(self) -> float:
|
||||||
|
n_ones = bin(self.bitarray).count("1")
|
||||||
|
return (n_ones / self.size) ** len(HASH_FUNCTIONS)
|
115
data_structures/queue/queue_by_two_stacks.py
Normal file
115
data_structures/queue/queue_by_two_stacks.py
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
"""Queue implementation using two stacks"""
|
||||||
|
|
||||||
|
from collections.abc import Iterable
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
_T = TypeVar("_T")
|
||||||
|
|
||||||
|
|
||||||
|
class QueueByTwoStacks(Generic[_T]):
|
||||||
|
def __init__(self, iterable: Iterable[_T] | None = None) -> None:
|
||||||
|
"""
|
||||||
|
>>> QueueByTwoStacks()
|
||||||
|
Queue(())
|
||||||
|
>>> QueueByTwoStacks([10, 20, 30])
|
||||||
|
Queue((10, 20, 30))
|
||||||
|
>>> QueueByTwoStacks((i**2 for i in range(1, 4)))
|
||||||
|
Queue((1, 4, 9))
|
||||||
|
"""
|
||||||
|
self._stack1: list[_T] = list(iterable or [])
|
||||||
|
self._stack2: list[_T] = []
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""
|
||||||
|
>>> len(QueueByTwoStacks())
|
||||||
|
0
|
||||||
|
>>> from string import ascii_lowercase
|
||||||
|
>>> len(QueueByTwoStacks(ascii_lowercase))
|
||||||
|
26
|
||||||
|
>>> queue = QueueByTwoStacks()
|
||||||
|
>>> for i in range(1, 11):
|
||||||
|
... queue.put(i)
|
||||||
|
...
|
||||||
|
>>> len(queue)
|
||||||
|
10
|
||||||
|
>>> for i in range(2):
|
||||||
|
... queue.get()
|
||||||
|
1
|
||||||
|
2
|
||||||
|
>>> len(queue)
|
||||||
|
8
|
||||||
|
"""
|
||||||
|
|
||||||
|
return len(self._stack1) + len(self._stack2)
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
"""
|
||||||
|
>>> queue = QueueByTwoStacks()
|
||||||
|
>>> queue
|
||||||
|
Queue(())
|
||||||
|
>>> str(queue)
|
||||||
|
'Queue(())'
|
||||||
|
>>> queue.put(10)
|
||||||
|
>>> queue
|
||||||
|
Queue((10,))
|
||||||
|
>>> queue.put(20)
|
||||||
|
>>> queue.put(30)
|
||||||
|
>>> queue
|
||||||
|
Queue((10, 20, 30))
|
||||||
|
"""
|
||||||
|
return f"Queue({tuple(self._stack2[::-1] + self._stack1)})"
|
||||||
|
|
||||||
|
def put(self, item: _T) -> None:
|
||||||
|
"""
|
||||||
|
Put `item` into the Queue
|
||||||
|
|
||||||
|
>>> queue = QueueByTwoStacks()
|
||||||
|
>>> queue.put(10)
|
||||||
|
>>> queue.put(20)
|
||||||
|
>>> len(queue)
|
||||||
|
2
|
||||||
|
>>> queue
|
||||||
|
Queue((10, 20))
|
||||||
|
"""
|
||||||
|
|
||||||
|
self._stack1.append(item)
|
||||||
|
|
||||||
|
def get(self) -> _T:
|
||||||
|
"""
|
||||||
|
Get `item` from the Queue
|
||||||
|
|
||||||
|
>>> queue = QueueByTwoStacks((10, 20, 30))
|
||||||
|
>>> queue.get()
|
||||||
|
10
|
||||||
|
>>> queue.put(40)
|
||||||
|
>>> queue.get()
|
||||||
|
20
|
||||||
|
>>> queue.get()
|
||||||
|
30
|
||||||
|
>>> len(queue)
|
||||||
|
1
|
||||||
|
>>> queue.get()
|
||||||
|
40
|
||||||
|
>>> queue.get()
|
||||||
|
Traceback (most recent call last):
|
||||||
|
...
|
||||||
|
IndexError: Queue is empty
|
||||||
|
"""
|
||||||
|
|
||||||
|
# To reduce number of attribute look-ups in `while` loop.
|
||||||
|
stack1_pop = self._stack1.pop
|
||||||
|
stack2_append = self._stack2.append
|
||||||
|
|
||||||
|
if not self._stack2:
|
||||||
|
while self._stack1:
|
||||||
|
stack2_append(stack1_pop())
|
||||||
|
|
||||||
|
if not self._stack2:
|
||||||
|
raise IndexError("Queue is empty")
|
||||||
|
return self._stack2.pop()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
from doctest import testmod
|
||||||
|
|
||||||
|
testmod()
|
Loading…
x
Reference in New Issue
Block a user