mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-03-04 05:48:41 +00:00
from b to bloom
This commit is contained in:
parent
c132d501b6
commit
313c80c369
@ -4,61 +4,61 @@ See https://en.wikipedia.org/wiki/Bloom_filter
|
|||||||
The use of this data structure is to test membership in a set.
|
The use of this data structure is to test membership in a set.
|
||||||
Compared to Python's built-in set() it is more space-efficient.
|
Compared to Python's built-in set() it is more space-efficient.
|
||||||
In the following example, only 8 bits of memory will be used:
|
In the following example, only 8 bits of memory will be used:
|
||||||
>>> b = Bloom(size=8)
|
>>> bloom = Bloom(size=8)
|
||||||
>>> "Titanic" in b
|
>>> "Titanic" in bloom
|
||||||
False
|
False
|
||||||
|
|
||||||
Initially the filter contains all zeros:
|
Initially the filter contains all zeros:
|
||||||
>>> b.bitstring
|
>>> bloom.bitstring
|
||||||
'00000000'
|
'00000000'
|
||||||
|
|
||||||
When an element is added, two bits are set to 1
|
When an element is added, two bits are set to 1
|
||||||
since there are 2 hash functions in this implementation:
|
since there are 2 hash functions in this implementation:
|
||||||
>>> b.add("Titanic")
|
>>> bloom.add("Titanic")
|
||||||
>>> b.bitstring
|
>>> bloom.bitstring
|
||||||
'01100000'
|
'01100000'
|
||||||
>>> "Titanic" in b
|
>>> "Titanic" in bloom
|
||||||
True
|
True
|
||||||
|
|
||||||
However, sometimes only one bit is added
|
However, sometimes only one bit is added
|
||||||
because both hash functions return the same value
|
because both hash functions return the same value
|
||||||
>>> b.add("Avatar")
|
>>> bloom.add("Avatar")
|
||||||
>>> b.format_hash("Avatar")
|
>>> bloom.format_hash("Avatar")
|
||||||
'00000100'
|
'00000100'
|
||||||
>>> b.bitstring
|
>>> bloom.bitstring
|
||||||
'01100100'
|
'01100100'
|
||||||
|
|
||||||
Not added elements should return False ...
|
Not added elements should return False ...
|
||||||
>>> "The Goodfather" in b
|
>>> "The Goodfather" in bloom
|
||||||
False
|
False
|
||||||
>>> b.format_hash("The Goodfather")
|
>>> bloom.format_hash("The Goodfather")
|
||||||
'00011000'
|
'00011000'
|
||||||
>>> "Interstellar" in b
|
>>> "Interstellar" in bloom
|
||||||
False
|
False
|
||||||
>>> b.format_hash("Interstellar")
|
>>> bloom.format_hash("Interstellar")
|
||||||
'00000011'
|
'00000011'
|
||||||
>>> "Parasite" in b
|
>>> "Parasite" in bloom
|
||||||
False
|
False
|
||||||
>>> b.format_hash("Parasite")
|
>>> bloom.format_hash("Parasite")
|
||||||
'00010010'
|
'00010010'
|
||||||
>>> "Pulp Fiction" in b
|
>>> "Pulp Fiction" in bloom
|
||||||
False
|
False
|
||||||
>>> b.format_hash("Pulp Fiction")
|
>>> bloom.format_hash("Pulp Fiction")
|
||||||
'10000100'
|
'10000100'
|
||||||
|
|
||||||
but sometimes there are false positives:
|
but sometimes there are false positives:
|
||||||
>>> "Ratatouille" in b
|
>>> "Ratatouille" in bloom
|
||||||
True
|
True
|
||||||
>>> b.format_hash("Ratatouille")
|
>>> bloom.format_hash("Ratatouille")
|
||||||
'01100000'
|
'01100000'
|
||||||
|
|
||||||
The probability increases with the number of added elements
|
The probability increases with the number of added elements
|
||||||
>>> b.estimated_error_rate()
|
>>> bloom.estimated_error_rate()
|
||||||
0.140625
|
0.140625
|
||||||
>>> b.add("The Goodfather")
|
>>> bloom.add("The Goodfather")
|
||||||
>>> b.estimated_error_rate()
|
>>> bloom.estimated_error_rate()
|
||||||
0.390625
|
0.390625
|
||||||
>>> b.bitstring
|
>>> bloom.bitstring
|
||||||
'01111100'
|
'01111100'
|
||||||
"""
|
"""
|
||||||
from hashlib import md5, sha256
|
from hashlib import md5, sha256
|
||||||
|
Loading…
x
Reference in New Issue
Block a user