From 2fd71965f12a08e6d0b8a4f526266e5f935270cc Mon Sep 17 00:00:00 2001 From: Isidro Arias Date: Fri, 7 Apr 2023 17:31:08 +0200 Subject: [PATCH] estimated error --- data_structures/hashing/bloom_filter.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/data_structures/hashing/bloom_filter.py b/data_structures/hashing/bloom_filter.py index 8affaabe4..586d7de30 100644 --- a/data_structures/hashing/bloom_filter.py +++ b/data_structures/hashing/bloom_filter.py @@ -13,7 +13,7 @@ Initially the filter contains all zeros: '00000000' When an element is added, two bits are set to 1 -since there are 2 hash functions: +since there are 2 hash functions in this implementation: >>> b.add("Titanic") >>> b.bitstring '01100000' @@ -35,10 +35,16 @@ False '00011000' >>> "Interstellar" in b False +>>> b.format_hash("Interstellar") +'00000011' >>> "Parasite" in b False +>>> b.format_hash("Parasite") +'00010010' >>> "Pulp Fiction" in b False +>>> b.format_hash("Pulp Fiction") +'10000100' but sometimes there are false positives: >>> "Ratatouille" in b @@ -46,8 +52,14 @@ True >>> b.format_hash("Ratatouille") '01100000' +The probability increases with the number of added elements >>> b.estimated_error_rate() 0.140625 +>>> b.add("The Goodfather") +>>> b.estimated_error_rate() +0.390625 +>>> b.bitstring +'01111100' """ from hashlib import md5, sha256