From ec2d900b03dbc511504819caf353310b0a997fa6 Mon Sep 17 00:00:00 2001 From: bnMikheili <39998190+bnMikheili@users.noreply.github.com> Date: Fri, 12 Jun 2020 00:22:16 +0400 Subject: [PATCH] implement sdbm hash algorithm (#2094) * implement sdbm hash algorithm * fix bug: styling * fix styling for decimal_to_any --- conversions/decimal_to_any.py | 8 +++++--- hashes/sdbm.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 hashes/sdbm.py diff --git a/conversions/decimal_to_any.py b/conversions/decimal_to_any.py index b91d84b9d..e3fb4e5d3 100644 --- a/conversions/decimal_to_any.py +++ b/conversions/decimal_to_any.py @@ -99,6 +99,8 @@ if __name__ == "__main__": for base in range(2, 37): for num in range(1000): assert int(decimal_to_any(num, base), base) == num, ( - num, base, decimal_to_any(num, base), - int(decimal_to_any(num, base), base) - ) + num, + base, + decimal_to_any(num, base), + int(decimal_to_any(num, base), base), + ) diff --git a/hashes/sdbm.py b/hashes/sdbm.py new file mode 100644 index 000000000..f80941306 --- /dev/null +++ b/hashes/sdbm.py @@ -0,0 +1,32 @@ +""" + This algorithm was created for sdbm (a public-domain reimplementation of ndbm) database library. + It was found to do well in scrambling bits, causing better distribution of the keys and fewer splits. + It also happens to be a good general hashing function with good distribution. + The actual function (pseudo code) is: + for i in i..len(str): + hash(i) = hash(i - 1) * 65599 + str[i]; + + What is included below is the faster version used in gawk. [there is even a faster, duff-device version] + The magic constant 65599 was picked out of thin air while experimenting with different constants. + It turns out to be a prime. + This is one of the algorithms used in berkeley db (see sleepycat) and elsewhere. + + source: http://www.cse.yorku.ca/~oz/hash.html +""" + + +def sdbm(plain_text: str) -> str: + """ + Function implements sdbm hash, easy to use, great for bits scrambling. + iterates over each character in the given string and applies function to each of them. + + >>> sdbm('Algorithms') + 1462174910723540325254304520539387479031000036 + + >>> sdbm('scramble bits') + 730247649148944819640658295400555317318720608290373040936089 + """ + hash = 0 + for plain_chr in plain_text: + hash = ord(plain_chr) + (hash << 6) + (hash << 16) - hash + return hash