2019-07-24 09:32:05 +00:00
|
|
|
# Numbers of alphabet which we call base
|
|
|
|
alphabet_size = 256
|
|
|
|
# Modulus to hash a string
|
|
|
|
modulus = 1000003
|
|
|
|
|
|
|
|
|
2018-01-07 12:49:51 +00:00
|
|
|
def rabin_karp(pattern, text):
|
|
|
|
"""
|
|
|
|
The Rabin-Karp Algorithm for finding a pattern within a piece of text
|
|
|
|
with complexity O(nm), most efficient when it is used with multiple patterns
|
|
|
|
as it is able to check if any of a set of patterns match a section of text in o(1) given the precomputed hashes.
|
|
|
|
|
|
|
|
This will be the simple version which only assumes one pattern is being searched for but it's not hard to modify
|
|
|
|
|
|
|
|
1) Calculate pattern hash
|
|
|
|
|
|
|
|
2) Step through the text one character at a time passing a window with the same length as the pattern
|
|
|
|
calculating the hash of the text within the window compare it with the hash of the pattern. Only testing
|
|
|
|
equality if the hashes match
|
|
|
|
"""
|
|
|
|
p_len = len(pattern)
|
2019-07-24 09:32:05 +00:00
|
|
|
t_len = len(text)
|
|
|
|
if p_len > t_len:
|
|
|
|
return False
|
|
|
|
|
|
|
|
p_hash = 0
|
|
|
|
text_hash = 0
|
|
|
|
modulus_power = 1
|
2018-01-07 12:49:51 +00:00
|
|
|
|
2019-07-24 09:32:05 +00:00
|
|
|
# Calculating the hash of pattern and substring of text
|
|
|
|
for i in range(p_len):
|
|
|
|
p_hash = (ord(pattern[i]) + p_hash * alphabet_size) % modulus
|
|
|
|
text_hash = (ord(text[i]) + text_hash * alphabet_size) % modulus
|
|
|
|
if i == p_len - 1:
|
|
|
|
continue
|
|
|
|
modulus_power = (modulus_power * alphabet_size) % modulus
|
2018-01-07 12:49:51 +00:00
|
|
|
|
2019-07-24 09:32:05 +00:00
|
|
|
for i in range(0, t_len - p_len + 1):
|
|
|
|
if text_hash == p_hash and text[i : i + p_len] == pattern:
|
2018-01-07 12:49:51 +00:00
|
|
|
return True
|
2019-07-24 09:32:05 +00:00
|
|
|
if i == t_len - p_len:
|
|
|
|
continue
|
|
|
|
# Calculating the ruling hash
|
|
|
|
text_hash = (
|
|
|
|
(text_hash - ord(text[i]) * modulus_power) * alphabet_size
|
|
|
|
+ ord(text[i + p_len])
|
|
|
|
) % modulus
|
2018-01-07 12:49:51 +00:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
2019-07-24 09:32:05 +00:00
|
|
|
def test_rabin_karp():
|
|
|
|
"""
|
|
|
|
>>> test_rabin_karp()
|
|
|
|
Success.
|
|
|
|
"""
|
2018-01-07 12:49:51 +00:00
|
|
|
# Test 1)
|
|
|
|
pattern = "abc1abc12"
|
|
|
|
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
|
|
|
|
text2 = "alskfjaldsk23adsfabcabc"
|
|
|
|
assert rabin_karp(pattern, text1) and not rabin_karp(pattern, text2)
|
|
|
|
|
|
|
|
# Test 2)
|
|
|
|
pattern = "ABABX"
|
|
|
|
text = "ABABZABABYABABX"
|
|
|
|
assert rabin_karp(pattern, text)
|
|
|
|
|
|
|
|
# Test 3)
|
|
|
|
pattern = "AAAB"
|
|
|
|
text = "ABAAAAAB"
|
|
|
|
assert rabin_karp(pattern, text)
|
|
|
|
|
|
|
|
# Test 4)
|
|
|
|
pattern = "abcdabcy"
|
|
|
|
text = "abcxabcdabxabcdabcdabcy"
|
|
|
|
assert rabin_karp(pattern, text)
|
2019-07-24 09:32:05 +00:00
|
|
|
print("Success.")
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
test_rabin_karp()
|