mirror of
https://github.com/TheAlgorithms/Python.git
synced 2024-12-18 01:00:15 +00:00
421ace81ed
* [pre-commit.ci] pre-commit autoupdate updates: - [github.com/astral-sh/ruff-pre-commit: v0.0.285 → v0.0.286](https://github.com/astral-sh/ruff-pre-commit/compare/v0.0.285...v0.0.286) - [github.com/tox-dev/pyproject-fmt: 0.13.1 → 1.1.0](https://github.com/tox-dev/pyproject-fmt/compare/0.13.1...1.1.0) * updating DIRECTORY.md * Fis ruff rules PIE808,PLR1714 --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com>
91 lines
2.5 KiB
Python
91 lines
2.5 KiB
Python
# Numbers of alphabet which we call base
|
|
alphabet_size = 256
|
|
# Modulus to hash a string
|
|
modulus = 1000003
|
|
|
|
|
|
def rabin_karp(pattern: str, text: str) -> bool:
|
|
"""
|
|
The Rabin-Karp Algorithm for finding a pattern within a piece of text
|
|
with complexity O(nm), most efficient when it is used with multiple patterns
|
|
as it is able to check if any of a set of patterns match a section of text in o(1)
|
|
given the precomputed hashes.
|
|
|
|
This will be the simple version which only assumes one pattern is being searched
|
|
for but it's not hard to modify
|
|
|
|
1) Calculate pattern hash
|
|
|
|
2) Step through the text one character at a time passing a window with the same
|
|
length as the pattern
|
|
calculating the hash of the text within the window compare it with the hash
|
|
of the pattern. Only testing equality if the hashes match
|
|
"""
|
|
p_len = len(pattern)
|
|
t_len = len(text)
|
|
if p_len > t_len:
|
|
return False
|
|
|
|
p_hash = 0
|
|
text_hash = 0
|
|
modulus_power = 1
|
|
|
|
# Calculating the hash of pattern and substring of text
|
|
for i in range(p_len):
|
|
p_hash = (ord(pattern[i]) + p_hash * alphabet_size) % modulus
|
|
text_hash = (ord(text[i]) + text_hash * alphabet_size) % modulus
|
|
if i == p_len - 1:
|
|
continue
|
|
modulus_power = (modulus_power * alphabet_size) % modulus
|
|
|
|
for i in range(t_len - p_len + 1):
|
|
if text_hash == p_hash and text[i : i + p_len] == pattern:
|
|
return True
|
|
if i == t_len - p_len:
|
|
continue
|
|
# Calculate the https://en.wikipedia.org/wiki/Rolling_hash
|
|
text_hash = (
|
|
(text_hash - ord(text[i]) * modulus_power) * alphabet_size
|
|
+ ord(text[i + p_len])
|
|
) % modulus
|
|
return False
|
|
|
|
|
|
def test_rabin_karp() -> None:
|
|
"""
|
|
>>> test_rabin_karp()
|
|
Success.
|
|
"""
|
|
# Test 1)
|
|
pattern = "abc1abc12"
|
|
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
|
|
text2 = "alskfjaldsk23adsfabcabc"
|
|
assert rabin_karp(pattern, text1) and not rabin_karp(pattern, text2)
|
|
|
|
# Test 2)
|
|
pattern = "ABABX"
|
|
text = "ABABZABABYABABX"
|
|
assert rabin_karp(pattern, text)
|
|
|
|
# Test 3)
|
|
pattern = "AAAB"
|
|
text = "ABAAAAAB"
|
|
assert rabin_karp(pattern, text)
|
|
|
|
# Test 4)
|
|
pattern = "abcdabcy"
|
|
text = "abcxabcdabxabcdabcdabcy"
|
|
assert rabin_karp(pattern, text)
|
|
|
|
# Test 5)
|
|
pattern = "Lü"
|
|
text = "Lüsai"
|
|
assert rabin_karp(pattern, text)
|
|
pattern = "Lue"
|
|
assert not rabin_karp(pattern, text)
|
|
print("Success.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_rabin_karp()
|