mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-01-19 08:47:01 +00:00
Merge pull request #237 from damelLP/add_string_algos
fixed failure function and cleaned up code in kmp + added rabin-karp
This commit is contained in:
commit
a4c7e58da5
|
@ -1,4 +1,4 @@
|
||||||
def kmp(pattern, text, len_p=None, len_t=None):
|
def kmp(pattern, text):
|
||||||
"""
|
"""
|
||||||
The Knuth-Morris-Pratt Algorithm for finding a pattern within a piece of text
|
The Knuth-Morris-Pratt Algorithm for finding a pattern within a piece of text
|
||||||
with complexity O(n + m)
|
with complexity O(n + m)
|
||||||
|
@ -14,14 +14,7 @@ def kmp(pattern, text, len_p=None, len_t=None):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# 1) Construct the failure array
|
# 1) Construct the failure array
|
||||||
failure = [0]
|
failure = get_failure_array(pattern)
|
||||||
i = 0
|
|
||||||
for index, char in enumerate(pattern[1:]):
|
|
||||||
if pattern[i] == char:
|
|
||||||
i += 1
|
|
||||||
else:
|
|
||||||
i = 0
|
|
||||||
failure.append(i)
|
|
||||||
|
|
||||||
# 2) Step through text searching for pattern
|
# 2) Step through text searching for pattern
|
||||||
i, j = 0, 0 # index into text, pattern
|
i, j = 0, 0 # index into text, pattern
|
||||||
|
@ -29,20 +22,38 @@ def kmp(pattern, text, len_p=None, len_t=None):
|
||||||
if pattern[j] == text[i]:
|
if pattern[j] == text[i]:
|
||||||
if j == (len(pattern) - 1):
|
if j == (len(pattern) - 1):
|
||||||
return True
|
return True
|
||||||
i += 1
|
|
||||||
j += 1
|
j += 1
|
||||||
|
|
||||||
# if this is a prefix in our pattern
|
# if this is a prefix in our pattern
|
||||||
# just go back far enough to continue
|
# just go back far enough to continue
|
||||||
elif failure[j] > 0:
|
elif j > 0:
|
||||||
j = failure[j] - 1
|
j = failure[j - 1]
|
||||||
else:
|
continue
|
||||||
i += 1
|
i += 1
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
def get_failure_array(pattern):
|
||||||
|
"""
|
||||||
|
Calculates the new index we should go to if we fail a comparison
|
||||||
|
:param pattern:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
failure = [0]
|
||||||
|
i = 0
|
||||||
|
j = 1
|
||||||
|
while j < len(pattern):
|
||||||
|
if pattern[i] == pattern[j]:
|
||||||
|
i += 1
|
||||||
|
elif i > 0:
|
||||||
|
i = failure[i-1]
|
||||||
|
continue
|
||||||
|
j += 1
|
||||||
|
failure.append(i)
|
||||||
|
return failure
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
# Test 1)
|
# Test 1)
|
||||||
pattern = "abc1abc12"
|
pattern = "abc1abc12"
|
||||||
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
|
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
|
||||||
|
@ -54,4 +65,16 @@ if __name__ == '__main__':
|
||||||
text = "ABABZABABYABABX"
|
text = "ABABZABABYABABX"
|
||||||
assert kmp(pattern, text)
|
assert kmp(pattern, text)
|
||||||
|
|
||||||
|
# Test 3)
|
||||||
|
pattern = "AAAB"
|
||||||
|
text = "ABAAAAAB"
|
||||||
|
assert kmp(pattern, text)
|
||||||
|
|
||||||
|
# Test 4)
|
||||||
|
pattern = "abcdabcy"
|
||||||
|
text = "abcxabcdabxabcdabcdabcy"
|
||||||
|
assert kmp(pattern, text)
|
||||||
|
|
||||||
|
# Test 5)
|
||||||
|
pattern = "aabaabaaa"
|
||||||
|
assert get_failure_array(pattern) == [0, 1, 0, 1, 2, 3, 4, 5, 2]
|
||||||
|
|
50
strings/rabin-karp.py
Normal file
50
strings/rabin-karp.py
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
def rabin_karp(pattern, text):
|
||||||
|
"""
|
||||||
|
|
||||||
|
The Rabin-Karp Algorithm for finding a pattern within a piece of text
|
||||||
|
with complexity O(nm), most efficient when it is used with multiple patterns
|
||||||
|
as it is able to check if any of a set of patterns match a section of text in o(1) given the precomputed hashes.
|
||||||
|
|
||||||
|
This will be the simple version which only assumes one pattern is being searched for but it's not hard to modify
|
||||||
|
|
||||||
|
1) Calculate pattern hash
|
||||||
|
|
||||||
|
2) Step through the text one character at a time passing a window with the same length as the pattern
|
||||||
|
calculating the hash of the text within the window compare it with the hash of the pattern. Only testing
|
||||||
|
equality if the hashes match
|
||||||
|
|
||||||
|
"""
|
||||||
|
p_len = len(pattern)
|
||||||
|
p_hash = hash(pattern)
|
||||||
|
|
||||||
|
for i in range(0, len(text) - (p_len - 1)):
|
||||||
|
|
||||||
|
# written like this t
|
||||||
|
text_hash = hash(text[i:i + p_len])
|
||||||
|
if text_hash == p_hash and \
|
||||||
|
text[i:i + p_len] == pattern:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# Test 1)
|
||||||
|
pattern = "abc1abc12"
|
||||||
|
text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
|
||||||
|
text2 = "alskfjaldsk23adsfabcabc"
|
||||||
|
assert rabin_karp(pattern, text1) and not rabin_karp(pattern, text2)
|
||||||
|
|
||||||
|
# Test 2)
|
||||||
|
pattern = "ABABX"
|
||||||
|
text = "ABABZABABYABABX"
|
||||||
|
assert rabin_karp(pattern, text)
|
||||||
|
|
||||||
|
# Test 3)
|
||||||
|
pattern = "AAAB"
|
||||||
|
text = "ABAAAAAB"
|
||||||
|
assert rabin_karp(pattern, text)
|
||||||
|
|
||||||
|
# Test 4)
|
||||||
|
pattern = "abcdabcy"
|
||||||
|
text = "abcxabcdabxabcdabcdabcy"
|
||||||
|
assert rabin_karp(pattern, text)
|
Loading…
Reference in New Issue
Block a user