From 0d36dc60c50acb555e1aa7f6127ddb4fd4ee9040 Mon Sep 17 00:00:00 2001
From: damelLP <dlambertpowell@gmail.com>
Date: Sun, 7 Jan 2018 12:49:51 +0000
Subject: [PATCH] fixed failure function and cleaned up code in kmp + added
 rabin-karp

---
 strings/knuth-morris-pratt.py | 53 +++++++++++++++++++++++++----------
 strings/rabin-karp.py         | 50 +++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 15 deletions(-)
 create mode 100644 strings/rabin-karp.py

diff --git a/strings/knuth-morris-pratt.py b/strings/knuth-morris-pratt.py
index 731f75c58..455394428 100644
--- a/strings/knuth-morris-pratt.py
+++ b/strings/knuth-morris-pratt.py
@@ -1,4 +1,4 @@
-def kmp(pattern, text, len_p=None, len_t=None):
+def kmp(pattern, text):
     """
     The Knuth-Morris-Pratt Algorithm for finding a pattern within a piece of text
     with complexity O(n + m)
@@ -14,14 +14,7 @@ def kmp(pattern, text, len_p=None, len_t=None):
     """
 
     # 1) Construct the failure array
-    failure = [0]
-    i = 0
-    for index, char in enumerate(pattern[1:]):
-        if pattern[i] == char:
-            i += 1
-        else:
-            i = 0
-        failure.append(i)
+    failure = get_failure_array(pattern)
 
     # 2) Step through text searching for pattern
     i, j = 0, 0  # index into text, pattern
@@ -29,20 +22,38 @@ def kmp(pattern, text, len_p=None, len_t=None):
         if pattern[j] == text[i]:
             if j == (len(pattern) - 1):
                 return True
-            i += 1
             j += 1
 
         # if this is a prefix in our pattern
         # just go back far enough to continue
-        elif failure[j] > 0:
-            j = failure[j] - 1
-        else:
-            i += 1
+        elif j > 0:
+            j = failure[j - 1]
+            continue
+        i += 1
     return False
 
 
-if __name__ == '__main__':
+def get_failure_array(pattern):
+    """
+    Calculates the new index we should go to if we fail a comparison
+    :param pattern:
+    :return:
+    """
+    failure = [0]
+    i = 0
+    j = 1
+    while j < len(pattern):
+        if pattern[i] == pattern[j]:
+            i += 1
+        elif i > 0:
+            i = failure[i-1]
+            continue
+        j += 1
+        failure.append(i)
+    return failure
 
+
+if __name__ == '__main__':
     # Test 1)
     pattern = "abc1abc12"
     text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
@@ -54,4 +65,16 @@ if __name__ == '__main__':
     text = "ABABZABABYABABX"
     assert kmp(pattern, text)
 
+    # Test 3)
+    pattern = "AAAB"
+    text = "ABAAAAAB"
+    assert kmp(pattern, text)
 
+    # Test 4)
+    pattern = "abcdabcy"
+    text = "abcxabcdabxabcdabcdabcy"
+    assert kmp(pattern, text)
+
+    # Test 5)
+    pattern = "aabaabaaa"
+    assert get_failure_array(pattern) == [0, 1, 0, 1, 2, 3, 4, 5, 2]
diff --git a/strings/rabin-karp.py b/strings/rabin-karp.py
new file mode 100644
index 000000000..04a849266
--- /dev/null
+++ b/strings/rabin-karp.py
@@ -0,0 +1,50 @@
+def rabin_karp(pattern, text):
+    """
+
+    The Rabin-Karp Algorithm for finding a pattern within a piece of text
+    with complexity O(nm), most efficient when it is used with multiple patterns
+    as it is able to check if any of a set of patterns match a section of text in o(1) given the precomputed hashes.
+
+    This will be the simple version which only assumes one pattern is being searched for but it's not hard to modify
+
+    1) Calculate pattern hash
+
+    2) Step through the text one character at a time passing a window with the same length as the pattern
+        calculating the hash of the text within the window compare it with the hash of the pattern. Only testing
+        equality if the hashes match
+
+    """
+    p_len = len(pattern)
+    p_hash = hash(pattern)
+
+    for i in range(0, len(text) - (p_len - 1)):
+
+        # written like this t
+        text_hash = hash(text[i:i + p_len])
+        if text_hash == p_hash and \
+                text[i:i + p_len] == pattern:
+            return True
+    return False
+
+
+if __name__ == '__main__':
+    # Test 1)
+    pattern = "abc1abc12"
+    text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc"
+    text2 = "alskfjaldsk23adsfabcabc"
+    assert rabin_karp(pattern, text1) and not rabin_karp(pattern, text2)
+
+    # Test 2)
+    pattern = "ABABX"
+    text = "ABABZABABYABABX"
+    assert rabin_karp(pattern, text)
+
+    # Test 3)
+    pattern = "AAAB"
+    text = "ABAAAAAB"
+    assert rabin_karp(pattern, text)
+
+    # Test 4)
+    pattern = "abcdabcy"
+    text = "abcxabcdabxabcdabcdabcy"
+    assert rabin_karp(pattern, text)