From 8b0e74e81cf8f89c21850c3f29a2d997b7a8018c Mon Sep 17 00:00:00 2001 From: Putul Singh <127419636+putul03@users.noreply.github.com> Date: Sat, 19 Oct 2024 13:46:04 +0530 Subject: [PATCH] suffix_array_lcp.py --- divide_and_conquer/suffix_array_lcp.py | 43 ++++++++++---------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/divide_and_conquer/suffix_array_lcp.py b/divide_and_conquer/suffix_array_lcp.py index 2d90255c6..f9c5cfdbf 100644 --- a/divide_and_conquer/suffix_array_lcp.py +++ b/divide_and_conquer/suffix_array_lcp.py @@ -2,45 +2,43 @@ import doctest - -def build_suffix_array(s: str) -> list[int]: +def build_suffix_array(input_string: str) -> list[int]: """ Build the suffix array for the given string. Parameters: - s (str): The input string. + input_string (str): The input string. Returns: list[int]: The suffix array (a list of starting indices of suffixes in sorted order). - Examples: + Example: >>> build_suffix_array("banana") [5, 3, 1, 0, 4, 2] """ - suffixes = [(s[i:], i) for i in range(len(s))] + suffixes = [(input_string[i:], i) for i in range(len(input_string))] suffixes.sort() # Sort the suffixes lexicographically suffix_array = [suffix[1] for suffix in suffixes] return suffix_array - -def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: +def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]: """ Build the LCP array for the given string and suffix array. Parameters: - s (str): The input string. + input_string (str): The input string. suffix_array (list[int]): The suffix array. Returns: list[int]: The LCP array. - Examples: - >>> suffix_array = build_suffix_array("banana") - >>> build_lcp_array("banana", suffix_array) + Example: + >>> suffix_arr = build_suffix_array("banana") + >>> build_lcp_array("banana", suffix_arr) [0, 1, 3, 0, 0, 2] """ - n = len(s) + n = len(input_string) rank = [0] * n lcp = [0] * n @@ -53,32 +51,23 @@ def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]: for i in range(n): if rank[i] > 0: j = suffix_array[rank[i] - 1] - while (i + h < n) and (j + h < n) and (s[i + h] == s[j + h]): + while (i + h < n) and (j + h < n) and (input_string[i + h] == input_string[j + h]): h += 1 lcp[rank[i]] = h if h > 0: h -= 1 # Decrease h for the next suffix return lcp - # Example usage if __name__ == "__main__": - s = "banana" - suffix_array = build_suffix_array(s) - lcp_array = build_lcp_array(s, suffix_array) + test_string = "banana" + suffix_array = build_suffix_array(test_string) + lcp_array = build_lcp_array(test_string, suffix_array) print("Suffix Array:") for i in range(len(suffix_array)): - print(f"{suffix_array[i]}: {s[suffix_array[i]:]}") + print(f"{suffix_array[i]}: {test_string[suffix_array[i]:]}") print("\nLCP Array:") for i in range(1, len(lcp_array)): - lcp_info = ( - f"LCP between {s[suffix_array[i - 1]:]} and " - f"{s[suffix_array[i]]}: {lcp_array[i]}" - ) - print(lcp_info) - -# Run doctests -if __name__ == "__main__": - doctest.testmod() + print(f"LCP between {test_string[suffix_array[i - 1]:]} and {test_string[suffix_array[i]]}: {lcp_array[i]}")