suffix_array_lcp.py

This commit is contained in:
Putul Singh 2024-10-19 13:46:04 +05:30 committed by GitHub
parent 8038826cd9
commit 8b0e74e81c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,45 +2,43 @@
import doctest import doctest
def build_suffix_array(input_string: str) -> list[int]:
def build_suffix_array(s: str) -> list[int]:
""" """
Build the suffix array for the given string. Build the suffix array for the given string.
Parameters: Parameters:
s (str): The input string. input_string (str): The input string.
Returns: Returns:
list[int]: The suffix array (a list of starting indices of list[int]: The suffix array (a list of starting indices of
suffixes in sorted order). suffixes in sorted order).
Examples: Example:
>>> build_suffix_array("banana") >>> build_suffix_array("banana")
[5, 3, 1, 0, 4, 2] [5, 3, 1, 0, 4, 2]
""" """
suffixes = [(s[i:], i) for i in range(len(s))] suffixes = [(input_string[i:], i) for i in range(len(input_string))]
suffixes.sort() # Sort the suffixes lexicographically suffixes.sort() # Sort the suffixes lexicographically
suffix_array = [suffix[1] for suffix in suffixes] suffix_array = [suffix[1] for suffix in suffixes]
return suffix_array return suffix_array
def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]:
def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]:
""" """
Build the LCP array for the given string and suffix array. Build the LCP array for the given string and suffix array.
Parameters: Parameters:
s (str): The input string. input_string (str): The input string.
suffix_array (list[int]): The suffix array. suffix_array (list[int]): The suffix array.
Returns: Returns:
list[int]: The LCP array. list[int]: The LCP array.
Examples: Example:
>>> suffix_array = build_suffix_array("banana") >>> suffix_arr = build_suffix_array("banana")
>>> build_lcp_array("banana", suffix_array) >>> build_lcp_array("banana", suffix_arr)
[0, 1, 3, 0, 0, 2] [0, 1, 3, 0, 0, 2]
""" """
n = len(s) n = len(input_string)
rank = [0] * n rank = [0] * n
lcp = [0] * n lcp = [0] * n
@ -53,32 +51,23 @@ def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]:
for i in range(n): for i in range(n):
if rank[i] > 0: if rank[i] > 0:
j = suffix_array[rank[i] - 1] j = suffix_array[rank[i] - 1]
while (i + h < n) and (j + h < n) and (s[i + h] == s[j + h]): while (i + h < n) and (j + h < n) and (input_string[i + h] == input_string[j + h]):
h += 1 h += 1
lcp[rank[i]] = h lcp[rank[i]] = h
if h > 0: if h > 0:
h -= 1 # Decrease h for the next suffix h -= 1 # Decrease h for the next suffix
return lcp return lcp
# Example usage # Example usage
if __name__ == "__main__": if __name__ == "__main__":
s = "banana" test_string = "banana"
suffix_array = build_suffix_array(s) suffix_array = build_suffix_array(test_string)
lcp_array = build_lcp_array(s, suffix_array) lcp_array = build_lcp_array(test_string, suffix_array)
print("Suffix Array:") print("Suffix Array:")
for i in range(len(suffix_array)): for i in range(len(suffix_array)):
print(f"{suffix_array[i]}: {s[suffix_array[i]:]}") print(f"{suffix_array[i]}: {test_string[suffix_array[i]:]}")
print("\nLCP Array:") print("\nLCP Array:")
for i in range(1, len(lcp_array)): for i in range(1, len(lcp_array)):
lcp_info = ( print(f"LCP between {test_string[suffix_array[i - 1]:]} and {test_string[suffix_array[i]]}: {lcp_array[i]}")
f"LCP between {s[suffix_array[i - 1]:]} and "
f"{s[suffix_array[i]]}: {lcp_array[i]}"
)
print(lcp_info)
# Run doctests
if __name__ == "__main__":
doctest.testmod()