2024-10-19 07:53:23 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2024-10-19 08:10:04 +00:00
|
|
|
import doctest
|
2024-10-19 07:53:45 +00:00
|
|
|
|
2024-10-19 08:21:24 +00:00
|
|
|
|
2024-10-19 08:16:04 +00:00
|
|
|
def build_suffix_array(input_string: str) -> list[int]:
|
2024-10-19 07:53:23 +00:00
|
|
|
"""
|
|
|
|
Build the suffix array for the given string.
|
|
|
|
|
|
|
|
Parameters:
|
2024-10-19 08:16:04 +00:00
|
|
|
input_string (str): The input string.
|
2024-10-19 07:53:23 +00:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
list[int]: The suffix array (a list of starting indices of
|
|
|
|
suffixes in sorted order).
|
2024-10-19 08:10:04 +00:00
|
|
|
|
2024-10-19 08:19:17 +00:00
|
|
|
Examples:
|
2024-10-19 08:10:04 +00:00
|
|
|
>>> build_suffix_array("banana")
|
|
|
|
[5, 3, 1, 0, 4, 2]
|
2024-10-19 07:53:23 +00:00
|
|
|
"""
|
2024-10-19 08:16:04 +00:00
|
|
|
suffixes = [(input_string[i:], i) for i in range(len(input_string))]
|
2024-10-19 07:53:23 +00:00
|
|
|
suffixes.sort() # Sort the suffixes lexicographically
|
|
|
|
suffix_array = [suffix[1] for suffix in suffixes]
|
|
|
|
return suffix_array
|
|
|
|
|
2024-10-19 08:21:24 +00:00
|
|
|
|
2024-10-19 08:16:04 +00:00
|
|
|
def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]:
|
2024-10-19 07:53:23 +00:00
|
|
|
"""
|
|
|
|
Build the LCP array for the given string and suffix array.
|
|
|
|
|
|
|
|
Parameters:
|
2024-10-19 08:16:04 +00:00
|
|
|
input_string (str): The input string.
|
2024-10-19 07:53:23 +00:00
|
|
|
suffix_array (list[int]): The suffix array.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
list[int]: The LCP array.
|
2024-10-19 08:10:04 +00:00
|
|
|
|
2024-10-19 08:19:17 +00:00
|
|
|
Examples:
|
|
|
|
>>> suffix_array = build_suffix_array("banana")
|
|
|
|
>>> build_lcp_array("banana", suffix_array)
|
2024-10-19 08:10:04 +00:00
|
|
|
[0, 1, 3, 0, 0, 2]
|
2024-10-19 07:53:23 +00:00
|
|
|
"""
|
2024-10-19 08:16:04 +00:00
|
|
|
n = len(input_string)
|
2024-10-19 07:53:23 +00:00
|
|
|
rank = [0] * n
|
|
|
|
lcp = [0] * n
|
|
|
|
|
|
|
|
# Compute the rank of each suffix
|
|
|
|
for i, suffix_index in enumerate(suffix_array):
|
|
|
|
rank[suffix_index] = i
|
|
|
|
|
|
|
|
# Compute the LCP array
|
|
|
|
h = 0
|
|
|
|
for i in range(n):
|
|
|
|
if rank[i] > 0:
|
|
|
|
j = suffix_array[rank[i] - 1]
|
2024-10-19 08:21:24 +00:00
|
|
|
while (
|
|
|
|
(i + h < n)
|
|
|
|
and (j + h < n)
|
|
|
|
and (input_string[i + h] == input_string[j + h])
|
|
|
|
):
|
2024-10-19 07:53:23 +00:00
|
|
|
h += 1
|
|
|
|
lcp[rank[i]] = h
|
|
|
|
if h > 0:
|
|
|
|
h -= 1 # Decrease h for the next suffix
|
|
|
|
return lcp
|
|
|
|
|
2024-10-19 08:21:24 +00:00
|
|
|
|
2024-10-19 07:53:23 +00:00
|
|
|
# Example usage
|
|
|
|
if __name__ == "__main__":
|
2024-10-19 08:19:17 +00:00
|
|
|
s = "banana"
|
|
|
|
suffix_array = build_suffix_array(s)
|
|
|
|
lcp_array = build_lcp_array(s, suffix_array)
|
2024-10-19 07:53:23 +00:00
|
|
|
|
|
|
|
print("Suffix Array:")
|
|
|
|
for i in range(len(suffix_array)):
|
2024-10-19 08:19:17 +00:00
|
|
|
print(f"{suffix_array[i]}: {s[suffix_array[i]:]}")
|
2024-10-19 07:53:23 +00:00
|
|
|
|
|
|
|
print("\nLCP Array:")
|
|
|
|
for i in range(1, len(lcp_array)):
|
2024-10-19 08:21:24 +00:00
|
|
|
lcp_info = (
|
|
|
|
f"LCP between {s[suffix_array[i - 1]:]} and "
|
|
|
|
f"{s[suffix_array[i]]}: {lcp_array[i]}"
|
|
|
|
)
|
2024-10-19 08:19:17 +00:00
|
|
|
print(lcp_info)
|
|
|
|
|
|
|
|
# Run doctests
|
|
|
|
if __name__ == "__main__":
|
|
|
|
doctest.testmod()
|