Suffix Array and LCP Array Implementation

This commit is contained in:
Putul Singh 2024-10-19 13:21:16 +05:30 committed by GitHub
parent 03a42510b0
commit 0457860ed4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -0,0 +1,62 @@
#!/usr/bin/env python3
def build_suffix_array(s: str) -> list[int]:
"""
Build the suffix array for the given string.
Parameters:
s (str): The input string.
Returns:
list[int]: The suffix array (a list of starting indices of
suffixes in sorted order).
"""
suffixes = [(s[i:], i) for i in range(len(s))]
suffixes.sort() # Sort the suffixes lexicographically
suffix_array = [suffix[1] for suffix in suffixes]
return suffix_array
def build_lcp_array(s: str, suffix_array: list[int]) -> list[int]:
"""
Build the LCP array for the given string and suffix array.
Parameters:
s (str): The input string.
suffix_array (list[int]): The suffix array.
Returns:
list[int]: The LCP array.
"""
n = len(s)
rank = [0] * n
lcp = [0] * n
# Compute the rank of each suffix
for i, suffix_index in enumerate(suffix_array):
rank[suffix_index] = i
# Compute the LCP array
h = 0
for i in range(n):
if rank[i] > 0:
j = suffix_array[rank[i] - 1]
while (i + h < n) and (j + h < n) and (s[i + h] == s[j + h]):
h += 1
lcp[rank[i]] = h
if h > 0:
h -= 1 # Decrease h for the next suffix
return lcp
# Example usage
if __name__ == "__main__":
s = "banana"
suffix_array = build_suffix_array(s)
lcp_array = build_lcp_array(s, suffix_array)
print("Suffix Array:")
for i in range(len(suffix_array)):
print(f"{suffix_array[i]}: {s[suffix_array[i]:]}")
print("\nLCP Array:")
for i in range(1, len(lcp_array)):
print(f"LCP between {s[suffix_array[i - 1]:]} and {s[suffix_array[i]]}: {lcp_array[i]}")