mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-03-11 17:19:48 +00:00
Merge b632e4c72d06c75d872df8ff0f6540b7f395fae4 into 338cbafe0d5b07d57f83060ea0f9ba3a6c1155e7
This commit is contained in:
commit
104bec022d
150
strings/suffix_automaton.py
Normal file
150
strings/suffix_automaton.py
Normal file
@ -0,0 +1,150 @@
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class SuffixAutomaton:
|
||||
"""
|
||||
Suffix Automaton data structure for efficient substring counting and searching.
|
||||
|
||||
Usage:
|
||||
sa = SuffixAutomaton()
|
||||
sa.extend('abc')
|
||||
terminal_states = sa.terminal()
|
||||
is_found = sa.find('ab')
|
||||
|
||||
:param alpha: Number of characters in the alphabet
|
||||
(default is 26 for lowercase English letters)
|
||||
:param first: ASCII value of the first character in the alphabet (default is 'a')
|
||||
"""
|
||||
|
||||
def __init__(self, alpha: int = 26, first: int = ord("a")):
|
||||
self.alpha = alpha
|
||||
self.first = first
|
||||
self.substrings = 0
|
||||
self.last = 0
|
||||
self.st = [self.Node(0, -1, [0] * self.alpha)]
|
||||
|
||||
class Node:
|
||||
def __init__(self, length: int, link: int, next_states: list[int]):
|
||||
self.len = length
|
||||
self.link = link
|
||||
self.next = next_states
|
||||
|
||||
def extend(self, c: str) -> None:
|
||||
"""
|
||||
Extend the suffix automaton with a new character.
|
||||
|
||||
:param c: The character to extend with.
|
||||
|
||||
>>> sa = SuffixAutomaton()
|
||||
>>> sa.add('aabaaab')
|
||||
>>> terminal_states = [7, 3, 0]
|
||||
>>> sa.terminal() == terminal_states
|
||||
True
|
||||
>>> sa.find('a')
|
||||
True
|
||||
>>> sa.find('b')
|
||||
True
|
||||
>>> sa.find('c')
|
||||
False
|
||||
>>> sa.find('aa')
|
||||
True
|
||||
>>> sa.find('ab')
|
||||
True
|
||||
>>> sa.find('aaaab')
|
||||
False
|
||||
>>> sa.find('bab')
|
||||
False
|
||||
>>> sa.find('baaaab')
|
||||
False
|
||||
>>> sa.find('baaaaab')
|
||||
False
|
||||
>>> sa.extend('xy') # doctest: +IGNORE_EXCEPTION_DETAIL
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: Invalid input to extend method. Only single characters are allowed.
|
||||
"""
|
||||
if len(c) != 1:
|
||||
raise ValueError("The character to extend with must be a single character.")
|
||||
|
||||
char_value = ord(c) - self.first
|
||||
cur = len(self.st)
|
||||
p, q = self.last, 0
|
||||
|
||||
self.st.append(self.Node(self.st[p].len + 1, -1, [0] * self.alpha))
|
||||
|
||||
while p != -1 and not self.st[p].next[char_value]:
|
||||
self.st[p].next[char_value] = cur
|
||||
p = self.st[p].link
|
||||
|
||||
if p == -1:
|
||||
self.st[cur].link = 0
|
||||
self.substrings += self.st[cur].len
|
||||
else:
|
||||
q = self.st[p].next[char_value]
|
||||
|
||||
if self.st[p].len + 1 == self.st[q].len:
|
||||
self.st[cur].link = q
|
||||
self.substrings += self.st[cur].len - self.st[q].len
|
||||
else:
|
||||
clone = len(self.st)
|
||||
self.st.append(
|
||||
self.Node(self.st[p].len + 1, self.st[q].link, self.st[q].next[:])
|
||||
)
|
||||
self.substrings += self.st[clone].len - self.st[self.st[clone].link].len
|
||||
|
||||
while p != -1 and self.st[p].next[char_value] == q:
|
||||
self.st[p].next[char_value] = clone
|
||||
p = self.st[p].link
|
||||
|
||||
self.substrings -= self.st[q].len - self.st[self.st[q].link].len
|
||||
self.st[q].link = self.st[cur].link = clone
|
||||
self.substrings += (
|
||||
self.st[q].len
|
||||
- self.st[self.st[q].link].len
|
||||
+ self.st[cur].len
|
||||
- self.st[self.st[cur].link].len
|
||||
)
|
||||
|
||||
self.last = cur
|
||||
|
||||
def add(self, s: str) -> None:
|
||||
"""
|
||||
Add a string to the suffix automaton.
|
||||
|
||||
:param s: The string to add.
|
||||
"""
|
||||
for i in range(len(s)):
|
||||
self.extend(s[i])
|
||||
|
||||
def terminal(self) -> list[int]:
|
||||
"""
|
||||
Get the terminal states of the suffix automaton.
|
||||
|
||||
:return: List of terminal states.
|
||||
"""
|
||||
p = self.last
|
||||
terminal_states = []
|
||||
while p >= 0:
|
||||
terminal_states.append(p)
|
||||
p = self.st[p].link
|
||||
return terminal_states
|
||||
|
||||
def find(self, s: str) -> bool:
|
||||
"""
|
||||
Check if a string is present in the suffix automaton.
|
||||
|
||||
:param s: The string to search for.
|
||||
:return: True if the string is found, False otherwise.
|
||||
"""
|
||||
p = 0
|
||||
for j in range(len(s)):
|
||||
if not self.st[p].next[ord(s[j]) - self.first]:
|
||||
return False
|
||||
p = self.st[p].next[ord(s[j]) - self.first]
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
|
||||
doctest.testmod()
|
Loading…
x
Reference in New Issue
Block a user