mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-03-12 09:39:49 +00:00
Merge b632e4c72d06c75d872df8ff0f6540b7f395fae4 into 338cbafe0d5b07d57f83060ea0f9ba3a6c1155e7
This commit is contained in:
commit
104bec022d
150
strings/suffix_automaton.py
Normal file
150
strings/suffix_automaton.py
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
class SuffixAutomaton:
|
||||||
|
"""
|
||||||
|
Suffix Automaton data structure for efficient substring counting and searching.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
sa = SuffixAutomaton()
|
||||||
|
sa.extend('abc')
|
||||||
|
terminal_states = sa.terminal()
|
||||||
|
is_found = sa.find('ab')
|
||||||
|
|
||||||
|
:param alpha: Number of characters in the alphabet
|
||||||
|
(default is 26 for lowercase English letters)
|
||||||
|
:param first: ASCII value of the first character in the alphabet (default is 'a')
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, alpha: int = 26, first: int = ord("a")):
|
||||||
|
self.alpha = alpha
|
||||||
|
self.first = first
|
||||||
|
self.substrings = 0
|
||||||
|
self.last = 0
|
||||||
|
self.st = [self.Node(0, -1, [0] * self.alpha)]
|
||||||
|
|
||||||
|
class Node:
|
||||||
|
def __init__(self, length: int, link: int, next_states: list[int]):
|
||||||
|
self.len = length
|
||||||
|
self.link = link
|
||||||
|
self.next = next_states
|
||||||
|
|
||||||
|
def extend(self, c: str) -> None:
|
||||||
|
"""
|
||||||
|
Extend the suffix automaton with a new character.
|
||||||
|
|
||||||
|
:param c: The character to extend with.
|
||||||
|
|
||||||
|
>>> sa = SuffixAutomaton()
|
||||||
|
>>> sa.add('aabaaab')
|
||||||
|
>>> terminal_states = [7, 3, 0]
|
||||||
|
>>> sa.terminal() == terminal_states
|
||||||
|
True
|
||||||
|
>>> sa.find('a')
|
||||||
|
True
|
||||||
|
>>> sa.find('b')
|
||||||
|
True
|
||||||
|
>>> sa.find('c')
|
||||||
|
False
|
||||||
|
>>> sa.find('aa')
|
||||||
|
True
|
||||||
|
>>> sa.find('ab')
|
||||||
|
True
|
||||||
|
>>> sa.find('aaaab')
|
||||||
|
False
|
||||||
|
>>> sa.find('bab')
|
||||||
|
False
|
||||||
|
>>> sa.find('baaaab')
|
||||||
|
False
|
||||||
|
>>> sa.find('baaaaab')
|
||||||
|
False
|
||||||
|
>>> sa.extend('xy') # doctest: +IGNORE_EXCEPTION_DETAIL
|
||||||
|
Traceback (most recent call last):
|
||||||
|
...
|
||||||
|
ValueError: Invalid input to extend method. Only single characters are allowed.
|
||||||
|
"""
|
||||||
|
if len(c) != 1:
|
||||||
|
raise ValueError("The character to extend with must be a single character.")
|
||||||
|
|
||||||
|
char_value = ord(c) - self.first
|
||||||
|
cur = len(self.st)
|
||||||
|
p, q = self.last, 0
|
||||||
|
|
||||||
|
self.st.append(self.Node(self.st[p].len + 1, -1, [0] * self.alpha))
|
||||||
|
|
||||||
|
while p != -1 and not self.st[p].next[char_value]:
|
||||||
|
self.st[p].next[char_value] = cur
|
||||||
|
p = self.st[p].link
|
||||||
|
|
||||||
|
if p == -1:
|
||||||
|
self.st[cur].link = 0
|
||||||
|
self.substrings += self.st[cur].len
|
||||||
|
else:
|
||||||
|
q = self.st[p].next[char_value]
|
||||||
|
|
||||||
|
if self.st[p].len + 1 == self.st[q].len:
|
||||||
|
self.st[cur].link = q
|
||||||
|
self.substrings += self.st[cur].len - self.st[q].len
|
||||||
|
else:
|
||||||
|
clone = len(self.st)
|
||||||
|
self.st.append(
|
||||||
|
self.Node(self.st[p].len + 1, self.st[q].link, self.st[q].next[:])
|
||||||
|
)
|
||||||
|
self.substrings += self.st[clone].len - self.st[self.st[clone].link].len
|
||||||
|
|
||||||
|
while p != -1 and self.st[p].next[char_value] == q:
|
||||||
|
self.st[p].next[char_value] = clone
|
||||||
|
p = self.st[p].link
|
||||||
|
|
||||||
|
self.substrings -= self.st[q].len - self.st[self.st[q].link].len
|
||||||
|
self.st[q].link = self.st[cur].link = clone
|
||||||
|
self.substrings += (
|
||||||
|
self.st[q].len
|
||||||
|
- self.st[self.st[q].link].len
|
||||||
|
+ self.st[cur].len
|
||||||
|
- self.st[self.st[cur].link].len
|
||||||
|
)
|
||||||
|
|
||||||
|
self.last = cur
|
||||||
|
|
||||||
|
def add(self, s: str) -> None:
|
||||||
|
"""
|
||||||
|
Add a string to the suffix automaton.
|
||||||
|
|
||||||
|
:param s: The string to add.
|
||||||
|
"""
|
||||||
|
for i in range(len(s)):
|
||||||
|
self.extend(s[i])
|
||||||
|
|
||||||
|
def terminal(self) -> list[int]:
|
||||||
|
"""
|
||||||
|
Get the terminal states of the suffix automaton.
|
||||||
|
|
||||||
|
:return: List of terminal states.
|
||||||
|
"""
|
||||||
|
p = self.last
|
||||||
|
terminal_states = []
|
||||||
|
while p >= 0:
|
||||||
|
terminal_states.append(p)
|
||||||
|
p = self.st[p].link
|
||||||
|
return terminal_states
|
||||||
|
|
||||||
|
def find(self, s: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if a string is present in the suffix automaton.
|
||||||
|
|
||||||
|
:param s: The string to search for.
|
||||||
|
:return: True if the string is found, False otherwise.
|
||||||
|
"""
|
||||||
|
p = 0
|
||||||
|
for j in range(len(s)):
|
||||||
|
if not self.st[p].next[ord(s[j]) - self.first]:
|
||||||
|
return False
|
||||||
|
p = self.st[p].next[ord(s[j]) - self.first]
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import doctest
|
||||||
|
|
||||||
|
doctest.testmod()
|
Loading…
x
Reference in New Issue
Block a user