Python/dynamic_programming/regex_match.py

104 lines
2.6 KiB
Python
Raw Normal View History

"""
2022-08-12 16:42:34 +04:30
Regex matching check if a text matches pattern or not.
Pattern:
'.' Matches any single character.
'*' Matches zero or more of the preceding element.
2022-08-12 12:55:28 +04:30
More info:
https://medium.com/trick-the-interviwer/regular-expression-matching-9972eb74c03
"""
def recursive_match(text: str, pattern: str) -> bool:
"""
Recursive matching algorithm.
2022-08-12 18:08:52 +04:30
Time complexity: O(2 ^ (|text| + |pattern|))
Space complexity: Recursion depth is O(|text| + |pattern|).
:param text: Text to match.
:param pattern: Pattern to match.
:return: True if text matches pattern, False otherwise.
>>> recursive_match('abc', 'a.c')
True
>>> recursive_match('abc', 'af*.c')
True
>>> recursive_match('abc', 'a.c*')
True
>>> recursive_match('abc', 'a.c*d')
False
2022-08-12 12:52:38 +04:30
>>> recursive_match('aa', '.*')
True
"""
if not text and not pattern:
return True
if text and not pattern:
return False
if not text:
return pattern[-1] == "*" and recursive_match(text, pattern[:-2])
2022-08-12 18:04:22 +04:30
if text[-1] == pattern[-1] or pattern[-1] == ".":
return recursive_match(text[:-1], pattern[:-1])
2022-08-12 18:04:22 +04:30
if pattern[-1] == "*":
return recursive_match(text[:-1], pattern) or recursive_match(
text, pattern[:-2]
)
return False
2022-08-12 12:52:38 +04:30
def dp_match(text: str, pattern: str) -> bool:
"""
Dynamic programming matching algorithm.
2022-08-12 18:08:52 +04:30
Time complexity: O(|text| * |pattern|)
Space complexity: O(|text| * |pattern|)
2022-08-12 12:52:38 +04:30
:param text: Text to match.
:param pattern: Pattern to match.
:return: True if text matches pattern, False otherwise.
>>> dp_match('abc', 'a.c')
True
>>> dp_match('abc', 'af*.c')
True
>>> dp_match('abc', 'a.c*')
True
>>> dp_match('abc', 'a.c*d')
False
>>> dp_match('aa', '.*')
True
"""
m = len(text)
n = len(pattern)
dp = [[False for _ in range(n + 1)] for _ in range(m + 1)]
dp[0][0] = True
for i in range(1, m + 1):
dp[i][0] = False
for j in range(1, n + 1):
2022-08-12 18:04:22 +04:30
dp[0][j] = pattern[j - 1] == "*" and dp[0][j - 2]
2022-08-12 12:52:38 +04:30
for i in range(1, m + 1):
for j in range(1, n + 1):
2022-08-12 18:04:22 +04:30
if pattern[j - 1] == "." or pattern[j - 1] == text[i - 1]:
2022-08-12 12:52:38 +04:30
dp[i][j] = dp[i - 1][j - 1]
2022-08-12 18:04:22 +04:30
elif pattern[j - 1] == "*":
2022-08-12 18:08:52 +04:30
dp[i][j] = dp[i][j - 2]
if pattern[j - 2] == "." or pattern[j - 2] == text[i - 1]:
dp[i][j] |= dp[i - 1][j]
2022-08-12 12:52:38 +04:30
else:
dp[i][j] = False
return dp[m][n]
if __name__ == "__main__":
import doctest
doctest.testmod()