From 83cf5786cddd694a2af25827f8861b7dbcbf706c Mon Sep 17 00:00:00 2001 From: P U N I T H <55887644+punithbajaj@users.noreply.github.com> Date: Wed, 20 Oct 2021 14:00:58 +0530 Subject: [PATCH] Add wildcard pattern matching using dynamic programming (#5334) * Added regular expression implimentation using dp * replaced input() with example values * Apply suggestions from code review Co-authored-by: Christian Clauss * changed returning value to bool and added test cases * added doctest Co-authored-by: John Law * added test cases * Apply suggestions from code review Co-authored-by: John Law * shifted to strings * Changed filename * Update function name to match_pattern Co-authored-by: John Law * Update function name to match_pattern Co-authored-by: John Law Co-authored-by: Christian Clauss Co-authored-by: John Law --- strings/wildcard_pattern_matching.py | 112 +++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 strings/wildcard_pattern_matching.py diff --git a/strings/wildcard_pattern_matching.py b/strings/wildcard_pattern_matching.py new file mode 100644 index 000000000..83c8d834c --- /dev/null +++ b/strings/wildcard_pattern_matching.py @@ -0,0 +1,112 @@ +""" +Implementation of regular expression matching with support for '.' and '*'. +'.' Matches any single character. +'*' Matches zero or more of the preceding element. +The matching should cover the entire input string (not partial). + +""" + + +def match_pattern(input_string: str, pattern: str) -> bool: + """ + uses bottom-up dynamic programming solution for matching the input + string with a given pattern. + + Runtime: O(len(input_string)*len(pattern)) + + Arguments + -------- + input_string: str, any string which should be compared with the pattern + pattern: str, the string that represents a pattern and may contain + '.' for single character matches and '*' for zero or more of preceding character + matches + + Note + ---- + the pattern cannot start with a '*', + because there should be at least one character before * + + Returns + ------- + A Boolean denoting whether the given string follows the pattern + + Examples + ------- + >>> match_pattern("aab", "c*a*b") + True + >>> match_pattern("dabc", "*abc") + False + >>> match_pattern("aaa", "aa") + False + >>> match_pattern("aaa", "a.a") + True + >>> match_pattern("aaab", "aa*") + False + >>> match_pattern("aaab", ".*") + True + >>> match_pattern("a", "bbbb") + False + >>> match_pattern("", "bbbb") + False + >>> match_pattern("a", "") + False + >>> match_pattern("", "") + True + """ + + len_string = len(input_string) + 1 + len_pattern = len(pattern) + 1 + + # dp is a 2d matrix where dp[i][j] denotes whether prefix string of + # length i of input_string matches with prefix string of length j of + # given pattern. + # "dp" stands for dynamic programming. + dp = [[0 for i in range(len_pattern)] for j in range(len_string)] + + # since string of zero length match pattern of zero length + dp[0][0] = 1 + + # since pattern of zero length will never match with string of non-zero length + for i in range(1, len_string): + dp[i][0] = 0 + + # since string of zero length will match with pattern where there + # is at least one * alternatively + for j in range(1, len_pattern): + dp[0][j] = dp[0][j - 2] if pattern[j - 1] == "*" else 0 + + # now using bottom-up approach to find for all remaining lengths + for i in range(1, len_string): + for j in range(1, len_pattern): + if input_string[i - 1] == pattern[j - 1] or pattern[j - 1] == ".": + dp[i][j] = dp[i - 1][j - 1] + + elif pattern[j - 1] == "*": + if dp[i][j - 2] == 1: + dp[i][j] = 1 + elif pattern[j - 2] in (input_string[i - 1], "."): + dp[i][j] = dp[i - 1][j] + else: + dp[i][j] = 0 + else: + dp[i][j] = 0 + + return bool(dp[-1][-1]) + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + # inputing the strings + # input_string = input("input a string :") + # pattern = input("input a pattern :") + + input_string = "aab" + pattern = "c*a*b" + + # using function to check whether given string matches the given pattern + if match_pattern(input_string, pattern): + print(f"{input_string} matches the given pattern {pattern}") + else: + print(f"{input_string} does not match with the given pattern {pattern}")