From 33888646af9d74e46da0175df75b3e5892a72fc7 Mon Sep 17 00:00:00 2001 From: anshul-2010 <96651393+anshul-2010@users.noreply.github.com> Date: Thu, 19 Oct 2023 18:08:02 +0530 Subject: [PATCH] Edit Distance Algorithm for String Matching (#10571) * Edit Distance Algorithm for String Matching * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Apply suggestions from code review * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update edit_distance.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Tianyi Zheng --- strings/edit_distance.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 strings/edit_distance.py diff --git a/strings/edit_distance.py b/strings/edit_distance.py new file mode 100644 index 000000000..e842c8555 --- /dev/null +++ b/strings/edit_distance.py @@ -0,0 +1,32 @@ +def edit_distance(source: str, target: str) -> int: + """ + Edit distance algorithm is a string metric, i.e., it is a way of quantifying how + dissimilar two strings are to one another. It is measured by counting the minimum + number of operations required to transform one string into another. + + This implementation assumes that the cost of operations (insertion, deletion and + substitution) is always 1 + + Args: + source: the initial string with respect to which we are calculating the edit + distance for the target + target: the target string, formed after performing n operations on the source string + + >>> edit_distance("GATTIC", "GALTIC") + 1 + """ + if len(source) == 0: + return len(target) + elif len(target) == 0: + return len(source) + + delta = int(source[-1] != target[-1]) # Substitution + return min( + edit_distance(source[:-1], target[:-1]) + delta, + edit_distance(source, target[:-1]) + 1, + edit_distance(source[:-1], target) + 1, + ) + + +if __name__ == "__main__": + print(edit_distance("ATCGCTG", "TAGCTAA")) # Answer is 4