From 154e5e8681b7ae9711fbef0b89f0ce365a8bf5bf Mon Sep 17 00:00:00 2001
From: Pedram_Mohajer <48964282+pedram-mohajer@users.noreply.github.com>
Date: Sun, 26 Nov 2023 17:46:54 -0500
Subject: [PATCH] Update levenshtein_distance.py (#11171)

* Update levenshtein_distance.py

* Update levenshtein_distance.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update levenshtein_distance.py

* Update levenshtein_distance.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update levenshtein_distance.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update levenshtein_distance.py

* Update levenshtein_distance.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Christian Clauss <cclauss@me.com>
---
 strings/levenshtein_distance.py | 95 ++++++++++++++++++++++++++-------
 1 file changed, 75 insertions(+), 20 deletions(-)

diff --git a/strings/levenshtein_distance.py b/strings/levenshtein_distance.py
index 7be4074dc..3af660872 100644
--- a/strings/levenshtein_distance.py
+++ b/strings/levenshtein_distance.py
@@ -1,20 +1,9 @@
-"""
-This is a Python implementation of the levenshtein distance.
-Levenshtein distance is a string metric for measuring the
-difference between two sequences.
-
-For doctests run following command:
-python -m doctest -v levenshtein-distance.py
-or
-python3 -m doctest -v levenshtein-distance.py
-
-For manual testing run:
-python levenshtein-distance.py
-"""
+from collections.abc import Callable
 
 
 def levenshtein_distance(first_word: str, second_word: str) -> int:
-    """Implementation of the levenshtein distance in Python.
+    """
+    Implementation of the Levenshtein distance in Python.
     :param first_word: the first word to measure the difference.
     :param second_word: the second word to measure the difference.
     :return: the levenshtein distance between the two words.
@@ -47,7 +36,7 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
         current_row = [i + 1]
 
         for j, c2 in enumerate(second_word):
-            # Calculate insertions, deletions and substitutions
+            # Calculate insertions, deletions, and substitutions
             insertions = previous_row[j + 1] + 1
             deletions = current_row[j] + 1
             substitutions = previous_row[j] + (c1 != c2)
@@ -62,9 +51,75 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
     return previous_row[-1]
 
 
-if __name__ == "__main__":
-    first_word = input("Enter the first word:\n").strip()
-    second_word = input("Enter the second word:\n").strip()
+def levenshtein_distance_optimized(first_word: str, second_word: str) -> int:
+    """
+    Compute the Levenshtein distance between two words (strings).
+    The function is optimized for efficiency by modifying rows in place.
+    :param first_word: the first word to measure the difference.
+    :param second_word: the second word to measure the difference.
+    :return: the Levenshtein distance between the two words.
+    Examples:
+    >>> levenshtein_distance_optimized("planet", "planetary")
+    3
+    >>> levenshtein_distance_optimized("", "test")
+    4
+    >>> levenshtein_distance_optimized("book", "back")
+    2
+    >>> levenshtein_distance_optimized("book", "book")
+    0
+    >>> levenshtein_distance_optimized("test", "")
+    4
+    >>> levenshtein_distance_optimized("", "")
+    0
+    >>> levenshtein_distance_optimized("orchestration", "container")
+    10
+    """
+    if len(first_word) < len(second_word):
+        return levenshtein_distance_optimized(second_word, first_word)
 
-    result = levenshtein_distance(first_word, second_word)
-    print(f"Levenshtein distance between {first_word} and {second_word} is {result}")
+    if len(second_word) == 0:
+        return len(first_word)
+
+    previous_row = list(range(len(second_word) + 1))
+
+    for i, c1 in enumerate(first_word):
+        current_row = [i + 1] + [0] * len(second_word)
+
+        for j, c2 in enumerate(second_word):
+            insertions = previous_row[j + 1] + 1
+            deletions = current_row[j] + 1
+            substitutions = previous_row[j] + (c1 != c2)
+            current_row[j + 1] = min(insertions, deletions, substitutions)
+
+        previous_row = current_row
+
+    return previous_row[-1]
+
+
+def benchmark_levenshtein_distance(func: Callable) -> None:
+    """
+    Benchmark the Levenshtein distance function.
+    :param str: The name of the function being benchmarked.
+    :param func: The function to be benchmarked.
+    """
+    from timeit import timeit
+
+    stmt = f"{func.__name__}('sitting', 'kitten')"
+    setup = f"from __main__ import {func.__name__}"
+    number = 25_000
+    result = timeit(stmt=stmt, setup=setup, number=number)
+    print(f"{func.__name__:<30} finished {number:,} runs in {result:.5f} seconds")
+
+
+if __name__ == "__main__":
+    # Get user input for words
+    first_word = input("Enter the first word for Levenshtein distance:\n").strip()
+    second_word = input("Enter the second word for Levenshtein distance:\n").strip()
+
+    # Calculate and print Levenshtein distances
+    print(f"{levenshtein_distance(first_word, second_word) = }")
+    print(f"{levenshtein_distance_optimized(first_word, second_word) = }")
+
+    # Benchmark the Levenshtein distance functions
+    benchmark_levenshtein_distance(levenshtein_distance)
+    benchmark_levenshtein_distance(levenshtein_distance_optimized)