mirror of
https://github.com/TheAlgorithms/Python.git
synced 2024-11-23 21:11:08 +00:00
Update levenshtein_distance.py (#11171)
* Update levenshtein_distance.py * Update levenshtein_distance.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update levenshtein_distance.py * Update levenshtein_distance.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update levenshtein_distance.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update levenshtein_distance.py * Update levenshtein_distance.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com>
This commit is contained in:
parent
84a1533fd5
commit
154e5e8681
|
@ -1,20 +1,9 @@
|
||||||
"""
|
from collections.abc import Callable
|
||||||
This is a Python implementation of the levenshtein distance.
|
|
||||||
Levenshtein distance is a string metric for measuring the
|
|
||||||
difference between two sequences.
|
|
||||||
|
|
||||||
For doctests run following command:
|
|
||||||
python -m doctest -v levenshtein-distance.py
|
|
||||||
or
|
|
||||||
python3 -m doctest -v levenshtein-distance.py
|
|
||||||
|
|
||||||
For manual testing run:
|
|
||||||
python levenshtein-distance.py
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def levenshtein_distance(first_word: str, second_word: str) -> int:
|
def levenshtein_distance(first_word: str, second_word: str) -> int:
|
||||||
"""Implementation of the levenshtein distance in Python.
|
"""
|
||||||
|
Implementation of the Levenshtein distance in Python.
|
||||||
:param first_word: the first word to measure the difference.
|
:param first_word: the first word to measure the difference.
|
||||||
:param second_word: the second word to measure the difference.
|
:param second_word: the second word to measure the difference.
|
||||||
:return: the levenshtein distance between the two words.
|
:return: the levenshtein distance between the two words.
|
||||||
|
@ -47,7 +36,7 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
|
||||||
current_row = [i + 1]
|
current_row = [i + 1]
|
||||||
|
|
||||||
for j, c2 in enumerate(second_word):
|
for j, c2 in enumerate(second_word):
|
||||||
# Calculate insertions, deletions and substitutions
|
# Calculate insertions, deletions, and substitutions
|
||||||
insertions = previous_row[j + 1] + 1
|
insertions = previous_row[j + 1] + 1
|
||||||
deletions = current_row[j] + 1
|
deletions = current_row[j] + 1
|
||||||
substitutions = previous_row[j] + (c1 != c2)
|
substitutions = previous_row[j] + (c1 != c2)
|
||||||
|
@ -62,9 +51,75 @@ def levenshtein_distance(first_word: str, second_word: str) -> int:
|
||||||
return previous_row[-1]
|
return previous_row[-1]
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def levenshtein_distance_optimized(first_word: str, second_word: str) -> int:
|
||||||
first_word = input("Enter the first word:\n").strip()
|
"""
|
||||||
second_word = input("Enter the second word:\n").strip()
|
Compute the Levenshtein distance between two words (strings).
|
||||||
|
The function is optimized for efficiency by modifying rows in place.
|
||||||
|
:param first_word: the first word to measure the difference.
|
||||||
|
:param second_word: the second word to measure the difference.
|
||||||
|
:return: the Levenshtein distance between the two words.
|
||||||
|
Examples:
|
||||||
|
>>> levenshtein_distance_optimized("planet", "planetary")
|
||||||
|
3
|
||||||
|
>>> levenshtein_distance_optimized("", "test")
|
||||||
|
4
|
||||||
|
>>> levenshtein_distance_optimized("book", "back")
|
||||||
|
2
|
||||||
|
>>> levenshtein_distance_optimized("book", "book")
|
||||||
|
0
|
||||||
|
>>> levenshtein_distance_optimized("test", "")
|
||||||
|
4
|
||||||
|
>>> levenshtein_distance_optimized("", "")
|
||||||
|
0
|
||||||
|
>>> levenshtein_distance_optimized("orchestration", "container")
|
||||||
|
10
|
||||||
|
"""
|
||||||
|
if len(first_word) < len(second_word):
|
||||||
|
return levenshtein_distance_optimized(second_word, first_word)
|
||||||
|
|
||||||
result = levenshtein_distance(first_word, second_word)
|
if len(second_word) == 0:
|
||||||
print(f"Levenshtein distance between {first_word} and {second_word} is {result}")
|
return len(first_word)
|
||||||
|
|
||||||
|
previous_row = list(range(len(second_word) + 1))
|
||||||
|
|
||||||
|
for i, c1 in enumerate(first_word):
|
||||||
|
current_row = [i + 1] + [0] * len(second_word)
|
||||||
|
|
||||||
|
for j, c2 in enumerate(second_word):
|
||||||
|
insertions = previous_row[j + 1] + 1
|
||||||
|
deletions = current_row[j] + 1
|
||||||
|
substitutions = previous_row[j] + (c1 != c2)
|
||||||
|
current_row[j + 1] = min(insertions, deletions, substitutions)
|
||||||
|
|
||||||
|
previous_row = current_row
|
||||||
|
|
||||||
|
return previous_row[-1]
|
||||||
|
|
||||||
|
|
||||||
|
def benchmark_levenshtein_distance(func: Callable) -> None:
|
||||||
|
"""
|
||||||
|
Benchmark the Levenshtein distance function.
|
||||||
|
:param str: The name of the function being benchmarked.
|
||||||
|
:param func: The function to be benchmarked.
|
||||||
|
"""
|
||||||
|
from timeit import timeit
|
||||||
|
|
||||||
|
stmt = f"{func.__name__}('sitting', 'kitten')"
|
||||||
|
setup = f"from __main__ import {func.__name__}"
|
||||||
|
number = 25_000
|
||||||
|
result = timeit(stmt=stmt, setup=setup, number=number)
|
||||||
|
print(f"{func.__name__:<30} finished {number:,} runs in {result:.5f} seconds")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Get user input for words
|
||||||
|
first_word = input("Enter the first word for Levenshtein distance:\n").strip()
|
||||||
|
second_word = input("Enter the second word for Levenshtein distance:\n").strip()
|
||||||
|
|
||||||
|
# Calculate and print Levenshtein distances
|
||||||
|
print(f"{levenshtein_distance(first_word, second_word) = }")
|
||||||
|
print(f"{levenshtein_distance_optimized(first_word, second_word) = }")
|
||||||
|
|
||||||
|
# Benchmark the Levenshtein distance functions
|
||||||
|
benchmark_levenshtein_distance(levenshtein_distance)
|
||||||
|
benchmark_levenshtein_distance(levenshtein_distance_optimized)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user