Python/strings/levenshtein_distance.py
CarsonHam 61f3119467
Change occurrences of str.format to f-strings (#4118)
* f-string update rsa_cipher.py

* f-string update rsa_key_generator.py

* f-string update burrows_wheeler.py

* f-string update non_recursive_segment_tree.py

* f-string update red_black_tree.py

* f-string update deque_doubly.py

* f-string update climbing_stairs.py

* f-string update iterating_through_submasks.py

* f-string update knn_sklearn.py

* f-string update 3n_plus_1.py

* f-string update quadratic_equations_complex_numbers.py

* f-string update nth_fibonacci_using_matrix_exponentiation.py

* f-string update sherman_morrison.py

* f-string update levenshtein_distance.py

* fix lines that were too long
2021-02-23 11:23:49 +05:30

73 lines
2.2 KiB
Python

"""
This is a Python implementation of the levenshtein distance.
Levenshtein distance is a string metric for measuring the
difference between two sequences.
For doctests run following command:
python -m doctest -v levenshtein-distance.py
or
python3 -m doctest -v levenshtein-distance.py
For manual testing run:
python levenshtein-distance.py
"""
def levenshtein_distance(first_word: str, second_word: str) -> int:
"""Implementation of the levenshtein distance in Python.
:param first_word: the first word to measure the difference.
:param second_word: the second word to measure the difference.
:return: the levenshtein distance between the two words.
Examples:
>>> levenshtein_distance("planet", "planetary")
3
>>> levenshtein_distance("", "test")
4
>>> levenshtein_distance("book", "back")
2
>>> levenshtein_distance("book", "book")
0
>>> levenshtein_distance("test", "")
4
>>> levenshtein_distance("", "")
0
>>> levenshtein_distance("orchestration", "container")
10
"""
# The longer word should come first
if len(first_word) < len(second_word):
return levenshtein_distance(second_word, first_word)
if len(second_word) == 0:
return len(first_word)
previous_row = range(len(second_word) + 1)
for i, c1 in enumerate(first_word):
current_row = [i + 1]
for j, c2 in enumerate(second_word):
# Calculate insertions, deletions and substitutions
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
# Get the minimum to append to the current row
current_row.append(min(insertions, deletions, substitutions))
# Store the previous row
previous_row = current_row
# Returns the last element (distance)
return previous_row[-1]
if __name__ == "__main__":
first_word = input("Enter the first word:\n").strip()
second_word = input("Enter the second word:\n").strip()
result = levenshtein_distance(first_word, second_word)
print(f"Levenshtein distance between {first_word} and {second_word} is {result}")