mirror of
https://github.com/TheAlgorithms/Python.git
synced 2024-11-23 21:11:08 +00:00
Add Damerau-Levenshtein distance algorithm (#10159)
* Add Damerau-Levenshtein distance algorithm * fix: precommit check * fix: doc correction * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * refactor: use variable for length and doc correction * Update damerau_levenshtein_distance.py * Update damerau_levenshtein_distance.py --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com>
This commit is contained in:
parent
ebe66935d2
commit
c85506262d
71
strings/damerau_levenshtein_distance.py
Normal file
71
strings/damerau_levenshtein_distance.py
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
"""
|
||||||
|
This script is a implementation of the Damerau-Levenshtein distance algorithm.
|
||||||
|
|
||||||
|
It's an algorithm that measures the edit distance between two string sequences
|
||||||
|
|
||||||
|
More information about this algorithm can be found in this wikipedia article:
|
||||||
|
https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def damerau_levenshtein_distance(first_string: str, second_string: str) -> int:
|
||||||
|
"""
|
||||||
|
Implements the Damerau-Levenshtein distance algorithm that measures
|
||||||
|
the edit distance between two strings.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
first_string: The first string to compare
|
||||||
|
second_string: The second string to compare
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
distance: The edit distance between the first and second strings
|
||||||
|
|
||||||
|
>>> damerau_levenshtein_distance("cat", "cut")
|
||||||
|
1
|
||||||
|
>>> damerau_levenshtein_distance("kitten", "sitting")
|
||||||
|
3
|
||||||
|
>>> damerau_levenshtein_distance("hello", "world")
|
||||||
|
4
|
||||||
|
>>> damerau_levenshtein_distance("book", "back")
|
||||||
|
2
|
||||||
|
>>> damerau_levenshtein_distance("container", "containment")
|
||||||
|
3
|
||||||
|
>>> damerau_levenshtein_distance("container", "containment")
|
||||||
|
3
|
||||||
|
"""
|
||||||
|
# Create a dynamic programming matrix to store the distances
|
||||||
|
dp_matrix = [[0] * (len(second_string) + 1) for _ in range(len(first_string) + 1)]
|
||||||
|
|
||||||
|
# Initialize the matrix
|
||||||
|
for i in range(len(first_string) + 1):
|
||||||
|
dp_matrix[i][0] = i
|
||||||
|
for j in range(len(second_string) + 1):
|
||||||
|
dp_matrix[0][j] = j
|
||||||
|
|
||||||
|
# Fill the matrix
|
||||||
|
for i, first_char in enumerate(first_string, start=1):
|
||||||
|
for j, second_char in enumerate(second_string, start=1):
|
||||||
|
cost = int(first_char != second_char)
|
||||||
|
|
||||||
|
dp_matrix[i][j] = min(
|
||||||
|
dp_matrix[i - 1][j] + 1, # Deletion
|
||||||
|
dp_matrix[i][j - 1] + 1, # Insertion
|
||||||
|
dp_matrix[i - 1][j - 1] + cost, # Substitution
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
i > 1
|
||||||
|
and j > 1
|
||||||
|
and first_string[i - 1] == second_string[j - 2]
|
||||||
|
and first_string[i - 2] == second_string[j - 1]
|
||||||
|
):
|
||||||
|
# Transposition
|
||||||
|
dp_matrix[i][j] = min(dp_matrix[i][j], dp_matrix[i - 2][j - 2] + cost)
|
||||||
|
|
||||||
|
return dp_matrix[-1][-1]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import doctest
|
||||||
|
|
||||||
|
doctest.testmod()
|
Loading…
Reference in New Issue
Block a user