From 32aa7ff0819dba3d2166b8a66317999a7790e510 Mon Sep 17 00:00:00 2001 From: Maxwell Aladago Date: Mon, 19 Aug 2019 03:40:36 -0400 Subject: [PATCH] ENH: refactored longest common subsequence, also fixed a bug with the sequence returned (#1142) * function for the knapsack problem which returns one of the optimal subsets * function for the knapsack problem which returns one of the optimal subsets * function for the knapsack problem which returns one of the optimal subsets * function for the knapsack problem which returns one of the optimal subsets * function for the knapsack problem which returns one of the optimal subsets * some pep8 cleanup too * ENH: refactored longest common subsequence, also fixed a bug with the sequence returned * renamed function --- .../longest_common_subsequence.py | 88 ++++++++++++++----- 1 file changed, 67 insertions(+), 21 deletions(-) diff --git a/dynamic_programming/longest_common_subsequence.py b/dynamic_programming/longest_common_subsequence.py index 0a4771cb2..7836fe303 100644 --- a/dynamic_programming/longest_common_subsequence.py +++ b/dynamic_programming/longest_common_subsequence.py @@ -1,37 +1,83 @@ """ LCS Problem Statement: Given two sequences, find the length of longest subsequence present in both of them. -A subsequence is a sequence that appears in the same relative order, but not necessarily continious. +A subsequence is a sequence that appears in the same relative order, but not necessarily continuous. Example:"abc", "abg" are subsequences of "abcdefgh". """ from __future__ import print_function -try: - xrange # Python 2 -except NameError: - xrange = range # Python 3 -def lcs_dp(x, y): +def longest_common_subsequence(x: str, y: str): + """ + Finds the longest common subsequence between two strings. Also returns the + The subsequence found + + Parameters + ---------- + + x: str, one of the strings + y: str, the other string + + Returns + ------- + L[m][n]: int, the length of the longest subsequence. Also equal to len(seq) + Seq: str, the subsequence found + + >>> longest_common_subsequence("programming", "gaming") + (6, 'gaming') + >>> longest_common_subsequence("physics", "smartphone") + (2, 'ph') + >>> longest_common_subsequence("computer", "food") + (1, 'o') + """ # find the length of strings + + assert x is not None + assert y is not None + m = len(x) n = len(y) # declaring the array for storing the dp values - L = [[None] * (n + 1) for i in xrange(m + 1)] - seq = [] + L = [[0] * (n + 1) for _ in range(m + 1)] - for i in range(m + 1): - for j in range(n + 1): - if i == 0 or j == 0: - L[i][j] = 0 - elif x[i - 1] == y[ j - 1]: - L[i][j] = L[i - 1][j - 1] + 1 - seq.append(x[i -1]) + for i in range(1, m + 1): + for j in range(1, n + 1): + if x[i-1] == y[j-1]: + match = 1 else: - L[i][j] = max(L[i - 1][j], L[i][j - 1]) - # L[m][n] contains the length of LCS of X[0..n-1] & Y[0..m-1] + match = 0 + + L[i][j] = max(L[i-1][j], L[i][j-1], L[i-1][j-1] + match) + + seq = "" + i, j = m, n + while i > 0 and i > 0: + if x[i - 1] == y[j - 1]: + match = 1 + else: + match = 0 + + if L[i][j] == L[i - 1][j - 1] + match: + if match == 1: + seq = x[i - 1] + seq + i -= 1 + j -= 1 + elif L[i][j] == L[i - 1][j]: + i -= 1 + else: + j -= 1 + return L[m][n], seq -if __name__=='__main__': - x = 'AGGTAB' - y = 'GXTXAYB' - print(lcs_dp(x, y)) + +if __name__ == '__main__': + a = 'AGGTAB' + b = 'GXTXAYB' + expected_ln = 4 + expected_subseq = "GTAB" + + ln, subseq = longest_common_subsequence(a, b) + assert expected_ln == ln + assert expected_subseq == subseq + print("len =", ln, ", sub-sequence =", subseq) +