From c6223c71d82c7ba57f3de9eed23963ec96de01bb Mon Sep 17 00:00:00 2001 From: Alexander Pantyukhin Date: Fri, 30 Dec 2022 09:47:40 +0400 Subject: [PATCH] add word_break dynamic approach up -> down. (#8039) * add word_break dynamic approach up -> down. * updating DIRECTORY.md * Update word_break.py fix review notes. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update word_break.py fix review notes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix review notes * add trie type * Update word_break.py add typing Any to trie. * Update dynamic_programming/word_break.py Co-authored-by: Caeden Perelli-Harris * Update dynamic_programming/word_break.py Co-authored-by: Christian Clauss * Update dynamic_programming/word_break.py Co-authored-by: Christian Clauss * Update dynamic_programming/word_break.py Co-authored-by: Christian Clauss * Update dynamic_programming/word_break.py Co-authored-by: Christian Clauss * fix review notes Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Caeden Perelli-Harris Co-authored-by: Christian Clauss --- DIRECTORY.md | 1 + dynamic_programming/word_break.py | 111 ++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 dynamic_programming/word_break.py diff --git a/DIRECTORY.md b/DIRECTORY.md index bec857a38..3437df12c 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -328,6 +328,7 @@ * [Subset Generation](dynamic_programming/subset_generation.py) * [Sum Of Subset](dynamic_programming/sum_of_subset.py) * [Viterbi](dynamic_programming/viterbi.py) + * [Word Break](dynamic_programming/word_break.py) ## Electronics * [Builtin Voltage](electronics/builtin_voltage.py) diff --git a/dynamic_programming/word_break.py b/dynamic_programming/word_break.py new file mode 100644 index 000000000..642ea0edf --- /dev/null +++ b/dynamic_programming/word_break.py @@ -0,0 +1,111 @@ +""" +Author : Alexander Pantyukhin +Date : December 12, 2022 + +Task: +Given a string and a list of words, return true if the string can be +segmented into a space-separated sequence of one or more words. + +Note that the same word may be reused +multiple times in the segmentation. + +Implementation notes: Trie + Dynamic programming up -> down. +The Trie will be used to store the words. It will be useful for scanning +available words for the current position in the string. + +Leetcode: +https://leetcode.com/problems/word-break/description/ + +Runtime: O(n * n) +Space: O(n) +""" + +from functools import lru_cache +from typing import Any + + +def word_break(string: str, words: list[str]) -> bool: + """ + Return True if numbers have opposite signs False otherwise. + + >>> word_break("applepenapple", ["apple","pen"]) + True + >>> word_break("catsandog", ["cats","dog","sand","and","cat"]) + False + >>> word_break("cars", ["car","ca","rs"]) + True + >>> word_break('abc', []) + False + >>> word_break(123, ['a']) + Traceback (most recent call last): + ... + ValueError: the string should be not empty string + >>> word_break('', ['a']) + Traceback (most recent call last): + ... + ValueError: the string should be not empty string + >>> word_break('abc', [123]) + Traceback (most recent call last): + ... + ValueError: the words should be a list of non-empty strings + >>> word_break('abc', ['']) + Traceback (most recent call last): + ... + ValueError: the words should be a list of non-empty strings + """ + + # Validation + if not isinstance(string, str) or len(string) == 0: + raise ValueError("the string should be not empty string") + + if not isinstance(words, list) or not all( + isinstance(item, str) and len(item) > 0 for item in words + ): + raise ValueError("the words should be a list of non-empty strings") + + # Build trie + trie: dict[str, Any] = {} + word_keeper_key = "WORD_KEEPER" + + for word in words: + trie_node = trie + for c in word: + if c not in trie_node: + trie_node[c] = {} + + trie_node = trie_node[c] + + trie_node[word_keeper_key] = True + + len_string = len(string) + + # Dynamic programming method + @lru_cache(maxsize=None) + def is_breakable(index: int) -> bool: + """ + >>> string = 'a' + >>> is_breakable(1) + True + """ + if index == len_string: + return True + + trie_node = trie + for i in range(index, len_string): + trie_node = trie_node.get(string[i], None) + + if trie_node is None: + return False + + if trie_node.get(word_keeper_key, False) and is_breakable(i + 1): + return True + + return False + + return is_breakable(0) + + +if __name__ == "__main__": + import doctest + + doctest.testmod()