Add missing type annotations for strings directory (#5817)

* Type annotations for `strings/autocomplete_using_trie.py` * Update autocomplete_using_trie.py * Update detecting_english_programmatically.py * Update detecting_english_programmatically.py * Update frequency_finder.py * Update frequency_finder.py * Update frequency_finder.py * Update word_occurrence.py * Update frequency_finder.py * Update z_function.py * Update z_function.py * Update frequency_finder.py
2025-05-15 13:47:22 +00:00 · 2022-05-13 11:25:53 +05:30 · 2022-05-13 11:25:53 +05:30 · e95ecfaf27
commit e95ecfaf27
parent bbb88bb5c2
5 changed files with 82 additions and 102 deletions
--- a/strings/autocomplete_using_trie.py
+++ b/strings/autocomplete_using_trie.py
@ -1,11 +1,13 @@
+from __future__ import annotations
+
 END = "#"


 class Trie:
-    def __init__(self):
-        self._trie = {}
+    def __init__(self) -> None:
+        self._trie: dict = {}

-    def insert_word(self, text):
+    def insert_word(self, text: str) -> None:
        trie = self._trie
        for char in text:
            if char not in trie:
@ -13,7 +15,7 @@ class Trie:
            trie = trie[char]
        trie[END] = True

-    def find_word(self, prefix):
+    def find_word(self, prefix: str) -> tuple | list:
        trie = self._trie
        for char in prefix:
            if char in trie:
@ -22,7 +24,7 @@ class Trie:
                return []
        return self._elements(trie)

-    def _elements(self, d):
+    def _elements(self, d: dict) -> tuple:
        result = []
        for c, v in d.items():
            if c == END:
@ -39,26 +41,28 @@ for word in words:
    trie.insert_word(word)


-def autocomplete_using_trie(s):
+def autocomplete_using_trie(string: str) -> tuple:
    """
    >>> trie = Trie()
    >>> for word in words:
    ...     trie.insert_word(word)
    ...
    >>> matches = autocomplete_using_trie("de")
-
-    "detergent " in matches
+    >>> "detergent " in matches
    True
-    "dog " in matches
+    >>> "dog " in matches
    False
    """
-    suffixes = trie.find_word(s)
-    return tuple(s + w for w in suffixes)
+    suffixes = trie.find_word(string)
+    return tuple(string + word for word in suffixes)


-def main():
+def main() -> None:
    print(autocomplete_using_trie("de"))


 if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
    main()
--- a/strings/detecting_english_programmatically.py
+++ b/strings/detecting_english_programmatically.py
@ -4,55 +4,56 @@ UPPERLETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + " \t\n"


-def loadDictionary():
+def load_dictionary() -> dict[str, None]:
    path = os.path.split(os.path.realpath(__file__))
-    englishWords = {}
-    with open(path[0] + "/dictionary.txt") as dictionaryFile:
-        for word in dictionaryFile.read().split("\n"):
-            englishWords[word] = None
-    return englishWords
+    english_words: dict[str, None] = {}
+    with open(path[0] + "/dictionary.txt") as dictionary_file:
+        for word in dictionary_file.read().split("\n"):
+            english_words[word] = None
+    return english_words


-ENGLISH_WORDS = loadDictionary()
+ENGLISH_WORDS = load_dictionary()


-def getEnglishCount(message):
+def get_english_count(message: str) -> float:
    message = message.upper()
-    message = removeNonLetters(message)
-    possibleWords = message.split()
+    message = remove_non_letters(message)
+    possible_words = message.split()

-    if possibleWords == []:
+    if possible_words == []:
        return 0.0

    matches = 0
-    for word in possibleWords:
+    for word in possible_words:
        if word in ENGLISH_WORDS:
            matches += 1

-    return float(matches) / len(possibleWords)
+    return float(matches) / len(possible_words)


-def removeNonLetters(message):
-    lettersOnly = []
+def remove_non_letters(message: str) -> str:
+    letters_only = []
    for symbol in message:
        if symbol in LETTERS_AND_SPACE:
-            lettersOnly.append(symbol)
-    return "".join(lettersOnly)
+            letters_only.append(symbol)
+    return "".join(letters_only)


-def isEnglish(message, wordPercentage=20, letterPercentage=85):
+def is_english(
+    message: str, word_percentage: int = 20, letter_percentage: int = 85
+) -> bool:
    """
-    >>> isEnglish('Hello World')
+    >>> is_english('Hello World')
    True
-
-    >>> isEnglish('llold HorWd')
+    >>> is_english('llold HorWd')
    False
    """
-    wordsMatch = getEnglishCount(message) * 100 >= wordPercentage
-    numLetters = len(removeNonLetters(message))
-    messageLettersPercentage = (float(numLetters) / len(message)) * 100
-    lettersMatch = messageLettersPercentage >= letterPercentage
-    return wordsMatch and lettersMatch
+    words_match = get_english_count(message) * 100 >= word_percentage
+    num_letters = len(remove_non_letters(message))
+    message_letters_percentage = (float(num_letters) / len(message)) * 100
+    letters_match = message_letters_percentage >= letter_percentage
+    return words_match and letters_match


 if __name__ == "__main__":
--- a/strings/frequency_finder.py
+++ b/strings/frequency_finder.py
@ -1,7 +1,9 @@
 # Frequency Finder

+import string
+
 # frequency taken from http://en.wikipedia.org/wiki/Letter_frequency
-englishLetterFreq = {
+english_letter_freq = {
    "E": 12.70,
    "T": 9.06,
    "A": 8.17,
@ -33,85 +35,57 @@ ETAOIN = "ETAOINSHRDLCUMWFGYPBVKJXQZ"
 LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"


-def getLetterCount(message):
-    letterCount = {
-        "A": 0,
-        "B": 0,
-        "C": 0,
-        "D": 0,
-        "E": 0,
-        "F": 0,
-        "G": 0,
-        "H": 0,
-        "I": 0,
-        "J": 0,
-        "K": 0,
-        "L": 0,
-        "M": 0,
-        "N": 0,
-        "O": 0,
-        "P": 0,
-        "Q": 0,
-        "R": 0,
-        "S": 0,
-        "T": 0,
-        "U": 0,
-        "V": 0,
-        "W": 0,
-        "X": 0,
-        "Y": 0,
-        "Z": 0,
-    }
+def get_letter_count(message: str) -> dict[str, int]:
+    letter_count = {letter: 0 for letter in string.ascii_uppercase}
    for letter in message.upper():
        if letter in LETTERS:
-            letterCount[letter] += 1
+            letter_count[letter] += 1

-    return letterCount
+    return letter_count


-def getItemAtIndexZero(x):
+def get_item_at_index_zero(x: tuple) -> str:
    return x[0]


-def getFrequencyOrder(message):
-    letterToFreq = getLetterCount(message)
-    freqToLetter = {}
+def get_frequency_order(message: str) -> str:
+    letter_to_freq = get_letter_count(message)
+    freq_to_letter: dict[int, list[str]] = {
+        freq: [] for letter, freq in letter_to_freq.items()
+    }
    for letter in LETTERS:
-        if letterToFreq[letter] not in freqToLetter:
-            freqToLetter[letterToFreq[letter]] = [letter]
-        else:
-            freqToLetter[letterToFreq[letter]].append(letter)
+        freq_to_letter[letter_to_freq[letter]].append(letter)

-    for freq in freqToLetter:
-        freqToLetter[freq].sort(key=ETAOIN.find, reverse=True)
-        freqToLetter[freq] = "".join(freqToLetter[freq])
+    freq_to_letter_str: dict[int, str] = {}

-    freqPairs = list(freqToLetter.items())
-    freqPairs.sort(key=getItemAtIndexZero, reverse=True)
+    for freq in freq_to_letter:
+        freq_to_letter[freq].sort(key=ETAOIN.find, reverse=True)
+        freq_to_letter_str[freq] = "".join(freq_to_letter[freq])

-    freqOrder = []
-    for freqPair in freqPairs:
-        freqOrder.append(freqPair[1])
+    freq_pairs = list(freq_to_letter_str.items())
+    freq_pairs.sort(key=get_item_at_index_zero, reverse=True)

-    return "".join(freqOrder)
+    freq_order: list[str] = [freq_pair[1] for freq_pair in freq_pairs]
+
+    return "".join(freq_order)


-def englishFreqMatchScore(message):
+def english_freq_match_score(message: str) -> int:
    """
-    >>> englishFreqMatchScore('Hello World')
+    >>> english_freq_match_score('Hello World')
    1
    """
-    freqOrder = getFrequencyOrder(message)
-    matchScore = 0
-    for commonLetter in ETAOIN[:6]:
-        if commonLetter in freqOrder[:6]:
-            matchScore += 1
+    freq_order = get_frequency_order(message)
+    match_score = 0
+    for common_letter in ETAOIN[:6]:
+        if common_letter in freq_order[:6]:
+            match_score += 1

-    for uncommonLetter in ETAOIN[-6:]:
-        if uncommonLetter in freqOrder[-6:]:
-            matchScore += 1
+    for uncommon_letter in ETAOIN[-6:]:
+        if uncommon_letter in freq_order[-6:]:
+            match_score += 1

-    return matchScore
+    return match_score


 if __name__ == "__main__":
--- a/strings/word_occurrence.py
+++ b/strings/word_occurrence.py
@ -1,6 +1,7 @@
 # Created by sarathkaul on 17/11/19
 # Modified by Arkadip Bhattacharya(@darkmatter18) on 20/04/2020
 from collections import defaultdict
+from typing import DefaultDict


 def word_occurence(sentence: str) -> dict:
@ -14,7 +15,7 @@ def word_occurence(sentence: str) -> dict:
    >>> dict(word_occurence("Two  spaces"))
    {'Two': 1, 'spaces': 1}
    """
-    occurrence: dict = defaultdict(int)
+    occurrence: DefaultDict[str, int] = defaultdict(int)
    # Creating a dictionary containing count of each word
    for word in sentence.split():
        occurrence[word] += 1
--- a/strings/z_function.py
+++ b/strings/z_function.py
@ -10,7 +10,7 @@ Time Complexity: O(n) - where n is the length of the string
 """


-def z_function(input_str: str) -> list:
+def z_function(input_str: str) -> list[int]:
    """
    For the given string this function computes value for each index,
    which represents the maximal length substring starting from the index
@ -27,7 +27,7 @@ def z_function(input_str: str) -> list:
    >>> z_function("zxxzxxz")
    [0, 0, 0, 4, 0, 0, 1]
    """
-    z_result = [0] * len(input_str)
+    z_result = [0 for i in range(len(input_str))]

    # initialize interval's left pointer and right pointer
    left_pointer, right_pointer = 0, 0
@ -49,7 +49,7 @@ def z_function(input_str: str) -> list:
    return z_result


-def go_next(i, z_result, s):
+def go_next(i: int, z_result: list[int], s: str) -> bool:
    """
    Check if we have to move forward to the next characters or not
    """