Add missing type annotations for strings directory (#5817)

* Type annotations for `strings/autocomplete_using_trie.py`

* Update autocomplete_using_trie.py

* Update detecting_english_programmatically.py

* Update detecting_english_programmatically.py

* Update frequency_finder.py

* Update frequency_finder.py

* Update frequency_finder.py

* Update word_occurrence.py

* Update frequency_finder.py

* Update z_function.py

* Update z_function.py

* Update frequency_finder.py
This commit is contained in:
Rohan R Bharadwaj 2022-05-13 11:25:53 +05:30 committed by GitHub
parent bbb88bb5c2
commit e95ecfaf27
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 82 additions and 102 deletions

View File

@ -1,11 +1,13 @@
from __future__ import annotations
END = "#" END = "#"
class Trie: class Trie:
def __init__(self): def __init__(self) -> None:
self._trie = {} self._trie: dict = {}
def insert_word(self, text): def insert_word(self, text: str) -> None:
trie = self._trie trie = self._trie
for char in text: for char in text:
if char not in trie: if char not in trie:
@ -13,7 +15,7 @@ class Trie:
trie = trie[char] trie = trie[char]
trie[END] = True trie[END] = True
def find_word(self, prefix): def find_word(self, prefix: str) -> tuple | list:
trie = self._trie trie = self._trie
for char in prefix: for char in prefix:
if char in trie: if char in trie:
@ -22,7 +24,7 @@ class Trie:
return [] return []
return self._elements(trie) return self._elements(trie)
def _elements(self, d): def _elements(self, d: dict) -> tuple:
result = [] result = []
for c, v in d.items(): for c, v in d.items():
if c == END: if c == END:
@ -39,26 +41,28 @@ for word in words:
trie.insert_word(word) trie.insert_word(word)
def autocomplete_using_trie(s): def autocomplete_using_trie(string: str) -> tuple:
""" """
>>> trie = Trie() >>> trie = Trie()
>>> for word in words: >>> for word in words:
... trie.insert_word(word) ... trie.insert_word(word)
... ...
>>> matches = autocomplete_using_trie("de") >>> matches = autocomplete_using_trie("de")
>>> "detergent " in matches
"detergent " in matches
True True
"dog " in matches >>> "dog " in matches
False False
""" """
suffixes = trie.find_word(s) suffixes = trie.find_word(string)
return tuple(s + w for w in suffixes) return tuple(string + word for word in suffixes)
def main(): def main() -> None:
print(autocomplete_using_trie("de")) print(autocomplete_using_trie("de"))
if __name__ == "__main__": if __name__ == "__main__":
import doctest
doctest.testmod()
main() main()

View File

@ -4,55 +4,56 @@ UPPERLETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + " \t\n" LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + " \t\n"
def loadDictionary(): def load_dictionary() -> dict[str, None]:
path = os.path.split(os.path.realpath(__file__)) path = os.path.split(os.path.realpath(__file__))
englishWords = {} english_words: dict[str, None] = {}
with open(path[0] + "/dictionary.txt") as dictionaryFile: with open(path[0] + "/dictionary.txt") as dictionary_file:
for word in dictionaryFile.read().split("\n"): for word in dictionary_file.read().split("\n"):
englishWords[word] = None english_words[word] = None
return englishWords return english_words
ENGLISH_WORDS = loadDictionary() ENGLISH_WORDS = load_dictionary()
def getEnglishCount(message): def get_english_count(message: str) -> float:
message = message.upper() message = message.upper()
message = removeNonLetters(message) message = remove_non_letters(message)
possibleWords = message.split() possible_words = message.split()
if possibleWords == []: if possible_words == []:
return 0.0 return 0.0
matches = 0 matches = 0
for word in possibleWords: for word in possible_words:
if word in ENGLISH_WORDS: if word in ENGLISH_WORDS:
matches += 1 matches += 1
return float(matches) / len(possibleWords) return float(matches) / len(possible_words)
def removeNonLetters(message): def remove_non_letters(message: str) -> str:
lettersOnly = [] letters_only = []
for symbol in message: for symbol in message:
if symbol in LETTERS_AND_SPACE: if symbol in LETTERS_AND_SPACE:
lettersOnly.append(symbol) letters_only.append(symbol)
return "".join(lettersOnly) return "".join(letters_only)
def isEnglish(message, wordPercentage=20, letterPercentage=85): def is_english(
message: str, word_percentage: int = 20, letter_percentage: int = 85
) -> bool:
""" """
>>> isEnglish('Hello World') >>> is_english('Hello World')
True True
>>> is_english('llold HorWd')
>>> isEnglish('llold HorWd')
False False
""" """
wordsMatch = getEnglishCount(message) * 100 >= wordPercentage words_match = get_english_count(message) * 100 >= word_percentage
numLetters = len(removeNonLetters(message)) num_letters = len(remove_non_letters(message))
messageLettersPercentage = (float(numLetters) / len(message)) * 100 message_letters_percentage = (float(num_letters) / len(message)) * 100
lettersMatch = messageLettersPercentage >= letterPercentage letters_match = message_letters_percentage >= letter_percentage
return wordsMatch and lettersMatch return words_match and letters_match
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -1,7 +1,9 @@
# Frequency Finder # Frequency Finder
import string
# frequency taken from http://en.wikipedia.org/wiki/Letter_frequency # frequency taken from http://en.wikipedia.org/wiki/Letter_frequency
englishLetterFreq = { english_letter_freq = {
"E": 12.70, "E": 12.70,
"T": 9.06, "T": 9.06,
"A": 8.17, "A": 8.17,
@ -33,85 +35,57 @@ ETAOIN = "ETAOINSHRDLCUMWFGYPBVKJXQZ"
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
def getLetterCount(message): def get_letter_count(message: str) -> dict[str, int]:
letterCount = { letter_count = {letter: 0 for letter in string.ascii_uppercase}
"A": 0,
"B": 0,
"C": 0,
"D": 0,
"E": 0,
"F": 0,
"G": 0,
"H": 0,
"I": 0,
"J": 0,
"K": 0,
"L": 0,
"M": 0,
"N": 0,
"O": 0,
"P": 0,
"Q": 0,
"R": 0,
"S": 0,
"T": 0,
"U": 0,
"V": 0,
"W": 0,
"X": 0,
"Y": 0,
"Z": 0,
}
for letter in message.upper(): for letter in message.upper():
if letter in LETTERS: if letter in LETTERS:
letterCount[letter] += 1 letter_count[letter] += 1
return letterCount return letter_count
def getItemAtIndexZero(x): def get_item_at_index_zero(x: tuple) -> str:
return x[0] return x[0]
def getFrequencyOrder(message): def get_frequency_order(message: str) -> str:
letterToFreq = getLetterCount(message) letter_to_freq = get_letter_count(message)
freqToLetter = {} freq_to_letter: dict[int, list[str]] = {
freq: [] for letter, freq in letter_to_freq.items()
}
for letter in LETTERS: for letter in LETTERS:
if letterToFreq[letter] not in freqToLetter: freq_to_letter[letter_to_freq[letter]].append(letter)
freqToLetter[letterToFreq[letter]] = [letter]
else:
freqToLetter[letterToFreq[letter]].append(letter)
for freq in freqToLetter: freq_to_letter_str: dict[int, str] = {}
freqToLetter[freq].sort(key=ETAOIN.find, reverse=True)
freqToLetter[freq] = "".join(freqToLetter[freq])
freqPairs = list(freqToLetter.items()) for freq in freq_to_letter:
freqPairs.sort(key=getItemAtIndexZero, reverse=True) freq_to_letter[freq].sort(key=ETAOIN.find, reverse=True)
freq_to_letter_str[freq] = "".join(freq_to_letter[freq])
freqOrder = [] freq_pairs = list(freq_to_letter_str.items())
for freqPair in freqPairs: freq_pairs.sort(key=get_item_at_index_zero, reverse=True)
freqOrder.append(freqPair[1])
return "".join(freqOrder) freq_order: list[str] = [freq_pair[1] for freq_pair in freq_pairs]
return "".join(freq_order)
def englishFreqMatchScore(message): def english_freq_match_score(message: str) -> int:
""" """
>>> englishFreqMatchScore('Hello World') >>> english_freq_match_score('Hello World')
1 1
""" """
freqOrder = getFrequencyOrder(message) freq_order = get_frequency_order(message)
matchScore = 0 match_score = 0
for commonLetter in ETAOIN[:6]: for common_letter in ETAOIN[:6]:
if commonLetter in freqOrder[:6]: if common_letter in freq_order[:6]:
matchScore += 1 match_score += 1
for uncommonLetter in ETAOIN[-6:]: for uncommon_letter in ETAOIN[-6:]:
if uncommonLetter in freqOrder[-6:]: if uncommon_letter in freq_order[-6:]:
matchScore += 1 match_score += 1
return matchScore return match_score
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -1,6 +1,7 @@
# Created by sarathkaul on 17/11/19 # Created by sarathkaul on 17/11/19
# Modified by Arkadip Bhattacharya(@darkmatter18) on 20/04/2020 # Modified by Arkadip Bhattacharya(@darkmatter18) on 20/04/2020
from collections import defaultdict from collections import defaultdict
from typing import DefaultDict
def word_occurence(sentence: str) -> dict: def word_occurence(sentence: str) -> dict:
@ -14,7 +15,7 @@ def word_occurence(sentence: str) -> dict:
>>> dict(word_occurence("Two spaces")) >>> dict(word_occurence("Two spaces"))
{'Two': 1, 'spaces': 1} {'Two': 1, 'spaces': 1}
""" """
occurrence: dict = defaultdict(int) occurrence: DefaultDict[str, int] = defaultdict(int)
# Creating a dictionary containing count of each word # Creating a dictionary containing count of each word
for word in sentence.split(): for word in sentence.split():
occurrence[word] += 1 occurrence[word] += 1

View File

@ -10,7 +10,7 @@ Time Complexity: O(n) - where n is the length of the string
""" """
def z_function(input_str: str) -> list: def z_function(input_str: str) -> list[int]:
""" """
For the given string this function computes value for each index, For the given string this function computes value for each index,
which represents the maximal length substring starting from the index which represents the maximal length substring starting from the index
@ -27,7 +27,7 @@ def z_function(input_str: str) -> list:
>>> z_function("zxxzxxz") >>> z_function("zxxzxxz")
[0, 0, 0, 4, 0, 0, 1] [0, 0, 0, 4, 0, 0, 1]
""" """
z_result = [0] * len(input_str) z_result = [0 for i in range(len(input_str))]
# initialize interval's left pointer and right pointer # initialize interval's left pointer and right pointer
left_pointer, right_pointer = 0, 0 left_pointer, right_pointer = 0, 0
@ -49,7 +49,7 @@ def z_function(input_str: str) -> list:
return z_result return z_result
def go_next(i, z_result, s): def go_next(i: int, z_result: list[int], s: str) -> bool:
""" """
Check if we have to move forward to the next characters or not Check if we have to move forward to the next characters or not
""" """