Add missing type annotations for strings directory (#5817)

* Type annotations for `strings/autocomplete_using_trie.py`

* Update autocomplete_using_trie.py

* Update detecting_english_programmatically.py

* Update detecting_english_programmatically.py

* Update frequency_finder.py

* Update frequency_finder.py

* Update frequency_finder.py

* Update word_occurrence.py

* Update frequency_finder.py

* Update z_function.py

* Update z_function.py

* Update frequency_finder.py
This commit is contained in:
Rohan R Bharadwaj 2022-05-13 11:25:53 +05:30 committed by GitHub
parent bbb88bb5c2
commit e95ecfaf27
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 82 additions and 102 deletions

View File

@ -1,11 +1,13 @@
from __future__ import annotations
END = "#"
class Trie:
def __init__(self):
self._trie = {}
def __init__(self) -> None:
self._trie: dict = {}
def insert_word(self, text):
def insert_word(self, text: str) -> None:
trie = self._trie
for char in text:
if char not in trie:
@ -13,7 +15,7 @@ class Trie:
trie = trie[char]
trie[END] = True
def find_word(self, prefix):
def find_word(self, prefix: str) -> tuple | list:
trie = self._trie
for char in prefix:
if char in trie:
@ -22,7 +24,7 @@ class Trie:
return []
return self._elements(trie)
def _elements(self, d):
def _elements(self, d: dict) -> tuple:
result = []
for c, v in d.items():
if c == END:
@ -39,26 +41,28 @@ for word in words:
trie.insert_word(word)
def autocomplete_using_trie(s):
def autocomplete_using_trie(string: str) -> tuple:
"""
>>> trie = Trie()
>>> for word in words:
... trie.insert_word(word)
...
>>> matches = autocomplete_using_trie("de")
"detergent " in matches
>>> "detergent " in matches
True
"dog " in matches
>>> "dog " in matches
False
"""
suffixes = trie.find_word(s)
return tuple(s + w for w in suffixes)
suffixes = trie.find_word(string)
return tuple(string + word for word in suffixes)
def main():
def main() -> None:
print(autocomplete_using_trie("de"))
if __name__ == "__main__":
import doctest
doctest.testmod()
main()

View File

@ -4,55 +4,56 @@ UPPERLETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + " \t\n"
def loadDictionary():
def load_dictionary() -> dict[str, None]:
path = os.path.split(os.path.realpath(__file__))
englishWords = {}
with open(path[0] + "/dictionary.txt") as dictionaryFile:
for word in dictionaryFile.read().split("\n"):
englishWords[word] = None
return englishWords
english_words: dict[str, None] = {}
with open(path[0] + "/dictionary.txt") as dictionary_file:
for word in dictionary_file.read().split("\n"):
english_words[word] = None
return english_words
ENGLISH_WORDS = loadDictionary()
ENGLISH_WORDS = load_dictionary()
def getEnglishCount(message):
def get_english_count(message: str) -> float:
message = message.upper()
message = removeNonLetters(message)
possibleWords = message.split()
message = remove_non_letters(message)
possible_words = message.split()
if possibleWords == []:
if possible_words == []:
return 0.0
matches = 0
for word in possibleWords:
for word in possible_words:
if word in ENGLISH_WORDS:
matches += 1
return float(matches) / len(possibleWords)
return float(matches) / len(possible_words)
def removeNonLetters(message):
lettersOnly = []
def remove_non_letters(message: str) -> str:
letters_only = []
for symbol in message:
if symbol in LETTERS_AND_SPACE:
lettersOnly.append(symbol)
return "".join(lettersOnly)
letters_only.append(symbol)
return "".join(letters_only)
def isEnglish(message, wordPercentage=20, letterPercentage=85):
def is_english(
message: str, word_percentage: int = 20, letter_percentage: int = 85
) -> bool:
"""
>>> isEnglish('Hello World')
>>> is_english('Hello World')
True
>>> isEnglish('llold HorWd')
>>> is_english('llold HorWd')
False
"""
wordsMatch = getEnglishCount(message) * 100 >= wordPercentage
numLetters = len(removeNonLetters(message))
messageLettersPercentage = (float(numLetters) / len(message)) * 100
lettersMatch = messageLettersPercentage >= letterPercentage
return wordsMatch and lettersMatch
words_match = get_english_count(message) * 100 >= word_percentage
num_letters = len(remove_non_letters(message))
message_letters_percentage = (float(num_letters) / len(message)) * 100
letters_match = message_letters_percentage >= letter_percentage
return words_match and letters_match
if __name__ == "__main__":

View File

@ -1,7 +1,9 @@
# Frequency Finder
import string
# frequency taken from http://en.wikipedia.org/wiki/Letter_frequency
englishLetterFreq = {
english_letter_freq = {
"E": 12.70,
"T": 9.06,
"A": 8.17,
@ -33,85 +35,57 @@ ETAOIN = "ETAOINSHRDLCUMWFGYPBVKJXQZ"
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
def getLetterCount(message):
letterCount = {
"A": 0,
"B": 0,
"C": 0,
"D": 0,
"E": 0,
"F": 0,
"G": 0,
"H": 0,
"I": 0,
"J": 0,
"K": 0,
"L": 0,
"M": 0,
"N": 0,
"O": 0,
"P": 0,
"Q": 0,
"R": 0,
"S": 0,
"T": 0,
"U": 0,
"V": 0,
"W": 0,
"X": 0,
"Y": 0,
"Z": 0,
}
def get_letter_count(message: str) -> dict[str, int]:
letter_count = {letter: 0 for letter in string.ascii_uppercase}
for letter in message.upper():
if letter in LETTERS:
letterCount[letter] += 1
letter_count[letter] += 1
return letterCount
return letter_count
def getItemAtIndexZero(x):
def get_item_at_index_zero(x: tuple) -> str:
return x[0]
def getFrequencyOrder(message):
letterToFreq = getLetterCount(message)
freqToLetter = {}
def get_frequency_order(message: str) -> str:
letter_to_freq = get_letter_count(message)
freq_to_letter: dict[int, list[str]] = {
freq: [] for letter, freq in letter_to_freq.items()
}
for letter in LETTERS:
if letterToFreq[letter] not in freqToLetter:
freqToLetter[letterToFreq[letter]] = [letter]
else:
freqToLetter[letterToFreq[letter]].append(letter)
freq_to_letter[letter_to_freq[letter]].append(letter)
for freq in freqToLetter:
freqToLetter[freq].sort(key=ETAOIN.find, reverse=True)
freqToLetter[freq] = "".join(freqToLetter[freq])
freq_to_letter_str: dict[int, str] = {}
freqPairs = list(freqToLetter.items())
freqPairs.sort(key=getItemAtIndexZero, reverse=True)
for freq in freq_to_letter:
freq_to_letter[freq].sort(key=ETAOIN.find, reverse=True)
freq_to_letter_str[freq] = "".join(freq_to_letter[freq])
freqOrder = []
for freqPair in freqPairs:
freqOrder.append(freqPair[1])
freq_pairs = list(freq_to_letter_str.items())
freq_pairs.sort(key=get_item_at_index_zero, reverse=True)
return "".join(freqOrder)
freq_order: list[str] = [freq_pair[1] for freq_pair in freq_pairs]
return "".join(freq_order)
def englishFreqMatchScore(message):
def english_freq_match_score(message: str) -> int:
"""
>>> englishFreqMatchScore('Hello World')
>>> english_freq_match_score('Hello World')
1
"""
freqOrder = getFrequencyOrder(message)
matchScore = 0
for commonLetter in ETAOIN[:6]:
if commonLetter in freqOrder[:6]:
matchScore += 1
freq_order = get_frequency_order(message)
match_score = 0
for common_letter in ETAOIN[:6]:
if common_letter in freq_order[:6]:
match_score += 1
for uncommonLetter in ETAOIN[-6:]:
if uncommonLetter in freqOrder[-6:]:
matchScore += 1
for uncommon_letter in ETAOIN[-6:]:
if uncommon_letter in freq_order[-6:]:
match_score += 1
return matchScore
return match_score
if __name__ == "__main__":

View File

@ -1,6 +1,7 @@
# Created by sarathkaul on 17/11/19
# Modified by Arkadip Bhattacharya(@darkmatter18) on 20/04/2020
from collections import defaultdict
from typing import DefaultDict
def word_occurence(sentence: str) -> dict:
@ -14,7 +15,7 @@ def word_occurence(sentence: str) -> dict:
>>> dict(word_occurence("Two spaces"))
{'Two': 1, 'spaces': 1}
"""
occurrence: dict = defaultdict(int)
occurrence: DefaultDict[str, int] = defaultdict(int)
# Creating a dictionary containing count of each word
for word in sentence.split():
occurrence[word] += 1

View File

@ -10,7 +10,7 @@ Time Complexity: O(n) - where n is the length of the string
"""
def z_function(input_str: str) -> list:
def z_function(input_str: str) -> list[int]:
"""
For the given string this function computes value for each index,
which represents the maximal length substring starting from the index
@ -27,7 +27,7 @@ def z_function(input_str: str) -> list:
>>> z_function("zxxzxxz")
[0, 0, 0, 4, 0, 0, 1]
"""
z_result = [0] * len(input_str)
z_result = [0 for i in range(len(input_str))]
# initialize interval's left pointer and right pointer
left_pointer, right_pointer = 0, 0
@ -49,7 +49,7 @@ def z_function(input_str: str) -> list:
return z_result
def go_next(i, z_result, s):
def go_next(i: int, z_result: list[int], s: str) -> bool:
"""
Check if we have to move forward to the next characters or not
"""