mirror of
https://github.com/TheAlgorithms/Python.git
synced 2024-11-27 15:01:08 +00:00
Add missing type annotations for strings
directory (#5817)
* Type annotations for `strings/autocomplete_using_trie.py` * Update autocomplete_using_trie.py * Update detecting_english_programmatically.py * Update detecting_english_programmatically.py * Update frequency_finder.py * Update frequency_finder.py * Update frequency_finder.py * Update word_occurrence.py * Update frequency_finder.py * Update z_function.py * Update z_function.py * Update frequency_finder.py
This commit is contained in:
parent
bbb88bb5c2
commit
e95ecfaf27
|
@ -1,11 +1,13 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
END = "#"
|
END = "#"
|
||||||
|
|
||||||
|
|
||||||
class Trie:
|
class Trie:
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self._trie = {}
|
self._trie: dict = {}
|
||||||
|
|
||||||
def insert_word(self, text):
|
def insert_word(self, text: str) -> None:
|
||||||
trie = self._trie
|
trie = self._trie
|
||||||
for char in text:
|
for char in text:
|
||||||
if char not in trie:
|
if char not in trie:
|
||||||
|
@ -13,7 +15,7 @@ class Trie:
|
||||||
trie = trie[char]
|
trie = trie[char]
|
||||||
trie[END] = True
|
trie[END] = True
|
||||||
|
|
||||||
def find_word(self, prefix):
|
def find_word(self, prefix: str) -> tuple | list:
|
||||||
trie = self._trie
|
trie = self._trie
|
||||||
for char in prefix:
|
for char in prefix:
|
||||||
if char in trie:
|
if char in trie:
|
||||||
|
@ -22,7 +24,7 @@ class Trie:
|
||||||
return []
|
return []
|
||||||
return self._elements(trie)
|
return self._elements(trie)
|
||||||
|
|
||||||
def _elements(self, d):
|
def _elements(self, d: dict) -> tuple:
|
||||||
result = []
|
result = []
|
||||||
for c, v in d.items():
|
for c, v in d.items():
|
||||||
if c == END:
|
if c == END:
|
||||||
|
@ -39,26 +41,28 @@ for word in words:
|
||||||
trie.insert_word(word)
|
trie.insert_word(word)
|
||||||
|
|
||||||
|
|
||||||
def autocomplete_using_trie(s):
|
def autocomplete_using_trie(string: str) -> tuple:
|
||||||
"""
|
"""
|
||||||
>>> trie = Trie()
|
>>> trie = Trie()
|
||||||
>>> for word in words:
|
>>> for word in words:
|
||||||
... trie.insert_word(word)
|
... trie.insert_word(word)
|
||||||
...
|
...
|
||||||
>>> matches = autocomplete_using_trie("de")
|
>>> matches = autocomplete_using_trie("de")
|
||||||
|
>>> "detergent " in matches
|
||||||
"detergent " in matches
|
|
||||||
True
|
True
|
||||||
"dog " in matches
|
>>> "dog " in matches
|
||||||
False
|
False
|
||||||
"""
|
"""
|
||||||
suffixes = trie.find_word(s)
|
suffixes = trie.find_word(string)
|
||||||
return tuple(s + w for w in suffixes)
|
return tuple(string + word for word in suffixes)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
print(autocomplete_using_trie("de"))
|
print(autocomplete_using_trie("de"))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
import doctest
|
||||||
|
|
||||||
|
doctest.testmod()
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -4,55 +4,56 @@ UPPERLETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + " \t\n"
|
LETTERS_AND_SPACE = UPPERLETTERS + UPPERLETTERS.lower() + " \t\n"
|
||||||
|
|
||||||
|
|
||||||
def loadDictionary():
|
def load_dictionary() -> dict[str, None]:
|
||||||
path = os.path.split(os.path.realpath(__file__))
|
path = os.path.split(os.path.realpath(__file__))
|
||||||
englishWords = {}
|
english_words: dict[str, None] = {}
|
||||||
with open(path[0] + "/dictionary.txt") as dictionaryFile:
|
with open(path[0] + "/dictionary.txt") as dictionary_file:
|
||||||
for word in dictionaryFile.read().split("\n"):
|
for word in dictionary_file.read().split("\n"):
|
||||||
englishWords[word] = None
|
english_words[word] = None
|
||||||
return englishWords
|
return english_words
|
||||||
|
|
||||||
|
|
||||||
ENGLISH_WORDS = loadDictionary()
|
ENGLISH_WORDS = load_dictionary()
|
||||||
|
|
||||||
|
|
||||||
def getEnglishCount(message):
|
def get_english_count(message: str) -> float:
|
||||||
message = message.upper()
|
message = message.upper()
|
||||||
message = removeNonLetters(message)
|
message = remove_non_letters(message)
|
||||||
possibleWords = message.split()
|
possible_words = message.split()
|
||||||
|
|
||||||
if possibleWords == []:
|
if possible_words == []:
|
||||||
return 0.0
|
return 0.0
|
||||||
|
|
||||||
matches = 0
|
matches = 0
|
||||||
for word in possibleWords:
|
for word in possible_words:
|
||||||
if word in ENGLISH_WORDS:
|
if word in ENGLISH_WORDS:
|
||||||
matches += 1
|
matches += 1
|
||||||
|
|
||||||
return float(matches) / len(possibleWords)
|
return float(matches) / len(possible_words)
|
||||||
|
|
||||||
|
|
||||||
def removeNonLetters(message):
|
def remove_non_letters(message: str) -> str:
|
||||||
lettersOnly = []
|
letters_only = []
|
||||||
for symbol in message:
|
for symbol in message:
|
||||||
if symbol in LETTERS_AND_SPACE:
|
if symbol in LETTERS_AND_SPACE:
|
||||||
lettersOnly.append(symbol)
|
letters_only.append(symbol)
|
||||||
return "".join(lettersOnly)
|
return "".join(letters_only)
|
||||||
|
|
||||||
|
|
||||||
def isEnglish(message, wordPercentage=20, letterPercentage=85):
|
def is_english(
|
||||||
|
message: str, word_percentage: int = 20, letter_percentage: int = 85
|
||||||
|
) -> bool:
|
||||||
"""
|
"""
|
||||||
>>> isEnglish('Hello World')
|
>>> is_english('Hello World')
|
||||||
True
|
True
|
||||||
|
>>> is_english('llold HorWd')
|
||||||
>>> isEnglish('llold HorWd')
|
|
||||||
False
|
False
|
||||||
"""
|
"""
|
||||||
wordsMatch = getEnglishCount(message) * 100 >= wordPercentage
|
words_match = get_english_count(message) * 100 >= word_percentage
|
||||||
numLetters = len(removeNonLetters(message))
|
num_letters = len(remove_non_letters(message))
|
||||||
messageLettersPercentage = (float(numLetters) / len(message)) * 100
|
message_letters_percentage = (float(num_letters) / len(message)) * 100
|
||||||
lettersMatch = messageLettersPercentage >= letterPercentage
|
letters_match = message_letters_percentage >= letter_percentage
|
||||||
return wordsMatch and lettersMatch
|
return words_match and letters_match
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
# Frequency Finder
|
# Frequency Finder
|
||||||
|
|
||||||
|
import string
|
||||||
|
|
||||||
# frequency taken from http://en.wikipedia.org/wiki/Letter_frequency
|
# frequency taken from http://en.wikipedia.org/wiki/Letter_frequency
|
||||||
englishLetterFreq = {
|
english_letter_freq = {
|
||||||
"E": 12.70,
|
"E": 12.70,
|
||||||
"T": 9.06,
|
"T": 9.06,
|
||||||
"A": 8.17,
|
"A": 8.17,
|
||||||
|
@ -33,85 +35,57 @@ ETAOIN = "ETAOINSHRDLCUMWFGYPBVKJXQZ"
|
||||||
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
|
||||||
|
|
||||||
def getLetterCount(message):
|
def get_letter_count(message: str) -> dict[str, int]:
|
||||||
letterCount = {
|
letter_count = {letter: 0 for letter in string.ascii_uppercase}
|
||||||
"A": 0,
|
|
||||||
"B": 0,
|
|
||||||
"C": 0,
|
|
||||||
"D": 0,
|
|
||||||
"E": 0,
|
|
||||||
"F": 0,
|
|
||||||
"G": 0,
|
|
||||||
"H": 0,
|
|
||||||
"I": 0,
|
|
||||||
"J": 0,
|
|
||||||
"K": 0,
|
|
||||||
"L": 0,
|
|
||||||
"M": 0,
|
|
||||||
"N": 0,
|
|
||||||
"O": 0,
|
|
||||||
"P": 0,
|
|
||||||
"Q": 0,
|
|
||||||
"R": 0,
|
|
||||||
"S": 0,
|
|
||||||
"T": 0,
|
|
||||||
"U": 0,
|
|
||||||
"V": 0,
|
|
||||||
"W": 0,
|
|
||||||
"X": 0,
|
|
||||||
"Y": 0,
|
|
||||||
"Z": 0,
|
|
||||||
}
|
|
||||||
for letter in message.upper():
|
for letter in message.upper():
|
||||||
if letter in LETTERS:
|
if letter in LETTERS:
|
||||||
letterCount[letter] += 1
|
letter_count[letter] += 1
|
||||||
|
|
||||||
return letterCount
|
return letter_count
|
||||||
|
|
||||||
|
|
||||||
def getItemAtIndexZero(x):
|
def get_item_at_index_zero(x: tuple) -> str:
|
||||||
return x[0]
|
return x[0]
|
||||||
|
|
||||||
|
|
||||||
def getFrequencyOrder(message):
|
def get_frequency_order(message: str) -> str:
|
||||||
letterToFreq = getLetterCount(message)
|
letter_to_freq = get_letter_count(message)
|
||||||
freqToLetter = {}
|
freq_to_letter: dict[int, list[str]] = {
|
||||||
|
freq: [] for letter, freq in letter_to_freq.items()
|
||||||
|
}
|
||||||
for letter in LETTERS:
|
for letter in LETTERS:
|
||||||
if letterToFreq[letter] not in freqToLetter:
|
freq_to_letter[letter_to_freq[letter]].append(letter)
|
||||||
freqToLetter[letterToFreq[letter]] = [letter]
|
|
||||||
else:
|
|
||||||
freqToLetter[letterToFreq[letter]].append(letter)
|
|
||||||
|
|
||||||
for freq in freqToLetter:
|
freq_to_letter_str: dict[int, str] = {}
|
||||||
freqToLetter[freq].sort(key=ETAOIN.find, reverse=True)
|
|
||||||
freqToLetter[freq] = "".join(freqToLetter[freq])
|
|
||||||
|
|
||||||
freqPairs = list(freqToLetter.items())
|
for freq in freq_to_letter:
|
||||||
freqPairs.sort(key=getItemAtIndexZero, reverse=True)
|
freq_to_letter[freq].sort(key=ETAOIN.find, reverse=True)
|
||||||
|
freq_to_letter_str[freq] = "".join(freq_to_letter[freq])
|
||||||
|
|
||||||
freqOrder = []
|
freq_pairs = list(freq_to_letter_str.items())
|
||||||
for freqPair in freqPairs:
|
freq_pairs.sort(key=get_item_at_index_zero, reverse=True)
|
||||||
freqOrder.append(freqPair[1])
|
|
||||||
|
|
||||||
return "".join(freqOrder)
|
freq_order: list[str] = [freq_pair[1] for freq_pair in freq_pairs]
|
||||||
|
|
||||||
|
return "".join(freq_order)
|
||||||
|
|
||||||
|
|
||||||
def englishFreqMatchScore(message):
|
def english_freq_match_score(message: str) -> int:
|
||||||
"""
|
"""
|
||||||
>>> englishFreqMatchScore('Hello World')
|
>>> english_freq_match_score('Hello World')
|
||||||
1
|
1
|
||||||
"""
|
"""
|
||||||
freqOrder = getFrequencyOrder(message)
|
freq_order = get_frequency_order(message)
|
||||||
matchScore = 0
|
match_score = 0
|
||||||
for commonLetter in ETAOIN[:6]:
|
for common_letter in ETAOIN[:6]:
|
||||||
if commonLetter in freqOrder[:6]:
|
if common_letter in freq_order[:6]:
|
||||||
matchScore += 1
|
match_score += 1
|
||||||
|
|
||||||
for uncommonLetter in ETAOIN[-6:]:
|
for uncommon_letter in ETAOIN[-6:]:
|
||||||
if uncommonLetter in freqOrder[-6:]:
|
if uncommon_letter in freq_order[-6:]:
|
||||||
matchScore += 1
|
match_score += 1
|
||||||
|
|
||||||
return matchScore
|
return match_score
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# Created by sarathkaul on 17/11/19
|
# Created by sarathkaul on 17/11/19
|
||||||
# Modified by Arkadip Bhattacharya(@darkmatter18) on 20/04/2020
|
# Modified by Arkadip Bhattacharya(@darkmatter18) on 20/04/2020
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from typing import DefaultDict
|
||||||
|
|
||||||
|
|
||||||
def word_occurence(sentence: str) -> dict:
|
def word_occurence(sentence: str) -> dict:
|
||||||
|
@ -14,7 +15,7 @@ def word_occurence(sentence: str) -> dict:
|
||||||
>>> dict(word_occurence("Two spaces"))
|
>>> dict(word_occurence("Two spaces"))
|
||||||
{'Two': 1, 'spaces': 1}
|
{'Two': 1, 'spaces': 1}
|
||||||
"""
|
"""
|
||||||
occurrence: dict = defaultdict(int)
|
occurrence: DefaultDict[str, int] = defaultdict(int)
|
||||||
# Creating a dictionary containing count of each word
|
# Creating a dictionary containing count of each word
|
||||||
for word in sentence.split():
|
for word in sentence.split():
|
||||||
occurrence[word] += 1
|
occurrence[word] += 1
|
||||||
|
|
|
@ -10,7 +10,7 @@ Time Complexity: O(n) - where n is the length of the string
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def z_function(input_str: str) -> list:
|
def z_function(input_str: str) -> list[int]:
|
||||||
"""
|
"""
|
||||||
For the given string this function computes value for each index,
|
For the given string this function computes value for each index,
|
||||||
which represents the maximal length substring starting from the index
|
which represents the maximal length substring starting from the index
|
||||||
|
@ -27,7 +27,7 @@ def z_function(input_str: str) -> list:
|
||||||
>>> z_function("zxxzxxz")
|
>>> z_function("zxxzxxz")
|
||||||
[0, 0, 0, 4, 0, 0, 1]
|
[0, 0, 0, 4, 0, 0, 1]
|
||||||
"""
|
"""
|
||||||
z_result = [0] * len(input_str)
|
z_result = [0 for i in range(len(input_str))]
|
||||||
|
|
||||||
# initialize interval's left pointer and right pointer
|
# initialize interval's left pointer and right pointer
|
||||||
left_pointer, right_pointer = 0, 0
|
left_pointer, right_pointer = 0, 0
|
||||||
|
@ -49,7 +49,7 @@ def z_function(input_str: str) -> list:
|
||||||
return z_result
|
return z_result
|
||||||
|
|
||||||
|
|
||||||
def go_next(i, z_result, s):
|
def go_next(i: int, z_result: list[int], s: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if we have to move forward to the next characters or not
|
Check if we have to move forward to the next characters or not
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue
Block a user