diff --git a/graphs/bidirectional_breadth_first_search.py b/graphs/bidirectional_breadth_first_search.py index d567f6ae6..d6e3ebd64 100644 --- a/graphs/bidirectional_breadth_first_search.py +++ b/graphs/bidirectional_breadth_first_search.py @@ -100,7 +100,8 @@ class BreadthFirstSearch: class BidirectionalBreadthFirstSearch: """ - >>> bd_bfs = BidirectionalBreadthFirstSearch((0, 0), (len(grid) - 1, len(grid[0]) - 1)) + >>> bd_bfs = BidirectionalBreadthFirstSearch((0, 0), (len(grid) - 1, + ... len(grid[0]) - 1)) >>> bd_bfs.fwd_bfs.start.pos == bd_bfs.bwd_bfs.target.pos True >>> bd_bfs.retrace_bidirectional_path(bd_bfs.fwd_bfs.start, diff --git a/maths/entropy.py b/maths/entropy.py new file mode 100644 index 000000000..eb6bf1a5a --- /dev/null +++ b/maths/entropy.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 + +""" +Implementation of entropy of information +https://en.wikipedia.org/wiki/Entropy_(information_theory) +""" + +import math +from collections import Counter +from string import ascii_lowercase +from typing import Tuple + + +def calculate_prob(text: str) -> None: + """ + This method takes path and two dict as argument + and than calculates entropy of them. + :param dict: + :param dict: + :return: Prints + 1) Entropy of information based on 1 alphabet + 2) Entropy of information based on couples of 2 alphabet + 3) print Entropy of H(X n∣Xn−1) + + Text from random books. Also, random quotes. + >>> text = ("Behind Winston’s back the voice " + ... "from the telescreen was still " + ... "babbling and the overfulfilment") + >>> calculate_prob(text) + 4.0 + 6.0 + 2.0 + + >>> text = ("The Ministry of Truth—Minitrue, in Newspeak [Newspeak was the official" + ... "face in elegant lettering, the three") + >>> calculate_prob(text) + 4.0 + 5.0 + 1.0 + >>> text = ("Had repulsive dashwoods suspicion sincerity but advantage now him. " + ... "Remark easily garret nor nay. Civil those mrs enjoy shy fat merry. " + ... "You greatest jointure saw horrible. He private he on be imagine " + ... "suppose. Fertile beloved evident through no service elderly is. Blind " + ... "there if every no so at. Own neglected you preferred way sincerity " + ... "delivered his attempted. To of message cottage windows do besides " + ... "against uncivil. Delightful unreserved impossible few estimating " + ... "men favourable see entreaties. She propriety immediate was improving. " + ... "He or entrance humoured likewise moderate. Much nor game son say " + ... "feel. Fat make met can must form into gate. Me we offending prevailed " + ... "discovery.") + >>> calculate_prob(text) + 4.0 + 7.0 + 3.0 + """ + single_char_strings, two_char_strings = analyze_text(text) + my_alphas = list(' ' + ascii_lowercase) + # what is our total sum of probabilities. + all_sum = sum(single_char_strings.values()) + + # one length string + my_fir_sum = 0 + # for each alpha we go in our dict and if it is in it we calculate entropy + for ch in my_alphas: + if ch in single_char_strings: + my_str = single_char_strings[ch] + prob = my_str / all_sum + my_fir_sum += prob * math.log2(prob) # entropy formula. + + # print entropy + print("{0:.1f}".format(round(-1 * my_fir_sum))) + + # two len string + all_sum = sum(two_char_strings.values()) + my_sec_sum = 0 + # for each alpha (two in size) calculate entropy. + for ch0 in my_alphas: + for ch1 in my_alphas: + sequence = ch0 + ch1 + if sequence in two_char_strings: + my_str = two_char_strings[sequence] + prob = int(my_str) / all_sum + my_sec_sum += prob * math.log2(prob) + + # print second entropy + print("{0:.1f}".format(round(-1 * my_sec_sum))) + + # print the difference between them + print("{0:.1f}".format(round(((-1 * my_sec_sum) - (-1 * my_fir_sum))))) + + +def analyze_text(text: str) -> Tuple[dict, dict]: + """ + Convert text input into two dicts of counts. + The first dictionary stores the frequency of single character strings. + The second dictionary stores the frequency of two character strings. + """ + single_char_strings = Counter() # type: ignore + two_char_strings = Counter() # type: ignore + single_char_strings[text[-1]] += 1 + + # first case when we have space at start. + two_char_strings[" " + text[0]] += 1 + for i in range(0, len(text) - 1): + single_char_strings[text[i]] += 1 + two_char_strings[text[i : i + 2]] += 1 + return single_char_strings, two_char_strings + + +def main(): + import doctest + + doctest.testmod() + # text = ( + # "Had repulsive dashwoods suspicion sincerity but advantage now him. Remark " + # "easily garret nor nay. Civil those mrs enjoy shy fat merry. You greatest " + # "jointure saw horrible. He private he on be imagine suppose. Fertile " + # "beloved evident through no service elderly is. Blind there if every no so " + # "at. Own neglected you preferred way sincerity delivered his attempted. To " + # "of message cottage windows do besides against uncivil. Delightful " + # "unreserved impossible few estimating men favourable see entreaties. She " + # "propriety immediate was improving. He or entrance humoured likewise " + # "moderate. Much nor game son say feel. Fat make met can must form into " + # "gate. Me we offending prevailed discovery. " + # ) + + # calculate_prob(text) + + +if __name__ == "__main__": + main()