mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-01-30 22:23:42 +00:00
implementation of entropy algorithm. (#2110)
* implementation of entropy algorithm. * add tests, fix requested changes * open_file() --> analyze_text() * Create bidirectional_breadth_first_search.py * # type: ignore Co-authored-by: Christian Clauss <cclauss@me.com>
This commit is contained in:
parent
a5c246793c
commit
924ef9b7d8
|
@ -100,7 +100,8 @@ class BreadthFirstSearch:
|
|||
|
||||
class BidirectionalBreadthFirstSearch:
|
||||
"""
|
||||
>>> bd_bfs = BidirectionalBreadthFirstSearch((0, 0), (len(grid) - 1, len(grid[0]) - 1))
|
||||
>>> bd_bfs = BidirectionalBreadthFirstSearch((0, 0), (len(grid) - 1,
|
||||
... len(grid[0]) - 1))
|
||||
>>> bd_bfs.fwd_bfs.start.pos == bd_bfs.bwd_bfs.target.pos
|
||||
True
|
||||
>>> bd_bfs.retrace_bidirectional_path(bd_bfs.fwd_bfs.start,
|
||||
|
|
131
maths/entropy.py
Normal file
131
maths/entropy.py
Normal file
|
@ -0,0 +1,131 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Implementation of entropy of information
|
||||
https://en.wikipedia.org/wiki/Entropy_(information_theory)
|
||||
"""
|
||||
|
||||
import math
|
||||
from collections import Counter
|
||||
from string import ascii_lowercase
|
||||
from typing import Tuple
|
||||
|
||||
|
||||
def calculate_prob(text: str) -> None:
|
||||
"""
|
||||
This method takes path and two dict as argument
|
||||
and than calculates entropy of them.
|
||||
:param dict:
|
||||
:param dict:
|
||||
:return: Prints
|
||||
1) Entropy of information based on 1 alphabet
|
||||
2) Entropy of information based on couples of 2 alphabet
|
||||
3) print Entropy of H(X n∣Xn−1)
|
||||
|
||||
Text from random books. Also, random quotes.
|
||||
>>> text = ("Behind Winston’s back the voice "
|
||||
... "from the telescreen was still "
|
||||
... "babbling and the overfulfilment")
|
||||
>>> calculate_prob(text)
|
||||
4.0
|
||||
6.0
|
||||
2.0
|
||||
|
||||
>>> text = ("The Ministry of Truth—Minitrue, in Newspeak [Newspeak was the official"
|
||||
... "face in elegant lettering, the three")
|
||||
>>> calculate_prob(text)
|
||||
4.0
|
||||
5.0
|
||||
1.0
|
||||
>>> text = ("Had repulsive dashwoods suspicion sincerity but advantage now him. "
|
||||
... "Remark easily garret nor nay. Civil those mrs enjoy shy fat merry. "
|
||||
... "You greatest jointure saw horrible. He private he on be imagine "
|
||||
... "suppose. Fertile beloved evident through no service elderly is. Blind "
|
||||
... "there if every no so at. Own neglected you preferred way sincerity "
|
||||
... "delivered his attempted. To of message cottage windows do besides "
|
||||
... "against uncivil. Delightful unreserved impossible few estimating "
|
||||
... "men favourable see entreaties. She propriety immediate was improving. "
|
||||
... "He or entrance humoured likewise moderate. Much nor game son say "
|
||||
... "feel. Fat make met can must form into gate. Me we offending prevailed "
|
||||
... "discovery.")
|
||||
>>> calculate_prob(text)
|
||||
4.0
|
||||
7.0
|
||||
3.0
|
||||
"""
|
||||
single_char_strings, two_char_strings = analyze_text(text)
|
||||
my_alphas = list(' ' + ascii_lowercase)
|
||||
# what is our total sum of probabilities.
|
||||
all_sum = sum(single_char_strings.values())
|
||||
|
||||
# one length string
|
||||
my_fir_sum = 0
|
||||
# for each alpha we go in our dict and if it is in it we calculate entropy
|
||||
for ch in my_alphas:
|
||||
if ch in single_char_strings:
|
||||
my_str = single_char_strings[ch]
|
||||
prob = my_str / all_sum
|
||||
my_fir_sum += prob * math.log2(prob) # entropy formula.
|
||||
|
||||
# print entropy
|
||||
print("{0:.1f}".format(round(-1 * my_fir_sum)))
|
||||
|
||||
# two len string
|
||||
all_sum = sum(two_char_strings.values())
|
||||
my_sec_sum = 0
|
||||
# for each alpha (two in size) calculate entropy.
|
||||
for ch0 in my_alphas:
|
||||
for ch1 in my_alphas:
|
||||
sequence = ch0 + ch1
|
||||
if sequence in two_char_strings:
|
||||
my_str = two_char_strings[sequence]
|
||||
prob = int(my_str) / all_sum
|
||||
my_sec_sum += prob * math.log2(prob)
|
||||
|
||||
# print second entropy
|
||||
print("{0:.1f}".format(round(-1 * my_sec_sum)))
|
||||
|
||||
# print the difference between them
|
||||
print("{0:.1f}".format(round(((-1 * my_sec_sum) - (-1 * my_fir_sum)))))
|
||||
|
||||
|
||||
def analyze_text(text: str) -> Tuple[dict, dict]:
|
||||
"""
|
||||
Convert text input into two dicts of counts.
|
||||
The first dictionary stores the frequency of single character strings.
|
||||
The second dictionary stores the frequency of two character strings.
|
||||
"""
|
||||
single_char_strings = Counter() # type: ignore
|
||||
two_char_strings = Counter() # type: ignore
|
||||
single_char_strings[text[-1]] += 1
|
||||
|
||||
# first case when we have space at start.
|
||||
two_char_strings[" " + text[0]] += 1
|
||||
for i in range(0, len(text) - 1):
|
||||
single_char_strings[text[i]] += 1
|
||||
two_char_strings[text[i : i + 2]] += 1
|
||||
return single_char_strings, two_char_strings
|
||||
|
||||
|
||||
def main():
|
||||
import doctest
|
||||
|
||||
doctest.testmod()
|
||||
# text = (
|
||||
# "Had repulsive dashwoods suspicion sincerity but advantage now him. Remark "
|
||||
# "easily garret nor nay. Civil those mrs enjoy shy fat merry. You greatest "
|
||||
# "jointure saw horrible. He private he on be imagine suppose. Fertile "
|
||||
# "beloved evident through no service elderly is. Blind there if every no so "
|
||||
# "at. Own neglected you preferred way sincerity delivered his attempted. To "
|
||||
# "of message cottage windows do besides against uncivil. Delightful "
|
||||
# "unreserved impossible few estimating men favourable see entreaties. She "
|
||||
# "propriety immediate was improving. He or entrance humoured likewise "
|
||||
# "moderate. Much nor game son say feel. Fat make met can must form into "
|
||||
# "gate. Me we offending prevailed discovery. "
|
||||
# )
|
||||
|
||||
# calculate_prob(text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user