mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-02-20 00:02:04 +00:00
Compare commits
13 Commits
dda3a9f4f1
...
67a1b5e526
Author | SHA1 | Date | |
---|---|---|---|
|
67a1b5e526 | ||
|
6c92c5a539 | ||
|
13e4d3e76c | ||
|
c666db3729 | ||
|
fe3a43c64b | ||
|
930c4d463f | ||
|
bad910e71c | ||
|
4359762495 | ||
|
435f4518c2 | ||
|
653f8e4d4f | ||
|
521d7a23fa | ||
|
2c279c492d | ||
|
2f37ee9077 |
|
@ -16,13 +16,13 @@ repos:
|
|||
- id: auto-walrus
|
||||
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.9.2
|
||||
rev: v0.9.3
|
||||
hooks:
|
||||
- id: ruff
|
||||
- id: ruff-format
|
||||
|
||||
- repo: https://github.com/codespell-project/codespell
|
||||
rev: v2.3.0
|
||||
rev: v2.4.0
|
||||
hooks:
|
||||
- id: codespell
|
||||
additional_dependencies:
|
||||
|
|
|
@ -88,7 +88,7 @@ def __prepare(
|
|||
...
|
||||
KeyError: 'Length of alphabet has to be 27.'
|
||||
|
||||
Testing with punctuations that are not in the given alphabet
|
||||
Testing with punctuation not in the given alphabet
|
||||
|
||||
>>> __prepare('am i a boy?','abCdeFghijkLmnopqrStuVwxYZ+')
|
||||
Traceback (most recent call last):
|
||||
|
@ -128,7 +128,7 @@ def encrypt_message(
|
|||
encrypt_message
|
||||
===============
|
||||
|
||||
Encrypts a message using the trifid_cipher. Any punctuatuions that
|
||||
Encrypts a message using the trifid_cipher. Any punctuatuion chars that
|
||||
would be used should be added to the alphabet.
|
||||
|
||||
PARAMETERS
|
||||
|
|
133
compression/ppm.py
Normal file
133
compression/ppm.py
Normal file
|
@ -0,0 +1,133 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
# Description for the ppm algorithm can be found at https://en.wikipedia.org/wiki/Prediction_by_partial_matching
|
||||
|
||||
|
||||
class PPMNode:
|
||||
def __init__(self) -> None:
|
||||
# Initialize a PPMNode with a dictionary for child nodes
|
||||
# and a count of total occurrences
|
||||
self.counts: dict[str, PPMNode] = defaultdict(PPMNode)
|
||||
self.total: int = 0
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"PPMNode(total={self.total})"
|
||||
|
||||
|
||||
class PPM:
|
||||
def __init__(self, order: int = 2) -> None:
|
||||
# Initialize the PPM model with a specified order and create a root node
|
||||
self.order: int = order
|
||||
self.root: PPMNode = PPMNode()
|
||||
self.current_context: PPMNode = self.root
|
||||
|
||||
def update_model(self, context: str, symbol: str) -> None:
|
||||
# Update the model with the new symbol in the given context
|
||||
node = self.current_context
|
||||
for char in context:
|
||||
# Traverse through the context characters, updating the total counts
|
||||
node = node.counts[char]
|
||||
node.total += 1
|
||||
|
||||
# Increment the count for the specific symbol in the current context
|
||||
node.counts[symbol].total += 1
|
||||
|
||||
def compress(self, data: str) -> list[float]:
|
||||
# Compress the data using the PPM algorithm and return a list of probabilities
|
||||
compressed_output: list[float] = []
|
||||
context: str = ""
|
||||
|
||||
for symbol in data:
|
||||
# Update the model with the current context and symbol
|
||||
self.update_model(context, symbol)
|
||||
# Encode the symbol based on the current context
|
||||
compressed_output.append(self.encode_symbol(context, symbol))
|
||||
# Update the context by appending the symbol,
|
||||
# keeping it within the specified order
|
||||
context = (context + symbol)[-self.order :] # Keep the context within order
|
||||
|
||||
return compressed_output
|
||||
|
||||
def encode_symbol(self, context: str, symbol: str) -> float:
|
||||
# Encode a symbol based on the current context and return its probability
|
||||
node = self.root
|
||||
for char in context:
|
||||
# Traverse through the context to find the corresponding node
|
||||
if char in node.counts:
|
||||
node = node.counts[char]
|
||||
else:
|
||||
return 0.0 # Return 0.0 if the context is not found
|
||||
|
||||
# Return the probability of the symbol given the context
|
||||
if symbol in node.counts:
|
||||
return node.counts[symbol].total / node.total # Return probability
|
||||
return 0.0 # Return 0.0 if the symbol is not found
|
||||
|
||||
def decompress(self, compressed_data: list[float]) -> str:
|
||||
# Decompress the compressed data back into the original string
|
||||
decompressed_output: list[str] = []
|
||||
context: str = ""
|
||||
|
||||
for prob in compressed_data:
|
||||
# Decode each probability to retrieve the corresponding symbol
|
||||
symbol = self.decode_symbol(context, prob)
|
||||
if symbol:
|
||||
decompressed_output.append(symbol)
|
||||
# Update the context with the newly decoded symbol
|
||||
context = (context + symbol)[
|
||||
-self.order :
|
||||
] # Keep the context within order
|
||||
else:
|
||||
break # Stop if a symbol cannot be found
|
||||
|
||||
return "".join(decompressed_output) # Join the list into a single string
|
||||
|
||||
def decode_symbol(self, context: str, prob: float) -> str | None:
|
||||
# Decode a symbol from the given context based on the probability
|
||||
node = self.root
|
||||
for char in context:
|
||||
# Traverse through the context to find the corresponding node
|
||||
if char in node.counts:
|
||||
node = node.counts[char]
|
||||
else:
|
||||
return None # Return None if the context is not found
|
||||
|
||||
# Iterate through the children of the node to
|
||||
# find the symbol matching the given probability
|
||||
for symbol, child in node.counts.items():
|
||||
if child.total / node.total == prob:
|
||||
return symbol # Return the symbol if the probability matches
|
||||
return None # Return None if the symbol is not found
|
||||
|
||||
|
||||
def read_file(file_path: str) -> str:
|
||||
"""Read the entire file and return its content as a string."""
|
||||
with open(file_path) as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def ppm(file_path: str) -> None:
|
||||
"""Compress and decompress the file using PPM algorithm."""
|
||||
data = read_file(file_path) # Read the data from the specified file
|
||||
ppm_instance = PPM(order=2) # Create an instance of the PPM model with order 2
|
||||
|
||||
# Compress the data using the PPM model
|
||||
compressed = ppm_instance.compress(data)
|
||||
print("Compressed Data (Prob abilities):", compressed)
|
||||
|
||||
# Decompress the data back to its original form
|
||||
decompressed = ppm_instance.decompress(compressed)
|
||||
print("Decompressed Data:", decompressed)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check if the correct number of command line arguments is provided
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python ppm.py <file_path>")
|
||||
sys.exit(1)
|
||||
|
||||
# Call the ppm function with the provided file path
|
||||
ppm(sys.argv[1])
|
|
@ -221,6 +221,10 @@ def del_node(root: MyNode, data: Any) -> MyNode | None:
|
|||
else:
|
||||
root.set_right(del_node(right_child, data))
|
||||
|
||||
# Re-fetch left_child and right_child references
|
||||
left_child = root.get_left()
|
||||
right_child = root.get_right()
|
||||
|
||||
if get_height(right_child) - get_height(left_child) == 2:
|
||||
assert right_child is not None
|
||||
if get_height(right_child.get_right()) > get_height(right_child.get_left()):
|
||||
|
|
|
@ -159,7 +159,7 @@ lint.pylint.max-returns = 8 # default: 6
|
|||
lint.pylint.max-statements = 88 # default: 50
|
||||
|
||||
[tool.codespell]
|
||||
ignore-words-list = "3rt,ans,bitap,crate,damon,fo,followings,hist,iff,kwanza,manuel,mater,secant,som,sur,tim,toi,zar"
|
||||
ignore-words-list = "3rt,abd,aer,ans,bitap,crate,damon,fo,followings,hist,iff,kwanza,manuel,mater,secant,som,sur,tim,toi,zar"
|
||||
skip = "./.*,*.json,*.lock,ciphers/prehistoric_men.txt,project_euler/problem_022/p022_names.txt,pyproject.toml,strings/dictionary.txt,strings/words.txt"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
|
|
|
@ -85,6 +85,8 @@ def bubble_sort_recursive(collection: list[Any]) -> list[Any]:
|
|||
[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7]
|
||||
>>> bubble_sort_recursive([1, 3.3, 5, 7.7, 2, 4.4, 6])
|
||||
[1, 2, 3.3, 4.4, 5, 6, 7.7]
|
||||
>>> bubble_sort_recursive(['a', 'Z', 'B', 'C', 'A', 'c'])
|
||||
['A', 'B', 'C', 'Z', 'a', 'c']
|
||||
>>> import random
|
||||
>>> collection_arg = random.sample(range(-50, 50), 100)
|
||||
>>> bubble_sort_recursive(collection_arg) == sorted(collection_arg)
|
||||
|
|
Loading…
Reference in New Issue
Block a user