Compare commits

...

13 Commits

Author SHA1 Message Date
Lukas Olenyi
67a1b5e526
Merge fe3a43c64b into 6c92c5a539 2025-01-28 16:16:28 +03:00
pre-commit-ci[bot]
6c92c5a539
[pre-commit.ci] pre-commit autoupdate (#12542)
* [pre-commit.ci] pre-commit autoupdate

updates:
- [github.com/astral-sh/ruff-pre-commit: v0.9.2 → v0.9.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.9.2...v0.9.3)
- [github.com/codespell-project/codespell: v2.3.0 → v2.4.0](https://github.com/codespell-project/codespell/compare/v2.3.0...v2.4.0)

* Update trifid_cipher.py

* Update pyproject.toml

* Update trifid_cipher.py

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Christian Clauss <cclauss@me.com>
2025-01-27 22:05:20 +01:00
Rachel Spears
13e4d3e76c
Fix error in avl_tree del_node function (#11510)
* fixed error in del_node function

* Update avl_tree.py

---------

Co-authored-by: Maxim Smolskiy <mithridatus@mail.ru>
2025-01-24 08:59:36 +03:00
Vijayalaxmi Wakode
c666db3729
Add Doc test bubble sort (#12070)
* The string manipulation - replace()

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update replace.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* updating DIRECTORY.md

* Add doc test to bubble_sort

* Update DIRECTORY.md

* Delete strings/replace.py

* Update bubble_sort.py

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: vijayalaxmi777 <vijayalaxmi777@users.noreply.github.com>
Co-authored-by: Maxim Smolskiy <mithridatus@mail.ru>
2025-01-24 01:01:47 +03:00
Lukas Olenyi
fe3a43c64b ruff fixes 2024-11-21 10:29:55 +01:00
pre-commit-ci[bot]
930c4d463f [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2024-11-21 09:08:39 +00:00
Lukas Olenyi
bad910e71c fixed last issues with ruff 2024-11-21 10:07:01 +01:00
pre-commit-ci[bot]
4359762495 [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2024-11-21 08:48:54 +00:00
Lukas Olenyi
435f4518c2 trying to pass ruff tests 2024-11-21 09:48:26 +01:00
Lukas Olenyi
653f8e4d4f trying to make the code pass ruff auto review 2024-11-21 09:48:26 +01:00
pre-commit-ci[bot]
521d7a23fa [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2024-11-21 01:26:03 +00:00
Lukas Olenyi
2c279c492d feat: Add PPM (Prediction by Partial Matching) algorithm implementation
- Implemented the PPM algorithm for data compression and decompression.
- Added methods for updating the model, encoding, and decoding symbols.
- Included utility functions for reading from files and testing the algorithm.
- Verified functionality with various datasets to ensure accuracy.

This addition enhances the repository's collection of Python algorithms.
2024-11-21 02:16:20 +01:00
Lukas Olenyi
2f37ee9077 feat: Add PPM (Prediction by Partial Matching) algorithm implementation
- Implemented the PPM algorithm for data compression and decompression.
- Added methods for updating the model, encoding, and decoding symbols.
- Included utility functions for reading from files and testing the algorithm.
- Verified functionality with various datasets to ensure accuracy.

This addition enhances the repository's collection of Python algorithms.
2024-11-21 02:10:53 +01:00
6 changed files with 144 additions and 5 deletions

View File

@ -16,13 +16,13 @@ repos:
- id: auto-walrus
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.2
rev: v0.9.3
hooks:
- id: ruff
- id: ruff-format
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
rev: v2.4.0
hooks:
- id: codespell
additional_dependencies:

View File

@ -88,7 +88,7 @@ def __prepare(
...
KeyError: 'Length of alphabet has to be 27.'
Testing with punctuations that are not in the given alphabet
Testing with punctuation not in the given alphabet
>>> __prepare('am i a boy?','abCdeFghijkLmnopqrStuVwxYZ+')
Traceback (most recent call last):
@ -128,7 +128,7 @@ def encrypt_message(
encrypt_message
===============
Encrypts a message using the trifid_cipher. Any punctuatuions that
Encrypts a message using the trifid_cipher. Any punctuatuion chars that
would be used should be added to the alphabet.
PARAMETERS

133
compression/ppm.py Normal file
View File

@ -0,0 +1,133 @@
from __future__ import annotations
import sys
from collections import defaultdict
# Description for the ppm algorithm can be found at https://en.wikipedia.org/wiki/Prediction_by_partial_matching
class PPMNode:
def __init__(self) -> None:
# Initialize a PPMNode with a dictionary for child nodes
# and a count of total occurrences
self.counts: dict[str, PPMNode] = defaultdict(PPMNode)
self.total: int = 0
def __repr__(self) -> str:
return f"PPMNode(total={self.total})"
class PPM:
def __init__(self, order: int = 2) -> None:
# Initialize the PPM model with a specified order and create a root node
self.order: int = order
self.root: PPMNode = PPMNode()
self.current_context: PPMNode = self.root
def update_model(self, context: str, symbol: str) -> None:
# Update the model with the new symbol in the given context
node = self.current_context
for char in context:
# Traverse through the context characters, updating the total counts
node = node.counts[char]
node.total += 1
# Increment the count for the specific symbol in the current context
node.counts[symbol].total += 1
def compress(self, data: str) -> list[float]:
# Compress the data using the PPM algorithm and return a list of probabilities
compressed_output: list[float] = []
context: str = ""
for symbol in data:
# Update the model with the current context and symbol
self.update_model(context, symbol)
# Encode the symbol based on the current context
compressed_output.append(self.encode_symbol(context, symbol))
# Update the context by appending the symbol,
# keeping it within the specified order
context = (context + symbol)[-self.order :] # Keep the context within order
return compressed_output
def encode_symbol(self, context: str, symbol: str) -> float:
# Encode a symbol based on the current context and return its probability
node = self.root
for char in context:
# Traverse through the context to find the corresponding node
if char in node.counts:
node = node.counts[char]
else:
return 0.0 # Return 0.0 if the context is not found
# Return the probability of the symbol given the context
if symbol in node.counts:
return node.counts[symbol].total / node.total # Return probability
return 0.0 # Return 0.0 if the symbol is not found
def decompress(self, compressed_data: list[float]) -> str:
# Decompress the compressed data back into the original string
decompressed_output: list[str] = []
context: str = ""
for prob in compressed_data:
# Decode each probability to retrieve the corresponding symbol
symbol = self.decode_symbol(context, prob)
if symbol:
decompressed_output.append(symbol)
# Update the context with the newly decoded symbol
context = (context + symbol)[
-self.order :
] # Keep the context within order
else:
break # Stop if a symbol cannot be found
return "".join(decompressed_output) # Join the list into a single string
def decode_symbol(self, context: str, prob: float) -> str | None:
# Decode a symbol from the given context based on the probability
node = self.root
for char in context:
# Traverse through the context to find the corresponding node
if char in node.counts:
node = node.counts[char]
else:
return None # Return None if the context is not found
# Iterate through the children of the node to
# find the symbol matching the given probability
for symbol, child in node.counts.items():
if child.total / node.total == prob:
return symbol # Return the symbol if the probability matches
return None # Return None if the symbol is not found
def read_file(file_path: str) -> str:
"""Read the entire file and return its content as a string."""
with open(file_path) as f:
return f.read()
def ppm(file_path: str) -> None:
"""Compress and decompress the file using PPM algorithm."""
data = read_file(file_path) # Read the data from the specified file
ppm_instance = PPM(order=2) # Create an instance of the PPM model with order 2
# Compress the data using the PPM model
compressed = ppm_instance.compress(data)
print("Compressed Data (Prob abilities):", compressed)
# Decompress the data back to its original form
decompressed = ppm_instance.decompress(compressed)
print("Decompressed Data:", decompressed)
if __name__ == "__main__":
# Check if the correct number of command line arguments is provided
if len(sys.argv) != 2:
print("Usage: python ppm.py <file_path>")
sys.exit(1)
# Call the ppm function with the provided file path
ppm(sys.argv[1])

View File

@ -221,6 +221,10 @@ def del_node(root: MyNode, data: Any) -> MyNode | None:
else:
root.set_right(del_node(right_child, data))
# Re-fetch left_child and right_child references
left_child = root.get_left()
right_child = root.get_right()
if get_height(right_child) - get_height(left_child) == 2:
assert right_child is not None
if get_height(right_child.get_right()) > get_height(right_child.get_left()):

View File

@ -159,7 +159,7 @@ lint.pylint.max-returns = 8 # default: 6
lint.pylint.max-statements = 88 # default: 50
[tool.codespell]
ignore-words-list = "3rt,ans,bitap,crate,damon,fo,followings,hist,iff,kwanza,manuel,mater,secant,som,sur,tim,toi,zar"
ignore-words-list = "3rt,abd,aer,ans,bitap,crate,damon,fo,followings,hist,iff,kwanza,manuel,mater,secant,som,sur,tim,toi,zar"
skip = "./.*,*.json,*.lock,ciphers/prehistoric_men.txt,project_euler/problem_022/p022_names.txt,pyproject.toml,strings/dictionary.txt,strings/words.txt"
[tool.pytest.ini_options]

View File

@ -85,6 +85,8 @@ def bubble_sort_recursive(collection: list[Any]) -> list[Any]:
[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7]
>>> bubble_sort_recursive([1, 3.3, 5, 7.7, 2, 4.4, 6])
[1, 2, 3.3, 4.4, 5, 6, 7.7]
>>> bubble_sort_recursive(['a', 'Z', 'B', 'C', 'A', 'c'])
['A', 'B', 'C', 'Z', 'a', 'c']
>>> import random
>>> collection_arg = random.sample(range(-50, 50), 100)
>>> bubble_sort_recursive(collection_arg) == sorted(collection_arg)