Merge fe3a43c64b into 6c92c5a539

[pre-commit.ci] pre-commit autoupdate (#12542 )
* [pre-commit.ci] pre-commit autoupdate updates: - [github.com/astral-sh/ruff-pre-commit: v0.9.2 → v0.9.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.9.2...v0.9.3) - [github.com/codespell-project/codespell: v2.3.0 → v2.4.0](https://github.com/codespell-project/codespell/compare/v2.3.0...v2.4.0) * Update trifid_cipher.py * Update pyproject.toml * Update trifid_cipher.py --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com>
2025-02-20 00:02:04 +00:00 · 2025-01-28 16:16:28 +03:00 · 2025-01-27 22:05:20 +01:00 · 2025-01-24 08:59:36 +03:00 · 2025-01-24 01:01:47 +03:00 · 2024-11-21 10:29:55 +01:00
6 changed files with 144 additions and 5 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -16,13 +16,13 @@ repos:
      - id: auto-walrus

  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.9.2
+    rev: v0.9.3
    hooks:
      - id: ruff
      - id: ruff-format

  - repo: https://github.com/codespell-project/codespell
-    rev: v2.3.0
+    rev: v2.4.0
    hooks:
      - id: codespell
        additional_dependencies:
--- a/ciphers/trifid_cipher.py
+++ b/ciphers/trifid_cipher.py
@ -88,7 +88,7 @@ def __prepare(
        ...
    KeyError: 'Length of alphabet has to be 27.'

-    Testing with punctuations that are not in the given alphabet
+    Testing with punctuation not in the given alphabet

    >>> __prepare('am i a boy?','abCdeFghijkLmnopqrStuVwxYZ+')
    Traceback (most recent call last):
@ -128,7 +128,7 @@ def encrypt_message(
    encrypt_message
    ===============

-    Encrypts a message using the trifid_cipher. Any punctuatuions that
+    Encrypts a message using the trifid_cipher. Any punctuatuion chars that
    would be used should be added to the alphabet.

    PARAMETERS
--- a/compression/ppm.py
+++ b/compression/ppm.py
@ -0,0 +1,133 @@
+from __future__ import annotations
+
+import sys
+from collections import defaultdict
+
+# Description for the ppm algorithm can be found at https://en.wikipedia.org/wiki/Prediction_by_partial_matching
+
+
+class PPMNode:
+    def __init__(self) -> None:
+        # Initialize a PPMNode with a dictionary for child nodes
+        # and a count of total occurrences
+        self.counts: dict[str, PPMNode] = defaultdict(PPMNode)
+        self.total: int = 0
+
+    def __repr__(self) -> str:
+        return f"PPMNode(total={self.total})"
+
+
+class PPM:
+    def __init__(self, order: int = 2) -> None:
+        # Initialize the PPM model with a specified order and create a root node
+        self.order: int = order
+        self.root: PPMNode = PPMNode()
+        self.current_context: PPMNode = self.root
+
+    def update_model(self, context: str, symbol: str) -> None:
+        # Update the model with the new symbol in the given context
+        node = self.current_context
+        for char in context:
+            # Traverse through the context characters, updating the total counts
+            node = node.counts[char]
+            node.total += 1
+
+        # Increment the count for the specific symbol in the current context
+        node.counts[symbol].total += 1
+
+    def compress(self, data: str) -> list[float]:
+        # Compress the data using the PPM algorithm and return a list of probabilities
+        compressed_output: list[float] = []
+        context: str = ""
+
+        for symbol in data:
+            # Update the model with the current context and symbol
+            self.update_model(context, symbol)
+            # Encode the symbol based on the current context
+            compressed_output.append(self.encode_symbol(context, symbol))
+            # Update the context by appending the symbol,
+            # keeping it within the specified order
+            context = (context + symbol)[-self.order :]  # Keep the context within order
+
+        return compressed_output
+
+    def encode_symbol(self, context: str, symbol: str) -> float:
+        # Encode a symbol based on the current context and return its probability
+        node = self.root
+        for char in context:
+            # Traverse through the context to find the corresponding node
+            if char in node.counts:
+                node = node.counts[char]
+            else:
+                return 0.0  # Return 0.0 if the context is not found
+
+        # Return the probability of the symbol given the context
+        if symbol in node.counts:
+            return node.counts[symbol].total / node.total  # Return probability
+        return 0.0  # Return 0.0 if the symbol is not found
+
+    def decompress(self, compressed_data: list[float]) -> str:
+        # Decompress the compressed data back into the original string
+        decompressed_output: list[str] = []
+        context: str = ""
+
+        for prob in compressed_data:
+            # Decode each probability to retrieve the corresponding symbol
+            symbol = self.decode_symbol(context, prob)
+            if symbol:
+                decompressed_output.append(symbol)
+                # Update the context with the newly decoded symbol
+                context = (context + symbol)[
+                    -self.order :
+                ]  # Keep the context within order
+            else:
+                break  # Stop if a symbol cannot be found
+
+        return "".join(decompressed_output)  # Join the list into a single string
+
+    def decode_symbol(self, context: str, prob: float) -> str | None:
+        # Decode a symbol from the given context based on the probability
+        node = self.root
+        for char in context:
+            # Traverse through the context to find the corresponding node
+            if char in node.counts:
+                node = node.counts[char]
+            else:
+                return None  # Return None if the context is not found
+
+        # Iterate through the children of the node to
+        # find the symbol matching the given probability
+        for symbol, child in node.counts.items():
+            if child.total / node.total == prob:
+                return symbol  # Return the symbol if the probability matches
+        return None  # Return None if the symbol is not found
+
+
+def read_file(file_path: str) -> str:
+    """Read the entire file and return its content as a string."""
+    with open(file_path) as f:
+        return f.read()
+
+
+def ppm(file_path: str) -> None:
+    """Compress and decompress the file using PPM algorithm."""
+    data = read_file(file_path)  # Read the data from the specified file
+    ppm_instance = PPM(order=2)  # Create an instance of the PPM model with order 2
+
+    # Compress the data using the PPM model
+    compressed = ppm_instance.compress(data)
+    print("Compressed Data (Prob abilities):", compressed)
+
+    # Decompress the data back to its original form
+    decompressed = ppm_instance.decompress(compressed)
+    print("Decompressed Data:", decompressed)
+
+
+if __name__ == "__main__":
+    # Check if the correct number of command line arguments is provided
+    if len(sys.argv) != 2:
+        print("Usage: python ppm.py <file_path>")
+        sys.exit(1)
+
+    # Call the ppm function with the provided file path
+    ppm(sys.argv[1])
--- a/data_structures/binary_tree/avl_tree.py
+++ b/data_structures/binary_tree/avl_tree.py
@ -221,6 +221,10 @@ def del_node(root: MyNode, data: Any) -> MyNode | None:
    else:
        root.set_right(del_node(right_child, data))

+    # Re-fetch left_child and right_child references
+    left_child = root.get_left()
+    right_child = root.get_right()
+
    if get_height(right_child) - get_height(left_child) == 2:
        assert right_child is not None
        if get_height(right_child.get_right()) > get_height(right_child.get_left()):
--- a/pyproject.toml
+++ b/pyproject.toml
@ -159,7 +159,7 @@ lint.pylint.max-returns = 8 # default: 6
 lint.pylint.max-statements = 88 # default: 50

 [tool.codespell]
-ignore-words-list = "3rt,ans,bitap,crate,damon,fo,followings,hist,iff,kwanza,manuel,mater,secant,som,sur,tim,toi,zar"
+ignore-words-list = "3rt,abd,aer,ans,bitap,crate,damon,fo,followings,hist,iff,kwanza,manuel,mater,secant,som,sur,tim,toi,zar"
 skip = "./.*,*.json,*.lock,ciphers/prehistoric_men.txt,project_euler/problem_022/p022_names.txt,pyproject.toml,strings/dictionary.txt,strings/words.txt"

 [tool.pytest.ini_options]
--- a/sorts/bubble_sort.py
+++ b/sorts/bubble_sort.py
@ -85,6 +85,8 @@ def bubble_sort_recursive(collection: list[Any]) -> list[Any]:
    [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7]
    >>> bubble_sort_recursive([1, 3.3, 5, 7.7, 2, 4.4, 6])
    [1, 2, 3.3, 4.4, 5, 6, 7.7]
+    >>> bubble_sort_recursive(['a', 'Z', 'B', 'C', 'A', 'c'])
+    ['A', 'B', 'C', 'Z', 'a', 'c']
    >>> import random
    >>> collection_arg = random.sample(range(-50, 50), 100)
    >>> bubble_sort_recursive(collection_arg) == sorted(collection_arg)
Author	SHA1	Message	Date
Lukas Olenyi	67a1b5e526	Merge `fe3a43c64b` into `6c92c5a539`	2025-01-28 16:16:28 +03:00
pre-commit-ci[bot]	6c92c5a539	[pre-commit.ci] pre-commit autoupdate (#12542 ) * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/astral-sh/ruff-pre-commit: v0.9.2 → v0.9.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.9.2...v0.9.3) - [github.com/codespell-project/codespell: v2.3.0 → v2.4.0](https://github.com/codespell-project/codespell/compare/v2.3.0...v2.4.0) * Update trifid_cipher.py * Update pyproject.toml * Update trifid_cipher.py --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com>	2025-01-27 22:05:20 +01:00
Rachel Spears	13e4d3e76c	Fix error in avl_tree del_node function (#11510 ) * fixed error in del_node function * Update avl_tree.py --------- Co-authored-by: Maxim Smolskiy <mithridatus@mail.ru>	2025-01-24 08:59:36 +03:00
Vijayalaxmi Wakode	c666db3729	Add Doc test bubble sort (#12070 ) * The string manipulation - replace() * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update replace.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * updating DIRECTORY.md * Add doc test to bubble_sort * Update DIRECTORY.md * Delete strings/replace.py * Update bubble_sort.py --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: vijayalaxmi777 <vijayalaxmi777@users.noreply.github.com> Co-authored-by: Maxim Smolskiy <mithridatus@mail.ru>	2025-01-24 01:01:47 +03:00
Lukas Olenyi	fe3a43c64b	ruff fixes	2024-11-21 10:29:55 +01:00
pre-commit-ci[bot]	930c4d463f	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-11-21 09:08:39 +00:00
Lukas Olenyi	bad910e71c	fixed last issues with ruff	2024-11-21 10:07:01 +01:00
pre-commit-ci[bot]	4359762495	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-11-21 08:48:54 +00:00
Lukas Olenyi	435f4518c2	trying to pass ruff tests	2024-11-21 09:48:26 +01:00
Lukas Olenyi	653f8e4d4f	trying to make the code pass ruff auto review	2024-11-21 09:48:26 +01:00
pre-commit-ci[bot]	521d7a23fa	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-11-21 01:26:03 +00:00
Lukas Olenyi	2c279c492d	feat: Add PPM (Prediction by Partial Matching) algorithm implementation - Implemented the PPM algorithm for data compression and decompression. - Added methods for updating the model, encoding, and decoding symbols. - Included utility functions for reading from files and testing the algorithm. - Verified functionality with various datasets to ensure accuracy. This addition enhances the repository's collection of Python algorithms.	2024-11-21 02:16:20 +01:00
Lukas Olenyi	2f37ee9077	feat: Add PPM (Prediction by Partial Matching) algorithm implementation - Implemented the PPM algorithm for data compression and decompression. - Added methods for updating the model, encoding, and decoding symbols. - Included utility functions for reading from files and testing the algorithm. - Verified functionality with various datasets to ensure accuracy. This addition enhances the repository's collection of Python algorithms.	2024-11-21 02:10:53 +01:00