Add hashmap implementation (#7967 )

[pre-commit.ci] pre-commit autoupdate (#8177 )
* [pre-commit.ci] pre-commit autoupdate updates: - [github.com/charliermarsh/ruff-pre-commit: v0.0.254 → v0.0.255](https://github.com/charliermarsh/ruff-pre-commit/compare/v0.0.254...v0.0.255) - [github.com/pre-commit/mirrors-mypy: v1.0.1 → v1.1.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.0.1...v1.1.1) - [github.com/codespell-project/codespell: v2.2.2 → v2.2.4](https://github.com/codespell-project/codespell/compare/v2.2.2...v2.2.4) * updating DIRECTORY.md * Fixes for new version of codespell --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com>
2025-02-23 17:38:39 +00:00 · 2023-03-14 01:31:27 +01:00 · 2023-03-13 23:18:35 +01:00
6 changed files with 266 additions and 6 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -44,7 +44,7 @@ repos:
          - --py311-plus

  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.254
+    rev: v0.0.255
    hooks:
      - id: ruff
        args:
@ -69,7 +69,7 @@ repos:
          *flake8-plugins

  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.0.1
+    rev: v1.1.1
    hooks:
      - id: mypy
        args:
@ -79,11 +79,11 @@ repos:
        additional_dependencies: [types-requests]

  - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.2
+    rev: v2.2.4
    hooks:
      - id: codespell
        args:
-          - --ignore-words-list=ans,crate,damon,fo,followings,hist,iff,mater,secant,som,sur,tim,zar
+          - --ignore-words-list=3rt,ans,crate,damon,fo,followings,hist,iff,kwanza,mater,secant,som,sur,tim,zar
        exclude: |
          (?x)^(
              ciphers/prehistoric_men.txt |
--- a/DIRECTORY.md
+++ b/DIRECTORY.md
@ -334,6 +334,7 @@
 ## Electronics
  * [Builtin Voltage](electronics/builtin_voltage.py)
  * [Carrier Concentration](electronics/carrier_concentration.py)
+  * [Circular Convolution](electronics/circular_convolution.py)
  * [Coulombs Law](electronics/coulombs_law.py)
  * [Electric Conductivity](electronics/electric_conductivity.py)
  * [Electric Power](electronics/electric_power.py)
--- a/data_structures/hashing/hash_map.py
+++ b/data_structures/hashing/hash_map.py
@ -0,0 +1,162 @@
+"""
+Hash map with open addressing.
+
+https://en.wikipedia.org/wiki/Hash_table
+
+Another hash map implementation, with a good explanation.
+Modern Dictionaries by Raymond Hettinger
+https://www.youtube.com/watch?v=p33CVV29OG8
+"""
+from collections.abc import Iterator, MutableMapping
+from dataclasses import dataclass
+from typing import Generic, TypeVar
+
+KEY = TypeVar("KEY")
+VAL = TypeVar("VAL")
+
+
+@dataclass(frozen=True, slots=True)
+class _Item(Generic[KEY, VAL]):
+    key: KEY
+    val: VAL
+
+
+class _DeletedItem(_Item):
+    def __init__(self) -> None:
+        super().__init__(None, None)
+
+    def __bool__(self) -> bool:
+        return False
+
+
+_deleted = _DeletedItem()
+
+
+class HashMap(MutableMapping[KEY, VAL]):
+    """
+    Hash map with open addressing.
+    """
+
+    def __init__(
+        self, initial_block_size: int = 8, capacity_factor: float = 0.75
+    ) -> None:
+        self._initial_block_size = initial_block_size
+        self._buckets: list[_Item | None] = [None] * initial_block_size
+        assert 0.0 < capacity_factor < 1.0
+        self._capacity_factor = capacity_factor
+        self._len = 0
+
+    def _get_bucket_index(self, key: KEY) -> int:
+        return hash(key) % len(self._buckets)
+
+    def _get_next_ind(self, ind: int) -> int:
+        """
+        Get next index.
+
+        Implements linear open addressing.
+        """
+        return (ind + 1) % len(self._buckets)
+
+    def _try_set(self, ind: int, key: KEY, val: VAL) -> bool:
+        """
+        Try to add value to the bucket.
+
+        If bucket is empty or key is the same, does insert and return True.
+
+        If bucket has another key or deleted placeholder,
+        that means that we need to check next bucket.
+        """
+        stored = self._buckets[ind]
+        if not stored:
+            self._buckets[ind] = _Item(key, val)
+            self._len += 1
+            return True
+        elif stored.key == key:
+            self._buckets[ind] = _Item(key, val)
+            return True
+        else:
+            return False
+
+    def _is_full(self) -> bool:
+        """
+        Return true if we have reached safe capacity.
+
+        So we need to increase the number of buckets to avoid collisions.
+        """
+        limit = len(self._buckets) * self._capacity_factor
+        return len(self) >= int(limit)
+
+    def _is_sparse(self) -> bool:
+        """Return true if we need twice fewer buckets when we have now."""
+        if len(self._buckets) <= self._initial_block_size:
+            return False
+        limit = len(self._buckets) * self._capacity_factor / 2
+        return len(self) < limit
+
+    def _resize(self, new_size: int) -> None:
+        old_buckets = self._buckets
+        self._buckets = [None] * new_size
+        self._len = 0
+        for item in old_buckets:
+            if item:
+                self._add_item(item.key, item.val)
+
+    def _size_up(self) -> None:
+        self._resize(len(self._buckets) * 2)
+
+    def _size_down(self) -> None:
+        self._resize(len(self._buckets) // 2)
+
+    def _iterate_buckets(self, key: KEY) -> Iterator[int]:
+        ind = self._get_bucket_index(key)
+        for _ in range(len(self._buckets)):
+            yield ind
+            ind = self._get_next_ind(ind)
+
+    def _add_item(self, key: KEY, val: VAL) -> None:
+        for ind in self._iterate_buckets(key):
+            if self._try_set(ind, key, val):
+                break
+
+    def __setitem__(self, key: KEY, val: VAL) -> None:
+        if self._is_full():
+            self._size_up()
+
+        self._add_item(key, val)
+
+    def __delitem__(self, key: KEY) -> None:
+        for ind in self._iterate_buckets(key):
+            item = self._buckets[ind]
+            if item is None:
+                raise KeyError(key)
+            if item is _deleted:
+                continue
+            if item.key == key:
+                self._buckets[ind] = _deleted
+                self._len -= 1
+                break
+        if self._is_sparse():
+            self._size_down()
+
+    def __getitem__(self, key: KEY) -> VAL:
+        for ind in self._iterate_buckets(key):
+            item = self._buckets[ind]
+            if item is None:
+                break
+            if item is _deleted:
+                continue
+            if item.key == key:
+                return item.val
+        raise KeyError(key)
+
+    def __len__(self) -> int:
+        return self._len
+
+    def __iter__(self) -> Iterator[KEY]:
+        yield from (item.key for item in self._buckets if item)
+
+    def __repr__(self) -> str:
+        val_string = " ,".join(
+            f"{item.key}: {item.val}" for item in self._buckets if item
+        )
+        return f"HashMap({val_string})"
--- a/data_structures/hashing/tests/test_hash_map.py
+++ b/data_structures/hashing/tests/test_hash_map.py
@ -0,0 +1,97 @@
+from operator import delitem, getitem, setitem
+
+import pytest
+
+from data_structures.hashing.hash_map import HashMap
+
+
+def _get(k):
+    return getitem, k
+
+
+def _set(k, v):
+    return setitem, k, v
+
+
+def _del(k):
+    return delitem, k
+
+
+def _run_operation(obj, fun, *args):
+    try:
+        return fun(obj, *args), None
+    except Exception as e:
+        return None, e
+
+
+_add_items = (
+    _set("key_a", "val_a"),
+    _set("key_b", "val_b"),
+)
+
+_overwrite_items = [
+    _set("key_a", "val_a"),
+    _set("key_a", "val_b"),
+]
+
+_delete_items = [
+    _set("key_a", "val_a"),
+    _set("key_b", "val_b"),
+    _del("key_a"),
+    _del("key_b"),
+    _set("key_a", "val_a"),
+    _del("key_a"),
+]
+
+_access_absent_items = [
+    _get("key_a"),
+    _del("key_a"),
+    _set("key_a", "val_a"),
+    _del("key_a"),
+    _del("key_a"),
+    _get("key_a"),
+]
+
+_add_with_resize_up = [
+    *[_set(x, x) for x in range(5)],  # guaranteed upsize
+]
+
+_add_with_resize_down = [
+    *[_set(x, x) for x in range(5)],  # guaranteed upsize
+    *[_del(x) for x in range(5)],
+    _set("key_a", "val_b"),
+]
+
+
+@pytest.mark.parametrize(
+    "operations",
+    (
+        pytest.param(_add_items, id="add items"),
+        pytest.param(_overwrite_items, id="overwrite items"),
+        pytest.param(_delete_items, id="delete items"),
+        pytest.param(_access_absent_items, id="access absent items"),
+        pytest.param(_add_with_resize_up, id="add with resize up"),
+        pytest.param(_add_with_resize_down, id="add with resize down"),
+    ),
+)
+def test_hash_map_is_the_same_as_dict(operations):
+    my = HashMap(initial_block_size=4)
+    py = {}
+    for _, (fun, *args) in enumerate(operations):
+        my_res, my_exc = _run_operation(my, fun, *args)
+        py_res, py_exc = _run_operation(py, fun, *args)
+        assert my_res == py_res
+        assert str(my_exc) == str(py_exc)
+        assert set(py) == set(my)
+        assert len(py) == len(my)
+        assert set(my.items()) == set(py.items())
+
+
+def test_no_new_methods_was_added_to_api():
+    def is_public(name: str) -> bool:
+        return not name.startswith("_")
+
+    dict_public_names = {name for name in dir({}) if is_public(name)}
+    hash_public_names = {name for name in dir(HashMap()) if is_public(name)}
+
+    assert dict_public_names > hash_public_names
--- a/machine_learning/sequential_minimum_optimization.py
+++ b/machine_learning/sequential_minimum_optimization.py
@ -569,7 +569,7 @@ def plot_partition_boundary(
    """
    We can not get the optimum w of our kernel svm model which is different from linear
    svm.  For this reason, we generate randomly distributed points with high desity and
-    prediced values of these points are calculated by using our tained model. Then we
+    prediced values of these points are calculated by using our trained model. Then we
    could use this prediced values to draw contour map.
    And this contour map can represent svm's partition boundary.
    """
--- a/physics/lorentz_transformation_four_vector.py
+++ b/physics/lorentz_transformation_four_vector.py
@ -2,7 +2,7 @@
 Lorentz transformations describe the transition between two inertial reference
 frames F and F', each of which is moving in some direction with respect to the
 other. This code only calculates Lorentz transformations for movement in the x
-direction with no spacial rotation (i.e., a Lorentz boost in the x direction).
+direction with no spatial rotation (i.e., a Lorentz boost in the x direction).
 The Lorentz transformations are calculated here as linear transformations of
 four-vectors [ct, x, y, z] described by Minkowski space. Note that t (time) is
 multiplied by c (the speed of light) in the first entry of each four-vector.