Compare commits

...

2 Commits

Author SHA1 Message Date
Andrey
b797e437ae
Add hashmap implementation (#7967) 2023-03-14 01:31:27 +01:00
pre-commit-ci[bot]
8959211100
[pre-commit.ci] pre-commit autoupdate (#8177)
* [pre-commit.ci] pre-commit autoupdate

updates:
- [github.com/charliermarsh/ruff-pre-commit: v0.0.254 → v0.0.255](https://github.com/charliermarsh/ruff-pre-commit/compare/v0.0.254...v0.0.255)
- [github.com/pre-commit/mirrors-mypy: v1.0.1 → v1.1.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.0.1...v1.1.1)
- [github.com/codespell-project/codespell: v2.2.2 → v2.2.4](https://github.com/codespell-project/codespell/compare/v2.2.2...v2.2.4)

* updating DIRECTORY.md

* Fixes for new version of codespell

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
Co-authored-by: Christian Clauss <cclauss@me.com>
2023-03-13 23:18:35 +01:00
6 changed files with 266 additions and 6 deletions

View File

@ -44,7 +44,7 @@ repos:
- --py311-plus
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.254
rev: v0.0.255
hooks:
- id: ruff
args:
@ -69,7 +69,7 @@ repos:
*flake8-plugins
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.0.1
rev: v1.1.1
hooks:
- id: mypy
args:
@ -79,11 +79,11 @@ repos:
additional_dependencies: [types-requests]
- repo: https://github.com/codespell-project/codespell
rev: v2.2.2
rev: v2.2.4
hooks:
- id: codespell
args:
- --ignore-words-list=ans,crate,damon,fo,followings,hist,iff,mater,secant,som,sur,tim,zar
- --ignore-words-list=3rt,ans,crate,damon,fo,followings,hist,iff,kwanza,mater,secant,som,sur,tim,zar
exclude: |
(?x)^(
ciphers/prehistoric_men.txt |

View File

@ -334,6 +334,7 @@
## Electronics
* [Builtin Voltage](electronics/builtin_voltage.py)
* [Carrier Concentration](electronics/carrier_concentration.py)
* [Circular Convolution](electronics/circular_convolution.py)
* [Coulombs Law](electronics/coulombs_law.py)
* [Electric Conductivity](electronics/electric_conductivity.py)
* [Electric Power](electronics/electric_power.py)

View File

@ -0,0 +1,162 @@
"""
Hash map with open addressing.
https://en.wikipedia.org/wiki/Hash_table
Another hash map implementation, with a good explanation.
Modern Dictionaries by Raymond Hettinger
https://www.youtube.com/watch?v=p33CVV29OG8
"""
from collections.abc import Iterator, MutableMapping
from dataclasses import dataclass
from typing import Generic, TypeVar
KEY = TypeVar("KEY")
VAL = TypeVar("VAL")
@dataclass(frozen=True, slots=True)
class _Item(Generic[KEY, VAL]):
key: KEY
val: VAL
class _DeletedItem(_Item):
def __init__(self) -> None:
super().__init__(None, None)
def __bool__(self) -> bool:
return False
_deleted = _DeletedItem()
class HashMap(MutableMapping[KEY, VAL]):
"""
Hash map with open addressing.
"""
def __init__(
self, initial_block_size: int = 8, capacity_factor: float = 0.75
) -> None:
self._initial_block_size = initial_block_size
self._buckets: list[_Item | None] = [None] * initial_block_size
assert 0.0 < capacity_factor < 1.0
self._capacity_factor = capacity_factor
self._len = 0
def _get_bucket_index(self, key: KEY) -> int:
return hash(key) % len(self._buckets)
def _get_next_ind(self, ind: int) -> int:
"""
Get next index.
Implements linear open addressing.
"""
return (ind + 1) % len(self._buckets)
def _try_set(self, ind: int, key: KEY, val: VAL) -> bool:
"""
Try to add value to the bucket.
If bucket is empty or key is the same, does insert and return True.
If bucket has another key or deleted placeholder,
that means that we need to check next bucket.
"""
stored = self._buckets[ind]
if not stored:
self._buckets[ind] = _Item(key, val)
self._len += 1
return True
elif stored.key == key:
self._buckets[ind] = _Item(key, val)
return True
else:
return False
def _is_full(self) -> bool:
"""
Return true if we have reached safe capacity.
So we need to increase the number of buckets to avoid collisions.
"""
limit = len(self._buckets) * self._capacity_factor
return len(self) >= int(limit)
def _is_sparse(self) -> bool:
"""Return true if we need twice fewer buckets when we have now."""
if len(self._buckets) <= self._initial_block_size:
return False
limit = len(self._buckets) * self._capacity_factor / 2
return len(self) < limit
def _resize(self, new_size: int) -> None:
old_buckets = self._buckets
self._buckets = [None] * new_size
self._len = 0
for item in old_buckets:
if item:
self._add_item(item.key, item.val)
def _size_up(self) -> None:
self._resize(len(self._buckets) * 2)
def _size_down(self) -> None:
self._resize(len(self._buckets) // 2)
def _iterate_buckets(self, key: KEY) -> Iterator[int]:
ind = self._get_bucket_index(key)
for _ in range(len(self._buckets)):
yield ind
ind = self._get_next_ind(ind)
def _add_item(self, key: KEY, val: VAL) -> None:
for ind in self._iterate_buckets(key):
if self._try_set(ind, key, val):
break
def __setitem__(self, key: KEY, val: VAL) -> None:
if self._is_full():
self._size_up()
self._add_item(key, val)
def __delitem__(self, key: KEY) -> None:
for ind in self._iterate_buckets(key):
item = self._buckets[ind]
if item is None:
raise KeyError(key)
if item is _deleted:
continue
if item.key == key:
self._buckets[ind] = _deleted
self._len -= 1
break
if self._is_sparse():
self._size_down()
def __getitem__(self, key: KEY) -> VAL:
for ind in self._iterate_buckets(key):
item = self._buckets[ind]
if item is None:
break
if item is _deleted:
continue
if item.key == key:
return item.val
raise KeyError(key)
def __len__(self) -> int:
return self._len
def __iter__(self) -> Iterator[KEY]:
yield from (item.key for item in self._buckets if item)
def __repr__(self) -> str:
val_string = " ,".join(
f"{item.key}: {item.val}" for item in self._buckets if item
)
return f"HashMap({val_string})"

View File

@ -0,0 +1,97 @@
from operator import delitem, getitem, setitem
import pytest
from data_structures.hashing.hash_map import HashMap
def _get(k):
return getitem, k
def _set(k, v):
return setitem, k, v
def _del(k):
return delitem, k
def _run_operation(obj, fun, *args):
try:
return fun(obj, *args), None
except Exception as e:
return None, e
_add_items = (
_set("key_a", "val_a"),
_set("key_b", "val_b"),
)
_overwrite_items = [
_set("key_a", "val_a"),
_set("key_a", "val_b"),
]
_delete_items = [
_set("key_a", "val_a"),
_set("key_b", "val_b"),
_del("key_a"),
_del("key_b"),
_set("key_a", "val_a"),
_del("key_a"),
]
_access_absent_items = [
_get("key_a"),
_del("key_a"),
_set("key_a", "val_a"),
_del("key_a"),
_del("key_a"),
_get("key_a"),
]
_add_with_resize_up = [
*[_set(x, x) for x in range(5)], # guaranteed upsize
]
_add_with_resize_down = [
*[_set(x, x) for x in range(5)], # guaranteed upsize
*[_del(x) for x in range(5)],
_set("key_a", "val_b"),
]
@pytest.mark.parametrize(
"operations",
(
pytest.param(_add_items, id="add items"),
pytest.param(_overwrite_items, id="overwrite items"),
pytest.param(_delete_items, id="delete items"),
pytest.param(_access_absent_items, id="access absent items"),
pytest.param(_add_with_resize_up, id="add with resize up"),
pytest.param(_add_with_resize_down, id="add with resize down"),
),
)
def test_hash_map_is_the_same_as_dict(operations):
my = HashMap(initial_block_size=4)
py = {}
for _, (fun, *args) in enumerate(operations):
my_res, my_exc = _run_operation(my, fun, *args)
py_res, py_exc = _run_operation(py, fun, *args)
assert my_res == py_res
assert str(my_exc) == str(py_exc)
assert set(py) == set(my)
assert len(py) == len(my)
assert set(my.items()) == set(py.items())
def test_no_new_methods_was_added_to_api():
def is_public(name: str) -> bool:
return not name.startswith("_")
dict_public_names = {name for name in dir({}) if is_public(name)}
hash_public_names = {name for name in dir(HashMap()) if is_public(name)}
assert dict_public_names > hash_public_names

View File

@ -569,7 +569,7 @@ def plot_partition_boundary(
"""
We can not get the optimum w of our kernel svm model which is different from linear
svm. For this reason, we generate randomly distributed points with high desity and
prediced values of these points are calculated by using our tained model. Then we
prediced values of these points are calculated by using our trained model. Then we
could use this prediced values to draw contour map.
And this contour map can represent svm's partition boundary.
"""

View File

@ -2,7 +2,7 @@
Lorentz transformations describe the transition between two inertial reference
frames F and F', each of which is moving in some direction with respect to the
other. This code only calculates Lorentz transformations for movement in the x
direction with no spacial rotation (i.e., a Lorentz boost in the x direction).
direction with no spatial rotation (i.e., a Lorentz boost in the x direction).
The Lorentz transformations are calculated here as linear transformations of
four-vectors [ct, x, y, z] described by Minkowski space. Note that t (time) is
multiplied by c (the speed of light) in the first entry of each four-vector.