From 1f368da06d361e3d1415a2ec7d8857068b746586 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 14 May 2024 13:38:55 +0200 Subject: [PATCH 01/13] [pre-commit.ci] pre-commit autoupdate (#11402) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/astral-sh/ruff-pre-commit: v0.4.3 → v0.4.4](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.3...v0.4.4) - [github.com/tox-dev/pyproject-fmt: 1.8.0 → 2.0.4](https://github.com/tox-dev/pyproject-fmt/compare/1.8.0...2.0.4) - [github.com/abravalheri/validate-pyproject: v0.16 → v0.17](https://github.com/abravalheri/validate-pyproject/compare/v0.16...v0.17) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 6 +- pyproject.toml | 182 +++++++++++++++++++++++----------------- 2 files changed, 106 insertions(+), 82 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 210b74940..521769096 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: - id: auto-walrus - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.3 + rev: v0.4.4 hooks: - id: ruff - id: ruff-format @@ -29,7 +29,7 @@ repos: - tomli - repo: https://github.com/tox-dev/pyproject-fmt - rev: "1.8.0" + rev: "2.0.4" hooks: - id: pyproject-fmt @@ -42,7 +42,7 @@ repos: pass_filenames: false - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.16 + rev: v0.17 hooks: - id: validate-pyproject diff --git a/pyproject.toml b/pyproject.toml index c07bc9c48..89ed22bc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,61 +1,61 @@ [tool.ruff] -lint.ignore = [ # `ruff rule S101` for a description of that rule - "B904", # Within an `except` clause, raise exceptions with `raise ... from err` -- FIX ME - "B905", # `zip()` without an explicit `strict=` parameter -- FIX ME - "EM101", # Exception must not use a string literal, assign to variable first - "EXE001", # Shebang is present but file is not executable -- DO NOT FIX - "G004", # Logging statement uses f-string - "PLC1901", # `{}` can be simplified to `{}` as an empty string is falsey - "PLW060", # Using global for `{name}` but no assignment is done -- DO NOT FIX - "PLW2901", # PLW2901: Redefined loop variable -- FIX ME - "PT011", # `pytest.raises(Exception)` is too broad, set the `match` parameter or use a more specific exception - "PT018", # Assertion should be broken down into multiple parts - "S101", # Use of `assert` detected -- DO NOT FIX - "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes -- FIX ME - "SLF001", # Private member accessed: `_Iterator` -- FIX ME - "UP038", # Use `X | Y` in `{}` call instead of `(X, Y)` -- DO NOT FIX +lint.ignore = [ # `ruff rule S101` for a description of that rule + "B904", # Within an `except` clause, raise exceptions with `raise ... from err` -- FIX ME + "B905", # `zip()` without an explicit `strict=` parameter -- FIX ME + "EM101", # Exception must not use a string literal, assign to variable first + "EXE001", # Shebang is present but file is not executable -- DO NOT FIX + "G004", # Logging statement uses f-string + "PLC1901", # `{}` can be simplified to `{}` as an empty string is falsey + "PLW060", # Using global for `{name}` but no assignment is done -- DO NOT FIX + "PLW2901", # PLW2901: Redefined loop variable -- FIX ME + "PT011", # `pytest.raises(Exception)` is too broad, set the `match` parameter or use a more specific exception + "PT018", # Assertion should be broken down into multiple parts + "S101", # Use of `assert` detected -- DO NOT FIX + "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes -- FIX ME + "SLF001", # Private member accessed: `_Iterator` -- FIX ME + "UP038", # Use `X | Y` in `{}` call instead of `(X, Y)` -- DO NOT FIX ] -lint.select = [ # https://beta.ruff.rs/docs/rules - "A", # flake8-builtins - "ARG", # flake8-unused-arguments - "ASYNC", # flake8-async - "B", # flake8-bugbear - "BLE", # flake8-blind-except - "C4", # flake8-comprehensions - "C90", # McCabe cyclomatic complexity - "DJ", # flake8-django - "DTZ", # flake8-datetimez - "E", # pycodestyle - "EM", # flake8-errmsg - "EXE", # flake8-executable - "F", # Pyflakes - "FA", # flake8-future-annotations - "FLY", # flynt - "G", # flake8-logging-format - "I", # isort - "ICN", # flake8-import-conventions - "INP", # flake8-no-pep420 - "INT", # flake8-gettext - "ISC", # flake8-implicit-str-concat - "N", # pep8-naming - "NPY", # NumPy-specific rules - "PD", # pandas-vet - "PGH", # pygrep-hooks - "PIE", # flake8-pie - "PL", # Pylint - "PT", # flake8-pytest-style - "PYI", # flake8-pyi - "RSE", # flake8-raise - "RUF", # Ruff-specific rules - "S", # flake8-bandit - "SIM", # flake8-simplify - "SLF", # flake8-self - "T10", # flake8-debugger - "TD", # flake8-todos - "TID", # flake8-tidy-imports - "UP", # pyupgrade - "W", # pycodestyle - "YTT", # flake8-2020 +lint.select = [ # https://beta.ruff.rs/docs/rules + "A", # flake8-builtins + "ARG", # flake8-unused-arguments + "ASYNC", # flake8-async + "B", # flake8-bugbear + "BLE", # flake8-blind-except + "C4", # flake8-comprehensions + "C90", # McCabe cyclomatic complexity + "DJ", # flake8-django + "DTZ", # flake8-datetimez + "E", # pycodestyle + "EM", # flake8-errmsg + "EXE", # flake8-executable + "F", # Pyflakes + "FA", # flake8-future-annotations + "FLY", # flynt + "G", # flake8-logging-format + "I", # isort + "ICN", # flake8-import-conventions + "INP", # flake8-no-pep420 + "INT", # flake8-gettext + "ISC", # flake8-implicit-str-concat + "N", # pep8-naming + "NPY", # NumPy-specific rules + "PD", # pandas-vet + "PGH", # pygrep-hooks + "PIE", # flake8-pie + "PL", # Pylint + "PT", # flake8-pytest-style + "PYI", # flake8-pyi + "RSE", # flake8-raise + "RUF", # Ruff-specific rules + "S", # flake8-bandit + "SIM", # flake8-simplify + "SLF", # flake8-self + "T10", # flake8-debugger + "TD", # flake8-todos + "TID", # flake8-tidy-imports + "UP", # pyupgrade + "W", # pycodestyle + "YTT", # flake8-2020 # "ANN", # flake8-annotations # FIX ME? # "COM", # flake8-commas # "D", # pydocstyle -- FIX ME? @@ -71,27 +71,51 @@ lint.select = [ # https://beta.ruff.rs/docs/rules output-format = "full" target-version = "py312" -[tool.ruff.lint.mccabe] # DO NOT INCREASE THIS VALUE -max-complexity = 17 # default: 10 +[tool.ruff.lint.mccabe] # DO NOT INCREASE THIS VALUE +max-complexity = 17 # default: 10 [tool.ruff.lint.per-file-ignores] -"arithmetic_analysis/newton_raphson.py" = ["PGH001"] -"data_structures/binary_tree/binary_search_tree_recursive.py" = ["BLE001"] -"data_structures/hashing/tests/test_hash_map.py" = ["BLE001"] -"hashes/enigma_machine.py" = ["BLE001"] -"machine_learning/sequential_minimum_optimization.py" = ["SIM115"] -"matrix/sherman_morrison.py" = ["SIM103"] -"other/l*u_cache.py" = ["RUF012"] -"physics/newtons_second_law_of_motion.py" = ["BLE001"] -"project_euler/problem_099/sol1.py" = ["SIM115"] -"sorts/external_sort.py" = ["SIM115"] +"arithmetic_analysis/newton_raphson.py" = [ + "PGH001", +] +"data_structures/binary_tree/binary_search_tree_recursive.py" = [ + "BLE001", +] +"data_structures/hashing/tests/test_hash_map.py" = [ + "BLE001", +] +"hashes/enigma_machine.py" = [ + "BLE001", +] +"machine_learning/sequential_minimum_optimization.py" = [ + "SIM115", +] +"matrix/sherman_morrison.py" = [ + "SIM103", +] +"other/l*u_cache.py" = [ + "RUF012", +] +"physics/newtons_second_law_of_motion.py" = [ + "BLE001", +] +"project_euler/problem_099/sol1.py" = [ + "SIM115", +] +"sorts/external_sort.py" = [ + "SIM115", +] -[tool.ruff.lint.pylint] # DO NOT INCREASE THESE VALUES -allow-magic-value-types = ["float", "int", "str"] -max-args = 10 # default: 5 -max-branches = 20 # default: 12 -max-returns = 8 # default: 6 -max-statements = 88 # default: 50 +[tool.ruff.lint.pylint] # DO NOT INCREASE THESE VALUES +allow-magic-value-types = [ + "float", + "int", + "str", +] +max-args = 10 # default: 5 +max-branches = 20 # default: 12 +max-returns = 8 # default: 6 +max-statements = 88 # default: 50 [tool.codespell] ignore-words-list = "3rt,ans,bitap,crate,damon,fo,followings,hist,iff,kwanza,manuel,mater,secant,som,sur,tim,toi,zar" @@ -99,17 +123,17 @@ skip = "./.*,*.json,ciphers/prehistoric_men.txt,project_euler/problem_022/p022_n [tool.pytest.ini_options] markers = [ - "mat_ops: mark a test as utilizing matrix operations.", + "mat_ops: mark a test as utilizing matrix operations.", ] addopts = [ - "--durations=10", - "--doctest-modules", - "--showlocals", + "--durations=10", + "--doctest-modules", + "--showlocals", ] [tool.coverage.report] omit = [ ".env/*", - "project_euler/*" + "project_euler/*", ] sort = "Cover" From 0139143abb286027bd3954f3862aab4558642019 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 20 May 2024 22:44:57 +0200 Subject: [PATCH 02/13] [pre-commit.ci] pre-commit autoupdate (#11408) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/tox-dev/pyproject-fmt: 2.0.4 → 2.1.1](https://github.com/tox-dev/pyproject-fmt/compare/2.0.4...2.1.1) - [github.com/abravalheri/validate-pyproject: v0.17 → v0.18](https://github.com/abravalheri/validate-pyproject/compare/v0.17...v0.18) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 +-- pyproject.toml | 79 ++++++++++++++++++++--------------------- 2 files changed, 40 insertions(+), 43 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 521769096..b63457ca8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: - tomli - repo: https://github.com/tox-dev/pyproject-fmt - rev: "2.0.4" + rev: "2.1.1" hooks: - id: pyproject-fmt @@ -42,7 +42,7 @@ repos: pass_filenames: false - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.17 + rev: v0.18 hooks: - id: validate-pyproject diff --git a/pyproject.toml b/pyproject.toml index 89ed22bc6..5b8ce4e72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,21 +1,9 @@ [tool.ruff] -lint.ignore = [ # `ruff rule S101` for a description of that rule - "B904", # Within an `except` clause, raise exceptions with `raise ... from err` -- FIX ME - "B905", # `zip()` without an explicit `strict=` parameter -- FIX ME - "EM101", # Exception must not use a string literal, assign to variable first - "EXE001", # Shebang is present but file is not executable -- DO NOT FIX - "G004", # Logging statement uses f-string - "PLC1901", # `{}` can be simplified to `{}` as an empty string is falsey - "PLW060", # Using global for `{name}` but no assignment is done -- DO NOT FIX - "PLW2901", # PLW2901: Redefined loop variable -- FIX ME - "PT011", # `pytest.raises(Exception)` is too broad, set the `match` parameter or use a more specific exception - "PT018", # Assertion should be broken down into multiple parts - "S101", # Use of `assert` detected -- DO NOT FIX - "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes -- FIX ME - "SLF001", # Private member accessed: `_Iterator` -- FIX ME - "UP038", # Use `X | Y` in `{}` call instead of `(X, Y)` -- DO NOT FIX -] -lint.select = [ # https://beta.ruff.rs/docs/rules +target-version = "py312" + +output-format = "full" +lint.select = [ + # https://beta.ruff.rs/docs/rules "A", # flake8-builtins "ARG", # flake8-unused-arguments "ASYNC", # flake8-async @@ -68,54 +56,63 @@ lint.select = [ # https://beta.ruff.rs/docs/rules # "TCH", # flake8-type-checking # "TRY", # tryceratops ] -output-format = "full" -target-version = "py312" - -[tool.ruff.lint.mccabe] # DO NOT INCREASE THIS VALUE -max-complexity = 17 # default: 10 - -[tool.ruff.lint.per-file-ignores] -"arithmetic_analysis/newton_raphson.py" = [ +lint.per-file-ignores."arithmetic_analysis/newton_raphson.py" = [ "PGH001", ] -"data_structures/binary_tree/binary_search_tree_recursive.py" = [ +lint.per-file-ignores."data_structures/binary_tree/binary_search_tree_recursive.py" = [ "BLE001", ] -"data_structures/hashing/tests/test_hash_map.py" = [ +lint.per-file-ignores."data_structures/hashing/tests/test_hash_map.py" = [ "BLE001", ] -"hashes/enigma_machine.py" = [ +lint.per-file-ignores."hashes/enigma_machine.py" = [ "BLE001", ] -"machine_learning/sequential_minimum_optimization.py" = [ +lint.per-file-ignores."machine_learning/sequential_minimum_optimization.py" = [ "SIM115", ] -"matrix/sherman_morrison.py" = [ +lint.per-file-ignores."matrix/sherman_morrison.py" = [ "SIM103", ] -"other/l*u_cache.py" = [ +lint.per-file-ignores."other/l*u_cache.py" = [ "RUF012", ] -"physics/newtons_second_law_of_motion.py" = [ +lint.per-file-ignores."physics/newtons_second_law_of_motion.py" = [ "BLE001", ] -"project_euler/problem_099/sol1.py" = [ +lint.per-file-ignores."project_euler/problem_099/sol1.py" = [ "SIM115", ] -"sorts/external_sort.py" = [ +lint.per-file-ignores."sorts/external_sort.py" = [ "SIM115", ] - -[tool.ruff.lint.pylint] # DO NOT INCREASE THESE VALUES -allow-magic-value-types = [ +lint.mccabe.max-complexity = 17 # default: 10 +lint.pylint.allow-magic-value-types = [ "float", "int", "str", ] -max-args = 10 # default: 5 -max-branches = 20 # default: 12 -max-returns = 8 # default: 6 -max-statements = 88 # default: 50 +lint.pylint.max-args = 10 # default: 5 +lint.pylint.max-branches = 20 # default: 12 +lint.pylint.max-returns = 8 # default: 6 +lint.pylint.max-statements = 88 # default: 50 +lint.ignore = [ + # `ruff rule S101` for a description of that rule + "B904", # Within an `except` clause, raise exceptions with `raise ... from err` -- FIX ME + "B905", # `zip()` without an explicit `strict=` parameter -- FIX ME + "EM101", # Exception must not use a string literal, assign to variable first + "EXE001", # Shebang is present but file is not executable -- DO NOT FIX + "G004", # Logging statement uses f-string + "PLC1901", # `{}` can be simplified to `{}` as an empty string is falsey + "PLW060", # Using global for `{name}` but no assignment is done -- DO NOT FIX + "PLW2901", # PLW2901: Redefined loop variable -- FIX ME + "PT011", # `pytest.raises(Exception)` is too broad, set the `match` parameter or use a more specific exception + "PT018", # Assertion should be broken down into multiple parts + "S101", # Use of `assert` detected -- DO NOT FIX + "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes -- FIX ME + "SLF001", # Private member accessed: `_Iterator` -- FIX ME + "UP038", # Use `X | Y` in `{}` call instead of `(X, Y)` -- DO NOT FIX +] [tool.codespell] ignore-words-list = "3rt,ans,bitap,crate,damon,fo,followings,hist,iff,kwanza,manuel,mater,secant,som,sur,tim,toi,zar" From 82aa909db7736d8022532bee4dc381072d8c5b1f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 21:56:48 -0400 Subject: [PATCH 03/13] [pre-commit.ci] pre-commit autoupdate (#11417) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/astral-sh/ruff-pre-commit: v0.4.4 → v0.4.5](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.4...v0.4.5) - [github.com/codespell-project/codespell: v2.2.6 → v2.3.0](https://github.com/codespell-project/codespell/compare/v2.2.6...v2.3.0) - [github.com/tox-dev/pyproject-fmt: 2.1.1 → 2.1.3](https://github.com/tox-dev/pyproject-fmt/compare/2.1.1...2.1.3) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * iterable * at most --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Christian Clauss --- .pre-commit-config.yaml | 6 +++--- graphs/dijkstra_algorithm.py | 2 +- project_euler/problem_047/sol1.py | 2 +- pyproject.toml | 35 ++++++++++++++++--------------- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b63457ca8..43bf547de 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,20 +16,20 @@ repos: - id: auto-walrus - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.4 + rev: v0.4.5 hooks: - id: ruff - id: ruff-format - repo: https://github.com/codespell-project/codespell - rev: v2.2.6 + rev: v2.3.0 hooks: - id: codespell additional_dependencies: - tomli - repo: https://github.com/tox-dev/pyproject-fmt - rev: "2.1.1" + rev: "2.1.3" hooks: - id: pyproject-fmt diff --git a/graphs/dijkstra_algorithm.py b/graphs/dijkstra_algorithm.py index 2efa2cb63..51412b790 100644 --- a/graphs/dijkstra_algorithm.py +++ b/graphs/dijkstra_algorithm.py @@ -215,7 +215,7 @@ class PriorityQueue: [(5, 'A'), (15, 'B')] """ idx = self.pos[tup[1]] - # assuming the new_d is atmost old_d + # assuming the new_d is at most old_d self.array[idx] = (new_d, tup[1]) while idx > 0 and self.array[self.par(idx)][0] > self.array[idx][0]: self.swap(idx, self.par(idx)) diff --git a/project_euler/problem_047/sol1.py b/project_euler/problem_047/sol1.py index c9c44a983..4ecd4f4b4 100644 --- a/project_euler/problem_047/sol1.py +++ b/project_euler/problem_047/sol1.py @@ -58,7 +58,7 @@ def upf_len(num: int) -> int: def equality(iterable: list) -> bool: """ - Check equality of ALL elements in an interable. + Check equality of ALL elements in an iterable >>> equality([1, 2, 3, 4]) False >>> equality([2, 2, 2, 2]) diff --git a/pyproject.toml b/pyproject.toml index 5b8ce4e72..429f4fab9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,24 @@ lint.select = [ # "TCH", # flake8-type-checking # "TRY", # tryceratops ] +lint.ignore = [ + # `ruff rule S101` for a description of that rule + "B904", # Within an `except` clause, raise exceptions with `raise ... from err` -- FIX ME + "B905", # `zip()` without an explicit `strict=` parameter -- FIX ME + "EM101", # Exception must not use a string literal, assign to variable first + "EXE001", # Shebang is present but file is not executable -- DO NOT FIX + "G004", # Logging statement uses f-string + "PLC1901", # `{}` can be simplified to `{}` as an empty string is falsey + "PLW060", # Using global for `{name}` but no assignment is done -- DO NOT FIX + "PLW2901", # PLW2901: Redefined loop variable -- FIX ME + "PT011", # `pytest.raises(Exception)` is too broad, set the `match` parameter or use a more specific exception + "PT018", # Assertion should be broken down into multiple parts + "S101", # Use of `assert` detected -- DO NOT FIX + "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes -- FIX ME + "SLF001", # Private member accessed: `_Iterator` -- FIX ME + "UP038", # Use `X | Y` in `{}` call instead of `(X, Y)` -- DO NOT FIX +] + lint.per-file-ignores."arithmetic_analysis/newton_raphson.py" = [ "PGH001", ] @@ -96,23 +114,6 @@ lint.pylint.max-args = 10 # default: 5 lint.pylint.max-branches = 20 # default: 12 lint.pylint.max-returns = 8 # default: 6 lint.pylint.max-statements = 88 # default: 50 -lint.ignore = [ - # `ruff rule S101` for a description of that rule - "B904", # Within an `except` clause, raise exceptions with `raise ... from err` -- FIX ME - "B905", # `zip()` without an explicit `strict=` parameter -- FIX ME - "EM101", # Exception must not use a string literal, assign to variable first - "EXE001", # Shebang is present but file is not executable -- DO NOT FIX - "G004", # Logging statement uses f-string - "PLC1901", # `{}` can be simplified to `{}` as an empty string is falsey - "PLW060", # Using global for `{name}` but no assignment is done -- DO NOT FIX - "PLW2901", # PLW2901: Redefined loop variable -- FIX ME - "PT011", # `pytest.raises(Exception)` is too broad, set the `match` parameter or use a more specific exception - "PT018", # Assertion should be broken down into multiple parts - "S101", # Use of `assert` detected -- DO NOT FIX - "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes -- FIX ME - "SLF001", # Private member accessed: `_Iterator` -- FIX ME - "UP038", # Use `X | Y` in `{}` call instead of `(X, Y)` -- DO NOT FIX -] [tool.codespell] ignore-words-list = "3rt,ans,bitap,crate,damon,fo,followings,hist,iff,kwanza,manuel,mater,secant,som,sur,tim,toi,zar" From b8afb214f8c8d185dc42dafb9676becf512ca7fa Mon Sep 17 00:00:00 2001 From: Marco-campione-github <80974790+Marco-campione-github@users.noreply.github.com> Date: Fri, 31 May 2024 10:11:09 +0200 Subject: [PATCH 04/13] Changed the N to self.N in show_data in segment_tree.py (#11276) --- data_structures/binary_tree/segment_tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_structures/binary_tree/segment_tree.py b/data_structures/binary_tree/segment_tree.py index c7069b3f6..084fcf849 100644 --- a/data_structures/binary_tree/segment_tree.py +++ b/data_structures/binary_tree/segment_tree.py @@ -98,7 +98,7 @@ class SegmentTree: def show_data(self): show_list = [] - for i in range(1, N + 1): + for i in range(1, self.N + 1): show_list += [self.query(i, i)] print(show_list) From 70bd06db4642a2323ff397b041d40bc95ed6a5bf Mon Sep 17 00:00:00 2001 From: Pedram_Mohajer <48964282+pedram-mohajer@users.noreply.github.com> Date: Sat, 1 Jun 2024 05:09:03 -0400 Subject: [PATCH 05/13] add doctest/document to actual_power and document to power (#11187) * Update power.py * Update divide_and_conquer/power.py --------- Co-authored-by: Tianyi Zheng --- divide_and_conquer/power.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/divide_and_conquer/power.py b/divide_and_conquer/power.py index f2e023afd..faf6a3476 100644 --- a/divide_and_conquer/power.py +++ b/divide_and_conquer/power.py @@ -2,6 +2,20 @@ def actual_power(a: int, b: int): """ Function using divide and conquer to calculate a^b. It only works for integer a,b. + + :param a: The base of the power operation, an integer. + :param b: The exponent of the power operation, a non-negative integer. + :return: The result of a^b. + + Examples: + >>> actual_power(3, 2) + 9 + >>> actual_power(5, 3) + 125 + >>> actual_power(2, 5) + 32 + >>> actual_power(7, 0) + 1 """ if b == 0: return 1 @@ -13,6 +27,10 @@ def actual_power(a: int, b: int): def power(a: int, b: int) -> float: """ + :param a: The base (integer). + :param b: The exponent (integer). + :return: The result of a^b, as a float for negative exponents. + >>> power(4,6) 4096 >>> power(2,3) From 723cf9c42839c47e9e6fb83362a7391177355505 Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Sat, 1 Jun 2024 02:17:07 -0700 Subject: [PATCH 06/13] Remove duplicate implementation of median of two arrays algorithm (#11420) * Remove duplicate implementation of median of two arrays algorithm Remove maths/median_of_two_arrays.py because the repo has two implementations of this algorithm, with data_structures/arrays/median_two_array.py being the other. Even though maths/median_of_two_arrays.py is the older implementation, the newer implementation is better documented, has better error handling, and is already located in a more appropriate directory. * updating DIRECTORY.md --------- Co-authored-by: tianyizheng02 --- DIRECTORY.md | 1 - maths/median_of_two_arrays.py | 33 --------------------------------- 2 files changed, 34 deletions(-) delete mode 100644 maths/median_of_two_arrays.py diff --git a/DIRECTORY.md b/DIRECTORY.md index 4a053a3f1..2094fc3a9 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -661,7 +661,6 @@ * [Manhattan Distance](maths/manhattan_distance.py) * [Matrix Exponentiation](maths/matrix_exponentiation.py) * [Max Sum Sliding Window](maths/max_sum_sliding_window.py) - * [Median Of Two Arrays](maths/median_of_two_arrays.py) * [Minkowski Distance](maths/minkowski_distance.py) * [Mobius Function](maths/mobius_function.py) * [Modular Division](maths/modular_division.py) diff --git a/maths/median_of_two_arrays.py b/maths/median_of_two_arrays.py deleted file mode 100644 index 55aa587a9..000000000 --- a/maths/median_of_two_arrays.py +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import annotations - - -def median_of_two_arrays(nums1: list[float], nums2: list[float]) -> float: - """ - >>> median_of_two_arrays([1, 2], [3]) - 2 - >>> median_of_two_arrays([0, -1.1], [2.5, 1]) - 0.5 - >>> median_of_two_arrays([], [2.5, 1]) - 1.75 - >>> median_of_two_arrays([], [0]) - 0 - >>> median_of_two_arrays([], []) - Traceback (most recent call last): - ... - IndexError: list index out of range - """ - all_numbers = sorted(nums1 + nums2) - div, mod = divmod(len(all_numbers), 2) - if mod == 1: - return all_numbers[div] - else: - return (all_numbers[div] + all_numbers[div - 1]) / 2 - - -if __name__ == "__main__": - import doctest - - doctest.testmod() - array_1 = [float(x) for x in input("Enter the elements of first array: ").split()] - array_2 = [float(x) for x in input("Enter the elements of second array: ").split()] - print(f"The median of two arrays is: {median_of_two_arrays(array_1, array_2)}") From edee8e644b09a21a1f70d3a59d57feed51c74004 Mon Sep 17 00:00:00 2001 From: Vishal Kumar Gupta Date: Sun, 2 Jun 2024 02:41:40 +0100 Subject: [PATCH 07/13] use format to remove '0b' (#11307) * use format to remove '0b' * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix: error message for float input --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- bit_manipulation/binary_and_operator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bit_manipulation/binary_and_operator.py b/bit_manipulation/binary_and_operator.py index 36f6c668d..f33b8b1c0 100644 --- a/bit_manipulation/binary_and_operator.py +++ b/bit_manipulation/binary_and_operator.py @@ -26,7 +26,7 @@ def binary_and(a: int, b: int) -> str: >>> binary_and(0, 1.1) Traceback (most recent call last): ... - TypeError: 'float' object cannot be interpreted as an integer + ValueError: Unknown format code 'b' for object of type 'float' >>> binary_and("0", "1") Traceback (most recent call last): ... @@ -35,8 +35,8 @@ def binary_and(a: int, b: int) -> str: if a < 0 or b < 0: raise ValueError("the value of both inputs must be positive") - a_binary = str(bin(a))[2:] # remove the leading "0b" - b_binary = str(bin(b))[2:] # remove the leading "0b" + a_binary = format(a, "b") + b_binary = format(b, "b") max_len = max(len(a_binary), len(b_binary)) From 2f1704dae579295ea2f47584ef80b4b321a284d7 Mon Sep 17 00:00:00 2001 From: Mandeep Singh <135956602+MannCode@users.noreply.github.com> Date: Sun, 2 Jun 2024 18:27:35 -0700 Subject: [PATCH 08/13] issue #11150 Ensure explicit column selection and data type setting in data reading process. (#11302) * issue #11150 Ensure explicit column selection and data type setting in data reading process. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- machine_learning/sequential_minimum_optimization.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/machine_learning/sequential_minimum_optimization.py b/machine_learning/sequential_minimum_optimization.py index 3abdd6ccb..2ebdeb764 100644 --- a/machine_learning/sequential_minimum_optimization.py +++ b/machine_learning/sequential_minimum_optimization.py @@ -463,7 +463,11 @@ def test_cancel_data(): with open(r"cancel_data.csv", "w") as f: f.write(content) - data = pd.read_csv(r"cancel_data.csv", header=None) + data = pd.read_csv( + "cancel_data.csv", + header=None, + dtype={0: str}, # Assuming the first column contains string data + ) # 1: pre-processing data del data[data.columns.tolist()[0]] From ffaa976f6c5a5de30e284ae2fc8122f40cd3fa6a Mon Sep 17 00:00:00 2001 From: Harsh buddhdev Date: Sun, 2 Jun 2024 23:00:26 -0400 Subject: [PATCH 09/13] Fixes #9943 (#10252) * added doctest for all_permutations.py * added doctest for all_subsequences.py * added doctest for all_subsequences.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * doctest added * updated * Update backtracking/all_subsequences.py --------- Co-authored-by: Harsh Buddhdev Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Tianyi Zheng --- backtracking/all_permutations.py | 36 ++++++++++++++++++++++ backtracking/all_subsequences.py | 52 +++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/backtracking/all_permutations.py b/backtracking/all_permutations.py index c483cd62c..f376e6fa0 100644 --- a/backtracking/all_permutations.py +++ b/backtracking/all_permutations.py @@ -23,6 +23,42 @@ def create_state_space_tree( Creates a state space tree to iterate through each branch using DFS. We know that each state has exactly len(sequence) - index children. It terminates when it reaches the end of the given sequence. + + :param sequence: The input sequence for which permutations are generated. + :param current_sequence: The current permutation being built. + :param index: The current index in the sequence. + :param index_used: list to track which elements are used in permutation. + + Example 1: + >>> sequence = [1, 2, 3] + >>> current_sequence = [] + >>> index_used = [False, False, False] + >>> create_state_space_tree(sequence, current_sequence, 0, index_used) + [1, 2, 3] + [1, 3, 2] + [2, 1, 3] + [2, 3, 1] + [3, 1, 2] + [3, 2, 1] + + Example 2: + >>> sequence = ["A", "B", "C"] + >>> current_sequence = [] + >>> index_used = [False, False, False] + >>> create_state_space_tree(sequence, current_sequence, 0, index_used) + ['A', 'B', 'C'] + ['A', 'C', 'B'] + ['B', 'A', 'C'] + ['B', 'C', 'A'] + ['C', 'A', 'B'] + ['C', 'B', 'A'] + + Example 3: + >>> sequence = [1] + >>> current_sequence = [] + >>> index_used = [False] + >>> create_state_space_tree(sequence, current_sequence, 0, index_used) + [1] """ if index == len(sequence): diff --git a/backtracking/all_subsequences.py b/backtracking/all_subsequences.py index 7844a829d..18696054e 100644 --- a/backtracking/all_subsequences.py +++ b/backtracking/all_subsequences.py @@ -22,6 +22,56 @@ def create_state_space_tree( Creates a state space tree to iterate through each branch using DFS. We know that each state has exactly two children. It terminates when it reaches the end of the given sequence. + + :param sequence: The input sequence for which subsequences are generated. + :param current_subsequence: The current subsequence being built. + :param index: The current index in the sequence. + + Example: + >>> sequence = [3, 2, 1] + >>> current_subsequence = [] + >>> create_state_space_tree(sequence, current_subsequence, 0) + [] + [1] + [2] + [2, 1] + [3] + [3, 1] + [3, 2] + [3, 2, 1] + + >>> sequence = ["A", "B"] + >>> current_subsequence = [] + >>> create_state_space_tree(sequence, current_subsequence, 0) + [] + ['B'] + ['A'] + ['A', 'B'] + + >>> sequence = [] + >>> current_subsequence = [] + >>> create_state_space_tree(sequence, current_subsequence, 0) + [] + + >>> sequence = [1, 2, 3, 4] + >>> current_subsequence = [] + >>> create_state_space_tree(sequence, current_subsequence, 0) + [] + [4] + [3] + [3, 4] + [2] + [2, 4] + [2, 3] + [2, 3, 4] + [1] + [1, 4] + [1, 3] + [1, 3, 4] + [1, 2] + [1, 2, 4] + [1, 2, 3] + [1, 2, 3, 4] """ if index == len(sequence): @@ -35,7 +85,7 @@ def create_state_space_tree( if __name__ == "__main__": - seq: list[Any] = [3, 1, 2, 4] + seq: list[Any] = [1, 2, 3] generate_all_subsequences(seq) seq.clear() From c919579869ae9f57d6878336af6de6bc9a001c61 Mon Sep 17 00:00:00 2001 From: AtomicVar Date: Mon, 3 Jun 2024 11:15:01 +0800 Subject: [PATCH 10/13] Add KL divergence loss algorithm (#11238) * Add KL divergence loss algorithm * Apply suggestions from code review --------- Co-authored-by: Tianyi Zheng --- machine_learning/loss_functions.py | 34 ++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py index 16e5a3278..150035661 100644 --- a/machine_learning/loss_functions.py +++ b/machine_learning/loss_functions.py @@ -629,6 +629,40 @@ def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) -> return np.mean(loss) +def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float: + """ + Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels + and predicted probabilities. + + KL divergence loss quantifies dissimilarity between true labels and predicted + probabilities. It's often used in training generative models. + + KL = Σ(y_true * ln(y_true / y_pred)) + + Reference: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence + + Parameters: + - y_true: True class probabilities + - y_pred: Predicted class probabilities + + >>> true_labels = np.array([0.2, 0.3, 0.5]) + >>> predicted_probs = np.array([0.3, 0.3, 0.4]) + >>> kullback_leibler_divergence(true_labels, predicted_probs) + 0.030478754035472025 + >>> true_labels = np.array([0.2, 0.3, 0.5]) + >>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5]) + >>> kullback_leibler_divergence(true_labels, predicted_probs) + Traceback (most recent call last): + ... + ValueError: Input arrays must have the same length. + """ + if len(y_true) != len(y_pred): + raise ValueError("Input arrays must have the same length.") + + kl_loss = y_true * np.log(y_true / y_pred) + return np.sum(kl_loss) + + if __name__ == "__main__": import doctest From 5827aac79a36f0d43e9bd9f1c9ca11da07b2d623 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 18:21:27 -0300 Subject: [PATCH 11/13] [pre-commit.ci] pre-commit autoupdate (#11430) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.4.5 → v0.4.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.5...v0.4.7) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 43bf547de..a04f4f8b2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: - id: auto-walrus - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.5 + rev: v0.4.7 hooks: - id: ruff - id: ruff-format From 41a1cdf38d9cb1a14c9149d2d815efa2259679ef Mon Sep 17 00:00:00 2001 From: Yuri Batista Ishizawa Date: Tue, 11 Jun 2024 06:45:00 -0300 Subject: [PATCH 12/13] Add rainfall intensity calculation function (#11432) * Add rainfall intensity calculation function * chore: improve fuction and coefficient documentation * Update physics/rainfall_intensity.py --------- Co-authored-by: Tianyi Zheng --- physics/rainfall_intensity.py | 143 ++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 physics/rainfall_intensity.py diff --git a/physics/rainfall_intensity.py b/physics/rainfall_intensity.py new file mode 100644 index 000000000..cee8d50dd --- /dev/null +++ b/physics/rainfall_intensity.py @@ -0,0 +1,143 @@ +""" +Rainfall Intensity +================== +This module contains functions to calculate the intensity of +a rainfall event for a given duration and return period. + +This function uses the Sherman intensity-duration-frequency curve. + +References +---------- +- Aparicio, F. (1997): Fundamentos de Hidrología de Superficie. + Balderas, México, Limusa. 303 p. +- https://en.wikipedia.org/wiki/Intensity-duration-frequency_curve +""" + + +def rainfall_intensity( + coefficient_k: float, + coefficient_a: float, + coefficient_b: float, + coefficient_c: float, + return_period: float, + duration: float, +) -> float: + """ + Calculate the intensity of a rainfall event for a given duration and return period. + It's based on the Sherman intensity-duration-frequency curve: + + I = k * T^a / (D + b)^c + + where: + I = Intensity of the rainfall event [mm/h] + k, a, b, c = Coefficients obtained through statistical distribution adjust + T = Return period in years + D = Rainfall event duration in minutes + + Parameters + ---------- + coefficient_k : float + Coefficient obtained through statistical distribution adjust. + coefficient_a : float + Coefficient obtained through statistical distribution adjust. + coefficient_b : float + Coefficient obtained through statistical distribution adjust. + coefficient_c : float + Coefficient obtained through statistical distribution adjust. + return_period : float + Return period in years. + duration : float + Rainfall event duration in minutes. + + Returns + ------- + intensity : float + Intensity of the rainfall event in mm/h. + + Raises + ------ + ValueError + If any of the parameters are not positive. + + Examples + -------- + + >>> rainfall_intensity(1000, 0.2, 11.6, 0.81, 10, 60) + 49.83339231138578 + + >>> rainfall_intensity(1000, 0.2, 11.6, 0.81, 10, 30) + 77.36319588106228 + + >>> rainfall_intensity(1000, 0.2, 11.6, 0.81, 5, 60) + 43.382487747633625 + + >>> rainfall_intensity(0, 0.2, 11.6, 0.81, 10, 60) + Traceback (most recent call last): + ... + ValueError: All parameters must be positive. + + >>> rainfall_intensity(1000, -0.2, 11.6, 0.81, 10, 60) + Traceback (most recent call last): + ... + ValueError: All parameters must be positive. + + >>> rainfall_intensity(1000, 0.2, -11.6, 0.81, 10, 60) + Traceback (most recent call last): + ... + ValueError: All parameters must be positive. + + >>> rainfall_intensity(1000, 0.2, 11.6, -0.81, 10, 60) + Traceback (most recent call last): + ... + ValueError: All parameters must be positive. + + >>> rainfall_intensity(1000, 0, 11.6, 0.81, 10, 60) + Traceback (most recent call last): + ... + ValueError: All parameters must be positive. + + >>> rainfall_intensity(1000, 0.2, 0, 0.81, 10, 60) + Traceback (most recent call last): + ... + ValueError: All parameters must be positive. + + >>> rainfall_intensity(1000, 0.2, 11.6, 0, 10, 60) + Traceback (most recent call last): + ... + ValueError: All parameters must be positive. + + >>> rainfall_intensity(0, 0.2, 11.6, 0.81, 10, 60) + Traceback (most recent call last): + ... + ValueError: All parameters must be positive. + + >>> rainfall_intensity(1000, 0.2, 11.6, 0.81, 0, 60) + Traceback (most recent call last): + ... + ValueError: All parameters must be positive. + + >>> rainfall_intensity(1000, 0.2, 11.6, 0.81, 10, 0) + Traceback (most recent call last): + ... + ValueError: All parameters must be positive. + + """ + if ( + coefficient_k <= 0 + or coefficient_a <= 0 + or coefficient_b <= 0 + or coefficient_c <= 0 + or return_period <= 0 + or duration <= 0 + ): + raise ValueError("All parameters must be positive.") + intensity = (coefficient_k * (return_period**coefficient_a)) / ( + (duration + coefficient_b) ** coefficient_c + ) + return intensity + + +if __name__ == "__main__": + import doctest + + doctest.testmod() From 446742387e83f94f3d54ce640cb07004180130ee Mon Sep 17 00:00:00 2001 From: Tianyi Zheng Date: Thu, 13 Jun 2024 14:47:29 -0700 Subject: [PATCH 13/13] Fix grammar and spelling mistakes in sequential_minimum_optimization.py (#11427) --- .../sequential_minimum_optimization.py | 135 +++++++++--------- 1 file changed, 66 insertions(+), 69 deletions(-) diff --git a/machine_learning/sequential_minimum_optimization.py b/machine_learning/sequential_minimum_optimization.py index 2ebdeb764..625fc28fe 100644 --- a/machine_learning/sequential_minimum_optimization.py +++ b/machine_learning/sequential_minimum_optimization.py @@ -1,11 +1,9 @@ """ - Implementation of sequential minimal optimization (SMO) for support vector machines - (SVM). +Sequential minimal optimization (SMO) for support vector machines (SVM) - Sequential minimal optimization (SMO) is an algorithm for solving the quadratic - programming (QP) problem that arises during the training of support vector - machines. - It was invented by John Platt in 1998. +Sequential minimal optimization (SMO) is an algorithm for solving the quadratic +programming (QP) problem that arises during the training of SVMs. It was invented by +John Platt in 1998. Input: 0: type: numpy.ndarray. @@ -124,8 +122,7 @@ class SmoSVM: b_old = self._b self._b = b - # 4: update error value,here we only calculate those non-bound samples' - # error + # 4: update error, here we only calculate the error for non-bound samples self._unbound = [i for i in self._all_samples if self._is_unbound(i)] for s in self.unbound: if s in (i1, i2): @@ -136,7 +133,7 @@ class SmoSVM: + (self._b - b_old) ) - # if i1 or i2 is non-bound,update there error value to zero + # if i1 or i2 is non-bound, update their error value to zero if self._is_unbound(i1): self._error[i1] = 0 if self._is_unbound(i2): @@ -161,7 +158,7 @@ class SmoSVM: results.append(result) return np.array(results) - # Check if alpha violate KKT condition + # Check if alpha violates the KKT condition def _check_obey_kkt(self, index): alphas = self.alphas tol = self._tol @@ -172,20 +169,19 @@ class SmoSVM: # Get value calculated from kernel function def _k(self, i1, i2): - # for test samples,use Kernel function + # for test samples, use kernel function if isinstance(i2, np.ndarray): return self.Kernel(self.samples[i1], i2) - # for train samples,Kernel values have been saved in matrix + # for training samples, kernel values have been saved in matrix else: return self._K_matrix[i1, i2] - # Get sample's error + # Get error for sample def _e(self, index): """ Two cases: - 1:Sample[index] is non-bound,Fetch error from list: _error - 2:sample[index] is bound,Use predicted value deduct true value: g(xi) - yi - + 1: Sample[index] is non-bound, fetch error from list: _error + 2: sample[index] is bound, use predicted value minus true value: g(xi) - yi """ # get from error data if self._is_unbound(index): @@ -196,7 +192,7 @@ class SmoSVM: yi = self.tags[index] return gx - yi - # Calculate Kernel matrix of all possible i1,i2 ,saving time + # Calculate kernel matrix of all possible i1, i2, saving time def _calculate_k_matrix(self): k_matrix = np.zeros([self.length, self.length]) for i in self._all_samples: @@ -206,7 +202,7 @@ class SmoSVM: ) return k_matrix - # Predict test sample's tag + # Predict tag for test sample def _predict(self, sample): k = self._k predicted_value = ( @@ -222,30 +218,31 @@ class SmoSVM: # Choose alpha1 and alpha2 def _choose_alphas(self): - locis = yield from self._choose_a1() - if not locis: + loci = yield from self._choose_a1() + if not loci: return None - return locis + return loci def _choose_a1(self): """ - Choose first alpha ;steps: - 1:First loop over all sample - 2:Second loop over all non-bound samples till all non-bound samples does not - voilate kkt condition. - 3:Repeat this two process endlessly,till all samples does not voilate kkt - condition samples after first loop. + Choose first alpha + Steps: + 1: First loop over all samples + 2: Second loop over all non-bound samples until no non-bound samples violate + the KKT condition. + 3: Repeat these two processes until no samples violate the KKT condition + after the first loop. """ while True: all_not_obey = True # all sample - print("scanning all sample!") + print("Scanning all samples!") for i1 in [i for i in self._all_samples if self._check_obey_kkt(i)]: all_not_obey = False yield from self._choose_a2(i1) # non-bound sample - print("scanning non-bound sample!") + print("Scanning non-bound samples!") while True: not_obey = True for i1 in [ @@ -256,20 +253,21 @@ class SmoSVM: not_obey = False yield from self._choose_a2(i1) if not_obey: - print("all non-bound samples fit the KKT condition!") + print("All non-bound samples satisfy the KKT condition!") break if all_not_obey: - print("all samples fit the KKT condition! Optimization done!") + print("All samples satisfy the KKT condition!") break return False def _choose_a2(self, i1): """ - Choose the second alpha by using heuristic algorithm ;steps: - 1: Choose alpha2 which gets the maximum step size (|E1 - E2|). - 2: Start in a random point,loop over all non-bound samples till alpha1 and + Choose the second alpha using a heuristic algorithm + Steps: + 1: Choose alpha2 that maximizes the step size (|E1 - E2|). + 2: Start in a random point, loop over all non-bound samples till alpha1 and alpha2 are optimized. - 3: Start in a random point,loop over all samples till alpha1 and alpha2 are + 3: Start in a random point, loop over all samples till alpha1 and alpha2 are optimized. """ self._unbound = [i for i in self._all_samples if self._is_unbound(i)] @@ -306,7 +304,7 @@ class SmoSVM: if i1 == i2: return None, None - # calculate L and H which bound the new alpha2 + # calculate L and H which bound the new alpha2 s = y1 * y2 if s == -1: l, h = max(0.0, a2 - a1), min(self._c, self._c + a2 - a1) # noqa: E741 @@ -320,7 +318,7 @@ class SmoSVM: k22 = k(i2, i2) k12 = k(i1, i2) - # select the new alpha2 which could get the minimal objectives + # select the new alpha2 which could achieve the minimal objectives if (eta := k11 + k22 - 2.0 * k12) > 0.0: a2_new_unc = a2 + (y2 * (e1 - e2)) / eta # a2_new has a boundary @@ -335,7 +333,7 @@ class SmoSVM: l1 = a1 + s * (a2 - l) h1 = a1 + s * (a2 - h) - # way 1 + # Method 1 f1 = y1 * (e1 + b) - a1 * k(i1, i1) - s * a2 * k(i1, i2) f2 = y2 * (e2 + b) - a2 * k(i2, i2) - s * a1 * k(i1, i2) ol = ( @@ -353,9 +351,8 @@ class SmoSVM: + s * h * h1 * k(i1, i2) ) """ - # way 2 - Use objective function check which alpha2 new could get the minimal - objectives + Method 2: Use objective function to check which alpha2_new could achieve the + minimal objectives """ if ol < (oh - self._eps): a2_new = l @@ -375,7 +372,7 @@ class SmoSVM: return a1_new, a2_new - # Normalise data using min_max way + # Normalize data using min-max method def _norm(self, data): if self._init: self._min = np.min(data, axis=0) @@ -424,7 +421,7 @@ class Kernel: def _check(self): if self._kernel == self._rbf and self.gamma < 0: - raise ValueError("gamma value must greater than 0") + raise ValueError("gamma value must be non-negative") def _get_kernel(self, kernel_name): maps = {"linear": self._linear, "poly": self._polynomial, "rbf": self._rbf} @@ -444,27 +441,27 @@ def count_time(func): start_time = time.time() func(*args, **kwargs) end_time = time.time() - print(f"smo algorithm cost {end_time - start_time} seconds") + print(f"SMO algorithm cost {end_time - start_time} seconds") return call_func @count_time -def test_cancel_data(): - print("Hello!\nStart test svm by smo algorithm!") +def test_cancer_data(): + print("Hello!\nStart test SVM using the SMO algorithm!") # 0: download dataset and load into pandas' dataframe - if not os.path.exists(r"cancel_data.csv"): + if not os.path.exists(r"cancer_data.csv"): request = urllib.request.Request( # noqa: S310 CANCER_DATASET_URL, headers={"User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"}, ) response = urllib.request.urlopen(request) # noqa: S310 content = response.read().decode("utf-8") - with open(r"cancel_data.csv", "w") as f: + with open(r"cancer_data.csv", "w") as f: f.write(content) data = pd.read_csv( - "cancel_data.csv", + "cancer_data.csv", header=None, dtype={0: str}, # Assuming the first column contains string data ) @@ -479,14 +476,14 @@ def test_cancel_data(): train_data, test_data = samples[:328, :], samples[328:, :] test_tags, test_samples = test_data[:, 0], test_data[:, 1:] - # 3: choose kernel function,and set initial alphas to zero(optional) - mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5) + # 3: choose kernel function, and set initial alphas to zero (optional) + my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5) al = np.zeros(train_data.shape[0]) # 4: calculating best alphas using SMO algorithm and predict test_data samples mysvm = SmoSVM( train=train_data, - kernel_func=mykernel, + kernel_func=my_kernel, alpha_list=al, cost=0.4, b=0.0, @@ -501,30 +498,30 @@ def test_cancel_data(): for i in range(test_tags.shape[0]): if test_tags[i] == predict[i]: score += 1 - print(f"\nall: {test_num}\nright: {score}\nfalse: {test_num - score}") + print(f"\nAll: {test_num}\nCorrect: {score}\nIncorrect: {test_num - score}") print(f"Rough Accuracy: {score / test_tags.shape[0]}") def test_demonstration(): # change stdout - print("\nStart plot,please wait!!!") + print("\nStarting plot, please wait!") sys.stdout = open(os.devnull, "w") ax1 = plt.subplot2grid((2, 2), (0, 0)) ax2 = plt.subplot2grid((2, 2), (0, 1)) ax3 = plt.subplot2grid((2, 2), (1, 0)) ax4 = plt.subplot2grid((2, 2), (1, 1)) - ax1.set_title("linear svm,cost:0.1") + ax1.set_title("Linear SVM, cost = 0.1") test_linear_kernel(ax1, cost=0.1) - ax2.set_title("linear svm,cost:500") + ax2.set_title("Linear SVM, cost = 500") test_linear_kernel(ax2, cost=500) - ax3.set_title("rbf kernel svm,cost:0.1") + ax3.set_title("RBF kernel SVM, cost = 0.1") test_rbf_kernel(ax3, cost=0.1) - ax4.set_title("rbf kernel svm,cost:500") + ax4.set_title("RBF kernel SVM, cost = 500") test_rbf_kernel(ax4, cost=500) sys.stdout = sys.__stdout__ - print("Plot done!!!") + print("Plot done!") def test_linear_kernel(ax, cost): @@ -535,10 +532,10 @@ def test_linear_kernel(ax, cost): scaler = StandardScaler() train_x_scaled = scaler.fit_transform(train_x, train_y) train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled)) - mykernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5) + my_kernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5) mysvm = SmoSVM( train=train_data, - kernel_func=mykernel, + kernel_func=my_kernel, cost=cost, tolerance=0.001, auto_norm=False, @@ -555,10 +552,10 @@ def test_rbf_kernel(ax, cost): scaler = StandardScaler() train_x_scaled = scaler.fit_transform(train_x, train_y) train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled)) - mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5) + my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5) mysvm = SmoSVM( train=train_data, - kernel_func=mykernel, + kernel_func=my_kernel, cost=cost, tolerance=0.001, auto_norm=False, @@ -571,11 +568,11 @@ def plot_partition_boundary( model, train_data, ax, resolution=100, colors=("b", "k", "r") ): """ - We can not get the optimum w of our kernel svm model which is different from linear - svm. For this reason, we generate randomly distributed points with high desity and - prediced values of these points are calculated by using our trained model. Then we - could use this prediced values to draw contour map. - And this contour map can represent svm's partition boundary. + We cannot get the optimal w of our kernel SVM model, which is different from a + linear SVM. For this reason, we generate randomly distributed points with high + density, and predicted values of these points are calculated using our trained + model. Then we could use this predicted values to draw contour map, and this contour + map represents the SVM's partition boundary. """ train_data_x = train_data[:, 1] train_data_y = train_data[:, 2] @@ -620,6 +617,6 @@ def plot_partition_boundary( if __name__ == "__main__": - test_cancel_data() + test_cancer_data() test_demonstration() plt.show()