Merge d7a0b9d245 into e3bd7721c8

validate_filenames.py Shebang python for Windows (#12371 )
[pre-commit.ci] pre-commit autoupdate (#12370 )
2024-11-27 15:01:08 +00:00 · 2024-11-19 00:03:39 +05:30 · 2024-11-15 14:59:14 +01:00 · 2024-11-11 21:05:50 +01:00 · 2024-11-04 21:09:03 +01:00 · 2024-10-24 22:55:44 +05:30
11 changed files with 1283 additions and 10 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -16,7 +16,7 @@ repos:
      - id: auto-walrus
  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.7.1
+    rev: v0.7.3
    hooks:
      - id: ruff
      - id: ruff-format
@ -29,7 +29,7 @@ repos:
          - tomli
  - repo: https://github.com/tox-dev/pyproject-fmt
-    rev: "v2.4.3"
+    rev: "v2.5.0"
    hooks:
      - id: pyproject-fmt
@ -42,7 +42,7 @@ repos:
        pass_filenames: false
  - repo: https://github.com/abravalheri/validate-pyproject
-    rev: v0.22
+    rev: v0.23
    hooks:
      - id: validate-pyproject
--- a/data_structures/arrays/kadanes_algorithm.py
+++ b/data_structures/arrays/kadanes_algorithm.py
@ -0,0 +1,42 @@
 # Kadane's algorithm
 def kadanes_algorithm(arr: list[int]) -> int:
    """
    Function to find the maximum sum of a contiguous subarray using Kadane's algorithm
    >>> kadanes_algorithm([-2, 1, -3, 4, -1, 2, 1, -5, 4])
    6
    >>> kadanes_algorithm([-1, -2, -3, -4])
    -1
    >>> kadanes_algorithm([5, 4, -1, 7, 8])
    23
    >>> kadanes_algorithm([1])
    1
    >>> kadanes_algorithm([-1, 2, 3, -5, 4])
    5
    """
    # initializing variables
    max_current = arr[0]  # store the current max sum
    max_global = arr[0]  # store the global max sum
    # looping through the array starting at the second element
    for i in range(1, len(arr)):
        # update current max sum by choosing the maximum between
        # current element alone or current element plus previous max
        max_current = max(arr[i], max_current + arr[i])
        # update global max sum if current max is larger
        max_global = max(max_current, max_global)
    return max_global
 if __name__ == "__main__":
    import doctest
    doctest.testmod()
--- a/data_structures/arrays/sudoku_solver.py
+++ b/data_structures/arrays/sudoku_solver.py
@ -172,7 +172,7 @@ def solved(values):
 def from_file(filename, sep="\n"):
    "Parse a file into a list of strings, separated by sep."
-    return open(filename).read().strip().split(sep)  # noqa: SIM115
+    return open(filename).read().strip().split(sep)
 def random_puzzle(assignments=17):
--- a/data_structures/stacks/largest_rectangle_histogram.py
+++ b/data_structures/stacks/largest_rectangle_histogram.py
@ -0,0 +1,39 @@
 def largest_rectangle_area(heights: list[int]) -> int:
    """
    Inputs an array of integers representing the heights of bars,
    and returns the area of the largest rectangle that can be formed
    >>> largest_rectangle_area([2, 1, 5, 6, 2, 3])
    10
    >>> largest_rectangle_area([2, 4])
    4
    >>> largest_rectangle_area([6, 2, 5, 4, 5, 1, 6])
    12
    >>> largest_rectangle_area([1])
    1
    """
    stack: list[int] = []
    max_area = 0
    heights = [*heights, 0]  # make a new list by appending the sentinel 0
    n = len(heights)
    for i in range(n):
        # make sure the stack remains in increasing order
        while stack and heights[i] < heights[stack[-1]]:
            h = heights[stack.pop()]  # height of the bar
            # if stack is empty, it means entire width can be taken from index 0 to i-1
            w = i if not stack else i - stack[-1] - 1  # calculate width
            max_area = max(max_area, h * w)
        stack.append(i)
    return max_area
 if __name__ == "__main__":
    import doctest
    doctest.testmod()
--- a/machine_learning/frequent_pattern_growth.py
+++ b/machine_learning/frequent_pattern_growth.py
@ -240,7 +240,7 @@ def ascend_tree(leaf_node: TreeNode, prefix_path: list[str]) -> None:
        ascend_tree(leaf_node.parent, prefix_path)
-def find_prefix_path(base_pat: frozenset, tree_node: TreeNode | None) -> dict:  # noqa: ARG001
+def find_prefix_path(_: frozenset, tree_node: TreeNode | None) -> dict:
    """
    Find the conditional pattern base for a given base pattern.
--- a/machine_learning/loss_functions.py
+++ b/machine_learning/loss_functions.py
@ -629,13 +629,15 @@ def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) ->
    return np.mean(loss)
-def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float:
+def kullback_leibler_divergence(
    y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-10
 ) -> float:
    """
    Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels
    and predicted probabilities.
-    KL divergence loss quantifies dissimilarity between true labels and predicted
+    KL divergence loss quantifies the dissimilarity between true labels and predicted
-    probabilities. It's often used in training generative models.
+    probabilities. It is often used in training generative models.
    KL = Σ(y_true * ln(y_true / y_pred))
@ -649,6 +651,7 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float
    >>> predicted_probs = np.array([0.3, 0.3, 0.4])
    >>> float(kullback_leibler_divergence(true_labels, predicted_probs))
    0.030478754035472025
    >>> true_labels = np.array([0.2, 0.3, 0.5])
    >>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5])
    >>> kullback_leibler_divergence(true_labels, predicted_probs)
@ -659,7 +662,13 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float
    if len(y_true) != len(y_pred):
        raise ValueError("Input arrays must have the same length.")
-    kl_loss = y_true * np.log(y_true / y_pred)
+    # negligible epsilon to avoid issues with log(0) or division by zero
    epsilon = 1e-10
    y_pred = np.clip(y_pred, epsilon, None)
    # calculate KL divergence only where y_true is not zero
    kl_loss = np.where(y_true != 0, y_true * np.log(y_true / y_pred), 0.0)
    return np.sum(kl_loss)
--- a/machine_learning/ridge_regression/ADRvsRating.csv
+++ b/machine_learning/ridge_regression/ADRvsRating.csv
--- a/machine_learning/ridge_regression/init.py
+++ b/machine_learning/ridge_regression/init.py
--- a/machine_learning/ridge_regression/ridge_regression.py
+++ b/machine_learning/ridge_regression/ridge_regression.py
@ -0,0 +1,82 @@
 import numpy as np
 import pandas as pd
 class RidgeRegression:
    def __init__(
        self,
        alpha: float = 0.001,
        regularization_param: float = 0.1,
        num_iterations: int = 1000,
    ) -> None:
        self.alpha: float = alpha
        self.regularization_param: float = regularization_param
        self.num_iterations: int = num_iterations
        self.theta: np.ndarray = None
    def feature_scaling(
        self, features: np.ndarray
    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
        mean = np.mean(features, axis=0)
        std = np.std(features, axis=0)
        # avoid division by zero for constant features (std = 0)
        std[std == 0] = 1  # set std=1 for constant features to avoid NaN
        features_scaled = (features - mean) / std
        return features_scaled, mean, std
    def fit(self, features: np.ndarray, target: np.ndarray) -> None:
        features_scaled, mean, std = self.feature_scaling(features)
        m, n = features_scaled.shape
        self.theta = np.zeros(n)  # initializing weights to zeros
        for _ in range(self.num_iterations):
            predictions = features_scaled.dot(self.theta)
            error = predictions - target
            # computing gradient with L2 regularization
            gradient = (
                features_scaled.T.dot(error) + self.regularization_param * self.theta
            ) / m
            self.theta -= self.alpha * gradient  # updating weights
    def predict(self, features: np.ndarray) -> np.ndarray:
        features_scaled, _, _ = self.feature_scaling(features)
        return features_scaled.dot(self.theta)
    def compute_cost(self, features: np.ndarray, target: np.ndarray) -> float:
        features_scaled, _, _ = self.feature_scaling(features)
        m = len(target)
        predictions = features_scaled.dot(self.theta)
        cost = (1 / (2 * m)) * np.sum((predictions - target) ** 2) + (
            self.regularization_param / (2 * m)
        ) * np.sum(self.theta**2)
        return cost
    def mean_absolute_error(self, target: np.ndarray, predictions: np.ndarray) -> float:
        return np.mean(np.abs(target - predictions))
 # Example usage
 if __name__ == "__main__":
    data = pd.read_csv("ADRvsRating.csv")
    features_matrix = data[["Rating"]].to_numpy()
    target = data["ADR"].to_numpy()
    target = (target - np.mean(target)) / np.std(target)
    # added bias term to the feature matrix
    x = np.c_[np.ones(features_matrix.shape[0]), features_matrix]
    # initialize and train the ridge regression model
    model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000)
    model.fit(features_matrix, target)
    # predictions
    predictions = model.predict(features_matrix)
    # results
    print("Optimized Weights:", model.theta)
    print("Cost:", model.compute_cost(features_matrix, target))
    print("Mean Absolute Error:", model.mean_absolute_error(target, predictions))
--- a/machine_learning/ridge_regression/test_ridge_regression.py
+++ b/machine_learning/ridge_regression/test_ridge_regression.py
@ -0,0 +1,100 @@
 """
 Doctest for RidgeRegression class
 Tests include:
 - feature_scaling
 - fit
 - predict
 - mean_absolute_error
 To run these tests, use the following command:
    python -m doctest test_ridge_regression.py -v
 """
 import numpy as np  # noqa: F401
 from machine_learning.ridge_regression.ridge_regression import (
    RidgeRegression,  # noqa: F401
 )
 def test_feature_scaling():
    """
       Tests the feature_scaling function of RidgeRegression.
    --------
       >>> model = RidgeRegression()
       >>> features = np.array([[1, 2], [2, 3], [3, 4]])
       >>> features_scaled, mean, std = model.feature_scaling(features)
       >>> np.round(features_scaled, 2)
       array([[-1.22, -1.22],
              [ 0.  ,  0.  ],
              [ 1.22,  1.22]])
       >>> np.round(mean, 2)
       array([2., 3.])
       >>> np.round(std, 2)
       array([0.82, 0.82])
    """
 def test_fit():
    """
    Tests the fit function of RidgeRegression
    --------
    >>> model = RidgeRegression(alpha=0.01,
    ...                          regularization_param=0.1,
    ...                          num_iterations=1000)
    >>> features = np.array([[1], [2], [3]])
    >>> target = np.array([2, 3, 4])
    # Adding a bias term
    >>> features = np.c_[np.ones(features.shape[0]), features]
    # Fit the model
    >>> model.fit(features, target)
    # Check if the weights have been updated
    >>> np.round(model.theta, decimals=2)
    array([0.  , 0.79])
    """
 def test_predict():
    """
    Tests the predict function of RidgeRegression
    --------
    >>> model = RidgeRegression(alpha=0.01,
    ...                          regularization_param=0.1,
    ...                          num_iterations=1000)
    >>> features = np.array([[1], [2], [3]])
    >>> target = np.array([2, 3, 4])
    # Adding a bias term
    >>> features = np.c_[np.ones(features.shape[0]), features]
    # Fit the model
    >>> model.fit(features, target)
    # Predict with the model
    >>> predictions = model.predict(features)
    >>> np.round(predictions, decimals=2)
    array([-0.97,  0.  ,  0.97])
    """
 def test_mean_absolute_error():
    """
    Tests the mean_absolute_error function of RidgeRegression
    --------
    >>> model = RidgeRegression()
    >>> target = np.array([2, 3, 4])
    >>> predictions = np.array([2.1, 3.0, 3.9])
    >>> mae = model.mean_absolute_error(target, predictions)
    >>> float(np.round(mae, 2))
    0.07
    """
 if __name__ == "__main__":
    import doctest
    doctest.testmod()
--- a/scripts/validate_filenames.py
+++ b/scripts/validate_filenames.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!python
 import os
 try:
Author	SHA1	Message	Date
Ankana Pari	632f4845aa	Merge `d7a0b9d245` into `e3bd7721c8`	2024-11-19 00:03:39 +05:30
Christian Clauss	e3bd7721c8	`validate_filenames.py` Shebang `python` for Windows (#12371 )	2024-11-15 14:59:14 +01:00
pre-commit-ci[bot]	e3f3d668be	[pre-commit.ci] pre-commit autoupdate (#12370 ) * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/astral-sh/ruff-pre-commit: v0.7.2 → v0.7.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.7.2...v0.7.3) - [github.com/abravalheri/validate-pyproject: v0.22 → v0.23](https://github.com/abravalheri/validate-pyproject/compare/v0.22...v0.23) * Update sudoku_solver.py --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com>	2024-11-11 21:05:50 +01:00
pre-commit-ci[bot]	3e9ca92ca9	[pre-commit.ci] pre-commit autoupdate (#12349 ) updates: - [github.com/astral-sh/ruff-pre-commit: v0.7.1 → v0.7.2](https://github.com/astral-sh/ruff-pre-commit/compare/v0.7.1...v0.7.2) - [github.com/tox-dev/pyproject-fmt: v2.4.3 → v2.5.0](https://github.com/tox-dev/pyproject-fmt/compare/v2.4.3...v2.5.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>	2024-11-04 21:09:03 +01:00
Ankana Pari	d7a0b9d245	Merge pull request #4 from ankana2113/master Merge pull request #2 from ankana2113/kadane_algo	2024-10-24 22:55:44 +05:30
Ankana Pari	79648e21ff	Merge pull request #3 from ankana2113/kadane_algo Kadane algo	2024-10-24 22:54:39 +05:30
Ankana Pari	91f03956ec	Merge pull request #2 from ankana2113/kadane_algo Kadane algo	2024-10-24 22:53:37 +05:30
jbsch	bfb8167811	added kadane's algo	2024-10-24 22:39:56 +05:30
jbsch	b00284fa1f	Merge branch 'largest_rect'	2024-10-24 22:28:11 +05:30
jbsch	d0291190f9	Merge branch 'master' of https://github.com/ankana2113/Python	2024-10-24 22:27:54 +05:30
jbsch	50d5bb1af3	added largest rectangle histogram function	2024-10-24 22:13:23 +05:30
jbsch	5c2d1fe725	added largest rectangle histogram function	2024-10-24 22:06:42 +05:30
Ankana Pari	0c04372ebc	Merge pull request #1 from ankana2113/main fixes ruff check in loss_functions.py	2024-10-24 16:44:54 +05:30
jbsch	1459adf8e4	fixed pre-commit issues	2024-10-24 16:41:25 +05:30
jbsch	1ff79750a8	Fix ruff check in loss_functions.py	2024-10-24 16:31:38 +05:30
jbsch	0ea341a18b	descriptive names	2024-10-24 15:28:12 +05:30
jbsch	dcf47d4821	minor changes	2024-10-24 14:58:00 +05:30
jbsch	97eb853842	minor checks	2024-10-24 12:37:54 +05:30
jbsch	254b9bf87e	minor checks	2024-10-24 12:18:13 +05:30
jbsch	f614b2efee	minor chenges	2024-10-24 12:06:04 +05:30
jbsch	1918aac31e	Merge branch 'main' of https://github.com/ankana2113/Python	2024-10-24 12:04:09 +05:30
jbsch	83d7252b3a	ruff and minor checks	2024-10-24 12:03:41 +05:30
pre-commit-ci[bot]	59d3ceba27	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-24 05:24:49 +00:00
jbsch	d8c0b7c7b3	Merge branch 'main' of https://github.com/ankana2113/Python	2024-10-24 10:54:02 +05:30
jbsch	b0255a87fe	added doctests	2024-10-24 10:51:59 +05:30
pre-commit-ci[bot]	d5963b2da7	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-23 15:34:23 +00:00
jbsch	544a38b016	resolved conflicts	2024-10-23 21:01:03 +05:30
pre-commit-ci[bot]	c76784e708	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-23 15:25:54 +00:00
jbsch	38764378d4	resolved conflicts	2024-10-23 20:55:20 +05:30
jbsch	1713cbe7c2	resolved errors	2024-10-23 20:51:58 +05:30
pre-commit-ci[bot]	2eeb450e2d	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-23 15:15:27 +00:00
jbsch	b1353dddd4	ridge regression	2024-10-23 20:44:04 +05:30
jbsch	7484cda516	ridge regression	2024-10-23 20:40:28 +05:30
pre-commit-ci[bot]	21fe32fcbe	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-23 14:21:41 +00:00
jbsch	6fc134d96c	added ridge regression	2024-10-23 19:49:44 +05:30
jbsch	a84d209c08	added ridge regression	2024-10-23 19:47:34 +05:30
pre-commit-ci[bot]	d4fc2bf852	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-23 14:10:48 +00:00
jbsch	b72320b402	added ridge regression	2024-10-23 19:37:10 +05:30
jbsch	1cb79bc72a	added ridge regression	2024-10-23 19:27:43 +05:30