Merge d7a0b9d245 into e3bd7721c8

validate_filenames.py Shebang python for Windows (#12371 )
[pre-commit.ci] pre-commit autoupdate (#12370 )
2024-11-23 21:11:08 +00:00 · 2024-11-19 00:03:39 +05:30 · 2024-11-15 14:59:14 +01:00 · 2024-11-11 21:05:50 +01:00 · 2024-11-04 21:09:03 +01:00 · 2024-10-24 22:55:44 +05:30
11 changed files with 1283 additions and 10 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -16,7 +16,7 @@ repos:
      - id: auto-walrus

  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.7.1
+    rev: v0.7.3
    hooks:
      - id: ruff
      - id: ruff-format
@ -29,7 +29,7 @@ repos:
          - tomli

  - repo: https://github.com/tox-dev/pyproject-fmt
-    rev: "v2.4.3"
+    rev: "v2.5.0"
    hooks:
      - id: pyproject-fmt

@ -42,7 +42,7 @@ repos:
        pass_filenames: false

  - repo: https://github.com/abravalheri/validate-pyproject
-    rev: v0.22
+    rev: v0.23
    hooks:
      - id: validate-pyproject

--- a/data_structures/arrays/kadanes_algorithm.py
+++ b/data_structures/arrays/kadanes_algorithm.py
@ -0,0 +1,42 @@
+# Kadane's algorithm
+
+
+def kadanes_algorithm(arr: list[int]) -> int:
+    """
+    Function to find the maximum sum of a contiguous subarray using Kadane's algorithm
+
+    >>> kadanes_algorithm([-2, 1, -3, 4, -1, 2, 1, -5, 4])
+    6
+
+    >>> kadanes_algorithm([-1, -2, -3, -4])
+    -1
+
+    >>> kadanes_algorithm([5, 4, -1, 7, 8])
+    23
+
+    >>> kadanes_algorithm([1])
+    1
+
+    >>> kadanes_algorithm([-1, 2, 3, -5, 4])
+    5
+    """
+    # initializing variables
+    max_current = arr[0]  # store the current max sum
+    max_global = arr[0]  # store the global max sum
+
+    # looping through the array starting at the second element
+    for i in range(1, len(arr)):
+        # update current max sum by choosing the maximum between
+        # current element alone or current element plus previous max
+        max_current = max(arr[i], max_current + arr[i])
+
+        # update global max sum if current max is larger
+        max_global = max(max_current, max_global)
+
+    return max_global
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
--- a/data_structures/arrays/sudoku_solver.py
+++ b/data_structures/arrays/sudoku_solver.py
@ -172,7 +172,7 @@ def solved(values):

 def from_file(filename, sep="\n"):
    "Parse a file into a list of strings, separated by sep."
-    return open(filename).read().strip().split(sep)  # noqa: SIM115
+    return open(filename).read().strip().split(sep)


 def random_puzzle(assignments=17):
--- a/data_structures/stacks/largest_rectangle_histogram.py
+++ b/data_structures/stacks/largest_rectangle_histogram.py
@ -0,0 +1,39 @@
+def largest_rectangle_area(heights: list[int]) -> int:
+    """
+    Inputs an array of integers representing the heights of bars,
+    and returns the area of the largest rectangle that can be formed
+
+    >>> largest_rectangle_area([2, 1, 5, 6, 2, 3])
+    10
+
+    >>> largest_rectangle_area([2, 4])
+    4
+
+    >>> largest_rectangle_area([6, 2, 5, 4, 5, 1, 6])
+    12
+
+    >>> largest_rectangle_area([1])
+    1
+    """
+    stack: list[int] = []
+    max_area = 0
+    heights = [*heights, 0]  # make a new list by appending the sentinel 0
+    n = len(heights)
+
+    for i in range(n):
+        # make sure the stack remains in increasing order
+        while stack and heights[i] < heights[stack[-1]]:
+            h = heights[stack.pop()]  # height of the bar
+            # if stack is empty, it means entire width can be taken from index 0 to i-1
+            w = i if not stack else i - stack[-1] - 1  # calculate width
+            max_area = max(max_area, h * w)
+
+        stack.append(i)
+
+    return max_area
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
--- a/machine_learning/frequent_pattern_growth.py
+++ b/machine_learning/frequent_pattern_growth.py
@ -240,7 +240,7 @@ def ascend_tree(leaf_node: TreeNode, prefix_path: list[str]) -> None:
        ascend_tree(leaf_node.parent, prefix_path)


-def find_prefix_path(base_pat: frozenset, tree_node: TreeNode | None) -> dict:  # noqa: ARG001
+def find_prefix_path(_: frozenset, tree_node: TreeNode | None) -> dict:
    """
    Find the conditional pattern base for a given base pattern.

--- a/machine_learning/loss_functions.py
+++ b/machine_learning/loss_functions.py
@ -629,13 +629,15 @@ def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) ->
    return np.mean(loss)


-def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float:
+def kullback_leibler_divergence(
+    y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-10
+) -> float:
    """
    Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels
    and predicted probabilities.

-    KL divergence loss quantifies dissimilarity between true labels and predicted
-    probabilities. It's often used in training generative models.
+    KL divergence loss quantifies the dissimilarity between true labels and predicted
+    probabilities. It is often used in training generative models.

    KL = Σ(y_true * ln(y_true / y_pred))

@ -649,6 +651,7 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float
    >>> predicted_probs = np.array([0.3, 0.3, 0.4])
    >>> float(kullback_leibler_divergence(true_labels, predicted_probs))
    0.030478754035472025
+
    >>> true_labels = np.array([0.2, 0.3, 0.5])
    >>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5])
    >>> kullback_leibler_divergence(true_labels, predicted_probs)
@ -659,7 +662,13 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float
    if len(y_true) != len(y_pred):
        raise ValueError("Input arrays must have the same length.")

-    kl_loss = y_true * np.log(y_true / y_pred)
+    # negligible epsilon to avoid issues with log(0) or division by zero
+    epsilon = 1e-10
+    y_pred = np.clip(y_pred, epsilon, None)
+
+    # calculate KL divergence only where y_true is not zero
+    kl_loss = np.where(y_true != 0, y_true * np.log(y_true / y_pred), 0.0)
+
    return np.sum(kl_loss)


--- a/machine_learning/ridge_regression/ADRvsRating.csv
+++ b/machine_learning/ridge_regression/ADRvsRating.csv
--- a/machine_learning/ridge_regression/init.py
+++ b/machine_learning/ridge_regression/init.py
--- a/machine_learning/ridge_regression/ridge_regression.py
+++ b/machine_learning/ridge_regression/ridge_regression.py
@ -0,0 +1,82 @@
+import numpy as np
+import pandas as pd
+
+
+class RidgeRegression:
+    def __init__(
+        self,
+        alpha: float = 0.001,
+        regularization_param: float = 0.1,
+        num_iterations: int = 1000,
+    ) -> None:
+        self.alpha: float = alpha
+        self.regularization_param: float = regularization_param
+        self.num_iterations: int = num_iterations
+        self.theta: np.ndarray = None
+
+    def feature_scaling(
+        self, features: np.ndarray
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+        mean = np.mean(features, axis=0)
+        std = np.std(features, axis=0)
+
+        # avoid division by zero for constant features (std = 0)
+        std[std == 0] = 1  # set std=1 for constant features to avoid NaN
+
+        features_scaled = (features - mean) / std
+        return features_scaled, mean, std
+
+    def fit(self, features: np.ndarray, target: np.ndarray) -> None:
+        features_scaled, mean, std = self.feature_scaling(features)
+        m, n = features_scaled.shape
+        self.theta = np.zeros(n)  # initializing weights to zeros
+
+        for _ in range(self.num_iterations):
+            predictions = features_scaled.dot(self.theta)
+            error = predictions - target
+
+            # computing gradient with L2 regularization
+            gradient = (
+                features_scaled.T.dot(error) + self.regularization_param * self.theta
+            ) / m
+            self.theta -= self.alpha * gradient  # updating weights
+
+    def predict(self, features: np.ndarray) -> np.ndarray:
+        features_scaled, _, _ = self.feature_scaling(features)
+        return features_scaled.dot(self.theta)
+
+    def compute_cost(self, features: np.ndarray, target: np.ndarray) -> float:
+        features_scaled, _, _ = self.feature_scaling(features)
+        m = len(target)
+
+        predictions = features_scaled.dot(self.theta)
+        cost = (1 / (2 * m)) * np.sum((predictions - target) ** 2) + (
+            self.regularization_param / (2 * m)
+        ) * np.sum(self.theta**2)
+        return cost
+
+    def mean_absolute_error(self, target: np.ndarray, predictions: np.ndarray) -> float:
+        return np.mean(np.abs(target - predictions))
+
+
+# Example usage
+if __name__ == "__main__":
+    data = pd.read_csv("ADRvsRating.csv")
+    features_matrix = data[["Rating"]].to_numpy()
+    target = data["ADR"].to_numpy()
+    target = (target - np.mean(target)) / np.std(target)
+
+    # added bias term to the feature matrix
+    x = np.c_[np.ones(features_matrix.shape[0]), features_matrix]
+
+    # initialize and train the ridge regression model
+    model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000)
+    model.fit(features_matrix, target)
+
+    # predictions
+    predictions = model.predict(features_matrix)
+
+    # results
+    print("Optimized Weights:", model.theta)
+    print("Cost:", model.compute_cost(features_matrix, target))
+    print("Mean Absolute Error:", model.mean_absolute_error(target, predictions))
--- a/machine_learning/ridge_regression/test_ridge_regression.py
+++ b/machine_learning/ridge_regression/test_ridge_regression.py
@ -0,0 +1,100 @@
+"""
+Doctest for RidgeRegression class
+
+Tests include:
+- feature_scaling
+- fit
+- predict
+- mean_absolute_error
+
+To run these tests, use the following command:
+    python -m doctest test_ridge_regression.py -v
+"""
+
+import numpy as np  # noqa: F401
+
+from machine_learning.ridge_regression.ridge_regression import (
+    RidgeRegression,  # noqa: F401
+)
+
+
+def test_feature_scaling():
+    """
+       Tests the feature_scaling function of RidgeRegression.
+    --------
+       >>> model = RidgeRegression()
+       >>> features = np.array([[1, 2], [2, 3], [3, 4]])
+       >>> features_scaled, mean, std = model.feature_scaling(features)
+       >>> np.round(features_scaled, 2)
+       array([[-1.22, -1.22],
+              [ 0.  ,  0.  ],
+              [ 1.22,  1.22]])
+       >>> np.round(mean, 2)
+       array([2., 3.])
+       >>> np.round(std, 2)
+       array([0.82, 0.82])
+    """
+
+
+def test_fit():
+    """
+    Tests the fit function of RidgeRegression
+    --------
+    >>> model = RidgeRegression(alpha=0.01,
+    ...                          regularization_param=0.1,
+    ...                          num_iterations=1000)
+    >>> features = np.array([[1], [2], [3]])
+    >>> target = np.array([2, 3, 4])
+
+    # Adding a bias term
+    >>> features = np.c_[np.ones(features.shape[0]), features]
+
+    # Fit the model
+    >>> model.fit(features, target)
+
+    # Check if the weights have been updated
+    >>> np.round(model.theta, decimals=2)
+    array([0.  , 0.79])
+    """
+
+
+def test_predict():
+    """
+    Tests the predict function of RidgeRegression
+    --------
+    >>> model = RidgeRegression(alpha=0.01,
+    ...                          regularization_param=0.1,
+    ...                          num_iterations=1000)
+    >>> features = np.array([[1], [2], [3]])
+    >>> target = np.array([2, 3, 4])
+
+    # Adding a bias term
+    >>> features = np.c_[np.ones(features.shape[0]), features]
+
+    # Fit the model
+    >>> model.fit(features, target)
+
+    # Predict with the model
+    >>> predictions = model.predict(features)
+    >>> np.round(predictions, decimals=2)
+    array([-0.97,  0.  ,  0.97])
+    """
+
+
+def test_mean_absolute_error():
+    """
+    Tests the mean_absolute_error function of RidgeRegression
+    --------
+    >>> model = RidgeRegression()
+    >>> target = np.array([2, 3, 4])
+    >>> predictions = np.array([2.1, 3.0, 3.9])
+    >>> mae = model.mean_absolute_error(target, predictions)
+    >>> float(np.round(mae, 2))
+    0.07
+    """
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
--- a/scripts/validate_filenames.py
+++ b/scripts/validate_filenames.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!python
 import os

 try:
Author	SHA1	Message	Date
Ankana Pari	632f4845aa	Merge `d7a0b9d245` into `e3bd7721c8`	2024-11-19 00:03:39 +05:30
Christian Clauss	e3bd7721c8	`validate_filenames.py` Shebang `python` for Windows (#12371 )	2024-11-15 14:59:14 +01:00
pre-commit-ci[bot]	e3f3d668be	[pre-commit.ci] pre-commit autoupdate (#12370 ) * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/astral-sh/ruff-pre-commit: v0.7.2 → v0.7.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.7.2...v0.7.3) - [github.com/abravalheri/validate-pyproject: v0.22 → v0.23](https://github.com/abravalheri/validate-pyproject/compare/v0.22...v0.23) * Update sudoku_solver.py --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Christian Clauss <cclauss@me.com>	2024-11-11 21:05:50 +01:00
pre-commit-ci[bot]	3e9ca92ca9	[pre-commit.ci] pre-commit autoupdate (#12349 ) updates: - [github.com/astral-sh/ruff-pre-commit: v0.7.1 → v0.7.2](https://github.com/astral-sh/ruff-pre-commit/compare/v0.7.1...v0.7.2) - [github.com/tox-dev/pyproject-fmt: v2.4.3 → v2.5.0](https://github.com/tox-dev/pyproject-fmt/compare/v2.4.3...v2.5.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>	2024-11-04 21:09:03 +01:00
Ankana Pari	d7a0b9d245	Merge pull request #4 from ankana2113/master Merge pull request #2 from ankana2113/kadane_algo	2024-10-24 22:55:44 +05:30
Ankana Pari	79648e21ff	Merge pull request #3 from ankana2113/kadane_algo Kadane algo	2024-10-24 22:54:39 +05:30
Ankana Pari	91f03956ec	Merge pull request #2 from ankana2113/kadane_algo Kadane algo	2024-10-24 22:53:37 +05:30
jbsch	bfb8167811	added kadane's algo	2024-10-24 22:39:56 +05:30
jbsch	b00284fa1f	Merge branch 'largest_rect'	2024-10-24 22:28:11 +05:30
jbsch	d0291190f9	Merge branch 'master' of https://github.com/ankana2113/Python	2024-10-24 22:27:54 +05:30
jbsch	50d5bb1af3	added largest rectangle histogram function	2024-10-24 22:13:23 +05:30
jbsch	5c2d1fe725	added largest rectangle histogram function	2024-10-24 22:06:42 +05:30
Ankana Pari	0c04372ebc	Merge pull request #1 from ankana2113/main fixes ruff check in loss_functions.py	2024-10-24 16:44:54 +05:30
jbsch	1459adf8e4	fixed pre-commit issues	2024-10-24 16:41:25 +05:30
jbsch	1ff79750a8	Fix ruff check in loss_functions.py	2024-10-24 16:31:38 +05:30
jbsch	0ea341a18b	descriptive names	2024-10-24 15:28:12 +05:30
jbsch	dcf47d4821	minor changes	2024-10-24 14:58:00 +05:30
jbsch	97eb853842	minor checks	2024-10-24 12:37:54 +05:30
jbsch	254b9bf87e	minor checks	2024-10-24 12:18:13 +05:30
jbsch	f614b2efee	minor chenges	2024-10-24 12:06:04 +05:30
jbsch	1918aac31e	Merge branch 'main' of https://github.com/ankana2113/Python	2024-10-24 12:04:09 +05:30
jbsch	83d7252b3a	ruff and minor checks	2024-10-24 12:03:41 +05:30
pre-commit-ci[bot]	59d3ceba27	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-24 05:24:49 +00:00
jbsch	d8c0b7c7b3	Merge branch 'main' of https://github.com/ankana2113/Python	2024-10-24 10:54:02 +05:30
jbsch	b0255a87fe	added doctests	2024-10-24 10:51:59 +05:30
pre-commit-ci[bot]	d5963b2da7	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-23 15:34:23 +00:00
jbsch	544a38b016	resolved conflicts	2024-10-23 21:01:03 +05:30
pre-commit-ci[bot]	c76784e708	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-23 15:25:54 +00:00
jbsch	38764378d4	resolved conflicts	2024-10-23 20:55:20 +05:30
jbsch	1713cbe7c2	resolved errors	2024-10-23 20:51:58 +05:30
pre-commit-ci[bot]	2eeb450e2d	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-23 15:15:27 +00:00
jbsch	b1353dddd4	ridge regression	2024-10-23 20:44:04 +05:30
jbsch	7484cda516	ridge regression	2024-10-23 20:40:28 +05:30
pre-commit-ci[bot]	21fe32fcbe	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-23 14:21:41 +00:00
jbsch	6fc134d96c	added ridge regression	2024-10-23 19:49:44 +05:30
jbsch	a84d209c08	added ridge regression	2024-10-23 19:47:34 +05:30
pre-commit-ci[bot]	d4fc2bf852	[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci	2024-10-23 14:10:48 +00:00
jbsch	b72320b402	added ridge regression	2024-10-23 19:37:10 +05:30
jbsch	1cb79bc72a	added ridge regression	2024-10-23 19:27:43 +05:30