Compare commits

...

39 Commits

Author SHA1 Message Date
Ankana Pari
632f4845aa
Merge d7a0b9d245 into e3bd7721c8 2024-11-19 00:03:39 +05:30
Christian Clauss
e3bd7721c8
validate_filenames.py Shebang python for Windows (#12371) 2024-11-15 14:59:14 +01:00
pre-commit-ci[bot]
e3f3d668be
[pre-commit.ci] pre-commit autoupdate (#12370)
* [pre-commit.ci] pre-commit autoupdate

updates:
- [github.com/astral-sh/ruff-pre-commit: v0.7.2 → v0.7.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.7.2...v0.7.3)
- [github.com/abravalheri/validate-pyproject: v0.22 → v0.23](https://github.com/abravalheri/validate-pyproject/compare/v0.22...v0.23)

* Update sudoku_solver.py

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Christian Clauss <cclauss@me.com>
2024-11-11 21:05:50 +01:00
pre-commit-ci[bot]
3e9ca92ca9
[pre-commit.ci] pre-commit autoupdate (#12349)
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.7.1 → v0.7.2](https://github.com/astral-sh/ruff-pre-commit/compare/v0.7.1...v0.7.2)
- [github.com/tox-dev/pyproject-fmt: v2.4.3 → v2.5.0](https://github.com/tox-dev/pyproject-fmt/compare/v2.4.3...v2.5.0)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-11-04 21:09:03 +01:00
Ankana Pari
d7a0b9d245
Merge pull request #4 from ankana2113/master
Merge pull request #2 from ankana2113/kadane_algo
2024-10-24 22:55:44 +05:30
Ankana Pari
79648e21ff
Merge pull request #3 from ankana2113/kadane_algo
Kadane algo
2024-10-24 22:54:39 +05:30
Ankana Pari
91f03956ec
Merge pull request #2 from ankana2113/kadane_algo
Kadane algo
2024-10-24 22:53:37 +05:30
jbsch
bfb8167811 added kadane's algo 2024-10-24 22:39:56 +05:30
jbsch
b00284fa1f Merge branch 'largest_rect' 2024-10-24 22:28:11 +05:30
jbsch
d0291190f9 Merge branch 'master' of https://github.com/ankana2113/Python 2024-10-24 22:27:54 +05:30
jbsch
50d5bb1af3 added largest rectangle histogram function 2024-10-24 22:13:23 +05:30
jbsch
5c2d1fe725 added largest rectangle histogram function 2024-10-24 22:06:42 +05:30
Ankana Pari
0c04372ebc
Merge pull request #1 from ankana2113/main
fixes ruff check in loss_functions.py
2024-10-24 16:44:54 +05:30
jbsch
1459adf8e4 fixed pre-commit issues 2024-10-24 16:41:25 +05:30
jbsch
1ff79750a8 Fix ruff check in loss_functions.py 2024-10-24 16:31:38 +05:30
jbsch
0ea341a18b descriptive names 2024-10-24 15:28:12 +05:30
jbsch
dcf47d4821 minor changes 2024-10-24 14:58:00 +05:30
jbsch
97eb853842 minor checks 2024-10-24 12:37:54 +05:30
jbsch
254b9bf87e minor checks 2024-10-24 12:18:13 +05:30
jbsch
f614b2efee minor chenges 2024-10-24 12:06:04 +05:30
jbsch
1918aac31e Merge branch 'main' of https://github.com/ankana2113/Python 2024-10-24 12:04:09 +05:30
jbsch
83d7252b3a ruff and minor checks 2024-10-24 12:03:41 +05:30
pre-commit-ci[bot]
59d3ceba27 [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2024-10-24 05:24:49 +00:00
jbsch
d8c0b7c7b3 Merge branch 'main' of https://github.com/ankana2113/Python 2024-10-24 10:54:02 +05:30
jbsch
b0255a87fe added doctests 2024-10-24 10:51:59 +05:30
pre-commit-ci[bot]
d5963b2da7 [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2024-10-23 15:34:23 +00:00
jbsch
544a38b016 resolved conflicts 2024-10-23 21:01:03 +05:30
pre-commit-ci[bot]
c76784e708 [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2024-10-23 15:25:54 +00:00
jbsch
38764378d4 resolved conflicts 2024-10-23 20:55:20 +05:30
jbsch
1713cbe7c2 resolved errors 2024-10-23 20:51:58 +05:30
pre-commit-ci[bot]
2eeb450e2d [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2024-10-23 15:15:27 +00:00
jbsch
b1353dddd4 ridge regression 2024-10-23 20:44:04 +05:30
jbsch
7484cda516 ridge regression 2024-10-23 20:40:28 +05:30
pre-commit-ci[bot]
21fe32fcbe [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2024-10-23 14:21:41 +00:00
jbsch
6fc134d96c added ridge regression 2024-10-23 19:49:44 +05:30
jbsch
a84d209c08 added ridge regression 2024-10-23 19:47:34 +05:30
pre-commit-ci[bot]
d4fc2bf852 [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2024-10-23 14:10:48 +00:00
jbsch
b72320b402 added ridge regression 2024-10-23 19:37:10 +05:30
jbsch
1cb79bc72a added ridge regression 2024-10-23 19:27:43 +05:30
11 changed files with 1283 additions and 10 deletions

View File

@ -16,7 +16,7 @@ repos:
- id: auto-walrus - id: auto-walrus
- repo: https://github.com/astral-sh/ruff-pre-commit - repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.1 rev: v0.7.3
hooks: hooks:
- id: ruff - id: ruff
- id: ruff-format - id: ruff-format
@ -29,7 +29,7 @@ repos:
- tomli - tomli
- repo: https://github.com/tox-dev/pyproject-fmt - repo: https://github.com/tox-dev/pyproject-fmt
rev: "v2.4.3" rev: "v2.5.0"
hooks: hooks:
- id: pyproject-fmt - id: pyproject-fmt
@ -42,7 +42,7 @@ repos:
pass_filenames: false pass_filenames: false
- repo: https://github.com/abravalheri/validate-pyproject - repo: https://github.com/abravalheri/validate-pyproject
rev: v0.22 rev: v0.23
hooks: hooks:
- id: validate-pyproject - id: validate-pyproject

View File

@ -0,0 +1,42 @@
# Kadane's algorithm
def kadanes_algorithm(arr: list[int]) -> int:
"""
Function to find the maximum sum of a contiguous subarray using Kadane's algorithm
>>> kadanes_algorithm([-2, 1, -3, 4, -1, 2, 1, -5, 4])
6
>>> kadanes_algorithm([-1, -2, -3, -4])
-1
>>> kadanes_algorithm([5, 4, -1, 7, 8])
23
>>> kadanes_algorithm([1])
1
>>> kadanes_algorithm([-1, 2, 3, -5, 4])
5
"""
# initializing variables
max_current = arr[0] # store the current max sum
max_global = arr[0] # store the global max sum
# looping through the array starting at the second element
for i in range(1, len(arr)):
# update current max sum by choosing the maximum between
# current element alone or current element plus previous max
max_current = max(arr[i], max_current + arr[i])
# update global max sum if current max is larger
max_global = max(max_current, max_global)
return max_global
if __name__ == "__main__":
import doctest
doctest.testmod()

View File

@ -172,7 +172,7 @@ def solved(values):
def from_file(filename, sep="\n"): def from_file(filename, sep="\n"):
"Parse a file into a list of strings, separated by sep." "Parse a file into a list of strings, separated by sep."
return open(filename).read().strip().split(sep) # noqa: SIM115 return open(filename).read().strip().split(sep)
def random_puzzle(assignments=17): def random_puzzle(assignments=17):

View File

@ -0,0 +1,39 @@
def largest_rectangle_area(heights: list[int]) -> int:
"""
Inputs an array of integers representing the heights of bars,
and returns the area of the largest rectangle that can be formed
>>> largest_rectangle_area([2, 1, 5, 6, 2, 3])
10
>>> largest_rectangle_area([2, 4])
4
>>> largest_rectangle_area([6, 2, 5, 4, 5, 1, 6])
12
>>> largest_rectangle_area([1])
1
"""
stack: list[int] = []
max_area = 0
heights = [*heights, 0] # make a new list by appending the sentinel 0
n = len(heights)
for i in range(n):
# make sure the stack remains in increasing order
while stack and heights[i] < heights[stack[-1]]:
h = heights[stack.pop()] # height of the bar
# if stack is empty, it means entire width can be taken from index 0 to i-1
w = i if not stack else i - stack[-1] - 1 # calculate width
max_area = max(max_area, h * w)
stack.append(i)
return max_area
if __name__ == "__main__":
import doctest
doctest.testmod()

View File

@ -240,7 +240,7 @@ def ascend_tree(leaf_node: TreeNode, prefix_path: list[str]) -> None:
ascend_tree(leaf_node.parent, prefix_path) ascend_tree(leaf_node.parent, prefix_path)
def find_prefix_path(base_pat: frozenset, tree_node: TreeNode | None) -> dict: # noqa: ARG001 def find_prefix_path(_: frozenset, tree_node: TreeNode | None) -> dict:
""" """
Find the conditional pattern base for a given base pattern. Find the conditional pattern base for a given base pattern.

View File

@ -629,13 +629,15 @@ def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) ->
return np.mean(loss) return np.mean(loss)
def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float: def kullback_leibler_divergence(
y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-10
) -> float:
""" """
Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels
and predicted probabilities. and predicted probabilities.
KL divergence loss quantifies dissimilarity between true labels and predicted KL divergence loss quantifies the dissimilarity between true labels and predicted
probabilities. It's often used in training generative models. probabilities. It is often used in training generative models.
KL = Σ(y_true * ln(y_true / y_pred)) KL = Σ(y_true * ln(y_true / y_pred))
@ -649,6 +651,7 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float
>>> predicted_probs = np.array([0.3, 0.3, 0.4]) >>> predicted_probs = np.array([0.3, 0.3, 0.4])
>>> float(kullback_leibler_divergence(true_labels, predicted_probs)) >>> float(kullback_leibler_divergence(true_labels, predicted_probs))
0.030478754035472025 0.030478754035472025
>>> true_labels = np.array([0.2, 0.3, 0.5]) >>> true_labels = np.array([0.2, 0.3, 0.5])
>>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5]) >>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5])
>>> kullback_leibler_divergence(true_labels, predicted_probs) >>> kullback_leibler_divergence(true_labels, predicted_probs)
@ -659,7 +662,13 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float
if len(y_true) != len(y_pred): if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.") raise ValueError("Input arrays must have the same length.")
kl_loss = y_true * np.log(y_true / y_pred) # negligible epsilon to avoid issues with log(0) or division by zero
epsilon = 1e-10
y_pred = np.clip(y_pred, epsilon, None)
# calculate KL divergence only where y_true is not zero
kl_loss = np.where(y_true != 0, y_true * np.log(y_true / y_pred), 0.0)
return np.sum(kl_loss) return np.sum(kl_loss)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,82 @@
import numpy as np
import pandas as pd
class RidgeRegression:
def __init__(
self,
alpha: float = 0.001,
regularization_param: float = 0.1,
num_iterations: int = 1000,
) -> None:
self.alpha: float = alpha
self.regularization_param: float = regularization_param
self.num_iterations: int = num_iterations
self.theta: np.ndarray = None
def feature_scaling(
self, features: np.ndarray
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
mean = np.mean(features, axis=0)
std = np.std(features, axis=0)
# avoid division by zero for constant features (std = 0)
std[std == 0] = 1 # set std=1 for constant features to avoid NaN
features_scaled = (features - mean) / std
return features_scaled, mean, std
def fit(self, features: np.ndarray, target: np.ndarray) -> None:
features_scaled, mean, std = self.feature_scaling(features)
m, n = features_scaled.shape
self.theta = np.zeros(n) # initializing weights to zeros
for _ in range(self.num_iterations):
predictions = features_scaled.dot(self.theta)
error = predictions - target
# computing gradient with L2 regularization
gradient = (
features_scaled.T.dot(error) + self.regularization_param * self.theta
) / m
self.theta -= self.alpha * gradient # updating weights
def predict(self, features: np.ndarray) -> np.ndarray:
features_scaled, _, _ = self.feature_scaling(features)
return features_scaled.dot(self.theta)
def compute_cost(self, features: np.ndarray, target: np.ndarray) -> float:
features_scaled, _, _ = self.feature_scaling(features)
m = len(target)
predictions = features_scaled.dot(self.theta)
cost = (1 / (2 * m)) * np.sum((predictions - target) ** 2) + (
self.regularization_param / (2 * m)
) * np.sum(self.theta**2)
return cost
def mean_absolute_error(self, target: np.ndarray, predictions: np.ndarray) -> float:
return np.mean(np.abs(target - predictions))
# Example usage
if __name__ == "__main__":
data = pd.read_csv("ADRvsRating.csv")
features_matrix = data[["Rating"]].to_numpy()
target = data["ADR"].to_numpy()
target = (target - np.mean(target)) / np.std(target)
# added bias term to the feature matrix
x = np.c_[np.ones(features_matrix.shape[0]), features_matrix]
# initialize and train the ridge regression model
model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000)
model.fit(features_matrix, target)
# predictions
predictions = model.predict(features_matrix)
# results
print("Optimized Weights:", model.theta)
print("Cost:", model.compute_cost(features_matrix, target))
print("Mean Absolute Error:", model.mean_absolute_error(target, predictions))

View File

@ -0,0 +1,100 @@
"""
Doctest for RidgeRegression class
Tests include:
- feature_scaling
- fit
- predict
- mean_absolute_error
To run these tests, use the following command:
python -m doctest test_ridge_regression.py -v
"""
import numpy as np # noqa: F401
from machine_learning.ridge_regression.ridge_regression import (
RidgeRegression, # noqa: F401
)
def test_feature_scaling():
"""
Tests the feature_scaling function of RidgeRegression.
--------
>>> model = RidgeRegression()
>>> features = np.array([[1, 2], [2, 3], [3, 4]])
>>> features_scaled, mean, std = model.feature_scaling(features)
>>> np.round(features_scaled, 2)
array([[-1.22, -1.22],
[ 0. , 0. ],
[ 1.22, 1.22]])
>>> np.round(mean, 2)
array([2., 3.])
>>> np.round(std, 2)
array([0.82, 0.82])
"""
def test_fit():
"""
Tests the fit function of RidgeRegression
--------
>>> model = RidgeRegression(alpha=0.01,
... regularization_param=0.1,
... num_iterations=1000)
>>> features = np.array([[1], [2], [3]])
>>> target = np.array([2, 3, 4])
# Adding a bias term
>>> features = np.c_[np.ones(features.shape[0]), features]
# Fit the model
>>> model.fit(features, target)
# Check if the weights have been updated
>>> np.round(model.theta, decimals=2)
array([0. , 0.79])
"""
def test_predict():
"""
Tests the predict function of RidgeRegression
--------
>>> model = RidgeRegression(alpha=0.01,
... regularization_param=0.1,
... num_iterations=1000)
>>> features = np.array([[1], [2], [3]])
>>> target = np.array([2, 3, 4])
# Adding a bias term
>>> features = np.c_[np.ones(features.shape[0]), features]
# Fit the model
>>> model.fit(features, target)
# Predict with the model
>>> predictions = model.predict(features)
>>> np.round(predictions, decimals=2)
array([-0.97, 0. , 0.97])
"""
def test_mean_absolute_error():
"""
Tests the mean_absolute_error function of RidgeRegression
--------
>>> model = RidgeRegression()
>>> target = np.array([2, 3, 4])
>>> predictions = np.array([2.1, 3.0, 3.9])
>>> mae = model.mean_absolute_error(target, predictions)
>>> float(np.round(mae, 2))
0.07
"""
if __name__ == "__main__":
import doctest
doctest.testmod()

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python3 #!python
import os import os
try: try: