mirror of
https://github.com/TheAlgorithms/Python.git
synced 2024-11-27 15:01:08 +00:00
Compare commits
39 Commits
c1fe8c761f
...
632f4845aa
Author | SHA1 | Date | |
---|---|---|---|
|
632f4845aa | ||
|
e3bd7721c8 | ||
|
e3f3d668be | ||
|
3e9ca92ca9 | ||
|
d7a0b9d245 | ||
|
79648e21ff | ||
|
91f03956ec | ||
|
bfb8167811 | ||
|
b00284fa1f | ||
|
d0291190f9 | ||
|
50d5bb1af3 | ||
|
5c2d1fe725 | ||
|
0c04372ebc | ||
|
1459adf8e4 | ||
|
1ff79750a8 | ||
|
0ea341a18b | ||
|
dcf47d4821 | ||
|
97eb853842 | ||
|
254b9bf87e | ||
|
f614b2efee | ||
|
1918aac31e | ||
|
83d7252b3a | ||
|
59d3ceba27 | ||
|
d8c0b7c7b3 | ||
|
b0255a87fe | ||
|
d5963b2da7 | ||
|
544a38b016 | ||
|
c76784e708 | ||
|
38764378d4 | ||
|
1713cbe7c2 | ||
|
2eeb450e2d | ||
|
b1353dddd4 | ||
|
7484cda516 | ||
|
21fe32fcbe | ||
|
6fc134d96c | ||
|
a84d209c08 | ||
|
d4fc2bf852 | ||
|
b72320b402 | ||
|
1cb79bc72a |
|
@ -16,7 +16,7 @@ repos:
|
||||||
- id: auto-walrus
|
- id: auto-walrus
|
||||||
|
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
rev: v0.7.1
|
rev: v0.7.3
|
||||||
hooks:
|
hooks:
|
||||||
- id: ruff
|
- id: ruff
|
||||||
- id: ruff-format
|
- id: ruff-format
|
||||||
|
@ -29,7 +29,7 @@ repos:
|
||||||
- tomli
|
- tomli
|
||||||
|
|
||||||
- repo: https://github.com/tox-dev/pyproject-fmt
|
- repo: https://github.com/tox-dev/pyproject-fmt
|
||||||
rev: "v2.4.3"
|
rev: "v2.5.0"
|
||||||
hooks:
|
hooks:
|
||||||
- id: pyproject-fmt
|
- id: pyproject-fmt
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@ repos:
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
|
|
||||||
- repo: https://github.com/abravalheri/validate-pyproject
|
- repo: https://github.com/abravalheri/validate-pyproject
|
||||||
rev: v0.22
|
rev: v0.23
|
||||||
hooks:
|
hooks:
|
||||||
- id: validate-pyproject
|
- id: validate-pyproject
|
||||||
|
|
||||||
|
|
42
data_structures/arrays/kadanes_algorithm.py
Normal file
42
data_structures/arrays/kadanes_algorithm.py
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
# Kadane's algorithm
|
||||||
|
|
||||||
|
|
||||||
|
def kadanes_algorithm(arr: list[int]) -> int:
|
||||||
|
"""
|
||||||
|
Function to find the maximum sum of a contiguous subarray using Kadane's algorithm
|
||||||
|
|
||||||
|
>>> kadanes_algorithm([-2, 1, -3, 4, -1, 2, 1, -5, 4])
|
||||||
|
6
|
||||||
|
|
||||||
|
>>> kadanes_algorithm([-1, -2, -3, -4])
|
||||||
|
-1
|
||||||
|
|
||||||
|
>>> kadanes_algorithm([5, 4, -1, 7, 8])
|
||||||
|
23
|
||||||
|
|
||||||
|
>>> kadanes_algorithm([1])
|
||||||
|
1
|
||||||
|
|
||||||
|
>>> kadanes_algorithm([-1, 2, 3, -5, 4])
|
||||||
|
5
|
||||||
|
"""
|
||||||
|
# initializing variables
|
||||||
|
max_current = arr[0] # store the current max sum
|
||||||
|
max_global = arr[0] # store the global max sum
|
||||||
|
|
||||||
|
# looping through the array starting at the second element
|
||||||
|
for i in range(1, len(arr)):
|
||||||
|
# update current max sum by choosing the maximum between
|
||||||
|
# current element alone or current element plus previous max
|
||||||
|
max_current = max(arr[i], max_current + arr[i])
|
||||||
|
|
||||||
|
# update global max sum if current max is larger
|
||||||
|
max_global = max(max_current, max_global)
|
||||||
|
|
||||||
|
return max_global
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import doctest
|
||||||
|
|
||||||
|
doctest.testmod()
|
|
@ -172,7 +172,7 @@ def solved(values):
|
||||||
|
|
||||||
def from_file(filename, sep="\n"):
|
def from_file(filename, sep="\n"):
|
||||||
"Parse a file into a list of strings, separated by sep."
|
"Parse a file into a list of strings, separated by sep."
|
||||||
return open(filename).read().strip().split(sep) # noqa: SIM115
|
return open(filename).read().strip().split(sep)
|
||||||
|
|
||||||
|
|
||||||
def random_puzzle(assignments=17):
|
def random_puzzle(assignments=17):
|
||||||
|
|
39
data_structures/stacks/largest_rectangle_histogram.py
Normal file
39
data_structures/stacks/largest_rectangle_histogram.py
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
def largest_rectangle_area(heights: list[int]) -> int:
|
||||||
|
"""
|
||||||
|
Inputs an array of integers representing the heights of bars,
|
||||||
|
and returns the area of the largest rectangle that can be formed
|
||||||
|
|
||||||
|
>>> largest_rectangle_area([2, 1, 5, 6, 2, 3])
|
||||||
|
10
|
||||||
|
|
||||||
|
>>> largest_rectangle_area([2, 4])
|
||||||
|
4
|
||||||
|
|
||||||
|
>>> largest_rectangle_area([6, 2, 5, 4, 5, 1, 6])
|
||||||
|
12
|
||||||
|
|
||||||
|
>>> largest_rectangle_area([1])
|
||||||
|
1
|
||||||
|
"""
|
||||||
|
stack: list[int] = []
|
||||||
|
max_area = 0
|
||||||
|
heights = [*heights, 0] # make a new list by appending the sentinel 0
|
||||||
|
n = len(heights)
|
||||||
|
|
||||||
|
for i in range(n):
|
||||||
|
# make sure the stack remains in increasing order
|
||||||
|
while stack and heights[i] < heights[stack[-1]]:
|
||||||
|
h = heights[stack.pop()] # height of the bar
|
||||||
|
# if stack is empty, it means entire width can be taken from index 0 to i-1
|
||||||
|
w = i if not stack else i - stack[-1] - 1 # calculate width
|
||||||
|
max_area = max(max_area, h * w)
|
||||||
|
|
||||||
|
stack.append(i)
|
||||||
|
|
||||||
|
return max_area
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import doctest
|
||||||
|
|
||||||
|
doctest.testmod()
|
|
@ -240,7 +240,7 @@ def ascend_tree(leaf_node: TreeNode, prefix_path: list[str]) -> None:
|
||||||
ascend_tree(leaf_node.parent, prefix_path)
|
ascend_tree(leaf_node.parent, prefix_path)
|
||||||
|
|
||||||
|
|
||||||
def find_prefix_path(base_pat: frozenset, tree_node: TreeNode | None) -> dict: # noqa: ARG001
|
def find_prefix_path(_: frozenset, tree_node: TreeNode | None) -> dict:
|
||||||
"""
|
"""
|
||||||
Find the conditional pattern base for a given base pattern.
|
Find the conditional pattern base for a given base pattern.
|
||||||
|
|
||||||
|
|
|
@ -629,13 +629,15 @@ def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) ->
|
||||||
return np.mean(loss)
|
return np.mean(loss)
|
||||||
|
|
||||||
|
|
||||||
def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float:
|
def kullback_leibler_divergence(
|
||||||
|
y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-10
|
||||||
|
) -> float:
|
||||||
"""
|
"""
|
||||||
Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels
|
Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels
|
||||||
and predicted probabilities.
|
and predicted probabilities.
|
||||||
|
|
||||||
KL divergence loss quantifies dissimilarity between true labels and predicted
|
KL divergence loss quantifies the dissimilarity between true labels and predicted
|
||||||
probabilities. It's often used in training generative models.
|
probabilities. It is often used in training generative models.
|
||||||
|
|
||||||
KL = Σ(y_true * ln(y_true / y_pred))
|
KL = Σ(y_true * ln(y_true / y_pred))
|
||||||
|
|
||||||
|
@ -649,6 +651,7 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float
|
||||||
>>> predicted_probs = np.array([0.3, 0.3, 0.4])
|
>>> predicted_probs = np.array([0.3, 0.3, 0.4])
|
||||||
>>> float(kullback_leibler_divergence(true_labels, predicted_probs))
|
>>> float(kullback_leibler_divergence(true_labels, predicted_probs))
|
||||||
0.030478754035472025
|
0.030478754035472025
|
||||||
|
|
||||||
>>> true_labels = np.array([0.2, 0.3, 0.5])
|
>>> true_labels = np.array([0.2, 0.3, 0.5])
|
||||||
>>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5])
|
>>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5])
|
||||||
>>> kullback_leibler_divergence(true_labels, predicted_probs)
|
>>> kullback_leibler_divergence(true_labels, predicted_probs)
|
||||||
|
@ -659,7 +662,13 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float
|
||||||
if len(y_true) != len(y_pred):
|
if len(y_true) != len(y_pred):
|
||||||
raise ValueError("Input arrays must have the same length.")
|
raise ValueError("Input arrays must have the same length.")
|
||||||
|
|
||||||
kl_loss = y_true * np.log(y_true / y_pred)
|
# negligible epsilon to avoid issues with log(0) or division by zero
|
||||||
|
epsilon = 1e-10
|
||||||
|
y_pred = np.clip(y_pred, epsilon, None)
|
||||||
|
|
||||||
|
# calculate KL divergence only where y_true is not zero
|
||||||
|
kl_loss = np.where(y_true != 0, y_true * np.log(y_true / y_pred), 0.0)
|
||||||
|
|
||||||
return np.sum(kl_loss)
|
return np.sum(kl_loss)
|
||||||
|
|
||||||
|
|
||||||
|
|
1001
machine_learning/ridge_regression/ADRvsRating.csv
Normal file
1001
machine_learning/ridge_regression/ADRvsRating.csv
Normal file
File diff suppressed because it is too large
Load Diff
0
machine_learning/ridge_regression/__init__.py
Normal file
0
machine_learning/ridge_regression/__init__.py
Normal file
82
machine_learning/ridge_regression/ridge_regression.py
Normal file
82
machine_learning/ridge_regression/ridge_regression.py
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
class RidgeRegression:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
alpha: float = 0.001,
|
||||||
|
regularization_param: float = 0.1,
|
||||||
|
num_iterations: int = 1000,
|
||||||
|
) -> None:
|
||||||
|
self.alpha: float = alpha
|
||||||
|
self.regularization_param: float = regularization_param
|
||||||
|
self.num_iterations: int = num_iterations
|
||||||
|
self.theta: np.ndarray = None
|
||||||
|
|
||||||
|
def feature_scaling(
|
||||||
|
self, features: np.ndarray
|
||||||
|
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||||
|
mean = np.mean(features, axis=0)
|
||||||
|
std = np.std(features, axis=0)
|
||||||
|
|
||||||
|
# avoid division by zero for constant features (std = 0)
|
||||||
|
std[std == 0] = 1 # set std=1 for constant features to avoid NaN
|
||||||
|
|
||||||
|
features_scaled = (features - mean) / std
|
||||||
|
return features_scaled, mean, std
|
||||||
|
|
||||||
|
def fit(self, features: np.ndarray, target: np.ndarray) -> None:
|
||||||
|
features_scaled, mean, std = self.feature_scaling(features)
|
||||||
|
m, n = features_scaled.shape
|
||||||
|
self.theta = np.zeros(n) # initializing weights to zeros
|
||||||
|
|
||||||
|
for _ in range(self.num_iterations):
|
||||||
|
predictions = features_scaled.dot(self.theta)
|
||||||
|
error = predictions - target
|
||||||
|
|
||||||
|
# computing gradient with L2 regularization
|
||||||
|
gradient = (
|
||||||
|
features_scaled.T.dot(error) + self.regularization_param * self.theta
|
||||||
|
) / m
|
||||||
|
self.theta -= self.alpha * gradient # updating weights
|
||||||
|
|
||||||
|
def predict(self, features: np.ndarray) -> np.ndarray:
|
||||||
|
features_scaled, _, _ = self.feature_scaling(features)
|
||||||
|
return features_scaled.dot(self.theta)
|
||||||
|
|
||||||
|
def compute_cost(self, features: np.ndarray, target: np.ndarray) -> float:
|
||||||
|
features_scaled, _, _ = self.feature_scaling(features)
|
||||||
|
m = len(target)
|
||||||
|
|
||||||
|
predictions = features_scaled.dot(self.theta)
|
||||||
|
cost = (1 / (2 * m)) * np.sum((predictions - target) ** 2) + (
|
||||||
|
self.regularization_param / (2 * m)
|
||||||
|
) * np.sum(self.theta**2)
|
||||||
|
return cost
|
||||||
|
|
||||||
|
def mean_absolute_error(self, target: np.ndarray, predictions: np.ndarray) -> float:
|
||||||
|
return np.mean(np.abs(target - predictions))
|
||||||
|
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
if __name__ == "__main__":
|
||||||
|
data = pd.read_csv("ADRvsRating.csv")
|
||||||
|
features_matrix = data[["Rating"]].to_numpy()
|
||||||
|
target = data["ADR"].to_numpy()
|
||||||
|
target = (target - np.mean(target)) / np.std(target)
|
||||||
|
|
||||||
|
# added bias term to the feature matrix
|
||||||
|
x = np.c_[np.ones(features_matrix.shape[0]), features_matrix]
|
||||||
|
|
||||||
|
# initialize and train the ridge regression model
|
||||||
|
model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000)
|
||||||
|
model.fit(features_matrix, target)
|
||||||
|
|
||||||
|
# predictions
|
||||||
|
predictions = model.predict(features_matrix)
|
||||||
|
|
||||||
|
# results
|
||||||
|
print("Optimized Weights:", model.theta)
|
||||||
|
print("Cost:", model.compute_cost(features_matrix, target))
|
||||||
|
print("Mean Absolute Error:", model.mean_absolute_error(target, predictions))
|
100
machine_learning/ridge_regression/test_ridge_regression.py
Normal file
100
machine_learning/ridge_regression/test_ridge_regression.py
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
"""
|
||||||
|
Doctest for RidgeRegression class
|
||||||
|
|
||||||
|
Tests include:
|
||||||
|
- feature_scaling
|
||||||
|
- fit
|
||||||
|
- predict
|
||||||
|
- mean_absolute_error
|
||||||
|
|
||||||
|
To run these tests, use the following command:
|
||||||
|
python -m doctest test_ridge_regression.py -v
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np # noqa: F401
|
||||||
|
|
||||||
|
from machine_learning.ridge_regression.ridge_regression import (
|
||||||
|
RidgeRegression, # noqa: F401
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_feature_scaling():
|
||||||
|
"""
|
||||||
|
Tests the feature_scaling function of RidgeRegression.
|
||||||
|
--------
|
||||||
|
>>> model = RidgeRegression()
|
||||||
|
>>> features = np.array([[1, 2], [2, 3], [3, 4]])
|
||||||
|
>>> features_scaled, mean, std = model.feature_scaling(features)
|
||||||
|
>>> np.round(features_scaled, 2)
|
||||||
|
array([[-1.22, -1.22],
|
||||||
|
[ 0. , 0. ],
|
||||||
|
[ 1.22, 1.22]])
|
||||||
|
>>> np.round(mean, 2)
|
||||||
|
array([2., 3.])
|
||||||
|
>>> np.round(std, 2)
|
||||||
|
array([0.82, 0.82])
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def test_fit():
|
||||||
|
"""
|
||||||
|
Tests the fit function of RidgeRegression
|
||||||
|
--------
|
||||||
|
>>> model = RidgeRegression(alpha=0.01,
|
||||||
|
... regularization_param=0.1,
|
||||||
|
... num_iterations=1000)
|
||||||
|
>>> features = np.array([[1], [2], [3]])
|
||||||
|
>>> target = np.array([2, 3, 4])
|
||||||
|
|
||||||
|
# Adding a bias term
|
||||||
|
>>> features = np.c_[np.ones(features.shape[0]), features]
|
||||||
|
|
||||||
|
# Fit the model
|
||||||
|
>>> model.fit(features, target)
|
||||||
|
|
||||||
|
# Check if the weights have been updated
|
||||||
|
>>> np.round(model.theta, decimals=2)
|
||||||
|
array([0. , 0.79])
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def test_predict():
|
||||||
|
"""
|
||||||
|
Tests the predict function of RidgeRegression
|
||||||
|
--------
|
||||||
|
>>> model = RidgeRegression(alpha=0.01,
|
||||||
|
... regularization_param=0.1,
|
||||||
|
... num_iterations=1000)
|
||||||
|
>>> features = np.array([[1], [2], [3]])
|
||||||
|
>>> target = np.array([2, 3, 4])
|
||||||
|
|
||||||
|
# Adding a bias term
|
||||||
|
>>> features = np.c_[np.ones(features.shape[0]), features]
|
||||||
|
|
||||||
|
# Fit the model
|
||||||
|
>>> model.fit(features, target)
|
||||||
|
|
||||||
|
# Predict with the model
|
||||||
|
>>> predictions = model.predict(features)
|
||||||
|
>>> np.round(predictions, decimals=2)
|
||||||
|
array([-0.97, 0. , 0.97])
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def test_mean_absolute_error():
|
||||||
|
"""
|
||||||
|
Tests the mean_absolute_error function of RidgeRegression
|
||||||
|
--------
|
||||||
|
>>> model = RidgeRegression()
|
||||||
|
>>> target = np.array([2, 3, 4])
|
||||||
|
>>> predictions = np.array([2.1, 3.0, 3.9])
|
||||||
|
>>> mae = model.mean_absolute_error(target, predictions)
|
||||||
|
>>> float(np.round(mae, 2))
|
||||||
|
0.07
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import doctest
|
||||||
|
|
||||||
|
doctest.testmod()
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python3
|
#!python
|
||||||
import os
|
import os
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user