This commit is contained in:
Ankana Pari 2024-11-19 00:03:39 +05:30 committed by GitHub
commit 632f4845aa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 1278 additions and 5 deletions

View File

@ -0,0 +1,42 @@
# Kadane's algorithm
def kadanes_algorithm(arr: list[int]) -> int:
"""
Function to find the maximum sum of a contiguous subarray using Kadane's algorithm
>>> kadanes_algorithm([-2, 1, -3, 4, -1, 2, 1, -5, 4])
6
>>> kadanes_algorithm([-1, -2, -3, -4])
-1
>>> kadanes_algorithm([5, 4, -1, 7, 8])
23
>>> kadanes_algorithm([1])
1
>>> kadanes_algorithm([-1, 2, 3, -5, 4])
5
"""
# initializing variables
max_current = arr[0] # store the current max sum
max_global = arr[0] # store the global max sum
# looping through the array starting at the second element
for i in range(1, len(arr)):
# update current max sum by choosing the maximum between
# current element alone or current element plus previous max
max_current = max(arr[i], max_current + arr[i])
# update global max sum if current max is larger
max_global = max(max_current, max_global)
return max_global
if __name__ == "__main__":
import doctest
doctest.testmod()

View File

@ -0,0 +1,39 @@
def largest_rectangle_area(heights: list[int]) -> int:
"""
Inputs an array of integers representing the heights of bars,
and returns the area of the largest rectangle that can be formed
>>> largest_rectangle_area([2, 1, 5, 6, 2, 3])
10
>>> largest_rectangle_area([2, 4])
4
>>> largest_rectangle_area([6, 2, 5, 4, 5, 1, 6])
12
>>> largest_rectangle_area([1])
1
"""
stack: list[int] = []
max_area = 0
heights = [*heights, 0] # make a new list by appending the sentinel 0
n = len(heights)
for i in range(n):
# make sure the stack remains in increasing order
while stack and heights[i] < heights[stack[-1]]:
h = heights[stack.pop()] # height of the bar
# if stack is empty, it means entire width can be taken from index 0 to i-1
w = i if not stack else i - stack[-1] - 1 # calculate width
max_area = max(max_area, h * w)
stack.append(i)
return max_area
if __name__ == "__main__":
import doctest
doctest.testmod()

View File

@ -240,7 +240,7 @@ def ascend_tree(leaf_node: TreeNode, prefix_path: list[str]) -> None:
ascend_tree(leaf_node.parent, prefix_path)
def find_prefix_path(base_pat: frozenset, tree_node: TreeNode | None) -> dict: # noqa: ARG001
def find_prefix_path(_: frozenset, tree_node: TreeNode | None) -> dict:
"""
Find the conditional pattern base for a given base pattern.

View File

@ -629,13 +629,15 @@ def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) ->
return np.mean(loss)
def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float:
def kullback_leibler_divergence(
y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-10
) -> float:
"""
Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels
and predicted probabilities.
KL divergence loss quantifies dissimilarity between true labels and predicted
probabilities. It's often used in training generative models.
KL divergence loss quantifies the dissimilarity between true labels and predicted
probabilities. It is often used in training generative models.
KL = Σ(y_true * ln(y_true / y_pred))
@ -649,6 +651,7 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float
>>> predicted_probs = np.array([0.3, 0.3, 0.4])
>>> float(kullback_leibler_divergence(true_labels, predicted_probs))
0.030478754035472025
>>> true_labels = np.array([0.2, 0.3, 0.5])
>>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5])
>>> kullback_leibler_divergence(true_labels, predicted_probs)
@ -659,7 +662,13 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
kl_loss = y_true * np.log(y_true / y_pred)
# negligible epsilon to avoid issues with log(0) or division by zero
epsilon = 1e-10
y_pred = np.clip(y_pred, epsilon, None)
# calculate KL divergence only where y_true is not zero
kl_loss = np.where(y_true != 0, y_true * np.log(y_true / y_pred), 0.0)
return np.sum(kl_loss)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,82 @@
import numpy as np
import pandas as pd
class RidgeRegression:
def __init__(
self,
alpha: float = 0.001,
regularization_param: float = 0.1,
num_iterations: int = 1000,
) -> None:
self.alpha: float = alpha
self.regularization_param: float = regularization_param
self.num_iterations: int = num_iterations
self.theta: np.ndarray = None
def feature_scaling(
self, features: np.ndarray
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
mean = np.mean(features, axis=0)
std = np.std(features, axis=0)
# avoid division by zero for constant features (std = 0)
std[std == 0] = 1 # set std=1 for constant features to avoid NaN
features_scaled = (features - mean) / std
return features_scaled, mean, std
def fit(self, features: np.ndarray, target: np.ndarray) -> None:
features_scaled, mean, std = self.feature_scaling(features)
m, n = features_scaled.shape
self.theta = np.zeros(n) # initializing weights to zeros
for _ in range(self.num_iterations):
predictions = features_scaled.dot(self.theta)
error = predictions - target
# computing gradient with L2 regularization
gradient = (
features_scaled.T.dot(error) + self.regularization_param * self.theta
) / m
self.theta -= self.alpha * gradient # updating weights
def predict(self, features: np.ndarray) -> np.ndarray:
features_scaled, _, _ = self.feature_scaling(features)
return features_scaled.dot(self.theta)
def compute_cost(self, features: np.ndarray, target: np.ndarray) -> float:
features_scaled, _, _ = self.feature_scaling(features)
m = len(target)
predictions = features_scaled.dot(self.theta)
cost = (1 / (2 * m)) * np.sum((predictions - target) ** 2) + (
self.regularization_param / (2 * m)
) * np.sum(self.theta**2)
return cost
def mean_absolute_error(self, target: np.ndarray, predictions: np.ndarray) -> float:
return np.mean(np.abs(target - predictions))
# Example usage
if __name__ == "__main__":
data = pd.read_csv("ADRvsRating.csv")
features_matrix = data[["Rating"]].to_numpy()
target = data["ADR"].to_numpy()
target = (target - np.mean(target)) / np.std(target)
# added bias term to the feature matrix
x = np.c_[np.ones(features_matrix.shape[0]), features_matrix]
# initialize and train the ridge regression model
model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000)
model.fit(features_matrix, target)
# predictions
predictions = model.predict(features_matrix)
# results
print("Optimized Weights:", model.theta)
print("Cost:", model.compute_cost(features_matrix, target))
print("Mean Absolute Error:", model.mean_absolute_error(target, predictions))

View File

@ -0,0 +1,100 @@
"""
Doctest for RidgeRegression class
Tests include:
- feature_scaling
- fit
- predict
- mean_absolute_error
To run these tests, use the following command:
python -m doctest test_ridge_regression.py -v
"""
import numpy as np # noqa: F401
from machine_learning.ridge_regression.ridge_regression import (
RidgeRegression, # noqa: F401
)
def test_feature_scaling():
"""
Tests the feature_scaling function of RidgeRegression.
--------
>>> model = RidgeRegression()
>>> features = np.array([[1, 2], [2, 3], [3, 4]])
>>> features_scaled, mean, std = model.feature_scaling(features)
>>> np.round(features_scaled, 2)
array([[-1.22, -1.22],
[ 0. , 0. ],
[ 1.22, 1.22]])
>>> np.round(mean, 2)
array([2., 3.])
>>> np.round(std, 2)
array([0.82, 0.82])
"""
def test_fit():
"""
Tests the fit function of RidgeRegression
--------
>>> model = RidgeRegression(alpha=0.01,
... regularization_param=0.1,
... num_iterations=1000)
>>> features = np.array([[1], [2], [3]])
>>> target = np.array([2, 3, 4])
# Adding a bias term
>>> features = np.c_[np.ones(features.shape[0]), features]
# Fit the model
>>> model.fit(features, target)
# Check if the weights have been updated
>>> np.round(model.theta, decimals=2)
array([0. , 0.79])
"""
def test_predict():
"""
Tests the predict function of RidgeRegression
--------
>>> model = RidgeRegression(alpha=0.01,
... regularization_param=0.1,
... num_iterations=1000)
>>> features = np.array([[1], [2], [3]])
>>> target = np.array([2, 3, 4])
# Adding a bias term
>>> features = np.c_[np.ones(features.shape[0]), features]
# Fit the model
>>> model.fit(features, target)
# Predict with the model
>>> predictions = model.predict(features)
>>> np.round(predictions, decimals=2)
array([-0.97, 0. , 0.97])
"""
def test_mean_absolute_error():
"""
Tests the mean_absolute_error function of RidgeRegression
--------
>>> model = RidgeRegression()
>>> target = np.array([2, 3, 4])
>>> predictions = np.array([2.1, 3.0, 3.9])
>>> mae = model.mean_absolute_error(target, predictions)
>>> float(np.round(mae, 2))
0.07
"""
if __name__ == "__main__":
import doctest
doctest.testmod()