diff --git a/data_structures/arrays/kadanes_algorithm.py b/data_structures/arrays/kadanes_algorithm.py new file mode 100644 index 000000000..5ab2b1fd1 --- /dev/null +++ b/data_structures/arrays/kadanes_algorithm.py @@ -0,0 +1,42 @@ +# Kadane's algorithm + + +def kadanes_algorithm(arr: list[int]) -> int: + """ + Function to find the maximum sum of a contiguous subarray using Kadane's algorithm + + >>> kadanes_algorithm([-2, 1, -3, 4, -1, 2, 1, -5, 4]) + 6 + + >>> kadanes_algorithm([-1, -2, -3, -4]) + -1 + + >>> kadanes_algorithm([5, 4, -1, 7, 8]) + 23 + + >>> kadanes_algorithm([1]) + 1 + + >>> kadanes_algorithm([-1, 2, 3, -5, 4]) + 5 + """ + # initializing variables + max_current = arr[0] # store the current max sum + max_global = arr[0] # store the global max sum + + # looping through the array starting at the second element + for i in range(1, len(arr)): + # update current max sum by choosing the maximum between + # current element alone or current element plus previous max + max_current = max(arr[i], max_current + arr[i]) + + # update global max sum if current max is larger + max_global = max(max_current, max_global) + + return max_global + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/data_structures/stacks/largest_rectangle_histogram.py b/data_structures/stacks/largest_rectangle_histogram.py new file mode 100644 index 000000000..7575bd9f6 --- /dev/null +++ b/data_structures/stacks/largest_rectangle_histogram.py @@ -0,0 +1,39 @@ +def largest_rectangle_area(heights: list[int]) -> int: + """ + Inputs an array of integers representing the heights of bars, + and returns the area of the largest rectangle that can be formed + + >>> largest_rectangle_area([2, 1, 5, 6, 2, 3]) + 10 + + >>> largest_rectangle_area([2, 4]) + 4 + + >>> largest_rectangle_area([6, 2, 5, 4, 5, 1, 6]) + 12 + + >>> largest_rectangle_area([1]) + 1 + """ + stack: list[int] = [] + max_area = 0 + heights = [*heights, 0] # make a new list by appending the sentinel 0 + n = len(heights) + + for i in range(n): + # make sure the stack remains in increasing order + while stack and heights[i] < heights[stack[-1]]: + h = heights[stack.pop()] # height of the bar + # if stack is empty, it means entire width can be taken from index 0 to i-1 + w = i if not stack else i - stack[-1] - 1 # calculate width + max_area = max(max_area, h * w) + + stack.append(i) + + return max_area + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/machine_learning/frequent_pattern_growth.py b/machine_learning/frequent_pattern_growth.py index 947f8692f..5d743a250 100644 --- a/machine_learning/frequent_pattern_growth.py +++ b/machine_learning/frequent_pattern_growth.py @@ -240,7 +240,7 @@ def ascend_tree(leaf_node: TreeNode, prefix_path: list[str]) -> None: ascend_tree(leaf_node.parent, prefix_path) -def find_prefix_path(base_pat: frozenset, tree_node: TreeNode | None) -> dict: # noqa: ARG001 +def find_prefix_path(_: frozenset, tree_node: TreeNode | None) -> dict: """ Find the conditional pattern base for a given base pattern. diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py index 0bd9aa8b5..8308d3684 100644 --- a/machine_learning/loss_functions.py +++ b/machine_learning/loss_functions.py @@ -629,13 +629,15 @@ def smooth_l1_loss(y_true: np.ndarray, y_pred: np.ndarray, beta: float = 1.0) -> return np.mean(loss) -def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float: +def kullback_leibler_divergence( + y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-10 +) -> float: """ Calculate the Kullback-Leibler divergence (KL divergence) loss between true labels and predicted probabilities. - KL divergence loss quantifies dissimilarity between true labels and predicted - probabilities. It's often used in training generative models. + KL divergence loss quantifies the dissimilarity between true labels and predicted + probabilities. It is often used in training generative models. KL = Σ(y_true * ln(y_true / y_pred)) @@ -649,6 +651,7 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float >>> predicted_probs = np.array([0.3, 0.3, 0.4]) >>> float(kullback_leibler_divergence(true_labels, predicted_probs)) 0.030478754035472025 + >>> true_labels = np.array([0.2, 0.3, 0.5]) >>> predicted_probs = np.array([0.3, 0.3, 0.4, 0.5]) >>> kullback_leibler_divergence(true_labels, predicted_probs) @@ -659,7 +662,13 @@ def kullback_leibler_divergence(y_true: np.ndarray, y_pred: np.ndarray) -> float if len(y_true) != len(y_pred): raise ValueError("Input arrays must have the same length.") - kl_loss = y_true * np.log(y_true / y_pred) + # negligible epsilon to avoid issues with log(0) or division by zero + epsilon = 1e-10 + y_pred = np.clip(y_pred, epsilon, None) + + # calculate KL divergence only where y_true is not zero + kl_loss = np.where(y_true != 0, y_true * np.log(y_true / y_pred), 0.0) + return np.sum(kl_loss) diff --git a/machine_learning/ridge_regression/ADRvsRating.csv b/machine_learning/ridge_regression/ADRvsRating.csv new file mode 100644 index 000000000..dc1bdde8e --- /dev/null +++ b/machine_learning/ridge_regression/ADRvsRating.csv @@ -0,0 +1,1001 @@ +ADR,Rating +85.0,117.0 +83.5,110.00000000000001 +78.2,102.0 +68.1,97.0 +66.3,86.0 +99.8,135.0 +78.2,120.0 +85.6,103.0 +66.6,100.0 +57.5,87.0 +118.6,182.0 +114.8,167.0 +90.0,155.0 +59.2,122.0 +68.9,107.0 +85.0,101.0 +65.8,81.0 +44.7,55.00000000000001 +56.2,52.0 +54.2,50.0 +113.7,142.0 +86.1,128.0 +77.9,114.99999999999999 +62.5,81.0 +43.7,69.0 +100.4,123.0 +73.1,110.00000000000001 +73.9,108.0 +59.6,99.0 +55.0,72.0 +86.6,112.00000000000001 +92.6,100.0 +66.6,90.0 +46.7,65.0 +53.1,62.0 +117.0,170.0 +88.3,133.0 +86.2,132.0 +56.7,109.00000000000001 +56.6,72.0 +77.4,133.0 +85.7,121.0 +78.1,93.0 +69.8,92.0 +62.3,87.0 +94.9,132.0 +88.1,118.0 +66.9,105.0 +76.6,102.0 +48.8,65.0 +93.7,117.0 +72.1,107.0 +77.9,106.0 +78.5,105.0 +58.7,77.0 +108.3,159.0 +94.5,115.99999999999999 +76.8,112.00000000000001 +64.9,104.0 +53.6,69.0 +75.0,99.0 +61.2,98.0 +80.8,97.0 +67.2,90.0 +67.0,79.0 +126.1,204.99999999999997 +100.0,146.0 +78.3,123.0 +56.6,76.0 +57.8,73.0 +91.1,109.00000000000001 +64.3,90.0 +85.5,87.0 +54.3,72.0 +57.1,72.0 +123.7,186.0 +84.2,123.0 +82.2,120.0 +73.3,120.0 +64.8,105.0 +87.9,119.0 +84.6,111.00000000000001 +80.5,99.0 +66.9,85.0 +54.8,65.0 +113.5,148.0 +93.6,131.0 +81.5,117.0 +69.5,111.00000000000001 +41.1,65.0 +95.7,150.0 +81.4,149.0 +94.7,137.0 +69.1,112.99999999999999 +49.0,67.0 +97.7,143.0 +69.7,99.0 +69.8,91.0 +62.2,75.0 +40.3,50.0 +136.5,220.00000000000003 +110.2,166.0 +75.4,154.0 +87.7,145.0 +80.0,134.0 +53.2,61.0 +63.1,60.0 +70.1,57.99999999999999 +52.0,48.0 +36.4,27.0 +71.6,78.0 +60.7,67.0 +59.5,60.0 +72.4,50.0 +45.1,41.0 +126.1,204.99999999999997 +100.2,158.0 +88.3,155.0 +92.4,134.0 +79.3,122.0 +104.3,146.0 +89.7,130.0 +79.0,127.0 +66.3,96.0 +53.2,85.0 +85.5,107.0 +72.7,95.0 +65.5,94.0 +66.9,87.0 +57.0,84.0 +100.7,138.0 +89.2,137.0 +79.8,137.0 +62.5,110.00000000000001 +73.4,105.0 +101.5,112.99999999999999 +83.0,106.0 +62.1,93.0 +41.7,57.99999999999999 +45.6,56.00000000000001 +110.0,165.0 +93.6,142.0 +83.8,125.0 +70.2,107.0 +42.6,82.0 +74.9,101.0 +73.0,97.0 +65.8,88.0 +69.9,77.0 +68.9,73.0 +89.7,139.0 +77.4,124.0 +77.5,120.0 +82.3,110.00000000000001 +55.5,93.0 +82.1,111.00000000000001 +72.7,92.0 +67.1,90.0 +70.8,88.0 +56.8,68.0 +84.2,112.99999999999999 +78.5,100.0 +62.3,86.0 +73.2,86.0 +68.3,72.0 +108.9,153.0 +98.6,144.0 +81.2,130.0 +73.1,120.0 +66.8,97.0 +108.4,150.0 +92.3,140.0 +75.0,111.00000000000001 +74.8,100.0 +35.5,49.0 +88.7,122.0 +72.5,100.0 +68.8,96.0 +67.8,89.0 +69.8,85.0 +135.3,202.99999999999997 +91.8,137.0 +79.3,131.0 +68.7,112.99999999999999 +44.3,75.0 +102.0,140.0 +67.1,67.0 +50.1,63.0 +37.7,47.0 +44.3,47.0 +105.3,137.0 +72.6,130.0 +97.7,127.0 +55.6,89.0 +62.1,82.0 +95.5,131.0 +72.4,112.00000000000001 +86.7,107.0 +63.4,94.0 +65.1,76.0 +55.4,85.0 +71.7,83.0 +70.8,77.0 +52.0,69.0 +59.6,68.0 +114.1,185.0 +95.6,143.0 +92.0,131.0 +71.2,121.0 +62.9,110.00000000000001 +119.1,211.0 +96.5,163.0 +98.1,154.0 +92.2,148.0 +85.2,145.0 +84.9,101.0 +58.9,61.0 +38.2,41.0 +38.8,35.0 +19.7,5.0 +116.4,168.0 +78.5,148.0 +98.5,136.0 +69.4,131.0 +64.1,126.0 +76.5,101.0 +69.3,77.0 +53.9,69.0 +51.0,61.0 +64.5,60.0 +96.4,141.0 +77.4,134.0 +74.4,121.0 +75.1,107.0 +82.0,99.0 +76.3,109.00000000000001 +84.6,107.0 +67.8,77.0 +68.4,76.0 +57.2,68.0 +115.3,196.0 +89.9,127.0 +81.3,118.0 +60.7,102.0 +66.8,96.0 +64.5,92.0 +77.9,91.0 +72.6,85.0 +69.7,84.0 +63.7,82.0 +113.0,176.0 +78.8,127.0 +67.1,118.0 +81.5,115.99999999999999 +80.2,112.99999999999999 +76.5,103.0 +73.9,103.0 +81.0,92.0 +65.1,80.0 +60.8,74.0 +76.1,90.0 +60.1,76.0 +66.3,71.0 +45.0,60.0 +63.7,56.99999999999999 +127.8,219.0 +129.7,176.0 +99.7,168.0 +76.2,100.0 +33.4,78.0 +96.0,112.99999999999999 +71.5,105.0 +74.1,89.0 +60.7,69.0 +49.0,69.0 +125.2,171.0 +80.4,124.0 +87.3,114.99999999999999 +71.4,110.00000000000001 +64.5,107.0 +100.0,141.0 +94.5,127.0 +88.5,120.0 +62.9,107.0 +71.7,97.0 +95.1,129.0 +72.2,99.0 +75.5,96.0 +58.8,73.0 +55.3,68.0 +100.1,169.0 +95.3,145.0 +82.7,127.0 +75.0,119.0 +77.7,104.0 +81.2,101.0 +71.6,90.0 +82.3,83.0 +54.0,81.0 +44.5,35.0 +89.0,91.0 +75.0,72.0 +55.0,59.0 +45.1,54.0 +48.8,51.0 +124.9,179.0 +74.1,139.0 +86.6,139.0 +63.5,119.0 +69.6,118.0 +82.8,112.99999999999999 +83.3,111.00000000000001 +76.0,83.0 +57.6,78.0 +57.1,73.0 +99.3,155.0 +95.8,136.0 +91.4,135.0 +60.3,106.0 +83.1,106.0 +96.4,157.0 +103.2,151.0 +91.1,130.0 +87.1,126.0 +65.1,113.99999999999999 +76.0,93.0 +73.6,90.0 +67.7,83.0 +73.3,78.0 +36.6,40.0 +70.2,82.0 +55.6,59.0 +52.2,53.0 +58.9,51.0 +50.0,51.0 +123.7,204.0 +127.1,186.0 +78.6,141.0 +67.9,113.99999999999999 +60.2,110.00000000000001 +75.2,98.0 +69.9,90.0 +72.8,84.0 +58.2,73.0 +57.5,69.0 +105.3,191.0 +89.8,152.0 +91.1,125.0 +74.4,106.0 +66.0,94.0 +109.2,119.0 +83.9,105.0 +69.1,91.0 +47.9,62.0 +50.3,56.99999999999999 +101.8,147.0 +82.7,134.0 +90.8,133.0 +85.0,127.0 +56.6,81.0 +78.6,96.0 +79.5,93.0 +69.1,69.0 +62.8,68.0 +47.9,59.0 +110.2,183.0 +102.0,151.0 +88.9,150.0 +71.0,114.99999999999999 +46.3,77.0 +92.0,135.0 +100.2,126.0 +76.4,112.99999999999999 +71.1,108.0 +65.8,89.0 +79.3,118.0 +85.9,118.0 +59.9,83.0 +72.3,82.0 +39.7,57.99999999999999 +76.3,114.99999999999999 +84.9,100.0 +77.3,96.0 +67.7,92.0 +73.1,91.0 +96.5,135.0 +99.6,134.0 +74.5,107.0 +72.1,102.0 +46.0,71.0 +86.8,141.0 +98.6,141.0 +101.7,132.0 +59.9,98.0 +59.0,78.0 +96.2,124.0 +72.6,112.00000000000001 +75.7,111.00000000000001 +68.3,93.0 +65.3,75.0 +97.5,137.0 +82.1,124.0 +89.8,113.99999999999999 +72.8,112.00000000000001 +61.6,75.0 +119.3,166.0 +75.7,102.0 +65.2,93.0 +56.4,88.0 +58.7,81.0 +93.2,120.0 +93.4,113.99999999999999 +74.7,112.00000000000001 +75.6,108.0 +58.4,79.0 +98.6,140.0 +85.8,119.0 +67.9,102.0 +67.3,94.0 +61.5,78.0 +92.4,118.0 +75.6,92.0 +68.8,91.0 +75.1,86.0 +44.3,64.0 +110.1,178.0 +98.0,138.0 +76.2,112.99999999999999 +56.2,93.0 +61.0,92.0 +54.5,100.0 +68.7,87.0 +78.2,87.0 +59.8,74.0 +59.5,68.0 +127.0,190.0 +98.6,132.0 +85.5,127.0 +55.9,106.0 +36.1,86.0 +94.1,171.0 +84.5,152.0 +76.0,110.00000000000001 +69.6,108.0 +46.9,78.0 +69.1,91.0 +49.3,91.0 +71.9,90.0 +66.7,77.0 +62.0,68.0 +105.0,160.0 +102.7,142.0 +51.9,106.0 +73.9,89.0 +63.1,82.0 +105.8,158.0 +80.2,93.0 +56.5,91.0 +62.4,89.0 +62.3,83.0 +73.0,88.0 +70.0,80.0 +68.2,74.0 +55.8,64.0 +27.9,22.0 +95.0,178.0 +97.8,144.0 +82.7,139.0 +73.9,136.0 +117.6,135.0 +93.3,142.0 +76.7,119.0 +78.0,117.0 +67.6,109.00000000000001 +68.3,81.0 +69.3,118.0 +80.5,105.0 +81.0,93.0 +54.9,73.0 +61.4,72.0 +101.9,114.99999999999999 +79.9,97.0 +50.3,60.0 +65.1,56.99999999999999 +54.5,55.00000000000001 +104.2,180.0 +111.9,178.0 +89.6,163.0 +72.5,134.0 +62.7,85.0 +89.5,106.0 +85.9,97.0 +65.5,87.0 +61.0,74.0 +50.7,54.0 +135.4,197.0 +97.0,161.0 +78.1,123.0 +70.0,101.0 +55.8,68.0 +101.9,117.0 +82.5,115.99999999999999 +73.6,101.0 +68.9,100.0 +44.8,76.0 +92.1,138.0 +83.6,126.0 +67.3,98.0 +75.4,96.0 +54.2,74.0 +103.7,182.0 +101.4,165.0 +82.9,141.0 +58.4,115.99999999999999 +85.1,108.0 +83.2,98.0 +70.2,78.0 +72.6,78.0 +47.7,74.0 +55.9,56.00000000000001 +88.5,112.00000000000001 +76.7,100.0 +66.6,87.0 +53.2,82.0 +60.3,81.0 +95.8,148.0 +93.3,139.0 +77.2,124.0 +76.0,107.0 +45.1,80.0 +123.3,206.99999999999997 +93.9,163.0 +86.4,152.0 +78.3,152.0 +69.1,92.0 +80.5,88.0 +80.7,86.0 +65.5,50.0 +51.3,40.0 +33.7,32.0 +115.5,162.0 +62.6,70.0 +46.0,66.0 +53.6,61.0 +56.9,50.0 +105.7,177.0 +90.2,136.0 +85.8,133.0 +84.2,119.0 +47.5,96.0 +115.2,182.0 +94.2,134.0 +83.0,112.00000000000001 +80.1,107.0 +61.6,83.0 +105.7,146.0 +75.2,106.0 +56.7,83.0 +50.3,63.0 +54.1,59.0 +104.1,156.0 +97.8,151.0 +98.4,143.0 +47.9,93.0 +42.1,74.0 +89.6,138.0 +87.6,130.0 +85.7,101.0 +55.8,88.0 +30.6,39.0 +96.1,144.0 +83.1,119.0 +75.9,114.99999999999999 +78.1,112.99999999999999 +69.9,110.00000000000001 +85.5,109.00000000000001 +71.0,99.0 +71.0,87.0 +67.2,86.0 +59.1,71.0 +120.3,188.0 +64.6,104.0 +78.9,104.0 +75.0,101.0 +56.4,68.0 +100.2,135.0 +75.3,104.0 +71.1,102.0 +68.9,94.0 +63.5,84.0 +66.3,89.0 +55.3,88.0 +80.9,84.0 +71.9,72.0 +42.1,41.0 +114.7,173.0 +88.0,160.0 +74.5,151.0 +74.8,127.0 +65.2,99.0 +115.3,163.0 +92.4,122.0 +58.3,89.0 +61.6,85.0 +42.6,69.0 +94.1,144.0 +87.6,129.0 +59.7,106.0 +79.0,99.0 +46.1,65.0 +75.5,89.0 +80.8,72.0 +60.2,67.0 +51.9,54.0 +33.9,27.0 +134.1,235.0 +98.2,160.0 +86.3,146.0 +72.5,140.0 +58.0,100.0 +78.8,112.99999999999999 +77.0,105.0 +75.2,81.0 +59.0,60.0 +51.0,56.99999999999999 +104.2,164.0 +94.8,157.0 +86.6,137.0 +81.7,115.99999999999999 +69.7,102.0 +92.7,154.0 +79.4,151.0 +84.7,145.0 +70.0,125.0 +92.4,112.00000000000001 +83.1,95.0 +67.8,78.0 +63.6,77.0 +51.9,50.0 +33.2,30.0 +113.9,162.0 +83.9,126.0 +75.7,118.0 +69.4,107.0 +78.1,96.0 +88.6,117.0 +95.3,111.00000000000001 +71.8,106.0 +72.0,92.0 +67.4,67.0 +87.2,127.0 +85.2,111.00000000000001 +58.7,86.0 +51.2,81.0 +53.6,73.0 +106.0,165.0 +80.5,125.0 +76.8,119.0 +80.3,107.0 +46.9,99.0 +117.8,204.0 +92.4,151.0 +89.2,136.0 +89.6,128.0 +69.2,119.0 +100.7,93.0 +54.5,67.0 +59.5,61.0 +59.4,60.0 +49.1,50.0 +109.8,147.0 +92.4,143.0 +86.0,124.0 +61.3,97.0 +46.6,85.0 +84.1,102.0 +70.8,89.0 +61.7,85.0 +62.7,80.0 +62.8,77.0 +115.2,211.0 +92.3,124.0 +71.0,112.00000000000001 +58.5,101.0 +70.3,82.0 +83.4,103.0 +99.1,99.0 +69.8,75.0 +55.1,74.0 +45.9,55.00000000000001 +113.6,172.0 +98.4,170.0 +84.8,146.0 +51.3,104.0 +68.6,90.0 +82.5,102.0 +66.8,89.0 +63.3,68.0 +50.8,56.99999999999999 +44.6,48.0 +113.9,152.0 +85.5,137.0 +78.9,127.0 +80.6,127.0 +92.1,122.0 +88.9,110.00000000000001 +78.4,103.0 +69.4,77.0 +59.3,69.0 +34.2,40.0 +77.1,95.0 +56.8,83.0 +68.7,82.0 +53.2,73.0 +52.9,55.00000000000001 +104.8,173.0 +124.5,170.0 +76.2,137.0 +51.4,98.0 +64.8,91.0 +91.2,137.0 +84.8,108.0 +70.3,98.0 +58.4,74.0 +55.2,67.0 +109.7,163.0 +98.7,141.0 +65.9,99.0 +67.4,74.0 +48.1,57.99999999999999 +91.7,137.0 +82.3,112.00000000000001 +91.1,109.00000000000001 +57.4,87.0 +75.7,82.0 +104.5,125.0 +70.6,113.99999999999999 +68.8,108.0 +73.6,103.0 +79.2,93.0 +79.4,91.0 +74.5,82.0 +65.1,78.0 +68.6,67.0 +34.1,48.0 +134.0,198.0 +104.6,172.0 +83.9,150.0 +66.0,91.0 +60.3,84.0 +95.6,112.99999999999999 +83.3,101.0 +71.7,98.0 +74.0,92.0 +44.9,70.0 +88.0,133.0 +95.7,132.0 +69.3,121.0 +77.2,118.0 +58.6,81.0 +63.0,78.0 +67.6,72.0 +68.0,56.99999999999999 +51.5,56.99999999999999 +52.4,54.0 +99.5,184.0 +93.5,172.0 +108.0,159.0 +88.1,150.0 +75.1,100.0 +87.1,126.0 +85.4,115.99999999999999 +73.7,107.0 +77.7,97.0 +41.5,73.0 +102.1,150.0 +71.2,102.0 +61.4,96.0 +65.5,88.0 +65.6,85.0 +90.7,142.0 +85.6,129.0 +81.0,113.99999999999999 +73.5,98.0 +62.7,81.0 +72.3,115.99999999999999 +75.8,99.0 +81.8,98.0 +67.8,87.0 +69.5,81.0 +114.5,133.0 +76.9,97.0 +67.0,93.0 +60.4,84.0 +48.9,56.00000000000001 +104.0,148.0 +92.3,138.0 +95.5,132.0 +74.3,108.0 +47.7,76.0 +68.1,77.0 +56.0,74.0 +69.6,63.0 +65.4,43.0 +46.0,42.0 +151.8,239.0 +75.8,147.0 +88.4,143.0 +79.6,131.0 +83.1,125.0 +83.3,107.0 +83.0,96.0 +74.0,90.0 +57.9,61.0 +54.9,53.0 +107.7,167.0 +100.7,148.0 +101.4,148.0 +77.5,132.0 +76.0,111.00000000000001 +78.2,109.00000000000001 +78.9,100.0 +83.7,84.0 +46.4,60.0 +58.1,54.0 +114.9,163.0 +103.5,157.0 +74.6,134.0 +73.3,129.0 +74.0,104.0 +66.0,96.0 +61.8,78.0 +68.8,69.0 +62.6,62.0 +59.3,60.0 +100.6,157.0 +82.3,138.0 +96.2,137.0 +94.6,125.0 +70.1,115.99999999999999 +100.8,120.0 +70.2,74.0 +49.6,69.0 +44.0,56.99999999999999 +30.9,34.0 +97.5,159.0 +98.6,156.0 +107.1,148.0 +87.7,112.00000000000001 +59.4,101.0 +86.8,129.0 +89.2,122.0 +81.5,114.99999999999999 +68.3,101.0 +46.6,69.0 +73.6,101.0 +65.6,99.0 +81.0,99.0 +64.1,99.0 +64.0,94.0 +77.5,126.0 +79.2,119.0 +86.6,119.0 +66.0,100.0 +78.1,95.0 +88.3,120.0 +67.2,104.0 +78.6,99.0 +66.0,85.0 +59.3,79.0 +80.7,105.0 +78.2,105.0 +89.2,93.0 +74.0,86.0 +65.1,78.0 +111.0,190.0 +99.2,154.0 +75.8,93.0 +60.2,84.0 +60.5,77.0 +93.2,112.00000000000001 +81.3,89.0 +46.7,65.0 +47.3,55.00000000000001 +44.3,54.0 +89.4,137.0 +66.2,135.0 +91.6,135.0 +90.0,129.0 +65.2,112.00000000000001 +76.1,112.99999999999999 +77.7,107.0 +78.2,107.0 +78.2,103.0 +73.3,81.0 +100.7,151.0 +84.3,133.0 +75.1,103.0 +60.5,84.0 +72.7,82.0 +121.4,196.0 +101.3,141.0 +74.4,121.0 +73.4,117.0 +62.8,100.0 +86.1,123.0 +67.5,82.0 +69.2,79.0 +53.8,65.0 +43.7,65.0 +103.4,134.0 +83.9,126.0 +68.0,101.0 +64.4,89.0 +62.8,86.0 +75.6,108.0 +78.4,107.0 +80.7,107.0 +73.3,102.0 +69.7,91.0 +74.5,103.0 +75.5,102.0 +64.8,98.0 +79.4,92.0 +71.6,83.0 +92.8,134.0 +83.6,125.0 +80.6,121.0 +88.2,114.99999999999999 +57.6,79.0 +102.4,133.0 +76.3,105.0 +59.7,93.0 +61.3,80.0 +55.4,67.0 +89.4,134.0 +94.5,127.0 +83.6,122.0 +69.8,88.0 +65.3,86.0 +82.8,122.0 +74.7,107.0 +80.9,102.0 +67.2,86.0 +48.5,55.00000000000001 +91.9,150.0 +82.8,130.0 +71.1,122.0 +97.2,112.99999999999999 +62.5,99.0 +75.2,109.00000000000001 +77.3,101.0 +71.1,99.0 +74.6,99.0 +61.9,87.0 +91.9,140.0 +83.5,119.0 +74.8,107.0 +60.0,96.0 +69.3,89.0 +76.2,96.0 +77.2,89.0 +67.4,71.0 +44.8,65.0 +55.6,62.0 +122.1,189.0 +117.6,185.0 +90.6,131.0 +71.8,123.0 +52.9,85.0 +80.5,112.00000000000001 +77.0,109.00000000000001 +64.8,107.0 +66.4,93.0 +57.2,77.0 +106.7,140.0 +78.4,114.99999999999999 +80.9,110.00000000000001 +62.1,105.0 +72.5,103.0 +97.4,140.0 +88.7,131.0 +89.8,112.00000000000001 +56.4,95.0 +57.0,89.0 +81.1,129.0 +89.3,120.0 +64.0,87.0 +59.7,73.0 +52.9,63.0 +101.4,179.0 +113.7,178.0 +97.7,153.0 +81.4,125.0 +59.4,95.0 +81.6,105.0 +57.6,80.0 +58.6,63.0 +61.8,62.0 +44.7,51.0 diff --git a/machine_learning/ridge_regression/__init__.py b/machine_learning/ridge_regression/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/machine_learning/ridge_regression/ridge_regression.py b/machine_learning/ridge_regression/ridge_regression.py new file mode 100644 index 000000000..1c2c13fa9 --- /dev/null +++ b/machine_learning/ridge_regression/ridge_regression.py @@ -0,0 +1,82 @@ +import numpy as np +import pandas as pd + + +class RidgeRegression: + def __init__( + self, + alpha: float = 0.001, + regularization_param: float = 0.1, + num_iterations: int = 1000, + ) -> None: + self.alpha: float = alpha + self.regularization_param: float = regularization_param + self.num_iterations: int = num_iterations + self.theta: np.ndarray = None + + def feature_scaling( + self, features: np.ndarray + ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + mean = np.mean(features, axis=0) + std = np.std(features, axis=0) + + # avoid division by zero for constant features (std = 0) + std[std == 0] = 1 # set std=1 for constant features to avoid NaN + + features_scaled = (features - mean) / std + return features_scaled, mean, std + + def fit(self, features: np.ndarray, target: np.ndarray) -> None: + features_scaled, mean, std = self.feature_scaling(features) + m, n = features_scaled.shape + self.theta = np.zeros(n) # initializing weights to zeros + + for _ in range(self.num_iterations): + predictions = features_scaled.dot(self.theta) + error = predictions - target + + # computing gradient with L2 regularization + gradient = ( + features_scaled.T.dot(error) + self.regularization_param * self.theta + ) / m + self.theta -= self.alpha * gradient # updating weights + + def predict(self, features: np.ndarray) -> np.ndarray: + features_scaled, _, _ = self.feature_scaling(features) + return features_scaled.dot(self.theta) + + def compute_cost(self, features: np.ndarray, target: np.ndarray) -> float: + features_scaled, _, _ = self.feature_scaling(features) + m = len(target) + + predictions = features_scaled.dot(self.theta) + cost = (1 / (2 * m)) * np.sum((predictions - target) ** 2) + ( + self.regularization_param / (2 * m) + ) * np.sum(self.theta**2) + return cost + + def mean_absolute_error(self, target: np.ndarray, predictions: np.ndarray) -> float: + return np.mean(np.abs(target - predictions)) + + +# Example usage +if __name__ == "__main__": + data = pd.read_csv("ADRvsRating.csv") + features_matrix = data[["Rating"]].to_numpy() + target = data["ADR"].to_numpy() + target = (target - np.mean(target)) / np.std(target) + + # added bias term to the feature matrix + x = np.c_[np.ones(features_matrix.shape[0]), features_matrix] + + # initialize and train the ridge regression model + model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) + model.fit(features_matrix, target) + + # predictions + predictions = model.predict(features_matrix) + + # results + print("Optimized Weights:", model.theta) + print("Cost:", model.compute_cost(features_matrix, target)) + print("Mean Absolute Error:", model.mean_absolute_error(target, predictions)) diff --git a/machine_learning/ridge_regression/test_ridge_regression.py b/machine_learning/ridge_regression/test_ridge_regression.py new file mode 100644 index 000000000..6bf6d6024 --- /dev/null +++ b/machine_learning/ridge_regression/test_ridge_regression.py @@ -0,0 +1,100 @@ +""" +Doctest for RidgeRegression class + +Tests include: +- feature_scaling +- fit +- predict +- mean_absolute_error + +To run these tests, use the following command: + python -m doctest test_ridge_regression.py -v +""" + +import numpy as np # noqa: F401 + +from machine_learning.ridge_regression.ridge_regression import ( + RidgeRegression, # noqa: F401 +) + + +def test_feature_scaling(): + """ + Tests the feature_scaling function of RidgeRegression. + -------- + >>> model = RidgeRegression() + >>> features = np.array([[1, 2], [2, 3], [3, 4]]) + >>> features_scaled, mean, std = model.feature_scaling(features) + >>> np.round(features_scaled, 2) + array([[-1.22, -1.22], + [ 0. , 0. ], + [ 1.22, 1.22]]) + >>> np.round(mean, 2) + array([2., 3.]) + >>> np.round(std, 2) + array([0.82, 0.82]) + """ + + +def test_fit(): + """ + Tests the fit function of RidgeRegression + -------- + >>> model = RidgeRegression(alpha=0.01, + ... regularization_param=0.1, + ... num_iterations=1000) + >>> features = np.array([[1], [2], [3]]) + >>> target = np.array([2, 3, 4]) + + # Adding a bias term + >>> features = np.c_[np.ones(features.shape[0]), features] + + # Fit the model + >>> model.fit(features, target) + + # Check if the weights have been updated + >>> np.round(model.theta, decimals=2) + array([0. , 0.79]) + """ + + +def test_predict(): + """ + Tests the predict function of RidgeRegression + -------- + >>> model = RidgeRegression(alpha=0.01, + ... regularization_param=0.1, + ... num_iterations=1000) + >>> features = np.array([[1], [2], [3]]) + >>> target = np.array([2, 3, 4]) + + # Adding a bias term + >>> features = np.c_[np.ones(features.shape[0]), features] + + # Fit the model + >>> model.fit(features, target) + + # Predict with the model + >>> predictions = model.predict(features) + >>> np.round(predictions, decimals=2) + array([-0.97, 0. , 0.97]) + """ + + +def test_mean_absolute_error(): + """ + Tests the mean_absolute_error function of RidgeRegression + -------- + >>> model = RidgeRegression() + >>> target = np.array([2, 3, 4]) + >>> predictions = np.array([2.1, 3.0, 3.9]) + >>> mae = model.mean_absolute_error(target, predictions) + >>> float(np.round(mae, 2)) + 0.07 + """ + + +if __name__ == "__main__": + import doctest + + doctest.testmod()