From 5645084dcd5cf398caefa40641ac99144a40e572 Mon Sep 17 00:00:00 2001
From: Tianyi Zheng <tianyizheng02@gmail.com>
Date: Fri, 20 Oct 2023 17:29:42 -0400
Subject: [PATCH] Consolidate loss functions into a single file (#10737)

* Consolidate loss functions into single file

* updating DIRECTORY.md

* Fix typo

---------

Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
---
 DIRECTORY.md                                  |   8 +-
 machine_learning/loss_functions.py            | 252 ++++++++++++++++++
 .../loss_functions/binary_cross_entropy.py    |  59 ----
 .../categorical_cross_entropy.py              |  85 ------
 machine_learning/loss_functions/hinge_loss.py |  64 -----
 machine_learning/loss_functions/huber_loss.py |  52 ----
 .../loss_functions/mean_squared_error.py      |  51 ----
 .../mean_squared_logarithmic_error.py         |  55 ----
 8 files changed, 253 insertions(+), 373 deletions(-)
 create mode 100644 machine_learning/loss_functions.py
 delete mode 100644 machine_learning/loss_functions/binary_cross_entropy.py
 delete mode 100644 machine_learning/loss_functions/categorical_cross_entropy.py
 delete mode 100644 machine_learning/loss_functions/hinge_loss.py
 delete mode 100644 machine_learning/loss_functions/huber_loss.py
 delete mode 100644 machine_learning/loss_functions/mean_squared_error.py
 delete mode 100644 machine_learning/loss_functions/mean_squared_logarithmic_error.py

diff --git a/DIRECTORY.md b/DIRECTORY.md
index 5b7ca856e..b92f8f877 100644
--- a/DIRECTORY.md
+++ b/DIRECTORY.md
@@ -549,13 +549,7 @@
   * Local Weighted Learning
     * [Local Weighted Learning](machine_learning/local_weighted_learning/local_weighted_learning.py)
   * [Logistic Regression](machine_learning/logistic_regression.py)
-  * Loss Functions
-    * [Binary Cross Entropy](machine_learning/loss_functions/binary_cross_entropy.py)
-    * [Categorical Cross Entropy](machine_learning/loss_functions/categorical_cross_entropy.py)
-    * [Hinge Loss](machine_learning/loss_functions/hinge_loss.py)
-    * [Huber Loss](machine_learning/loss_functions/huber_loss.py)
-    * [Mean Squared Error](machine_learning/loss_functions/mean_squared_error.py)
-    * [Mean Squared Logarithmic Error](machine_learning/loss_functions/mean_squared_logarithmic_error.py)
+  * [Loss Functions](machine_learning/loss_functions.py)
   * [Mfcc](machine_learning/mfcc.py)
   * [Multilayer Perceptron Classifier](machine_learning/multilayer_perceptron_classifier.py)
   * [Polynomial Regression](machine_learning/polynomial_regression.py)
diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py
new file mode 100644
index 000000000..0fa0956ed
--- /dev/null
+++ b/machine_learning/loss_functions.py
@@ -0,0 +1,252 @@
+import numpy as np
+
+
+def binary_cross_entropy(
+    y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-15
+) -> float:
+    """
+    Calculate the mean binary cross-entropy (BCE) loss between true labels and predicted
+    probabilities.
+
+    BCE loss quantifies dissimilarity between true labels (0 or 1) and predicted
+    probabilities. It's widely used in binary classification tasks.
+
+    BCE = -Σ(y_true * ln(y_pred) + (1 - y_true) * ln(1 - y_pred))
+
+    Reference: https://en.wikipedia.org/wiki/Cross_entropy
+
+    Parameters:
+    - y_true: True binary labels (0 or 1)
+    - y_pred: Predicted probabilities for class 1
+    - epsilon: Small constant to avoid numerical instability
+
+    >>> true_labels = np.array([0, 1, 1, 0, 1])
+    >>> predicted_probs = np.array([0.2, 0.7, 0.9, 0.3, 0.8])
+    >>> binary_cross_entropy(true_labels, predicted_probs)
+    0.2529995012327421
+    >>> true_labels = np.array([0, 1, 1, 0, 1])
+    >>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
+    >>> binary_cross_entropy(true_labels, predicted_probs)
+    Traceback (most recent call last):
+        ...
+    ValueError: Input arrays must have the same length.
+    """
+    if len(y_true) != len(y_pred):
+        raise ValueError("Input arrays must have the same length.")
+
+    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)  # Clip predictions to avoid log(0)
+    bce_loss = -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
+    return np.mean(bce_loss)
+
+
+def categorical_cross_entropy(
+    y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-15
+) -> float:
+    """
+    Calculate categorical cross-entropy (CCE) loss between true class labels and
+    predicted class probabilities.
+
+    CCE = -Σ(y_true * ln(y_pred))
+
+    Reference: https://en.wikipedia.org/wiki/Cross_entropy
+
+    Parameters:
+    - y_true: True class labels (one-hot encoded)
+    - y_pred: Predicted class probabilities
+    - epsilon: Small constant to avoid numerical instability
+
+    >>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+    >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
+    >>> categorical_cross_entropy(true_labels, pred_probs)
+    0.567395975254385
+    >>> true_labels = np.array([[1, 0], [0, 1]])
+    >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
+    >>> categorical_cross_entropy(true_labels, pred_probs)
+    Traceback (most recent call last):
+        ...
+    ValueError: Input arrays must have the same shape.
+    >>> true_labels = np.array([[2, 0, 1], [1, 0, 0]])
+    >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
+    >>> categorical_cross_entropy(true_labels, pred_probs)
+    Traceback (most recent call last):
+        ...
+    ValueError: y_true must be one-hot encoded.
+    >>> true_labels = np.array([[1, 0, 1], [1, 0, 0]])
+    >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
+    >>> categorical_cross_entropy(true_labels, pred_probs)
+    Traceback (most recent call last):
+        ...
+    ValueError: y_true must be one-hot encoded.
+    >>> true_labels = np.array([[1, 0, 0], [0, 1, 0]])
+    >>> pred_probs = np.array([[0.9, 0.1, 0.1], [0.2, 0.7, 0.1]])
+    >>> categorical_cross_entropy(true_labels, pred_probs)
+    Traceback (most recent call last):
+        ...
+    ValueError: Predicted probabilities must sum to approximately 1.
+    """
+    if y_true.shape != y_pred.shape:
+        raise ValueError("Input arrays must have the same shape.")
+
+    if np.any((y_true != 0) & (y_true != 1)) or np.any(y_true.sum(axis=1) != 1):
+        raise ValueError("y_true must be one-hot encoded.")
+
+    if not np.all(np.isclose(np.sum(y_pred, axis=1), 1, rtol=epsilon, atol=epsilon)):
+        raise ValueError("Predicted probabilities must sum to approximately 1.")
+
+    y_pred = np.clip(y_pred, epsilon, 1)  # Clip predictions to avoid log(0)
+    return -np.sum(y_true * np.log(y_pred))
+
+
+def hinge_loss(y_true: np.ndarray, y_pred: np.ndarray) -> float:
+    """
+    Calculate the mean hinge loss for between true labels and predicted probabilities
+    for training support vector machines (SVMs).
+
+    Hinge loss = max(0, 1 - true * pred)
+
+    Reference: https://en.wikipedia.org/wiki/Hinge_loss
+
+    Args:
+    - y_true: actual values (ground truth) encoded as -1 or 1
+    - y_pred: predicted values
+
+    >>> true_labels = np.array([-1, 1, 1, -1, 1])
+    >>> pred = np.array([-4, -0.3, 0.7, 5, 10])
+    >>> hinge_loss(true_labels, pred)
+    1.52
+    >>> true_labels = np.array([-1, 1, 1, -1, 1, 1])
+    >>> pred = np.array([-4, -0.3, 0.7, 5, 10])
+    >>> hinge_loss(true_labels, pred)
+    Traceback (most recent call last):
+    ...
+    ValueError: Length of predicted and actual array must be same.
+    >>> true_labels = np.array([-1, 1, 10, -1, 1])
+    >>> pred = np.array([-4, -0.3, 0.7, 5, 10])
+    >>> hinge_loss(true_labels, pred)
+    Traceback (most recent call last):
+    ...
+    ValueError: y_true can have values -1 or 1 only.
+    """
+    if len(y_true) != len(y_pred):
+        raise ValueError("Length of predicted and actual array must be same.")
+
+    if np.any((y_true != -1) & (y_true != 1)):
+        raise ValueError("y_true can have values -1 or 1 only.")
+
+    hinge_losses = np.maximum(0, 1.0 - (y_true * y_pred))
+    return np.mean(hinge_losses)
+
+
+def huber_loss(y_true: np.ndarray, y_pred: np.ndarray, delta: float) -> float:
+    """
+    Calculate the mean Huber loss between the given ground truth and predicted values.
+
+    The Huber loss describes the penalty incurred by an estimation procedure, and it
+    serves as a measure of accuracy for regression models.
+
+    Huber loss =
+        0.5 * (y_true - y_pred)^2                   if |y_true - y_pred| <= delta
+        delta * |y_true - y_pred| - 0.5 * delta^2   otherwise
+
+    Reference: https://en.wikipedia.org/wiki/Huber_loss
+
+    Parameters:
+    - y_true: The true values (ground truth)
+    - y_pred: The predicted values
+
+    >>> true_values = np.array([0.9, 10.0, 2.0, 1.0, 5.2])
+    >>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
+    >>> np.isclose(huber_loss(true_values, predicted_values, 1.0), 2.102)
+    True
+    >>> true_labels = np.array([11.0, 21.0, 3.32, 4.0, 5.0])
+    >>> predicted_probs = np.array([8.3, 20.8, 2.9, 11.2, 5.0])
+    >>> np.isclose(huber_loss(true_labels, predicted_probs, 1.0), 1.80164)
+    True
+    >>> true_labels = np.array([11.0, 21.0, 3.32, 4.0])
+    >>> predicted_probs = np.array([8.3, 20.8, 2.9, 11.2, 5.0])
+    >>> huber_loss(true_labels, predicted_probs, 1.0)
+    Traceback (most recent call last):
+    ...
+    ValueError: Input arrays must have the same length.
+    """
+    if len(y_true) != len(y_pred):
+        raise ValueError("Input arrays must have the same length.")
+
+    huber_mse = 0.5 * (y_true - y_pred) ** 2
+    huber_mae = delta * (np.abs(y_true - y_pred) - 0.5 * delta)
+    return np.where(np.abs(y_true - y_pred) <= delta, huber_mse, huber_mae).mean()
+
+
+def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
+    """
+    Calculate the mean squared error (MSE) between ground truth and predicted values.
+
+    MSE measures the squared difference between true values and predicted values, and it
+    serves as a measure of accuracy for regression models.
+
+    MSE = (1/n) * Σ(y_true - y_pred)^2
+
+    Reference: https://en.wikipedia.org/wiki/Mean_squared_error
+
+    Parameters:
+    - y_true: The true values (ground truth)
+    - y_pred: The predicted values
+
+    >>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
+    >>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
+    >>> np.isclose(mean_squared_error(true_values, predicted_values), 0.028)
+    True
+    >>> true_labels = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
+    >>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
+    >>> mean_squared_error(true_labels, predicted_probs)
+    Traceback (most recent call last):
+    ...
+    ValueError: Input arrays must have the same length.
+    """
+    if len(y_true) != len(y_pred):
+        raise ValueError("Input arrays must have the same length.")
+
+    squared_errors = (y_true - y_pred) ** 2
+    return np.mean(squared_errors)
+
+
+def mean_squared_logarithmic_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
+    """
+    Calculate the mean squared logarithmic error (MSLE) between ground truth and
+    predicted values.
+
+    MSLE measures the squared logarithmic difference between true values and predicted
+    values for regression models. It's particularly useful for dealing with skewed or
+    large-value data, and it's often used when the relative differences between
+    predicted and true values are more important than absolute differences.
+
+    MSLE = (1/n) * Σ(log(1 + y_true) - log(1 + y_pred))^2
+
+    Reference: https://insideaiml.com/blog/MeanSquared-Logarithmic-Error-Loss-1035
+
+    Parameters:
+    - y_true: The true values (ground truth)
+    - y_pred: The predicted values
+
+    >>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
+    >>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
+    >>> mean_squared_logarithmic_error(true_values, predicted_values)
+    0.0030860877925181344
+    >>> true_labels = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
+    >>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
+    >>> mean_squared_logarithmic_error(true_labels, predicted_probs)
+    Traceback (most recent call last):
+    ...
+    ValueError: Input arrays must have the same length.
+    """
+    if len(y_true) != len(y_pred):
+        raise ValueError("Input arrays must have the same length.")
+
+    squared_logarithmic_errors = (np.log1p(y_true) - np.log1p(y_pred)) ** 2
+    return np.mean(squared_logarithmic_errors)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod()
diff --git a/machine_learning/loss_functions/binary_cross_entropy.py b/machine_learning/loss_functions/binary_cross_entropy.py
deleted file mode 100644
index 4ebca7f21..000000000
--- a/machine_learning/loss_functions/binary_cross_entropy.py
+++ /dev/null
@@ -1,59 +0,0 @@
-"""
-Binary Cross-Entropy (BCE) Loss Function
-
-Description:
-Quantifies dissimilarity between true labels (0 or 1) and predicted probabilities.
-It's widely used in binary classification tasks.
-
-Formula:
-BCE = -Σ(y_true * log(y_pred) + (1 - y_true) * log(1 - y_pred))
-
-Source:
-[Wikipedia - Cross entropy](https://en.wikipedia.org/wiki/Cross_entropy)
-"""
-
-import numpy as np
-
-
-def binary_cross_entropy(
-    y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-15
-) -> float:
-    """
-    Calculate the BCE Loss between true labels and predicted probabilities.
-
-    Parameters:
-    - y_true: True binary labels (0 or 1).
-    - y_pred: Predicted probabilities for class 1.
-    - epsilon: Small constant to avoid numerical instability.
-
-    Returns:
-    - bce_loss: Binary Cross-Entropy Loss.
-
-    Example Usage:
-    >>> true_labels = np.array([0, 1, 1, 0, 1])
-    >>> predicted_probs = np.array([0.2, 0.7, 0.9, 0.3, 0.8])
-    >>> binary_cross_entropy(true_labels, predicted_probs)
-    0.2529995012327421
-    >>> true_labels = np.array([0, 1, 1, 0, 1])
-    >>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
-    >>> binary_cross_entropy(true_labels, predicted_probs)
-    Traceback (most recent call last):
-        ...
-    ValueError: Input arrays must have the same length.
-    """
-    if len(y_true) != len(y_pred):
-        raise ValueError("Input arrays must have the same length.")
-    # Clip predicted probabilities to avoid log(0) and log(1)
-    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
-
-    # Calculate binary cross-entropy loss
-    bce_loss = -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
-
-    # Take the mean over all samples
-    return np.mean(bce_loss)
-
-
-if __name__ == "__main__":
-    import doctest
-
-    doctest.testmod()
diff --git a/machine_learning/loss_functions/categorical_cross_entropy.py b/machine_learning/loss_functions/categorical_cross_entropy.py
deleted file mode 100644
index 68f98902b..000000000
--- a/machine_learning/loss_functions/categorical_cross_entropy.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""
-Categorical Cross-Entropy Loss
-
-This function calculates the Categorical Cross-Entropy Loss between true class
-labels and predicted class probabilities.
-
-Formula:
-Categorical Cross-Entropy Loss = -Σ(y_true * ln(y_pred))
-
-Resources:
-- [Wikipedia - Cross entropy](https://en.wikipedia.org/wiki/Cross_entropy)
-"""
-
-import numpy as np
-
-
-def categorical_cross_entropy(
-    y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-15
-) -> float:
-    """
-    Calculate Categorical Cross-Entropy Loss between true class labels and
-    predicted class probabilities.
-
-    Parameters:
-    - y_true: True class labels (one-hot encoded) as a NumPy array.
-    - y_pred: Predicted class probabilities as a NumPy array.
-    - epsilon: Small constant to avoid numerical instability.
-
-    Returns:
-    - ce_loss: Categorical Cross-Entropy Loss as a floating-point number.
-
-    Example:
-    >>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
-    >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
-    >>> categorical_cross_entropy(true_labels, pred_probs)
-    0.567395975254385
-
-    >>> y_true = np.array([[1, 0], [0, 1]])
-    >>> y_pred = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
-    >>> categorical_cross_entropy(y_true, y_pred)
-    Traceback (most recent call last):
-        ...
-    ValueError: Input arrays must have the same shape.
-
-    >>> y_true = np.array([[2, 0, 1], [1, 0, 0]])
-    >>> y_pred = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
-    >>> categorical_cross_entropy(y_true, y_pred)
-    Traceback (most recent call last):
-        ...
-    ValueError: y_true must be one-hot encoded.
-
-    >>> y_true = np.array([[1, 0, 1], [1, 0, 0]])
-    >>> y_pred = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
-    >>> categorical_cross_entropy(y_true, y_pred)
-    Traceback (most recent call last):
-        ...
-    ValueError: y_true must be one-hot encoded.
-
-    >>> y_true = np.array([[1, 0, 0], [0, 1, 0]])
-    >>> y_pred = np.array([[0.9, 0.1, 0.1], [0.2, 0.7, 0.1]])
-    >>> categorical_cross_entropy(y_true, y_pred)
-    Traceback (most recent call last):
-        ...
-    ValueError: Predicted probabilities must sum to approximately 1.
-    """
-    if y_true.shape != y_pred.shape:
-        raise ValueError("Input arrays must have the same shape.")
-
-    if np.any((y_true != 0) & (y_true != 1)) or np.any(y_true.sum(axis=1) != 1):
-        raise ValueError("y_true must be one-hot encoded.")
-
-    if not np.all(np.isclose(np.sum(y_pred, axis=1), 1, rtol=epsilon, atol=epsilon)):
-        raise ValueError("Predicted probabilities must sum to approximately 1.")
-
-    # Clip predicted probabilities to avoid log(0)
-    y_pred = np.clip(y_pred, epsilon, 1)
-
-    # Calculate categorical cross-entropy loss
-    return -np.sum(y_true * np.log(y_pred))
-
-
-if __name__ == "__main__":
-    import doctest
-
-    doctest.testmod()
diff --git a/machine_learning/loss_functions/hinge_loss.py b/machine_learning/loss_functions/hinge_loss.py
deleted file mode 100644
index 5480a8cd6..000000000
--- a/machine_learning/loss_functions/hinge_loss.py
+++ /dev/null
@@ -1,64 +0,0 @@
-"""
-Hinge Loss
-
-Description:
-Compute the Hinge loss used for training SVM (Support Vector Machine).
-
-Formula:
-loss = max(0, 1 - true * pred)
-
-Reference: https://en.wikipedia.org/wiki/Hinge_loss
-
-Author: Poojan Smart
-Email: smrtpoojan@gmail.com
-"""
-
-import numpy as np
-
-
-def hinge_loss(y_true: np.ndarray, y_pred: np.ndarray) -> float:
-    """
-    Calculate the mean hinge loss for y_true and y_pred for binary classification.
-
-    Args:
-        y_true: Array of actual values (ground truth) encoded as -1 and 1.
-        y_pred: Array of predicted values.
-
-    Returns:
-        The hinge loss between y_true and y_pred.
-
-    Examples:
-    >>> y_true = np.array([-1, 1, 1, -1, 1])
-    >>> pred = np.array([-4, -0.3, 0.7, 5, 10])
-    >>> hinge_loss(y_true, pred)
-    1.52
-    >>> y_true = np.array([-1, 1, 1, -1, 1, 1])
-    >>> pred = np.array([-4, -0.3, 0.7, 5, 10])
-    >>> hinge_loss(y_true, pred)
-    Traceback (most recent call last):
-    ...
-    ValueError: Length of predicted and actual array must be same.
-    >>> y_true = np.array([-1, 1, 10, -1, 1])
-    >>> pred = np.array([-4, -0.3, 0.7, 5, 10])
-    >>> hinge_loss(y_true, pred)
-    Traceback (most recent call last):
-    ...
-    ValueError: y_true can have values -1 or 1 only.
-    """
-
-    if len(y_true) != len(y_pred):
-        raise ValueError("Length of predicted and actual array must be same.")
-
-    # Raise value error when y_true (encoded labels) have any other values
-    # than -1 and 1
-    if np.any((y_true != -1) & (y_true != 1)):
-        raise ValueError("y_true can have values -1 or 1 only.")
-
-    hinge_losses = np.maximum(0, 1.0 - (y_true * y_pred))
-    return np.mean(hinge_losses)
-
-
-if __name__ == "__main__":
-    import doctest
-
-    doctest.testmod()
diff --git a/machine_learning/loss_functions/huber_loss.py b/machine_learning/loss_functions/huber_loss.py
deleted file mode 100644
index 202e013f2..000000000
--- a/machine_learning/loss_functions/huber_loss.py
+++ /dev/null
@@ -1,52 +0,0 @@
-"""
-Huber Loss Function
-
-Description:
-Huber loss function describes the penalty incurred by an estimation procedure.
-It serves as a measure of the model's accuracy in regression tasks.
-
-Formula:
-Huber Loss = if |y_true - y_pred| <= delta then 0.5 * (y_true - y_pred)^2
-             else delta * |y_true - y_pred| - 0.5 * delta^2
-
-Source:
-[Wikipedia - Huber Loss](https://en.wikipedia.org/wiki/Huber_loss)
-"""
-
-import numpy as np
-
-
-def huber_loss(y_true: np.ndarray, y_pred: np.ndarray, delta: float) -> float:
-    """
-    Calculate the mean of Huber Loss.
-
-    Parameters:
-    - y_true: The true values (ground truth).
-    - y_pred: The predicted values.
-
-    Returns:
-    - huber_loss: The mean of Huber Loss between y_true and y_pred.
-
-    Example usage:
-    >>> true_values = np.array([0.9, 10.0, 2.0, 1.0, 5.2])
-    >>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
-    >>> np.isclose(huber_loss(true_values, predicted_values, 1.0), 2.102)
-    True
-    >>> true_labels = np.array([11.0, 21.0, 3.32, 4.0, 5.0])
-    >>> predicted_probs = np.array([8.3, 20.8, 2.9, 11.2, 5.0])
-    >>> np.isclose(huber_loss(true_labels, predicted_probs, 1.0), 1.80164)
-    True
-    """
-
-    if len(y_true) != len(y_pred):
-        raise ValueError("Input arrays must have the same length.")
-
-    huber_mse = 0.5 * (y_true - y_pred) ** 2
-    huber_mae = delta * (np.abs(y_true - y_pred) - 0.5 * delta)
-    return np.where(np.abs(y_true - y_pred) <= delta, huber_mse, huber_mae).mean()
-
-
-if __name__ == "__main__":
-    import doctest
-
-    doctest.testmod()
diff --git a/machine_learning/loss_functions/mean_squared_error.py b/machine_learning/loss_functions/mean_squared_error.py
deleted file mode 100644
index d2b0e1e15..000000000
--- a/machine_learning/loss_functions/mean_squared_error.py
+++ /dev/null
@@ -1,51 +0,0 @@
-"""
-Mean Squared Error (MSE) Loss Function
-
-Description:
-MSE measures the mean squared difference between true values and predicted values.
-It serves as a measure of the model's accuracy in regression tasks.
-
-Formula:
-MSE = (1/n) * Σ(y_true - y_pred)^2
-
-Source:
-[Wikipedia - Mean squared error](https://en.wikipedia.org/wiki/Mean_squared_error)
-"""
-
-import numpy as np
-
-
-def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
-    """
-    Calculate the Mean Squared Error (MSE) between two arrays.
-
-    Parameters:
-    - y_true: The true values (ground truth).
-    - y_pred: The predicted values.
-
-    Returns:
-    - mse: The Mean Squared Error between y_true and y_pred.
-
-    Example usage:
-    >>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
-    >>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
-    >>> mean_squared_error(true_values, predicted_values)
-    0.028000000000000032
-    >>> true_labels = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
-    >>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
-    >>> mean_squared_error(true_labels, predicted_probs)
-    Traceback (most recent call last):
-        ...
-    ValueError: Input arrays must have the same length.
-    """
-    if len(y_true) != len(y_pred):
-        raise ValueError("Input arrays must have the same length.")
-
-    squared_errors = (y_true - y_pred) ** 2
-    return np.mean(squared_errors)
-
-
-if __name__ == "__main__":
-    import doctest
-
-    doctest.testmod()
diff --git a/machine_learning/loss_functions/mean_squared_logarithmic_error.py b/machine_learning/loss_functions/mean_squared_logarithmic_error.py
deleted file mode 100644
index 935ebff37..000000000
--- a/machine_learning/loss_functions/mean_squared_logarithmic_error.py
+++ /dev/null
@@ -1,55 +0,0 @@
-"""
-Mean Squared Logarithmic Error (MSLE) Loss Function
-
-Description:
-MSLE measures the mean squared logarithmic difference between
-true values and predicted values, particularly useful when
-dealing with regression problems involving skewed or large-value
-targets. It is often used when the relative differences between
-predicted and true values are more important than absolute
-differences.
-
-Formula:
-MSLE = (1/n) * Σ(log(1 + y_true) - log(1 + y_pred))^2
-
-Source:
-(https://insideaiml.com/blog/MeanSquared-Logarithmic-Error-Loss-1035)
-"""
-
-import numpy as np
-
-
-def mean_squared_logarithmic_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
-    """
-    Calculate the Mean Squared Logarithmic Error (MSLE) between two arrays.
-
-    Parameters:
-    - y_true: The true values (ground truth).
-    - y_pred: The predicted values.
-
-    Returns:
-    - msle: The Mean Squared Logarithmic Error between y_true and y_pred.
-
-    Example usage:
-    >>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
-    >>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
-    >>> mean_squared_logarithmic_error(true_values, predicted_values)
-    0.0030860877925181344
-    >>> true_labels = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
-    >>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
-    >>> mean_squared_logarithmic_error(true_labels, predicted_probs)
-    Traceback (most recent call last):
-        ...
-    ValueError: Input arrays must have the same length.
-    """
-    if len(y_true) != len(y_pred):
-        raise ValueError("Input arrays must have the same length.")
-
-    squared_logarithmic_errors = (np.log1p(y_true) - np.log1p(y_pred)) ** 2
-    return np.mean(squared_logarithmic_errors)
-
-
-if __name__ == "__main__":
-    import doctest
-
-    doctest.testmod()