diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/model.py index 281a62769..6a1470c5c 100644 --- a/machine_learning/ridge_regression/model.py +++ b/machine_learning/ridge_regression/model.py @@ -3,22 +3,21 @@ import pandas as pd class RidgeRegression: - def __init__( - self, - alpha: float = 0.001, - regularization_param: float = 0.1, - num_iterations: int = 1000, - ) -> None: + def __init__(self, + alpha: float = 0.001, + regularization_param: float = 0.1, + num_iterations: int = 1000, + ) -> None: self.alpha: float = alpha self.regularization_param: float = regularization_param self.num_iterations: int = num_iterations self.theta: np.ndarray = None def feature_scaling( - self, X: np.ndarray + self, x: np.ndarray ) -> tuple[np.ndarray, np.ndarray, np.ndarray]: - mean = np.mean(X, axis=0) - std = np.std(X, axis=0) + mean = np.mean(x, axis=0) + std = np.std(x, axis=0) # avoid division by zero for constant features (std = 0) std[std == 0] = 1 # set std=1 for constant features to avoid NaN @@ -31,7 +30,7 @@ class RidgeRegression: m, n = x_scaled.shape self.theta = np.zeros(n) # initializing weights to zeros - for i in range(self.num_iterations): + for _ in range(self.num_iterations): predictions = x_scaled.dot(self.theta) error = predictions - y @@ -41,18 +40,19 @@ class RidgeRegression: ) / m self.theta -= self.alpha * gradient # updating weights - def predict(self, X: np.ndarray) -> np.ndarray: - X_scaled, _, _ = self.feature_scaling(X) - return X_scaled.dot(self.theta) + def predict(self, x: np.ndarray) -> np.ndarray: + x_scaled, _, _ = self.feature_scaling(x) + return x_scaled.dot(self.theta) def compute_cost(self, x: np.ndarray, y: np.ndarray) -> float: x_scaled, _, _ = self.feature_scaling(x) m = len(y) predictions = x_scaled.dot(self.theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( - self.regularization_param / (2 * m) - ) * np.sum(self.theta**2) + cost = ( + 1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( + self.regularization_param / (2 * m) + ) * np.sum(self.theta**2) return cost def mean_absolute_error(self, y_true: np.ndarray, y_pred: np.ndarray) -> float: @@ -61,9 +61,9 @@ class RidgeRegression: # Example usage if __name__ == "__main__": - df = pd.read_csv("ADRvsRating.csv") - x = df[["Rating"]].values - y = df["ADR"].values + data = pd.read_csv("ADRvsRating.csv") + x = data[["Rating"]].to_numpy() + y = data["ADR"].to_numpy() y = (y - np.mean(y)) / np.std(y) # added bias term to the feature matrix