From 1713cbe7c20864f6d3eaa2d1b521ef1a1da4828d Mon Sep 17 00:00:00 2001 From: jbsch Date: Wed, 23 Oct 2024 20:51:58 +0530 Subject: [PATCH] resolved errors --- machine_learning/ridge_regression/__init__.py | 0 machine_learning/ridge_regression/model.py | 55 ++++++++++--------- 2 files changed, 29 insertions(+), 26 deletions(-) create mode 100644 machine_learning/ridge_regression/__init__.py diff --git a/machine_learning/ridge_regression/__init__.py b/machine_learning/ridge_regression/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/machine_learning/ridge_regression/model.py b/machine_learning/ridge_regression/model.py index 3a41ff60a..33e281543 100644 --- a/machine_learning/ridge_regression/model.py +++ b/machine_learning/ridge_regression/model.py @@ -3,54 +3,57 @@ import pandas as pd class RidgeRegression: - def __init__(self, alpha:float=0.001, regularization_param:float=0.1, num_iterations:int=1000) -> None: + def __init__(self, + alpha:float=0.001, + regularization_param:float=0.1, + num_iterations:int=1000) -> None: self.alpha:float = alpha self.regularization_param:float = regularization_param self.num_iterations:int = num_iterations self.theta:np.ndarray = None - def feature_scaling(self, X:np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]: - mean = np.mean(X, axis=0) - std = np.std(X, axis=0) + def feature_scaling(self, x:np.ndarray)-> tuple[np.ndarray, np.ndarray, np.ndarray]: + mean = np.mean(x, axis=0) + std = np.std(x, axis=0) # avoid division by zero for constant features (std = 0) std[std == 0] = 1 # set std=1 for constant features to avoid NaN - X_scaled = (X - mean) / std - return X_scaled, mean, std + x_scaled = (x - mean) / std + return x_scaled, mean, std - def fit(self, X:np.ndarray, y:np.ndarray) -> None: - X_scaled, mean, std = self.feature_scaling(X) - m, n = X_scaled.shape + def fit(self, x:np.ndarray, y:np.ndarray) -> None: + x_scaled, mean, std = self.feature_scaling(x) + m, n = x_scaled.shape self.theta = np.zeros(n) # initializing weights to zeros - for i in range(self.num_iterations): - predictions = X_scaled.dot(self.theta) + predictions = x_scaled.dot(self.theta) error = predictions - y # computing gradient with L2 regularization gradient = ( - X_scaled.T.dot(error) + self.regularization_param * self.theta + x_scaled.T.dot(error) + self.regularization_param * self.theta ) / m self.theta -= self.alpha * gradient # updating weights - def predict(self, X:np.ndarray) -> np.ndarray: - X_scaled, _, _ = self.feature_scaling(X) - return X_scaled.dot(self.theta) + def predict(self, x:np.ndarray) -> np.ndarray: + x_scaled, _, _ = self.feature_scaling(x) + return x_scaled.dot(self.theta) - def compute_cost(self, X:np.ndarray, y:np.ndarray) -> float: - X_scaled, _, _ = self.feature_scaling(X) + def compute_cost(self, x:np.ndarray, y:np.ndarray) -> float: + x_scaled, _, _ = self.feature_scaling(x) m = len(y) - predictions = X_scaled.dot(self.theta) - cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( - self.regularization_param / (2 * m) - ) * np.sum(self.theta**2) + predictions = x_scaled.dot(self.theta) + cost = ( + 1 / (2 * m)) * np.sum((predictions - y) ** 2) + ( + self.regularization_param / (2 * m) + ) * np.sum(self.theta**2) return cost @@ -61,21 +64,21 @@ class RidgeRegression: # Example usage if __name__ == "__main__": df = pd.read_csv("ADRvsRating.csv") - X = df[["Rating"]].values + x = df[["Rating"]].values y = df["ADR"].values y = (y - np.mean(y)) / np.std(y) # added bias term to the feature matrix - X = np.c_[np.ones(X.shape[0]), X] + x = np.c_[np.ones(x.shape[0]), x] # initialize and train the ridge regression model model = RidgeRegression(alpha=0.01, regularization_param=0.1, num_iterations=1000) - model.fit(X, y) + model.fit(x, y) # predictions - predictions = model.predict(X) + predictions = model.predict(x) # results print("Optimized Weights:", model.theta) - print("Cost:", model.compute_cost(X, y)) + print("Cost:", model.compute_cost(x, y)) print("Mean Absolute Error:", model.mean_absolute_error(y, predictions))