Python/machine_learning/Ridge_Regression.py

116 lines
4.1 KiB
Python
Raw Normal View History

2024-10-16 02:40:31 +00:00
import numpy as np
import requests
def collect_dataset():
"""Collect dataset of CSGO
The dataset contains ADR vs Rating of a Player
:return : dataset obtained from the link, as matrix
"""
response = requests.get(
"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
"master/Week1/ADRvsRating.csv",
timeout=10,
)
lines = response.text.splitlines()
data = []
for item in lines:
item = item.split(",")
data.append(item)
data.pop(0) # This is for removing the labels from the list
dataset = np.matrix(data)
return dataset
def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta, lambda_reg):
"""Run steep gradient descent and updates the Feature vector accordingly
:param data_x : contains the dataset
:param data_y : contains the output associated with each data-entry
:param len_data : length of the data
:param alpha : Learning rate of the model
:param theta : Feature vector (weights for our model)
:param lambda_reg: Regularization parameter
:return : Updated Features using
curr_features - alpha_ * gradient(w.r.t. feature)
"""
n = len_data
prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_grad = np.dot(prod, data_x)
# Add regularization to the gradient
theta_regularized = np.copy(theta)
theta_regularized[0, 0] = 0 # Don't regularize the bias term
sum_grad += lambda_reg * theta_regularized # Add regularization to gradient
theta = theta - (alpha / n) * sum_grad
return theta
def sum_of_square_error(data_x, data_y, len_data, theta, lambda_reg):
"""Return sum of square error for error calculation
:param data_x : contains our dataset
:param data_y : contains the output (result vector)
:param len_data : len of the dataset
:param theta : contains the feature vector
:param lambda_reg: Regularization parameter
:return : sum of square error computed from given features
"""
prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_elem = np.sum(np.square(prod))
# Add regularization to the cost function
regularization_term = lambda_reg * np.sum(np.square(theta[:, 1:])) # Don't regularize the bias term
error = (sum_elem / (2 * len_data)) + (regularization_term / (2 * len_data))
return error
def run_ridge_regression(data_x, data_y, lambda_reg=1.0):
"""Implement Ridge Regression over the dataset
:param data_x : contains our dataset
:param data_y : contains the output (result vector)
:param lambda_reg: Regularization parameter
:return : feature for line of best fit (Feature vector)
"""
iterations = 100000
alpha = 0.0001550
no_features = data_x.shape[1]
len_data = data_x.shape[0]
theta = np.zeros((1, no_features))
for i in range(iterations):
theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta, lambda_reg)
error = sum_of_square_error(data_x, data_y, len_data, theta, lambda_reg)
print(f"At Iteration {i + 1} - Error is {error:.5f}")
return theta
def mean_absolute_error(predicted_y, original_y):
"""Return mean absolute error for error calculation
:param predicted_y : contains the output of prediction (result vector)
:param original_y : contains values of expected outcome
:return : mean absolute error computed from given features
"""
total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
return total / len(original_y)
def main():
"""Driver function"""
data = collect_dataset()
len_data = data.shape[0]
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
data_y = data[:, -1].astype(float)
lambda_reg = 1.0 # Set your desired regularization parameter
theta = run_ridge_regression(data_x, data_y, lambda_reg)
len_result = theta.shape[1]
print("Resultant Feature vector : ")
for i in range(len_result):
print(f"{theta[0, i]:.5f}")
if __name__ == "__main__":
main()