Python/machine_learning/Ridge_Regression.py

import numpy as np
import requests

def collect_dataset():
    """Collect dataset of CSGO
    The dataset contains ADR vs Rating of a Player
    :return : dataset obtained from the link, as matrix
    """
    response = requests.get(
        "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
        "master/Week1/ADRvsRating.csv",
        timeout=10,
    )
    lines = response.text.splitlines()
    data = []
    for item in lines:
        item = item.split(",")
        data.append(item)
    data.pop(0)  # This is for removing the labels from the list
    dataset = np.matrix(data)
    return dataset

def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta, lambda_reg):
    """Run steep gradient descent and updates the Feature vector accordingly
    :param data_x   : contains the dataset
    :param data_y   : contains the output associated with each data-entry
    :param len_data : length of the data
    :param alpha    : Learning rate of the model
    :param theta    : Feature vector (weights for our model)
    :param lambda_reg: Regularization parameter
    :return : Updated Features using
              curr_features - alpha_ * gradient(w.r.t. feature)
    """
    n = len_data

    prod = np.dot(theta, data_x.transpose())
    prod -= data_y.transpose()
    sum_grad = np.dot(prod, data_x)
    
    # Add regularization to the gradient
    theta_regularized = np.copy(theta)
    theta_regularized[0, 0] = 0  # Don't regularize the bias term
    sum_grad += lambda_reg * theta_regularized  # Add regularization to gradient

    theta = theta - (alpha / n) * sum_grad
    return theta

def sum_of_square_error(data_x, data_y, len_data, theta, lambda_reg):
    """Return sum of square error for error calculation
    :param data_x    : contains our dataset
    :param data_y    : contains the output (result vector)
    :param len_data  : len of the dataset
    :param theta     : contains the feature vector
    :param lambda_reg: Regularization parameter
    :return          : sum of square error computed from given features
    """
    prod = np.dot(theta, data_x.transpose())
    prod -= data_y.transpose()
    sum_elem = np.sum(np.square(prod))
    
    # Add regularization to the cost function
    regularization_term = lambda_reg * np.sum(np.square(theta[:, 1:]))  # Don't regularize the bias term
    error = (sum_elem / (2 * len_data)) + (regularization_term / (2 * len_data))
    return error

def run_ridge_regression(data_x, data_y, lambda_reg=1.0):
    """Implement Ridge Regression over the dataset
    :param data_x  : contains our dataset
    :param data_y  : contains the output (result vector)
    :param lambda_reg: Regularization parameter
    :return        : feature for line of best fit (Feature vector)
    """
    iterations = 100000
    alpha = 0.0001550

    no_features = data_x.shape[1]
    len_data = data_x.shape[0]

    theta = np.zeros((1, no_features))

    for i in range(iterations):
        theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta, lambda_reg)
        error = sum_of_square_error(data_x, data_y, len_data, theta, lambda_reg)
        print(f"At Iteration {i + 1} - Error is {error:.5f}")

    return theta

def mean_absolute_error(predicted_y, original_y):
    """Return mean absolute error for error calculation
    :param predicted_y   : contains the output of prediction (result vector)
    :param original_y    : contains values of expected outcome
    :return          : mean absolute error computed from given features
    """
    total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
    return total / len(original_y)

def main():
    """Driver function"""
    data = collect_dataset()

    len_data = data.shape[0]
    data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
    data_y = data[:, -1].astype(float)

    lambda_reg = 1.0  # Set your desired regularization parameter
    theta = run_ridge_regression(data_x, data_y, lambda_reg)
    
    len_result = theta.shape[1]
    print("Resultant Feature vector : ")
    for i in range(len_result):
        print(f"{theta[0, i]:.5f}")

if __name__ == "__main__":
    main()
Ridge Regression 2024-10-16 02:40:31 +00:00			`import numpy as np`
			`import requests`

			`def collect_dataset():`
			`"""Collect dataset of CSGO`
			`The dataset contains ADR vs Rating of a Player`
			`:return : dataset obtained from the link, as matrix`
			`"""`
			`response = requests.get(`
			`"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"`
			`"master/Week1/ADRvsRating.csv",`
			`timeout=10,`
			`)`
			`lines = response.text.splitlines()`
			`data = []`
			`for item in lines:`
			`item = item.split(",")`
			`data.append(item)`
			`data.pop(0) # This is for removing the labels from the list`
			`dataset = np.matrix(data)`
			`return dataset`

			`def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta, lambda_reg):`
			`"""Run steep gradient descent and updates the Feature vector accordingly`
			`:param data_x : contains the dataset`
			`:param data_y : contains the output associated with each data-entry`
			`:param len_data : length of the data`
			`:param alpha : Learning rate of the model`
			`:param theta : Feature vector (weights for our model)`
			`:param lambda_reg: Regularization parameter`
			`:return : Updated Features using`
			`curr_features - alpha_ * gradient(w.r.t. feature)`
			`"""`
			`n = len_data`

			`prod = np.dot(theta, data_x.transpose())`
			`prod -= data_y.transpose()`
			`sum_grad = np.dot(prod, data_x)`

			`# Add regularization to the gradient`
			`theta_regularized = np.copy(theta)`
			`theta_regularized[0, 0] = 0 # Don't regularize the bias term`
			`sum_grad += lambda_reg * theta_regularized # Add regularization to gradient`

			`theta = theta - (alpha / n) * sum_grad`
			`return theta`

			`def sum_of_square_error(data_x, data_y, len_data, theta, lambda_reg):`
			`"""Return sum of square error for error calculation`
			`:param data_x : contains our dataset`
			`:param data_y : contains the output (result vector)`
			`:param len_data : len of the dataset`
			`:param theta : contains the feature vector`
			`:param lambda_reg: Regularization parameter`
			`:return : sum of square error computed from given features`
			`"""`
			`prod = np.dot(theta, data_x.transpose())`
			`prod -= data_y.transpose()`
			`sum_elem = np.sum(np.square(prod))`

			`# Add regularization to the cost function`
			`regularization_term = lambda_reg * np.sum(np.square(theta[:, 1:])) # Don't regularize the bias term`
			`error = (sum_elem / (2 * len_data)) + (regularization_term / (2 * len_data))`
			`return error`

			`def run_ridge_regression(data_x, data_y, lambda_reg=1.0):`
			`"""Implement Ridge Regression over the dataset`
			`:param data_x : contains our dataset`
			`:param data_y : contains the output (result vector)`
			`:param lambda_reg: Regularization parameter`
			`:return : feature for line of best fit (Feature vector)`
			`"""`
			`iterations = 100000`
			`alpha = 0.0001550`

			`no_features = data_x.shape[1]`
			`len_data = data_x.shape[0]`

			`theta = np.zeros((1, no_features))`

			`for i in range(iterations):`
			`theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta, lambda_reg)`
			`error = sum_of_square_error(data_x, data_y, len_data, theta, lambda_reg)`
			`print(f"At Iteration {i + 1} - Error is {error:.5f}")`

			`return theta`

			`def mean_absolute_error(predicted_y, original_y):`
			`"""Return mean absolute error for error calculation`
			`:param predicted_y : contains the output of prediction (result vector)`
			`:param original_y : contains values of expected outcome`
			`:return : mean absolute error computed from given features`
			`"""`
			`total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))`
			`return total / len(original_y)`

			`def main():`
			`"""Driver function"""`
			`data = collect_dataset()`

			`len_data = data.shape[0]`
			`data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)`
			`data_y = data[:, -1].astype(float)`

			`lambda_reg = 1.0 # Set your desired regularization parameter`
			`theta = run_ridge_regression(data_x, data_y, lambda_reg)`

			`len_result = theta.shape[1]`
			`print("Resultant Feature vector : ")`
			`for i in range(len_result):`
			`print(f"{theta[0, i]:.5f}")`

			`if __name__ == "__main__":`
			`main()`