2017-06-22 20:39:42 +00:00
|
|
|
"""
|
2020-06-16 08:09:19 +00:00
|
|
|
Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis
|
|
|
|
function.
|
2017-06-22 20:39:42 +00:00
|
|
|
"""
|
2024-03-13 06:52:41 +00:00
|
|
|
|
2024-03-28 18:03:23 +00:00
|
|
|
import numpy as np
|
2017-06-22 20:39:42 +00:00
|
|
|
|
|
|
|
# List of input, output pairs
|
2019-10-05 05:14:13 +00:00
|
|
|
train_data = (
|
|
|
|
((5, 2, 3), 15),
|
|
|
|
((6, 5, 9), 25),
|
|
|
|
((11, 12, 13), 41),
|
|
|
|
((1, 1, 1), 8),
|
|
|
|
((11, 12, 13), 41),
|
|
|
|
)
|
2017-06-22 20:39:42 +00:00
|
|
|
test_data = (((515, 22, 13), 555), ((61, 35, 49), 150))
|
|
|
|
parameter_vector = [2, 4, 1, 5]
|
|
|
|
m = len(train_data)
|
|
|
|
LEARNING_RATE = 0.009
|
|
|
|
|
|
|
|
|
2019-10-05 05:14:13 +00:00
|
|
|
def _error(example_no, data_set="train"):
|
2017-06-22 20:39:42 +00:00
|
|
|
"""
|
|
|
|
:param data_set: train data or test data
|
|
|
|
:param example_no: example number whose error has to be checked
|
|
|
|
:return: error in example pointed by example number.
|
|
|
|
"""
|
2019-10-05 05:14:13 +00:00
|
|
|
return calculate_hypothesis_value(example_no, data_set) - output(
|
|
|
|
example_no, data_set
|
|
|
|
)
|
2017-06-22 20:39:42 +00:00
|
|
|
|
|
|
|
|
|
|
|
def _hypothesis_value(data_input_tuple):
|
|
|
|
"""
|
|
|
|
Calculates hypothesis function value for a given input
|
|
|
|
:param data_input_tuple: Input tuple of a particular example
|
|
|
|
:return: Value of hypothesis function at that point.
|
|
|
|
Note that there is an 'biased input' whose value is fixed as 1.
|
|
|
|
It is not explicitly mentioned in input data.. But, ML hypothesis functions use it.
|
|
|
|
So, we have to take care of it separately. Line 36 takes care of it.
|
|
|
|
"""
|
|
|
|
hyp_val = 0
|
|
|
|
for i in range(len(parameter_vector) - 1):
|
2019-10-05 05:14:13 +00:00
|
|
|
hyp_val += data_input_tuple[i] * parameter_vector[i + 1]
|
2017-06-22 20:39:42 +00:00
|
|
|
hyp_val += parameter_vector[0]
|
|
|
|
return hyp_val
|
|
|
|
|
|
|
|
|
|
|
|
def output(example_no, data_set):
|
|
|
|
"""
|
|
|
|
:param data_set: test data or train data
|
|
|
|
:param example_no: example whose output is to be fetched
|
|
|
|
:return: output for that example
|
|
|
|
"""
|
2019-10-05 05:14:13 +00:00
|
|
|
if data_set == "train":
|
2017-06-22 20:39:42 +00:00
|
|
|
return train_data[example_no][1]
|
2019-10-05 05:14:13 +00:00
|
|
|
elif data_set == "test":
|
2017-06-22 20:39:42 +00:00
|
|
|
return test_data[example_no][1]
|
2023-03-01 16:23:33 +00:00
|
|
|
return None
|
2017-06-22 20:39:42 +00:00
|
|
|
|
|
|
|
|
|
|
|
def calculate_hypothesis_value(example_no, data_set):
|
|
|
|
"""
|
|
|
|
Calculates hypothesis value for a given example
|
|
|
|
:param data_set: test data or train_data
|
|
|
|
:param example_no: example whose hypothesis value is to be calculated
|
|
|
|
:return: hypothesis value for that example
|
|
|
|
"""
|
|
|
|
if data_set == "train":
|
|
|
|
return _hypothesis_value(train_data[example_no][0])
|
|
|
|
elif data_set == "test":
|
|
|
|
return _hypothesis_value(test_data[example_no][0])
|
2023-03-01 16:23:33 +00:00
|
|
|
return None
|
2017-06-22 20:39:42 +00:00
|
|
|
|
|
|
|
|
|
|
|
def summation_of_cost_derivative(index, end=m):
|
|
|
|
"""
|
|
|
|
Calculates the sum of cost function derivative
|
|
|
|
:param index: index wrt derivative is being calculated
|
|
|
|
:param end: value where summation ends, default is m, number of examples
|
|
|
|
:return: Returns the summation of cost derivative
|
2020-06-16 08:09:19 +00:00
|
|
|
Note: If index is -1, this means we are calculating summation wrt to biased
|
|
|
|
parameter.
|
2017-06-22 20:39:42 +00:00
|
|
|
"""
|
|
|
|
summation_value = 0
|
|
|
|
for i in range(end):
|
|
|
|
if index == -1:
|
|
|
|
summation_value += _error(i)
|
|
|
|
else:
|
2019-10-05 05:14:13 +00:00
|
|
|
summation_value += _error(i) * train_data[i][0][index]
|
2017-06-22 20:39:42 +00:00
|
|
|
return summation_value
|
|
|
|
|
|
|
|
|
|
|
|
def get_cost_derivative(index):
|
|
|
|
"""
|
|
|
|
:param index: index of the parameter vector wrt to derivative is to be calculated
|
|
|
|
:return: derivative wrt to that index
|
2020-06-16 08:09:19 +00:00
|
|
|
Note: If index is -1, this means we are calculating summation wrt to biased
|
|
|
|
parameter.
|
2017-06-22 20:39:42 +00:00
|
|
|
"""
|
2019-10-05 05:14:13 +00:00
|
|
|
cost_derivative_value = summation_of_cost_derivative(index, m) / m
|
2017-06-22 20:39:42 +00:00
|
|
|
return cost_derivative_value
|
|
|
|
|
|
|
|
|
|
|
|
def run_gradient_descent():
|
|
|
|
global parameter_vector
|
|
|
|
# Tune these values to set a tolerance value for predicted output
|
|
|
|
absolute_error_limit = 0.000002
|
|
|
|
relative_error_limit = 0
|
|
|
|
j = 0
|
|
|
|
while True:
|
|
|
|
j += 1
|
|
|
|
temp_parameter_vector = [0, 0, 0, 0]
|
2023-08-29 13:18:10 +00:00
|
|
|
for i in range(len(parameter_vector)):
|
2019-10-05 05:14:13 +00:00
|
|
|
cost_derivative = get_cost_derivative(i - 1)
|
|
|
|
temp_parameter_vector[i] = (
|
|
|
|
parameter_vector[i] - LEARNING_RATE * cost_derivative
|
|
|
|
)
|
2024-03-28 18:03:23 +00:00
|
|
|
if np.allclose(
|
2019-10-05 05:14:13 +00:00
|
|
|
parameter_vector,
|
|
|
|
temp_parameter_vector,
|
|
|
|
atol=absolute_error_limit,
|
|
|
|
rtol=relative_error_limit,
|
|
|
|
):
|
2017-06-22 20:39:42 +00:00
|
|
|
break
|
|
|
|
parameter_vector = temp_parameter_vector
|
2017-11-25 09:23:50 +00:00
|
|
|
print(("Number of iterations:", j))
|
2017-06-22 20:39:42 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_gradient_descent():
|
|
|
|
for i in range(len(test_data)):
|
2019-10-05 05:14:13 +00:00
|
|
|
print(("Actual output value:", output(i, "test")))
|
|
|
|
print(("Hypothesis output:", calculate_hypothesis_value(i, "test")))
|
2017-06-22 20:39:42 +00:00
|
|
|
|
|
|
|
|
2019-10-05 05:14:13 +00:00
|
|
|
if __name__ == "__main__":
|
2017-06-22 20:39:42 +00:00
|
|
|
run_gradient_descent()
|
|
|
|
print("\nTesting gradient descent for a linear hypothesis function.\n")
|
|
|
|
test_gradient_descent()
|