2017-11-18 03:34:07 +00:00
|
|
|
import numpy as np
|
2017-08-19 05:23:00 +00:00
|
|
|
|
|
|
|
""" Here I implemented the scoring functions.
|
|
|
|
MAE, MSE, RMSE, RMSLE are included.
|
|
|
|
|
|
|
|
Those are used for calculating differences between
|
|
|
|
predicted values and actual values.
|
|
|
|
|
|
|
|
Metrics are slightly differentiated. Sometimes squared, rooted,
|
|
|
|
even log is used.
|
|
|
|
|
|
|
|
Using log and roots can be perceived as tools for penalizing big
|
2020-01-18 12:24:33 +00:00
|
|
|
errors. However, using appropriate metrics depends on the situations,
|
2017-08-19 05:23:00 +00:00
|
|
|
and types of data
|
|
|
|
"""
|
|
|
|
|
2020-05-22 06:10:11 +00:00
|
|
|
|
2019-10-05 05:14:13 +00:00
|
|
|
# Mean Absolute Error
|
2017-08-19 05:23:00 +00:00
|
|
|
def mae(predict, actual):
|
2019-10-08 08:12:27 +00:00
|
|
|
"""
|
|
|
|
Examples(rounded for precision):
|
|
|
|
>>> actual = [1,2,3];predict = [1,4,3]
|
|
|
|
>>> np.around(mae(predict,actual),decimals = 2)
|
|
|
|
0.67
|
|
|
|
|
|
|
|
>>> actual = [1,1,1];predict = [1,1,1]
|
|
|
|
>>> mae(predict,actual)
|
|
|
|
0.0
|
|
|
|
"""
|
2017-08-19 05:23:00 +00:00
|
|
|
predict = np.array(predict)
|
|
|
|
actual = np.array(actual)
|
|
|
|
|
|
|
|
difference = abs(predict - actual)
|
|
|
|
score = difference.mean()
|
|
|
|
|
|
|
|
return score
|
|
|
|
|
2019-10-05 05:14:13 +00:00
|
|
|
|
|
|
|
# Mean Squared Error
|
2017-08-19 05:23:00 +00:00
|
|
|
def mse(predict, actual):
|
2019-10-08 08:12:27 +00:00
|
|
|
"""
|
|
|
|
Examples(rounded for precision):
|
|
|
|
>>> actual = [1,2,3];predict = [1,4,3]
|
|
|
|
>>> np.around(mse(predict,actual),decimals = 2)
|
|
|
|
1.33
|
|
|
|
|
|
|
|
>>> actual = [1,1,1];predict = [1,1,1]
|
|
|
|
>>> mse(predict,actual)
|
|
|
|
0.0
|
|
|
|
"""
|
2017-08-19 05:23:00 +00:00
|
|
|
predict = np.array(predict)
|
|
|
|
actual = np.array(actual)
|
|
|
|
|
|
|
|
difference = predict - actual
|
|
|
|
square_diff = np.square(difference)
|
|
|
|
|
|
|
|
score = square_diff.mean()
|
|
|
|
return score
|
|
|
|
|
2019-10-05 05:14:13 +00:00
|
|
|
|
|
|
|
# Root Mean Squared Error
|
2017-08-19 05:23:00 +00:00
|
|
|
def rmse(predict, actual):
|
2019-10-08 08:12:27 +00:00
|
|
|
"""
|
|
|
|
Examples(rounded for precision):
|
|
|
|
>>> actual = [1,2,3];predict = [1,4,3]
|
|
|
|
>>> np.around(rmse(predict,actual),decimals = 2)
|
|
|
|
1.15
|
|
|
|
|
|
|
|
>>> actual = [1,1,1];predict = [1,1,1]
|
|
|
|
>>> rmse(predict,actual)
|
|
|
|
0.0
|
|
|
|
"""
|
2017-08-19 05:23:00 +00:00
|
|
|
predict = np.array(predict)
|
|
|
|
actual = np.array(actual)
|
|
|
|
|
|
|
|
difference = predict - actual
|
2017-11-18 03:34:07 +00:00
|
|
|
square_diff = np.square(difference)
|
2017-08-19 05:23:00 +00:00
|
|
|
mean_square_diff = square_diff.mean()
|
|
|
|
score = np.sqrt(mean_square_diff)
|
|
|
|
return score
|
|
|
|
|
2019-10-05 05:14:13 +00:00
|
|
|
|
|
|
|
# Root Mean Square Logarithmic Error
|
2017-08-19 05:23:00 +00:00
|
|
|
def rmsle(predict, actual):
|
2019-10-08 08:12:27 +00:00
|
|
|
"""
|
|
|
|
Examples(rounded for precision):
|
|
|
|
>>> actual = [10,10,30];predict = [10,2,30]
|
|
|
|
>>> np.around(rmsle(predict,actual),decimals = 2)
|
|
|
|
0.75
|
|
|
|
|
|
|
|
>>> actual = [1,1,1];predict = [1,1,1]
|
|
|
|
>>> rmsle(predict,actual)
|
|
|
|
0.0
|
|
|
|
"""
|
2017-08-19 05:23:00 +00:00
|
|
|
predict = np.array(predict)
|
|
|
|
actual = np.array(actual)
|
|
|
|
|
2019-10-05 05:14:13 +00:00
|
|
|
log_predict = np.log(predict + 1)
|
|
|
|
log_actual = np.log(actual + 1)
|
2017-08-19 05:23:00 +00:00
|
|
|
|
|
|
|
difference = log_predict - log_actual
|
|
|
|
square_diff = np.square(difference)
|
|
|
|
mean_square_diff = square_diff.mean()
|
|
|
|
|
|
|
|
score = np.sqrt(mean_square_diff)
|
|
|
|
|
|
|
|
return score
|
2017-11-18 03:34:07 +00:00
|
|
|
|
2019-10-05 05:14:13 +00:00
|
|
|
|
|
|
|
# Mean Bias Deviation
|
2017-11-18 03:34:07 +00:00
|
|
|
def mbd(predict, actual):
|
2019-10-08 08:12:27 +00:00
|
|
|
"""
|
|
|
|
This value is Negative, if the model underpredicts,
|
|
|
|
positive, if it overpredicts.
|
|
|
|
|
|
|
|
Example(rounded for precision):
|
|
|
|
|
|
|
|
Here the model overpredicts
|
|
|
|
>>> actual = [1,2,3];predict = [2,3,4]
|
|
|
|
>>> np.around(mbd(predict,actual),decimals = 2)
|
|
|
|
50.0
|
|
|
|
|
|
|
|
Here the model underpredicts
|
|
|
|
>>> actual = [1,2,3];predict = [0,1,1]
|
|
|
|
>>> np.around(mbd(predict,actual),decimals = 2)
|
|
|
|
-66.67
|
|
|
|
"""
|
2017-11-18 03:34:07 +00:00
|
|
|
predict = np.array(predict)
|
|
|
|
actual = np.array(actual)
|
|
|
|
|
|
|
|
difference = predict - actual
|
2019-10-05 05:14:13 +00:00
|
|
|
numerator = np.sum(difference) / len(predict)
|
|
|
|
denumerator = np.sum(actual) / len(predict)
|
2019-10-08 08:12:27 +00:00
|
|
|
# print(numerator, denumerator)
|
2017-11-18 03:34:07 +00:00
|
|
|
score = float(numerator) / denumerator * 100
|
|
|
|
|
2018-01-21 07:25:19 +00:00
|
|
|
return score
|