mirror of
https://github.com/TheAlgorithms/Python.git
synced 2024-11-27 23:11:09 +00:00
4b79d771cd
* Add more ruff rules * Add more ruff rules * pre-commit: Update ruff v0.0.269 -> v0.0.270 * Apply suggestions from code review * Fix doctest * Fix doctest (ignore whitespace) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
118 lines
3.5 KiB
Python
118 lines
3.5 KiB
Python
"""
|
|
developed by: markmelnic
|
|
original repo: https://github.com/markmelnic/Scoring-Algorithm
|
|
|
|
Analyse data using a range based percentual proximity algorithm
|
|
and calculate the linear maximum likelihood estimation.
|
|
The basic principle is that all values supplied will be broken
|
|
down to a range from 0 to 1 and each column's score will be added
|
|
up to get the total score.
|
|
|
|
==========
|
|
Example for data of vehicles
|
|
price|mileage|registration_year
|
|
20k |60k |2012
|
|
22k |50k |2011
|
|
23k |90k |2015
|
|
16k |210k |2010
|
|
|
|
We want the vehicle with the lowest price,
|
|
lowest mileage but newest registration year.
|
|
Thus the weights for each column are as follows:
|
|
[0, 0, 1]
|
|
"""
|
|
|
|
|
|
def get_data(source_data: list[list[float]]) -> list[list[float]]:
|
|
"""
|
|
>>> get_data([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]])
|
|
[[20.0, 23.0, 22.0], [60.0, 90.0, 50.0], [2012.0, 2015.0, 2011.0]]
|
|
"""
|
|
data_lists: list[list[float]] = []
|
|
for data in source_data:
|
|
for i, el in enumerate(data):
|
|
if len(data_lists) < i + 1:
|
|
data_lists.append([])
|
|
data_lists[i].append(float(el))
|
|
return data_lists
|
|
|
|
|
|
def calculate_each_score(
|
|
data_lists: list[list[float]], weights: list[int]
|
|
) -> list[list[float]]:
|
|
"""
|
|
>>> calculate_each_score([[20, 23, 22], [60, 90, 50], [2012, 2015, 2011]],
|
|
... [0, 0, 1])
|
|
[[1.0, 0.0, 0.33333333333333337], [0.75, 0.0, 1.0], [0.25, 1.0, 0.0]]
|
|
"""
|
|
score_lists: list[list[float]] = []
|
|
for dlist, weight in zip(data_lists, weights):
|
|
mind = min(dlist)
|
|
maxd = max(dlist)
|
|
|
|
score: list[float] = []
|
|
# for weight 0 score is 1 - actual score
|
|
if weight == 0:
|
|
for item in dlist:
|
|
try:
|
|
score.append(1 - ((item - mind) / (maxd - mind)))
|
|
except ZeroDivisionError:
|
|
score.append(1)
|
|
|
|
elif weight == 1:
|
|
for item in dlist:
|
|
try:
|
|
score.append((item - mind) / (maxd - mind))
|
|
except ZeroDivisionError:
|
|
score.append(0)
|
|
|
|
# weight not 0 or 1
|
|
else:
|
|
msg = f"Invalid weight of {weight:f} provided"
|
|
raise ValueError(msg)
|
|
|
|
score_lists.append(score)
|
|
|
|
return score_lists
|
|
|
|
|
|
def generate_final_scores(score_lists: list[list[float]]) -> list[float]:
|
|
"""
|
|
>>> generate_final_scores([[1.0, 0.0, 0.33333333333333337],
|
|
... [0.75, 0.0, 1.0],
|
|
... [0.25, 1.0, 0.0]])
|
|
[2.0, 1.0, 1.3333333333333335]
|
|
"""
|
|
# initialize final scores
|
|
final_scores: list[float] = [0 for i in range(len(score_lists[0]))]
|
|
|
|
for slist in score_lists:
|
|
for j, ele in enumerate(slist):
|
|
final_scores[j] = final_scores[j] + ele
|
|
|
|
return final_scores
|
|
|
|
|
|
def procentual_proximity(
|
|
source_data: list[list[float]], weights: list[int]
|
|
) -> list[list[float]]:
|
|
"""
|
|
weights - int list
|
|
possible values - 0 / 1
|
|
0 if lower values have higher weight in the data set
|
|
1 if higher values have higher weight in the data set
|
|
|
|
>>> procentual_proximity([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 0, 1])
|
|
[[20, 60, 2012, 2.0], [23, 90, 2015, 1.0], [22, 50, 2011, 1.3333333333333335]]
|
|
"""
|
|
|
|
data_lists = get_data(source_data)
|
|
score_lists = calculate_each_score(data_lists, weights)
|
|
final_scores = generate_final_scores(score_lists)
|
|
|
|
# append scores to source data
|
|
for i, ele in enumerate(final_scores):
|
|
source_data[i].append(ele)
|
|
|
|
return source_data
|