Python/other/scoring_algorithm.py

"""
developed by: markmelnic
original repo: https://github.com/markmelnic/Scoring-Algorithm

Analyse data using a range based percentual proximity algorithm
and calculate the linear maximum likelihood estimation.
The basic principle is that all values supplied will be broken
down to a range from 0 to 1 and each column's score will be added
up to get the total score.

==========
Example for data of vehicles
price|mileage|registration_year
20k  |60k    |2012
22k  |50k    |2011
23k  |90k    |2015
16k  |210k   |2010

We want the vehicle with the lowest price,
lowest mileage but newest registration year.
Thus the weights for each column are as follows:
[0, 0, 1]
"""


def get_data(source_data: list[list[float]]) -> list[list[float]]:
    """
    >>> get_data([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]])
    [[20.0, 23.0, 22.0], [60.0, 90.0, 50.0], [2012.0, 2015.0, 2011.0]]
    """
    data_lists: list[list[float]] = []
    for data in source_data:
        for i, el in enumerate(data):
            if len(data_lists) < i + 1:
                data_lists.append([])
            data_lists[i].append(float(el))
    return data_lists


def calculate_each_score(
    data_lists: list[list[float]], weights: list[int]
) -> list[list[float]]:
    """
    >>> calculate_each_score([[20, 23, 22], [60, 90, 50], [2012, 2015, 2011]],
    ...                      [0, 0, 1])
    [[1.0, 0.0, 0.33333333333333337], [0.75, 0.0, 1.0], [0.25, 1.0, 0.0]]
    """
    score_lists: list[list[float]] = []
    for dlist, weight in zip(data_lists, weights):
        mind = min(dlist)
        maxd = max(dlist)

        score: list[float] = []
        # for weight 0 score is 1 - actual score
        if weight == 0:
            for item in dlist:
                try:
                    score.append(1 - ((item - mind) / (maxd - mind)))
                except ZeroDivisionError:
                    score.append(1)

        elif weight == 1:
            for item in dlist:
                try:
                    score.append((item - mind) / (maxd - mind))
                except ZeroDivisionError:
                    score.append(0)

        # weight not 0 or 1
        else:
            msg = f"Invalid weight of {weight:f} provided"
            raise ValueError(msg)

        score_lists.append(score)

    return score_lists


def generate_final_scores(score_lists: list[list[float]]) -> list[float]:
    """
    >>> generate_final_scores([[1.0, 0.0, 0.33333333333333337],
    ...                        [0.75, 0.0, 1.0],
    ...                        [0.25, 1.0, 0.0]])
    [2.0, 1.0, 1.3333333333333335]
    """
    # initialize final scores
    final_scores: list[float] = [0 for i in range(len(score_lists[0]))]

    for slist in score_lists:
        for j, ele in enumerate(slist):
            final_scores[j] = final_scores[j] + ele

    return final_scores


def procentual_proximity(
    source_data: list[list[float]], weights: list[int]
) -> list[list[float]]:
    """
    weights - int list
    possible values - 0 / 1
    0 if lower values have higher weight in the data set
    1 if higher values have higher weight in the data set

    >>> procentual_proximity([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 0, 1])
    [[20, 60, 2012, 2.0], [23, 90, 2015, 1.0], [22, 50, 2011, 1.3333333333333335]]
    """

    data_lists = get_data(source_data)
    score_lists = calculate_each_score(data_lists, weights)
    final_scores = generate_final_scores(score_lists)

    # append scores to source data
    for i, ele in enumerate(final_scores):
        source_data[i].append(ele)

    return source_data
requirements.txt: Unpin numpy (#2287) * requirements.txt: Unpin numpy * fixup! Format Python code with psf/black push * Less clutter * fixup! Format Python code with psf/black push Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> 2020-08-06 15:50:23 +00:00			`"""`
Procentual proximity scoring algorithm implemented (#2280) * Procentual proximity scoring algorithm implemented - added requested changes - passed doctest - passed flake8 test * Apply suggestions from code review Co-authored-by: Christian Clauss <cclauss@me.com> * Function rename Co-authored-by: Christian Clauss <cclauss@me.com> 2020-08-04 20:11:07 +00:00			`developed by: markmelnic`
			`original repo: https://github.com/markmelnic/Scoring-Algorithm`

			`Analyse data using a range based percentual proximity algorithm`
			`and calculate the linear maximum likelihood estimation.`
			`The basic principle is that all values supplied will be broken`
			`down to a range from 0 to 1 and each column's score will be added`
			`up to get the total score.`

			`==========`
			`Example for data of vehicles`
			`price\|mileage\|registration_year`
			`20k \|60k \|2012`
			`22k \|50k \|2011`
			`23k \|90k \|2015`
			`16k \|210k \|2010`

			`We want the vehicle with the lowest price,`
			`lowest mileage but newest registration year.`
			`Thus the weights for each column are as follows:`
			`[0, 0, 1]`
requirements.txt: Unpin numpy (#2287) * requirements.txt: Unpin numpy * fixup! Format Python code with psf/black push * Less clutter * fixup! Format Python code with psf/black push Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> 2020-08-06 15:50:23 +00:00			`"""`
Procentual proximity scoring algorithm implemented (#2280) * Procentual proximity scoring algorithm implemented - added requested changes - passed doctest - passed flake8 test * Apply suggestions from code review Co-authored-by: Christian Clauss <cclauss@me.com> * Function rename Co-authored-by: Christian Clauss <cclauss@me.com> 2020-08-04 20:11:07 +00:00

Reduce the complexity of other/scoring_algorithm.py (#8045) * Increase the --max-complexity threshold in the file .flake8 2023-03-02 04:57:07 +00:00			`def get_data(source_data: list[list[float]]) -> list[list[float]]:`
requirements.txt: Unpin numpy (#2287) * requirements.txt: Unpin numpy * fixup! Format Python code with psf/black push * Less clutter * fixup! Format Python code with psf/black push Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> 2020-08-06 15:50:23 +00:00			`"""`
Reduce the complexity of other/scoring_algorithm.py (#8045) * Increase the --max-complexity threshold in the file .flake8 2023-03-02 04:57:07 +00:00			`>>> get_data([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]])`
			`[[20.0, 23.0, 22.0], [60.0, 90.0, 50.0], [2012.0, 2015.0, 2011.0]]`
requirements.txt: Unpin numpy (#2287) * requirements.txt: Unpin numpy * fixup! Format Python code with psf/black push * Less clutter * fixup! Format Python code with psf/black push Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> 2020-08-06 15:50:23 +00:00			`"""`
[mypy] Annotates other/scoring_algorithm (#5621) * scoring_algorithm: Moves doctest into function docstring so it will be run * [mypy] annotates other/scoring_algorithm * [mypy] renames temp var to unique value to work around mypy issue in other/scoring_algorithm reusing loop variables with the same name and different types gives this very confusing mypy error response. pyright correctly infers the types without issue. ``` scoring_algorithm.py:58: error: Incompatible types in assignment (expression has type "float", variable has type "List[float]") scoring_algorithm.py:60: error: Unsupported operand types for - ("List[float]" and "float") scoring_algorithm.py:65: error: Incompatible types in assignment (expression has type "float", variable has type "List[float]") scoring_algorithm.py:67: error: Unsupported operand types for - ("List[float]" and "float") Found 4 errors in 1 file (checked 1 source file) ``` * scoring_algorithm: uses enumeration instead of manual indexing on loop var * scoring_algorithm: sometimes we look before we leap. * clean-up: runs `black` to fix formatting 2021-10-29 05:21:16 +00:00			`data_lists: list[list[float]] = []`
			`for data in source_data:`
			`for i, el in enumerate(data):`
			`if len(data_lists) < i + 1:`
Procentual proximity scoring algorithm implemented (#2280) * Procentual proximity scoring algorithm implemented - added requested changes - passed doctest - passed flake8 test * Apply suggestions from code review Co-authored-by: Christian Clauss <cclauss@me.com> * Function rename Co-authored-by: Christian Clauss <cclauss@me.com> 2020-08-04 20:11:07 +00:00			`data_lists.append([])`
[mypy] Annotates other/scoring_algorithm (#5621) * scoring_algorithm: Moves doctest into function docstring so it will be run * [mypy] annotates other/scoring_algorithm * [mypy] renames temp var to unique value to work around mypy issue in other/scoring_algorithm reusing loop variables with the same name and different types gives this very confusing mypy error response. pyright correctly infers the types without issue. ``` scoring_algorithm.py:58: error: Incompatible types in assignment (expression has type "float", variable has type "List[float]") scoring_algorithm.py:60: error: Unsupported operand types for - ("List[float]" and "float") scoring_algorithm.py:65: error: Incompatible types in assignment (expression has type "float", variable has type "List[float]") scoring_algorithm.py:67: error: Unsupported operand types for - ("List[float]" and "float") Found 4 errors in 1 file (checked 1 source file) ``` * scoring_algorithm: uses enumeration instead of manual indexing on loop var * scoring_algorithm: sometimes we look before we leap. * clean-up: runs `black` to fix formatting 2021-10-29 05:21:16 +00:00			`data_lists[i].append(float(el))`
Reduce the complexity of other/scoring_algorithm.py (#8045) * Increase the --max-complexity threshold in the file .flake8 2023-03-02 04:57:07 +00:00			`return data_lists`

Procentual proximity scoring algorithm implemented (#2280) * Procentual proximity scoring algorithm implemented - added requested changes - passed doctest - passed flake8 test * Apply suggestions from code review Co-authored-by: Christian Clauss <cclauss@me.com> * Function rename Co-authored-by: Christian Clauss <cclauss@me.com> 2020-08-04 20:11:07 +00:00
Reduce the complexity of other/scoring_algorithm.py (#8045) * Increase the --max-complexity threshold in the file .flake8 2023-03-02 04:57:07 +00:00			`def calculate_each_score(`
			`data_lists: list[list[float]], weights: list[int]`
			`) -> list[list[float]]:`
			`"""`
			`>>> calculate_each_score([[20, 23, 22], [60, 90, 50], [2012, 2015, 2011]],`
			`... [0, 0, 1])`
			`[[1.0, 0.0, 0.33333333333333337], [0.75, 0.0, 1.0], [0.25, 1.0, 0.0]]`
			`"""`
[mypy] Annotates other/scoring_algorithm (#5621) * scoring_algorithm: Moves doctest into function docstring so it will be run * [mypy] annotates other/scoring_algorithm * [mypy] renames temp var to unique value to work around mypy issue in other/scoring_algorithm reusing loop variables with the same name and different types gives this very confusing mypy error response. pyright correctly infers the types without issue. ``` scoring_algorithm.py:58: error: Incompatible types in assignment (expression has type "float", variable has type "List[float]") scoring_algorithm.py:60: error: Unsupported operand types for - ("List[float]" and "float") scoring_algorithm.py:65: error: Incompatible types in assignment (expression has type "float", variable has type "List[float]") scoring_algorithm.py:67: error: Unsupported operand types for - ("List[float]" and "float") Found 4 errors in 1 file (checked 1 source file) ``` * scoring_algorithm: uses enumeration instead of manual indexing on loop var * scoring_algorithm: sometimes we look before we leap. * clean-up: runs `black` to fix formatting 2021-10-29 05:21:16 +00:00			`score_lists: list[list[float]] = []`
Procentual proximity scoring algorithm implemented (#2280) * Procentual proximity scoring algorithm implemented - added requested changes - passed doctest - passed flake8 test * Apply suggestions from code review Co-authored-by: Christian Clauss <cclauss@me.com> * Function rename Co-authored-by: Christian Clauss <cclauss@me.com> 2020-08-04 20:11:07 +00:00			`for dlist, weight in zip(data_lists, weights):`
			`mind = min(dlist)`
			`maxd = max(dlist)`

[mypy] Annotates other/scoring_algorithm (#5621) * scoring_algorithm: Moves doctest into function docstring so it will be run * [mypy] annotates other/scoring_algorithm * [mypy] renames temp var to unique value to work around mypy issue in other/scoring_algorithm reusing loop variables with the same name and different types gives this very confusing mypy error response. pyright correctly infers the types without issue. ``` scoring_algorithm.py:58: error: Incompatible types in assignment (expression has type "float", variable has type "List[float]") scoring_algorithm.py:60: error: Unsupported operand types for - ("List[float]" and "float") scoring_algorithm.py:65: error: Incompatible types in assignment (expression has type "float", variable has type "List[float]") scoring_algorithm.py:67: error: Unsupported operand types for - ("List[float]" and "float") Found 4 errors in 1 file (checked 1 source file) ``` * scoring_algorithm: uses enumeration instead of manual indexing on loop var * scoring_algorithm: sometimes we look before we leap. * clean-up: runs `black` to fix formatting 2021-10-29 05:21:16 +00:00			`score: list[float] = []`
Procentual proximity scoring algorithm implemented (#2280) * Procentual proximity scoring algorithm implemented - added requested changes - passed doctest - passed flake8 test * Apply suggestions from code review Co-authored-by: Christian Clauss <cclauss@me.com> * Function rename Co-authored-by: Christian Clauss <cclauss@me.com> 2020-08-04 20:11:07 +00:00			`# for weight 0 score is 1 - actual score`
			`if weight == 0:`
			`for item in dlist:`
			`try:`
			`score.append(1 - ((item - mind) / (maxd - mind)))`
			`except ZeroDivisionError:`
			`score.append(1)`

			`elif weight == 1:`
			`for item in dlist:`
			`try:`
			`score.append((item - mind) / (maxd - mind))`
			`except ZeroDivisionError:`
			`score.append(0)`

			`# weight not 0 or 1`
			`else:`
Add more ruff rules (#8767) * Add more ruff rules * Add more ruff rules * pre-commit: Update ruff v0.0.269 -> v0.0.270 * Apply suggestions from code review * Fix doctest * Fix doctest (ignore whitespace) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2023-05-26 07:34:17 +00:00			`msg = f"Invalid weight of {weight:f} provided"`
			`raise ValueError(msg)`
Procentual proximity scoring algorithm implemented (#2280) * Procentual proximity scoring algorithm implemented - added requested changes - passed doctest - passed flake8 test * Apply suggestions from code review Co-authored-by: Christian Clauss <cclauss@me.com> * Function rename Co-authored-by: Christian Clauss <cclauss@me.com> 2020-08-04 20:11:07 +00:00
			`score_lists.append(score)`

Reduce the complexity of other/scoring_algorithm.py (#8045) * Increase the --max-complexity threshold in the file .flake8 2023-03-02 04:57:07 +00:00			`return score_lists`


			`def generate_final_scores(score_lists: list[list[float]]) -> list[float]:`
			`"""`
			`>>> generate_final_scores([[1.0, 0.0, 0.33333333333333337],`
			`... [0.75, 0.0, 1.0],`
			`... [0.25, 1.0, 0.0]])`
			`[2.0, 1.0, 1.3333333333333335]`
			`"""`
Procentual proximity scoring algorithm implemented (#2280) * Procentual proximity scoring algorithm implemented - added requested changes - passed doctest - passed flake8 test * Apply suggestions from code review Co-authored-by: Christian Clauss <cclauss@me.com> * Function rename Co-authored-by: Christian Clauss <cclauss@me.com> 2020-08-04 20:11:07 +00:00			`# initialize final scores`
[mypy] Annotates other/scoring_algorithm (#5621) * scoring_algorithm: Moves doctest into function docstring so it will be run * [mypy] annotates other/scoring_algorithm * [mypy] renames temp var to unique value to work around mypy issue in other/scoring_algorithm reusing loop variables with the same name and different types gives this very confusing mypy error response. pyright correctly infers the types without issue. ``` scoring_algorithm.py:58: error: Incompatible types in assignment (expression has type "float", variable has type "List[float]") scoring_algorithm.py:60: error: Unsupported operand types for - ("List[float]" and "float") scoring_algorithm.py:65: error: Incompatible types in assignment (expression has type "float", variable has type "List[float]") scoring_algorithm.py:67: error: Unsupported operand types for - ("List[float]" and "float") Found 4 errors in 1 file (checked 1 source file) ``` * scoring_algorithm: uses enumeration instead of manual indexing on loop var * scoring_algorithm: sometimes we look before we leap. * clean-up: runs `black` to fix formatting 2021-10-29 05:21:16 +00:00			`final_scores: list[float] = [0 for i in range(len(score_lists[0]))]`
Procentual proximity scoring algorithm implemented (#2280) * Procentual proximity scoring algorithm implemented - added requested changes - passed doctest - passed flake8 test * Apply suggestions from code review Co-authored-by: Christian Clauss <cclauss@me.com> * Function rename Co-authored-by: Christian Clauss <cclauss@me.com> 2020-08-04 20:11:07 +00:00
Add flake8 pluin flake8 bugbear to pre-commit (#7132) * ci(pre-commit): Add ``flake8-builtins`` additional dependency to ``pre-commit`` (#7104) * refactor: Fix ``flake8-builtins`` (#7104) * fix(lru_cache): Fix naming conventions in docstrings (#7104) * ci(pre-commit): Order additional dependencies alphabetically (#7104) * fix(lfu_cache): Correct function name in docstring (#7104) * Update strings/snake_case_to_camel_pascal_case.py Co-authored-by: Christian Clauss <cclauss@me.com> * Update data_structures/stacks/next_greater_element.py Co-authored-by: Christian Clauss <cclauss@me.com> * Update digital_image_processing/index_calculation.py Co-authored-by: Christian Clauss <cclauss@me.com> * Update graphs/prim.py Co-authored-by: Christian Clauss <cclauss@me.com> * Update hashes/djb2.py Co-authored-by: Christian Clauss <cclauss@me.com> * refactor: Rename `_builtin` to `builtin_` ( #7104) * fix: Rename all instances (#7104) * refactor: Update variable names (#7104) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * ci: Create ``tox.ini`` and ignore ``A003`` (#7123) * revert: Remove function name changes (#7104) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Rename tox.ini to .flake8 * Update data_structures/heap/heap.py Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com> * refactor: Rename `next_` to `next_item` (#7104) * ci(pre-commit): Add `flake8` plugin `flake8-bugbear` (#7127) * refactor: Follow `flake8-bugbear` plugin (#7127) * fix: Correct `knapsack` code (#7127) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: Christian Clauss <cclauss@me.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Dhruv Manilawala <dhruvmanila@gmail.com> 2022-10-13 16:03:06 +00:00			`for slist in score_lists:`
Procentual proximity scoring algorithm implemented (#2280) * Procentual proximity scoring algorithm implemented - added requested changes - passed doctest - passed flake8 test * Apply suggestions from code review Co-authored-by: Christian Clauss <cclauss@me.com> * Function rename Co-authored-by: Christian Clauss <cclauss@me.com> 2020-08-04 20:11:07 +00:00			`for j, ele in enumerate(slist):`
			`final_scores[j] = final_scores[j] + ele`

Reduce the complexity of other/scoring_algorithm.py (#8045) * Increase the --max-complexity threshold in the file .flake8 2023-03-02 04:57:07 +00:00			`return final_scores`


			`def procentual_proximity(`
			`source_data: list[list[float]], weights: list[int]`
			`) -> list[list[float]]:`
			`"""`
			`weights - int list`
			`possible values - 0 / 1`
			`0 if lower values have higher weight in the data set`
			`1 if higher values have higher weight in the data set`

			`>>> procentual_proximity([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 0, 1])`
			`[[20, 60, 2012, 2.0], [23, 90, 2015, 1.0], [22, 50, 2011, 1.3333333333333335]]`
			`"""`

			`data_lists = get_data(source_data)`
			`score_lists = calculate_each_score(data_lists, weights)`
			`final_scores = generate_final_scores(score_lists)`

Procentual proximity scoring algorithm implemented (#2280) * Procentual proximity scoring algorithm implemented - added requested changes - passed doctest - passed flake8 test * Apply suggestions from code review Co-authored-by: Christian Clauss <cclauss@me.com> * Function rename Co-authored-by: Christian Clauss <cclauss@me.com> 2020-08-04 20:11:07 +00:00			`# append scores to source data`
			`for i, ele in enumerate(final_scores):`
			`source_data[i].append(ele)`

			`return source_data`