From 43905efe298172e9e9280661d80af8f7e2105517 Mon Sep 17 00:00:00 2001 From: ELNS <57490926+EverLookNeverSee@users.noreply.github.com> Date: Mon, 9 Dec 2019 01:45:17 +0330 Subject: [PATCH] Adding doctests into LDA algorithm (#1621) * Adding doctests into function * Adding doctests into function * Adding doctests into function * Adding doctests into function * Adding doctests into function * Adding doctests into function * Adding doctests into function * fixup! Format Python code with psf/black push * Update convex_hull.py * Update convex_hull.py --- divide_and_conquer/convex_hull.py | 57 +++++--------- .../linear_discriminant_analysis.py | 78 ++++++++++++++++++- 2 files changed, 96 insertions(+), 39 deletions(-) diff --git a/divide_and_conquer/convex_hull.py b/divide_and_conquer/convex_hull.py index f233e822c..76184524e 100644 --- a/divide_and_conquer/convex_hull.py +++ b/divide_and_conquer/convex_hull.py @@ -1,5 +1,3 @@ -from numbers import Number - """ The convex hull problem is problem of finding all the vertices of convex polygon, P of a set of points in a plane such that all the points are either on the vertices of P or @@ -40,22 +38,11 @@ class Point: >>> Point("pi", "e") Traceback (most recent call last): ... - ValueError: x and y must be both numeric types but got , instead + ValueError: could not convert string to float: 'pi' """ def __init__(self, x, y): - if not (isinstance(x, Number) and isinstance(y, Number)): - try: - x, y = float(x), float(y) - except ValueError as e: - e.args = ( - "x and y must be both numeric types " - f"but got {type(x)}, {type(y)} instead" - ) - raise - - self.x = x - self.y = y + self.x, self.y = float(x), float(y) def __eq__(self, other): return self.x == other.x and self.y == other.y @@ -112,13 +99,7 @@ def _construct_points(list_of_tuples): Examples ------- >>> _construct_points([[1, 1], [2, -1], [0.3, 4]]) - [(1, 1), (2, -1), (0.3, 4)] - >>> _construct_points(([1, 1], [2, -1], [0.3, 4])) - [(1, 1), (2, -1), (0.3, 4)] - >>> _construct_points([(1, 1), (2, -1), (0.3, 4)]) - [(1, 1), (2, -1), (0.3, 4)] - >>> _construct_points([[1, 1], (2, -1), [0.3, 4]]) - [(1, 1), (2, -1), (0.3, 4)] + [(1.0, 1.0), (2.0, -1.0), (0.3, 4.0)] >>> _construct_points([1, 2]) Ignoring deformed point 1. All points must have at least 2 coordinates. Ignoring deformed point 2. All points must have at least 2 coordinates. @@ -168,11 +149,11 @@ def _validate_input(points): Examples ------- >>> _validate_input([[1, 2]]) - [(1, 2)] + [(1.0, 2.0)] >>> _validate_input([(1, 2)]) - [(1, 2)] + [(1.0, 2.0)] >>> _validate_input([Point(2, 1), Point(-1, 2)]) - [(2, 1), (-1, 2)] + [(2.0, 1.0), (-1.0, 2.0)] >>> _validate_input([]) Traceback (most recent call last): ... @@ -200,9 +181,9 @@ def _validate_input(points): ) elif not hasattr(points, "__iter__"): raise ValueError( - "Expecting an iterable object " f"but got an non-iterable type {points}" + f"Expecting an iterable object but got an non-iterable type {points}" ) - except TypeError as e: + except TypeError: print("Expecting an iterable of type Point, list or tuple.") raise @@ -233,11 +214,11 @@ def _det(a, b, c): Examples ---------- >>> _det(Point(1, 1), Point(1, 2), Point(1, 5)) - 0 + 0.0 >>> _det(Point(0, 0), Point(10, 0), Point(0, 10)) - 100 + 100.0 >>> _det(Point(0, 0), Point(10, 0), Point(0, -10)) - -100 + -100.0 """ det = (a.x * b.y + b.x * c.y + c.x * a.y) - (a.y * b.x + b.y * c.x + c.y * a.x) @@ -271,13 +252,13 @@ def convex_hull_bf(points): Examples --------- >>> convex_hull_bf([[0, 0], [1, 0], [10, 1]]) - [(0, 0), (1, 0), (10, 1)] + [(0.0, 0.0), (1.0, 0.0), (10.0, 1.0)] >>> convex_hull_bf([[0, 0], [1, 0], [10, 0]]) - [(0, 0), (10, 0)] + [(0.0, 0.0), (10.0, 0.0)] >>> convex_hull_bf([[-1, 1],[-1, -1], [0, 0], [0.5, 0.5], [1, -1], [1, 1], [-0.75, 1]]) - [(-1, -1), (-1, 1), (1, -1), (1, 1)] + [(-1.0, -1.0), (-1.0, 1.0), (1.0, -1.0), (1.0, 1.0)] >>> convex_hull_bf([(0, 3), (2, 2), (1, 1), (2, 1), (3, 0), (0, 0), (3, 3), (2, -1), (2, -4), (1, -3)]) - [(0, 0), (0, 3), (1, -3), (2, -4), (3, 0), (3, 3)] + [(0.0, 0.0), (0.0, 3.0), (1.0, -3.0), (2.0, -4.0), (3.0, 0.0), (3.0, 3.0)] """ points = sorted(_validate_input(points)) @@ -336,13 +317,13 @@ def convex_hull_recursive(points): Examples --------- >>> convex_hull_recursive([[0, 0], [1, 0], [10, 1]]) - [(0, 0), (1, 0), (10, 1)] + [(0.0, 0.0), (1.0, 0.0), (10.0, 1.0)] >>> convex_hull_recursive([[0, 0], [1, 0], [10, 0]]) - [(0, 0), (10, 0)] + [(0.0, 0.0), (10.0, 0.0)] >>> convex_hull_recursive([[-1, 1],[-1, -1], [0, 0], [0.5, 0.5], [1, -1], [1, 1], [-0.75, 1]]) - [(-1, -1), (-1, 1), (1, -1), (1, 1)] + [(-1.0, -1.0), (-1.0, 1.0), (1.0, -1.0), (1.0, 1.0)] >>> convex_hull_recursive([(0, 3), (2, 2), (1, 1), (2, 1), (3, 0), (0, 0), (3, 3), (2, -1), (2, -4), (1, -3)]) - [(0, 0), (0, 3), (1, -3), (2, -4), (3, 0), (3, 3)] + [(0.0, 0.0), (0.0, 3.0), (1.0, -3.0), (2.0, -4.0), (3.0, 0.0), (3.0, 3.0)] """ points = sorted(_validate_input(points)) diff --git a/machine_learning/linear_discriminant_analysis.py b/machine_learning/linear_discriminant_analysis.py index cc2f1dac7..6998db1ce 100644 --- a/machine_learning/linear_discriminant_analysis.py +++ b/machine_learning/linear_discriminant_analysis.py @@ -45,6 +45,7 @@ from math import log from os import name, system from random import gauss +from random import seed # Make a training dataset drawn from a gaussian distribution @@ -56,7 +57,15 @@ def gaussian_distribution(mean: float, std_dev: float, instance_count: int) -> l :param instance_count: instance number of class :return: a list containing generated values based-on given mean, std_dev and instance_count + + >>> gaussian_distribution(5.0, 1.0, 20) # doctest: +NORMALIZE_WHITESPACE + [6.288184753155463, 6.4494456086997705, 5.066335808938262, 4.235456349028368, + 3.9078267848958586, 5.031334516831717, 3.977896829989127, 3.56317055489747, + 5.199311976483754, 5.133374604658605, 5.546468300338232, 4.086029056264687, + 5.005005283626573, 4.935258239627312, 3.494170998739258, 5.537997178661033, + 5.320711100998849, 7.3891120432406865, 5.202969177309964, 4.855297691835079] """ + seed(1) return [gauss(mean, std_dev) for _ in range(instance_count)] @@ -67,6 +76,14 @@ def y_generator(class_count: int, instance_count: list) -> list: :param class_count: Number of classes(data groupings) in dataset :param instance_count: number of instances in class :return: corresponding values for data groupings in dataset + + >>> y_generator(1, [10]) + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + >>> y_generator(2, [5, 10]) + [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + >>> y_generator(4, [10, 5, 15, 20]) # doctest: +NORMALIZE_WHITESPACE + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] """ return [k for k in range(class_count) for _ in range(instance_count[k])] @@ -79,6 +96,10 @@ def calculate_mean(instance_count: int, items: list) -> float: :param instance_count: Number of instances in class :param items: items that related to specific class(data grouping) :return: calculated actual mean of considered class + + >>> items = gaussian_distribution(5.0, 1.0, 20) + >>> calculate_mean(len(items), items) + 5.011267842911003 """ # the sum of all items divided by number of instances return sum(items) / instance_count @@ -91,6 +112,11 @@ def calculate_probabilities(instance_count: int, total_count: int) -> float: :param instance_count: number of instances in class :param total_count: the number of all instances :return: value of probability for considered class + + >>> calculate_probabilities(20, 60) + 0.3333333333333333 + >>> calculate_probabilities(30, 100) + 0.3 """ # number of instances in specific class divided by number of all instances return instance_count / total_count @@ -104,6 +130,12 @@ def calculate_variance(items: list, means: list, total_count: int) -> float: :param means: a list containing real mean values of each class :param total_count: the number of all instances :return: calculated variance for considered dataset + + >>> items = gaussian_distribution(5.0, 1.0, 20) + >>> means = [5.011267842911003] + >>> total_count = 20 + >>> calculate_variance([items], means, total_count) + 0.9618530973487491 """ squared_diff = [] # An empty list to store all squared differences # iterate over number of elements in items @@ -129,6 +161,36 @@ def predict_y_values( :param variance: calculated value of variance by calculate_variance function :param probabilities: a list containing all probabilities of classes :return: a list containing predicted Y values + + >>> x_items = [[6.288184753155463, 6.4494456086997705, 5.066335808938262, + ... 4.235456349028368, 3.9078267848958586, 5.031334516831717, + ... 3.977896829989127, 3.56317055489747, 5.199311976483754, + ... 5.133374604658605, 5.546468300338232, 4.086029056264687, + ... 5.005005283626573, 4.935258239627312, 3.494170998739258, + ... 5.537997178661033, 5.320711100998849, 7.3891120432406865, + ... 5.202969177309964, 4.855297691835079], [11.288184753155463, + ... 11.44944560869977, 10.066335808938263, 9.235456349028368, + ... 8.907826784895859, 10.031334516831716, 8.977896829989128, + ... 8.56317055489747, 10.199311976483754, 10.133374604658606, + ... 10.546468300338232, 9.086029056264687, 10.005005283626572, + ... 9.935258239627313, 8.494170998739259, 10.537997178661033, + ... 10.320711100998848, 12.389112043240686, 10.202969177309964, + ... 9.85529769183508], [16.288184753155463, 16.449445608699772, + ... 15.066335808938263, 14.235456349028368, 13.907826784895859, + ... 15.031334516831716, 13.977896829989128, 13.56317055489747, + ... 15.199311976483754, 15.133374604658606, 15.546468300338232, + ... 14.086029056264687, 15.005005283626572, 14.935258239627313, + ... 13.494170998739259, 15.537997178661033, 15.320711100998848, + ... 17.389112043240686, 15.202969177309964, 14.85529769183508]] + + >>> means = [5.011267842911003, 10.011267842911003, 15.011267842911002] + >>> variance = 0.9618530973487494 + >>> probabilities = [0.3333333333333333, 0.3333333333333333, 0.3333333333333333] + >>> predict_y_values(x_items, means, variance, probabilities) # doctest: +NORMALIZE_WHITESPACE + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2] + """ # An empty list to store generated discriminant values of all items in dataset for # each class @@ -148,7 +210,7 @@ def predict_y_values( ) # appending discriminant values of each item to 'results' list results.append(temp) - print("Generated Discriminants: \n", results) + return [l.index(max(l)) for l in results] @@ -161,6 +223,20 @@ def accuracy(actual_y: list, predicted_y: list) -> float: :param predicted_y: a list containing predicted Y values generated by 'predict_y_values' function :return: percentage of accuracy + + >>> actual_y = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + ... 1, 1 ,1 ,1 ,1 ,1 ,1] + >>> predicted_y = [0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, + ... 0, 0, 1, 1, 1, 0, 1, 1, 1] + >>> accuracy(actual_y, predicted_y) + 50.0 + + >>> actual_y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + ... 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2] + >>> predicted_y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + ... 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2] + >>> accuracy(actual_y, predicted_y) + 100.0 """ # iterate over one element of each list at a time (zip mode) # prediction is correct if actual Y value equals to predicted Y value