From e107d6d5d0d2ce7536f9e9c9e37c7da93e114b7c Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Tue, 1 Oct 2024 20:06:58 +0530 Subject: [PATCH 01/27] Implementation of DBSCAN from Scratch --- machine_learning/DBSCAN.py | 108 +++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 machine_learning/DBSCAN.py diff --git a/machine_learning/DBSCAN.py b/machine_learning/DBSCAN.py new file mode 100644 index 000000000..c746161e8 --- /dev/null +++ b/machine_learning/DBSCAN.py @@ -0,0 +1,108 @@ +import pandas as pd +import numpy as np +import math +import matplotlib.pyplot as plt + +class DBSCAN: + ''' + Author : Gowtham Kamalasekar + LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ + + DBSCAN Algorithm : + Density-Based Spatial Clustering Of Applications With Noise + Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN + + Attributes: + ----------- + minPts (int) : Minimum number of points needed to be within the radius to considered as core + radius (int) : The radius from a given core point where other core points can be considered as core + file (csv) : CSV file location. Should contain x and y coordinate value for each point. + + Example : + minPts = 4 + radius = 1.9 + file = 'data_dbscan.csv' + + File Structure of CSV Data: + --------------------------- + _____ + x | y + ----- + 3 | 7 + 4 | 6 + 5 | 5 + 6 | 4 + 7 | 3 + ----- + + Functions: + ---------- + __init__() : Constructor that sets minPts, radius and file + perform_dbscan() : Invoked by constructor and calculates the core and noise points and returns a dictionary. + print_dbscan() : Prints the core and noise points along with stating if the noise are border points or not. + plot_dbscan() : Plots the points to show the core and noise point. + + To create a object + ------------------ + import DBSCAN + obj = DBSCAN.DBSCAN(minPts, radius, file) + obj.print_dbscan() + obj.plot_dbscan() + ''' + + def __init__(self, minPts, radius, file): + self.minPts = minPts + self.radius = radius + self.file = file + self.dict1 = self.perform_dbscan() + + def perform_dbscan(self): + data = pd.read_csv(self.file) + + minPts = self.minPts + e = self.radius + + dict1 = {} + for i in range(len(data)): + for j in range(len(data)): + dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) + pow(data['y'][j] - data['y'][i],2)) + if dist < e: + if i+1 in dict1: + dict1[i+1].append(j+1) + else: + dict1[i+1] = [j+1,] + + return dict1 + + def print_dbscan(self): + for i in self.dict1: + print(i," ",self.dict1[i], end=' ---> ') + if len(self.dict1[i]) >= self.minPts: + print("Core") + else: + for j in self.dict1: + if i != j and len(self.dict1[j]) >= self.minPts and i in self.dict1[j]: + print("Noise ---> Border") + break + else: + print("Noise") + + def plot_dbscan(self): + data = pd.read_csv(self.file) + e = self.radius + for i in self.dict1: + if len(self.dict1[i]) >= self.minPts: + plt.scatter(data['x'][i-1], data['y'][i-1], color='red') + circle = plt.Circle((data['x'][i-1], data['y'][i-1]), e, color='blue', fill=False) + plt.gca().add_artist(circle) + plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') + else: + plt.scatter(data['x'][i-1], data['y'][i-1], color='green') + plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') + + plt.xlabel('X') + plt.ylabel('Y') + plt.title('DBSCAN Clustering') + + plt.legend(['Core','Noise']) + plt.show() From da81c073ebe149ea110111bb2dce3457574eb499 Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Tue, 1 Oct 2024 20:49:14 +0530 Subject: [PATCH 02/27] Update and rename DBSCAN.py to dbscan.py --- machine_learning/{DBSCAN.py => dbscan.py} | 92 +++++++++++++---------- 1 file changed, 52 insertions(+), 40 deletions(-) rename machine_learning/{DBSCAN.py => dbscan.py} (63%) diff --git a/machine_learning/DBSCAN.py b/machine_learning/dbscan.py similarity index 63% rename from machine_learning/DBSCAN.py rename to machine_learning/dbscan.py index c746161e8..3e044d8f6 100644 --- a/machine_learning/DBSCAN.py +++ b/machine_learning/dbscan.py @@ -1,40 +1,13 @@ import pandas as pd -import numpy as np import math import matplotlib.pyplot as plt - -class DBSCAN: +from typing import Dict, List +class dbscan: ''' - Author : Gowtham Kamalasekar - LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ - DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN - Attributes: - ----------- - minPts (int) : Minimum number of points needed to be within the radius to considered as core - radius (int) : The radius from a given core point where other core points can be considered as core - file (csv) : CSV file location. Should contain x and y coordinate value for each point. - - Example : - minPts = 4 - radius = 1.9 - file = 'data_dbscan.csv' - - File Structure of CSV Data: - --------------------------- - _____ - x | y - ----- - 3 | 7 - 4 | 6 - 5 | 5 - 6 | 4 - 7 | 3 - ----- - Functions: ---------- __init__() : Constructor that sets minPts, radius and file @@ -44,19 +17,52 @@ class DBSCAN: To create a object ------------------ - import DBSCAN - obj = DBSCAN.DBSCAN(minPts, radius, file) + import dbscan + obj = dbscan.dbscan(minPts, radius, file) obj.print_dbscan() obj.plot_dbscan() ''' - - def __init__(self, minPts, radius, file): + def __init__(self, minPts : int, radius : int, file : str) -> None: + ''' + Constructor + + Attributes: + ----------- + minPts (int) : Minimum number of points needed to be within the radius to considered as core + radius (int) : The radius from a given core point where other core points can be considered as core + file (csv) : CSV file location. Should contain x and y coordinate value for each point. + + Example : + minPts = 4 + radius = 1.9 + file = 'data_dbscan.csv' + + File Structure of CSV Data: + --------------------------- + _____ + x | y + ----- + 3 | 7 + 4 | 6 + 5 | 5 + 6 | 4 + 7 | 3 + ----- + ''' self.minPts = minPts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> Dict[int, List[int]]: + ''' + Parameters: + ----------- + None - def perform_dbscan(self): + Return: + -------- + Dictionary with points and the list of points that lie in its radius + ''' data = pd.read_csv(self.file) minPts = self.minPts @@ -73,8 +79,12 @@ class DBSCAN: dict1[i+1] = [j+1,] return dict1 - - def print_dbscan(self): + def print_dbscan(self) -> None: + ''' + Outputs: + -------- + Prints each point and if it is a core or a noise (w/ border) + ''' for i in self.dict1: print(i," ",self.dict1[i], end=' ---> ') if len(self.dict1[i]) >= self.minPts: @@ -86,8 +96,12 @@ class DBSCAN: break else: print("Noise") - - def plot_dbscan(self): + def plot_dbscan(self) -> None: + ''' + Output: + ------- + A matplotlib plot that show points as core and noise along with the circle that lie within it. + ''' data = pd.read_csv(self.file) e = self.radius for i in self.dict1: @@ -99,10 +113,8 @@ class DBSCAN: else: plt.scatter(data['x'][i-1], data['y'][i-1], color='green') plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') - plt.xlabel('X') plt.ylabel('Y') plt.title('DBSCAN Clustering') - plt.legend(['Core','Noise']) plt.show() From b526b4d4eb076f8f04b21eeb8e3e3a190191b7f9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 1 Oct 2024 15:27:01 +0000 Subject: [PATCH 03/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/dbscan.py | 81 ++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 3e044d8f6..566f8e217 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -2,8 +2,10 @@ import pandas as pd import math import matplotlib.pyplot as plt from typing import Dict, List + + class dbscan: - ''' + """ DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -21,9 +23,10 @@ class dbscan: obj = dbscan.dbscan(minPts, radius, file) obj.print_dbscan() obj.plot_dbscan() - ''' - def __init__(self, minPts : int, radius : int, file : str) -> None: - ''' + """ + + def __init__(self, minPts: int, radius: int, file: str) -> None: + """ Constructor Attributes: @@ -48,13 +51,14 @@ class dbscan: 6 | 4 7 | 3 ----- - ''' + """ self.minPts = minPts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> Dict[int, List[int]]: - ''' + """ Parameters: ----------- None @@ -62,7 +66,7 @@ class dbscan: Return: -------- Dictionary with points and the list of points that lie in its radius - ''' + """ data = pd.read_csv(self.file) minPts = self.minPts @@ -71,50 +75,75 @@ class dbscan: dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) + pow(data['y'][j] - data['y'][i],2)) + dist = math.sqrt( + pow(data["x"][j] - data["x"][i], 2) + + pow(data["y"][j] - data["y"][i], 2) + ) if dist < e: - if i+1 in dict1: - dict1[i+1].append(j+1) + if i + 1 in dict1: + dict1[i + 1].append(j + 1) else: - dict1[i+1] = [j+1,] + dict1[i + 1] = [ + j + 1, + ] return dict1 + def print_dbscan(self) -> None: - ''' + """ Outputs: -------- Prints each point and if it is a core or a noise (w/ border) - ''' + """ for i in self.dict1: - print(i," ",self.dict1[i], end=' ---> ') + print(i, " ", self.dict1[i], end=" ---> ") if len(self.dict1[i]) >= self.minPts: print("Core") else: for j in self.dict1: - if i != j and len(self.dict1[j]) >= self.minPts and i in self.dict1[j]: + if ( + i != j + and len(self.dict1[j]) >= self.minPts + and i in self.dict1[j] + ): print("Noise ---> Border") break else: print("Noise") + def plot_dbscan(self) -> None: - ''' + """ Output: ------- A matplotlib plot that show points as core and noise along with the circle that lie within it. - ''' + """ data = pd.read_csv(self.file) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minPts: - plt.scatter(data['x'][i-1], data['y'][i-1], color='red') - circle = plt.Circle((data['x'][i-1], data['y'][i-1]), e, color='blue', fill=False) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") + circle = plt.Circle( + (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False + ) plt.gca().add_artist(circle) - plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) else: - plt.scatter(data['x'][i-1], data['y'][i-1], color='green') - plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') - plt.xlabel('X') - plt.ylabel('Y') - plt.title('DBSCAN Clustering') - plt.legend(['Core','Noise']) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) + plt.xlabel("X") + plt.ylabel("Y") + plt.title("DBSCAN Clustering") + plt.legend(["Core", "Noise"]) plt.show() From 49e9f614f56278ea65f089737a5a956116baa5ce Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Tue, 1 Oct 2024 21:13:40 +0530 Subject: [PATCH 04/27] Update dbscan.py --- machine_learning/dbscan.py | 112 +++++++++++++++---------------------- 1 file changed, 45 insertions(+), 67 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 566f8e217..d83bdbaba 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -1,11 +1,9 @@ import pandas as pd import math import matplotlib.pyplot as plt -from typing import Dict, List - - -class dbscan: - """ +from typing import dict, list +class dbScan: + ''' DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -20,20 +18,22 @@ class dbscan: To create a object ------------------ import dbscan - obj = dbscan.dbscan(minPts, radius, file) + obj = dbscan.dbscan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - """ - - def __init__(self, minPts: int, radius: int, file: str) -> None: - """ + ''' + def __init__(self, minpts : int, radius : int, file : str) -> None: + ''' Constructor Attributes: ----------- - minPts (int) : Minimum number of points needed to be within the radius to considered as core - radius (int) : The radius from a given core point where other core points can be considered as core - file (csv) : CSV file location. Should contain x and y coordinate value for each point. + minpts (int) : Minimum number of points needed to be + within the radius to considered as core + radius (int) : The radius from a given core point where + other core points can be considered as core + file (csv) : CSV file location. Should contain x and y + coordinate value for each point. Example : minPts = 4 @@ -51,99 +51,77 @@ class dbscan: 6 | 4 7 | 3 ----- - """ - self.minPts = minPts + ''' + self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> dict[int, list[int]]: + ''' + >>>perform_dbscan() - def perform_dbscan(self) -> Dict[int, List[int]]: - """ Parameters: ----------- None Return: -------- - Dictionary with points and the list of points that lie in its radius - """ + Dictionary with points and the list of points + that lie in its radius + ''' data = pd.read_csv(self.file) - minPts = self.minPts + minpts = self.minpts e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt( - pow(data["x"][j] - data["x"][i], 2) - + pow(data["y"][j] - data["y"][i], 2) - ) + dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) + pow(data['y'][j] - data['y'][i],2)) if dist < e: - if i + 1 in dict1: - dict1[i + 1].append(j + 1) + if i+1 in dict1: + dict1[i+1].append(j+1) else: - dict1[i + 1] = [ - j + 1, - ] + dict1[i+1] = [j+1,] return dict1 - def print_dbscan(self) -> None: - """ + ''' Outputs: -------- Prints each point and if it is a core or a noise (w/ border) - """ + ''' for i in self.dict1: - print(i, " ", self.dict1[i], end=" ---> ") - if len(self.dict1[i]) >= self.minPts: + print(i," ",self.dict1[i], end=' ---> ') + if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: - if ( - i != j - and len(self.dict1[j]) >= self.minPts - and i in self.dict1[j] - ): + if i != j and len(self.dict1[j]) >= self.minpts and i in self.dict1[j]: print("Noise ---> Border") break else: print("Noise") - def plot_dbscan(self) -> None: - """ + ''' Output: ------- - A matplotlib plot that show points as core and noise along with the circle that lie within it. - """ + A matplotlib plot that show points as core and noise along + with the circle that lie within it. + ''' data = pd.read_csv(self.file) e = self.radius for i in self.dict1: - if len(self.dict1[i]) >= self.minPts: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") - circle = plt.Circle( - (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False - ) + if len(self.dict1[i]) >= self.minpts: + plt.scatter(data['x'][i-1], data['y'][i-1], color='red') + circle = plt.Circle((data['x'][i-1], data['y'][i-1]), e, color='blue', fill=False) plt.gca().add_artist(circle) - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) + plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') else: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) - plt.xlabel("X") - plt.ylabel("Y") - plt.title("DBSCAN Clustering") - plt.legend(["Core", "Noise"]) + plt.scatter(data['x'][i-1], data['y'][i-1], color='green') + plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') + plt.xlabel('X') + plt.ylabel('Y') + plt.title('DBSCAN Clustering') + plt.legend(['Core','Noise']) plt.show() From d61809015ba6a5a85020cf265c5421660892f4eb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 1 Oct 2024 15:44:14 +0000 Subject: [PATCH 05/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/dbscan.py | 81 ++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index d83bdbaba..8bc9bf92b 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -2,8 +2,10 @@ import pandas as pd import math import matplotlib.pyplot as plt from typing import dict, list + + class dbScan: - ''' + """ DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -21,9 +23,10 @@ class dbScan: obj = dbscan.dbscan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - ''' - def __init__(self, minpts : int, radius : int, file : str) -> None: - ''' + """ + + def __init__(self, minpts: int, radius: int, file: str) -> None: + """ Constructor Attributes: @@ -51,13 +54,14 @@ class dbScan: 6 | 4 7 | 3 ----- - ''' + """ self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> dict[int, list[int]]: - ''' + """ >>>perform_dbscan() Parameters: @@ -68,7 +72,7 @@ class dbScan: -------- Dictionary with points and the list of points that lie in its radius - ''' + """ data = pd.read_csv(self.file) minpts = self.minpts @@ -77,51 +81,76 @@ class dbScan: dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) + pow(data['y'][j] - data['y'][i],2)) + dist = math.sqrt( + pow(data["x"][j] - data["x"][i], 2) + + pow(data["y"][j] - data["y"][i], 2) + ) if dist < e: - if i+1 in dict1: - dict1[i+1].append(j+1) + if i + 1 in dict1: + dict1[i + 1].append(j + 1) else: - dict1[i+1] = [j+1,] + dict1[i + 1] = [ + j + 1, + ] return dict1 + def print_dbscan(self) -> None: - ''' + """ Outputs: -------- Prints each point and if it is a core or a noise (w/ border) - ''' + """ for i in self.dict1: - print(i," ",self.dict1[i], end=' ---> ') + print(i, " ", self.dict1[i], end=" ---> ") if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: - if i != j and len(self.dict1[j]) >= self.minpts and i in self.dict1[j]: + if ( + i != j + and len(self.dict1[j]) >= self.minpts + and i in self.dict1[j] + ): print("Noise ---> Border") break else: print("Noise") + def plot_dbscan(self) -> None: - ''' + """ Output: ------- A matplotlib plot that show points as core and noise along with the circle that lie within it. - ''' + """ data = pd.read_csv(self.file) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data['x'][i-1], data['y'][i-1], color='red') - circle = plt.Circle((data['x'][i-1], data['y'][i-1]), e, color='blue', fill=False) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") + circle = plt.Circle( + (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False + ) plt.gca().add_artist(circle) - plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) else: - plt.scatter(data['x'][i-1], data['y'][i-1], color='green') - plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') - plt.xlabel('X') - plt.ylabel('Y') - plt.title('DBSCAN Clustering') - plt.legend(['Core','Noise']) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) + plt.xlabel("X") + plt.ylabel("Y") + plt.title("DBSCAN Clustering") + plt.legend(["Core", "Noise"]) plt.show() From 12ac966b635e3b33b93ddbfc75799345126e62b8 Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Tue, 1 Oct 2024 21:20:12 +0530 Subject: [PATCH 06/27] Update dbscan.py --- machine_learning/dbscan.py | 91 ++++++++++++-------------------------- 1 file changed, 28 insertions(+), 63 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 8bc9bf92b..9b5f76456 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -2,10 +2,8 @@ import pandas as pd import math import matplotlib.pyplot as plt from typing import dict, list - - -class dbScan: - """ +class DbScan: + ''' DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -20,13 +18,12 @@ class dbScan: To create a object ------------------ import dbscan - obj = dbscan.dbscan(minpts, radius, file) + obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - """ - - def __init__(self, minpts: int, radius: int, file: str) -> None: - """ + ''' + def __init__(self, minpts : int, radius : int, file : str) -> None: + ''' Constructor Attributes: @@ -54,16 +51,13 @@ class dbScan: 6 | 4 7 | 3 ----- - """ + ''' self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() - def perform_dbscan(self) -> dict[int, list[int]]: - """ - >>>perform_dbscan() - + ''' Parameters: ----------- None @@ -72,85 +66,56 @@ class dbScan: -------- Dictionary with points and the list of points that lie in its radius - """ + ''' data = pd.read_csv(self.file) - - minpts = self.minpts e = self.radius - dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt( - pow(data["x"][j] - data["x"][i], 2) - + pow(data["y"][j] - data["y"][i], 2) - ) + dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) + pow(data['y'][j] - data['y'][i],2)) if dist < e: - if i + 1 in dict1: - dict1[i + 1].append(j + 1) + if i+1 in dict1: + dict1[i+1].append(j+1) else: - dict1[i + 1] = [ - j + 1, - ] - + dict1[i+1] = [j+1,] return dict1 - def print_dbscan(self) -> None: - """ + ''' Outputs: -------- Prints each point and if it is a core or a noise (w/ border) - """ + ''' for i in self.dict1: - print(i, " ", self.dict1[i], end=" ---> ") + print(i," ",self.dict1[i], end=' ---> ') if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: - if ( - i != j - and len(self.dict1[j]) >= self.minpts - and i in self.dict1[j] - ): + if i != j and len(self.dict1[j]) >= self.minpts and i in self.dict1[j]: print("Noise ---> Border") break else: print("Noise") - def plot_dbscan(self) -> None: - """ + ''' Output: ------- A matplotlib plot that show points as core and noise along with the circle that lie within it. - """ + ''' data = pd.read_csv(self.file) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") - circle = plt.Circle( - (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False - ) + plt.scatter(data['x'][i-1], data['y'][i-1], color='red') + circle = plt.Circle((data['x'][i-1], data['y'][i-1]), e, color='blue', fill=False) plt.gca().add_artist(circle) - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) + plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') else: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) - plt.xlabel("X") - plt.ylabel("Y") - plt.title("DBSCAN Clustering") - plt.legend(["Core", "Noise"]) + plt.scatter(data['x'][i-1], data['y'][i-1], color='green') + plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') + plt.xlabel('X') + plt.ylabel('Y') + plt.title('DBSCAN Clustering') + plt.legend(['Core','Noise']) plt.show() From a393075ede41cb613020816d2578d2b57bf071c6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 1 Oct 2024 15:51:03 +0000 Subject: [PATCH 07/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/dbscan.py | 81 ++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 9b5f76456..107ab2f68 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -2,8 +2,10 @@ import pandas as pd import math import matplotlib.pyplot as plt from typing import dict, list + + class DbScan: - ''' + """ DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -21,9 +23,10 @@ class DbScan: obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - ''' - def __init__(self, minpts : int, radius : int, file : str) -> None: - ''' + """ + + def __init__(self, minpts: int, radius: int, file: str) -> None: + """ Constructor Attributes: @@ -51,13 +54,14 @@ class DbScan: 6 | 4 7 | 3 ----- - ''' + """ self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> dict[int, list[int]]: - ''' + """ Parameters: ----------- None @@ -66,56 +70,81 @@ class DbScan: -------- Dictionary with points and the list of points that lie in its radius - ''' + """ data = pd.read_csv(self.file) e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) + pow(data['y'][j] - data['y'][i],2)) + dist = math.sqrt( + pow(data["x"][j] - data["x"][i], 2) + + pow(data["y"][j] - data["y"][i], 2) + ) if dist < e: - if i+1 in dict1: - dict1[i+1].append(j+1) + if i + 1 in dict1: + dict1[i + 1].append(j + 1) else: - dict1[i+1] = [j+1,] + dict1[i + 1] = [ + j + 1, + ] return dict1 + def print_dbscan(self) -> None: - ''' + """ Outputs: -------- Prints each point and if it is a core or a noise (w/ border) - ''' + """ for i in self.dict1: - print(i," ",self.dict1[i], end=' ---> ') + print(i, " ", self.dict1[i], end=" ---> ") if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: - if i != j and len(self.dict1[j]) >= self.minpts and i in self.dict1[j]: + if ( + i != j + and len(self.dict1[j]) >= self.minpts + and i in self.dict1[j] + ): print("Noise ---> Border") break else: print("Noise") + def plot_dbscan(self) -> None: - ''' + """ Output: ------- A matplotlib plot that show points as core and noise along with the circle that lie within it. - ''' + """ data = pd.read_csv(self.file) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data['x'][i-1], data['y'][i-1], color='red') - circle = plt.Circle((data['x'][i-1], data['y'][i-1]), e, color='blue', fill=False) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") + circle = plt.Circle( + (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False + ) plt.gca().add_artist(circle) - plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) else: - plt.scatter(data['x'][i-1], data['y'][i-1], color='green') - plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') - plt.xlabel('X') - plt.ylabel('Y') - plt.title('DBSCAN Clustering') - plt.legend(['Core','Noise']) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) + plt.xlabel("X") + plt.ylabel("Y") + plt.title("DBSCAN Clustering") + plt.legend(["Core", "Noise"]) plt.show() From 254854e832d96145af261654b523a0a902d28591 Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Wed, 2 Oct 2024 15:43:16 +0530 Subject: [PATCH 08/27] Update dbscan.py --- machine_learning/dbscan.py | 159 +++++++++++++++++++++---------------- 1 file changed, 89 insertions(+), 70 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 107ab2f68..0b791652b 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -1,20 +1,21 @@ -import pandas as pd import math import matplotlib.pyplot as plt -from typing import dict, list - - +import pandas as pd +from typing import dict, list, optional class DbScan: - """ + ''' DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise - Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN - + Reference Website : https://en.wikipedia.org/wiki/DBSCAN + Reference YouTube Video : https://youtu.be/-p354tQsKrs?si=t1IxCFhrOB-RAcIU + Functions: ---------- __init__() : Constructor that sets minPts, radius and file - perform_dbscan() : Invoked by constructor and calculates the core and noise points and returns a dictionary. - print_dbscan() : Prints the core and noise points along with stating if the noise are border points or not. + perform_dbscan() : Invoked by constructor and calculates the core + and noise points and returns a dictionary. + print_dbscan() : Prints the core and noise points along + with stating if the noise are border points or not. plot_dbscan() : Plots the points to show the core and noise point. To create a object @@ -23,13 +24,17 @@ class DbScan: obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - """ - - def __init__(self, minpts: int, radius: int, file: str) -> None: - """ + ''' + def __init__(self, minpts : int, radius : int, file : optional[str] = + [{'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5}, + {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2}, + {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3}, + {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}] + ) -> None: + ''' Constructor - Attributes: + Args: ----------- minpts (int) : Minimum number of points needed to be within the radius to considered as core @@ -54,97 +59,111 @@ class DbScan: 6 | 4 7 | 3 ----- - """ + ''' self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() - def perform_dbscan(self) -> dict[int, list[int]]: - """ - Parameters: + ''' + Args: ----------- - None + None Return: -------- - Dictionary with points and the list of points - that lie in its radius - """ - data = pd.read_csv(self.file) + Dictionary with points and the list + of points that lie in its radius + + >>> result = DbScan(4, 1.9).perform_dbscan() + >>> for key in sorted(result): + ... print(key, sorted(result[key])) + 1 [1, 2, 10] + 2 [1, 2, 3, 11] + 3 [2, 3, 4] + 4 [3, 4, 5] + 5 [4, 5, 6, 7, 8] + 6 [5, 6, 7] + 7 [5, 6, 7] + 8 [5, 8] + 9 [9, 12] + 10 [1, 10, 11] + 11 [2, 10, 11, 12] + 12 [9, 11, 12] + + ''' + data = pd.read_csv(self.file) if type(self.file) == type("str") else pd.DataFrame(self.file) e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt( - pow(data["x"][j] - data["x"][i], 2) - + pow(data["y"][j] - data["y"][i], 2) - ) + dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) + + pow(data['y'][j] - data['y'][i],2)) if dist < e: - if i + 1 in dict1: - dict1[i + 1].append(j + 1) + if i+1 in dict1: + dict1[i+1].append(j+1) else: - dict1[i + 1] = [ - j + 1, - ] + dict1[i+1] = [j+1,] return dict1 - def print_dbscan(self) -> None: - """ + ''' Outputs: -------- Prints each point and if it is a core or a noise (w/ border) - """ + + >>> DbScan(4,1.9).print_dbscan() + 1 [1, 2, 10] ---> Noise ---> Border + 2 [1, 2, 3, 11] ---> Core + 3 [2, 3, 4] ---> Noise ---> Border + 4 [3, 4, 5] ---> Noise ---> Border + 5 [4, 5, 6, 7, 8] ---> Core + 6 [5, 6, 7] ---> Noise ---> Border + 7 [5, 6, 7] ---> Noise ---> Border + 8 [5, 8] ---> Noise ---> Border + 9 [9, 12] ---> Noise + 10 [1, 10, 11] ---> Noise ---> Border + 11 [2, 10, 11, 12] ---> Core + 12 [9, 11, 12] ---> Noise ---> Border + ''' for i in self.dict1: - print(i, " ", self.dict1[i], end=" ---> ") + print(i," ",self.dict1[i], end=' ---> ') if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: - if ( - i != j - and len(self.dict1[j]) >= self.minpts - and i in self.dict1[j] - ): - print("Noise ---> Border") - break + if i != j and len(self.dict1[j]) >= self.minpts: + if i in self.dict1[j]: + print("Noise ---> Border") + break else: print("Noise") - def plot_dbscan(self) -> None: - """ + ''' Output: ------- A matplotlib plot that show points as core and noise along with the circle that lie within it. - """ - data = pd.read_csv(self.file) + + >>> DbScan(4,1.9).plot_dbscan() + Plotted Successfully + ''' + data = pd.read_csv(self.file) if type(self.file) == type("str") else pd.DataFrame(self.file) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") - circle = plt.Circle( - (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False - ) + plt.scatter(data['x'][i-1], data['y'][i-1], color='red') + circle = plt.Circle((data['x'][i-1], data['y'][i-1]), + e, color='blue', fill=False) plt.gca().add_artist(circle) - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) + plt.text(data['x'][i-1], data['y'][i-1], + 'P'+str(i), ha='center', va='bottom') else: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) - plt.xlabel("X") - plt.ylabel("Y") - plt.title("DBSCAN Clustering") - plt.legend(["Core", "Noise"]) + plt.scatter(data['x'][i-1], data['y'][i-1], color='green') + plt.text(data['x'][i-1], data['y'][i-1], + 'P'+str(i), ha='center', va='bottom') + plt.xlabel('X') + plt.ylabel('Y') + plt.title('DBSCAN Clustering') + plt.legend(['Core','Noise']) plt.show() + print("Plotted Successfully") From b7e5e9c112b3e3ac80a5383fc3705a02d5f1f0b5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 2 Oct 2024 10:18:48 +0000 Subject: [PATCH 09/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/dbscan.py | 116 +++++++++++++++++++++++++------------ 1 file changed, 79 insertions(+), 37 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 0b791652b..acb58dce6 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -2,13 +2,15 @@ import math import matplotlib.pyplot as plt import pandas as pd from typing import dict, list, optional + + class DbScan: - ''' + """ DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Reference Website : https://en.wikipedia.org/wiki/DBSCAN Reference YouTube Video : https://youtu.be/-p354tQsKrs?si=t1IxCFhrOB-RAcIU - + Functions: ---------- __init__() : Constructor that sets minPts, radius and file @@ -24,14 +26,28 @@ class DbScan: obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - ''' - def __init__(self, minpts : int, radius : int, file : optional[str] = - [{'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5}, - {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2}, - {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3}, - {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}] - ) -> None: - ''' + """ + + def __init__( + self, + minpts: int, + radius: int, + file: optional[str] = [ + {"x": 3, "y": 7}, + {"x": 4, "y": 6}, + {"x": 5, "y": 5}, + {"x": 6, "y": 4}, + {"x": 7, "y": 3}, + {"x": 6, "y": 2}, + {"x": 7, "y": 2}, + {"x": 8, "y": 4}, + {"x": 3, "y": 3}, + {"x": 2, "y": 6}, + {"x": 3, "y": 5}, + {"x": 2, "y": 4}, + ], + ) -> None: + """ Constructor Args: @@ -59,13 +75,14 @@ class DbScan: 6 | 4 7 | 3 ----- - ''' + """ self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> dict[int, list[int]]: - ''' + """ Args: ----------- None @@ -91,22 +108,31 @@ class DbScan: 11 [2, 10, 11, 12] 12 [9, 11, 12] - ''' - data = pd.read_csv(self.file) if type(self.file) == type("str") else pd.DataFrame(self.file) + """ + data = ( + pd.read_csv(self.file) + if type(self.file) == type("str") + else pd.DataFrame(self.file) + ) e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) - + pow(data['y'][j] - data['y'][i],2)) + dist = math.sqrt( + pow(data["x"][j] - data["x"][i], 2) + + pow(data["y"][j] - data["y"][i], 2) + ) if dist < e: - if i+1 in dict1: - dict1[i+1].append(j+1) + if i + 1 in dict1: + dict1[i + 1].append(j + 1) else: - dict1[i+1] = [j+1,] + dict1[i + 1] = [ + j + 1, + ] return dict1 + def print_dbscan(self) -> None: - ''' + """ Outputs: -------- Prints each point and if it is a core or a noise (w/ border) @@ -124,9 +150,9 @@ class DbScan: 10 [1, 10, 11] ---> Noise ---> Border 11 [2, 10, 11, 12] ---> Core 12 [9, 11, 12] ---> Noise ---> Border - ''' + """ for i in self.dict1: - print(i," ",self.dict1[i], end=' ---> ') + print(i, " ", self.dict1[i], end=" ---> ") if len(self.dict1[i]) >= self.minpts: print("Core") else: @@ -137,8 +163,9 @@ class DbScan: break else: print("Noise") + def plot_dbscan(self) -> None: - ''' + """ Output: ------- A matplotlib plot that show points as core and noise along @@ -146,24 +173,39 @@ class DbScan: >>> DbScan(4,1.9).plot_dbscan() Plotted Successfully - ''' - data = pd.read_csv(self.file) if type(self.file) == type("str") else pd.DataFrame(self.file) + """ + data = ( + pd.read_csv(self.file) + if type(self.file) == type("str") + else pd.DataFrame(self.file) + ) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data['x'][i-1], data['y'][i-1], color='red') - circle = plt.Circle((data['x'][i-1], data['y'][i-1]), - e, color='blue', fill=False) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") + circle = plt.Circle( + (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False + ) plt.gca().add_artist(circle) - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) else: - plt.scatter(data['x'][i-1], data['y'][i-1], color='green') - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') - plt.xlabel('X') - plt.ylabel('Y') - plt.title('DBSCAN Clustering') - plt.legend(['Core','Noise']) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) + plt.xlabel("X") + plt.ylabel("Y") + plt.title("DBSCAN Clustering") + plt.legend(["Core", "Noise"]) plt.show() print("Plotted Successfully") From 249b0e88716b4f28d54e9e863b4b82fd9d25a5bd Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Wed, 2 Oct 2024 16:03:06 +0530 Subject: [PATCH 10/27] Update dbscan.py --- machine_learning/dbscan.py | 136 ++++++++++++++----------------------- 1 file changed, 51 insertions(+), 85 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index acb58dce6..16a3d9cb2 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -1,15 +1,12 @@ import math +from typing import dict, list, optional import matplotlib.pyplot as plt import pandas as pd -from typing import dict, list, optional - - class DbScan: - """ + ''' DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise - Reference Website : https://en.wikipedia.org/wiki/DBSCAN - Reference YouTube Video : https://youtu.be/-p354tQsKrs?si=t1IxCFhrOB-RAcIU + Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN Functions: ---------- @@ -26,28 +23,14 @@ class DbScan: obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - """ - - def __init__( - self, - minpts: int, - radius: int, - file: optional[str] = [ - {"x": 3, "y": 7}, - {"x": 4, "y": 6}, - {"x": 5, "y": 5}, - {"x": 6, "y": 4}, - {"x": 7, "y": 3}, - {"x": 6, "y": 2}, - {"x": 7, "y": 2}, - {"x": 8, "y": 4}, - {"x": 3, "y": 3}, - {"x": 2, "y": 6}, - {"x": 3, "y": 5}, - {"x": 2, "y": 4}, - ], - ) -> None: - """ + ''' + def __init__(self, minpts : int, radius : int, file : optional[str] = + ({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5}, + {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2}, + {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3}, + {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}) + ) -> None: + ''' Constructor Args: @@ -75,14 +58,13 @@ class DbScan: 6 | 4 7 | 3 ----- - """ + ''' self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() - def perform_dbscan(self) -> dict[int, list[int]]: - """ + ''' Args: ----------- None @@ -108,31 +90,25 @@ class DbScan: 11 [2, 10, 11, 12] 12 [9, 11, 12] - """ - data = ( - pd.read_csv(self.file) - if type(self.file) == type("str") - else pd.DataFrame(self.file) - ) + ''' + if type(self.file) is str: + data = pd.read_csv(self.file) + else: + data = pd.DataFrame(list(self.file)) e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt( - pow(data["x"][j] - data["x"][i], 2) - + pow(data["y"][j] - data["y"][i], 2) - ) + dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) + + pow(data['y'][j] - data['y'][i],2)) if dist < e: - if i + 1 in dict1: - dict1[i + 1].append(j + 1) + if i+1 in dict1: + dict1[i+1].append(j+1) else: - dict1[i + 1] = [ - j + 1, - ] + dict1[i+1] = [j+1,] return dict1 - def print_dbscan(self) -> None: - """ + ''' Outputs: -------- Prints each point and if it is a core or a noise (w/ border) @@ -150,22 +126,24 @@ class DbScan: 10 [1, 10, 11] ---> Noise ---> Border 11 [2, 10, 11, 12] ---> Core 12 [9, 11, 12] ---> Noise ---> Border - """ + ''' for i in self.dict1: - print(i, " ", self.dict1[i], end=" ---> ") + print(i," ",self.dict1[i], end=' ---> ') if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: - if i != j and len(self.dict1[j]) >= self.minpts: - if i in self.dict1[j]: - print("Noise ---> Border") - break + if ( + i != j + and len(self.dict1[j]) >= self.minpts + and i in self.dict1[j] + ): + print("Noise ---> Border") + break else: print("Noise") - def plot_dbscan(self) -> None: - """ + ''' Output: ------- A matplotlib plot that show points as core and noise along @@ -173,39 +151,27 @@ class DbScan: >>> DbScan(4,1.9).plot_dbscan() Plotted Successfully - """ - data = ( - pd.read_csv(self.file) - if type(self.file) == type("str") - else pd.DataFrame(self.file) - ) + ''' + if type(self.file) is str: + data = pd.read_csv(self.file) + else: + data = pd.DataFrame(list(self.file)) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") - circle = plt.Circle( - (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False - ) + plt.scatter(data['x'][i-1], data['y'][i-1], color='red') + circle = plt.Circle((data['x'][i-1], data['y'][i-1]), + e, color='blue', fill=False) plt.gca().add_artist(circle) - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) + plt.text(data['x'][i-1], data['y'][i-1], + 'P'+str(i), ha='center', va='bottom') else: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) - plt.xlabel("X") - plt.ylabel("Y") - plt.title("DBSCAN Clustering") - plt.legend(["Core", "Noise"]) + plt.scatter(data['x'][i-1], data['y'][i-1], color='green') + plt.text(data['x'][i-1], data['y'][i-1], + 'P'+str(i), ha='center', va='bottom') + plt.xlabel('X') + plt.ylabel('Y') + plt.title('DBSCAN Clustering') + plt.legend(['Core','Noise']) plt.show() print("Plotted Successfully") From 4d76e8236bd9c7a7e5a2b7fd07f1fcba00ff3e85 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 2 Oct 2024 10:33:28 +0000 Subject: [PATCH 11/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/dbscan.py | 110 ++++++++++++++++++++++++------------- 1 file changed, 72 insertions(+), 38 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 16a3d9cb2..2ae3991d0 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -2,8 +2,10 @@ import math from typing import dict, list, optional import matplotlib.pyplot as plt import pandas as pd + + class DbScan: - ''' + """ DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -23,14 +25,28 @@ class DbScan: obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - ''' - def __init__(self, minpts : int, radius : int, file : optional[str] = - ({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5}, - {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2}, - {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3}, - {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}) - ) -> None: - ''' + """ + + def __init__( + self, + minpts: int, + radius: int, + file: optional[str] = ( + {"x": 3, "y": 7}, + {"x": 4, "y": 6}, + {"x": 5, "y": 5}, + {"x": 6, "y": 4}, + {"x": 7, "y": 3}, + {"x": 6, "y": 2}, + {"x": 7, "y": 2}, + {"x": 8, "y": 4}, + {"x": 3, "y": 3}, + {"x": 2, "y": 6}, + {"x": 3, "y": 5}, + {"x": 2, "y": 4}, + ), + ) -> None: + """ Constructor Args: @@ -58,13 +74,14 @@ class DbScan: 6 | 4 7 | 3 ----- - ''' + """ self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> dict[int, list[int]]: - ''' + """ Args: ----------- None @@ -90,25 +107,30 @@ class DbScan: 11 [2, 10, 11, 12] 12 [9, 11, 12] - ''' + """ if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) - + pow(data['y'][j] - data['y'][i],2)) + dist = math.sqrt( + pow(data["x"][j] - data["x"][i], 2) + + pow(data["y"][j] - data["y"][i], 2) + ) if dist < e: - if i+1 in dict1: - dict1[i+1].append(j+1) + if i + 1 in dict1: + dict1[i + 1].append(j + 1) else: - dict1[i+1] = [j+1,] + dict1[i + 1] = [ + j + 1, + ] return dict1 + def print_dbscan(self) -> None: - ''' + """ Outputs: -------- Prints each point and if it is a core or a noise (w/ border) @@ -126,24 +148,25 @@ class DbScan: 10 [1, 10, 11] ---> Noise ---> Border 11 [2, 10, 11, 12] ---> Core 12 [9, 11, 12] ---> Noise ---> Border - ''' + """ for i in self.dict1: - print(i," ",self.dict1[i], end=' ---> ') + print(i, " ", self.dict1[i], end=" ---> ") if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: if ( - i != j - and len(self.dict1[j]) >= self.minpts + i != j + and len(self.dict1[j]) >= self.minpts and i in self.dict1[j] ): print("Noise ---> Border") break else: print("Noise") + def plot_dbscan(self) -> None: - ''' + """ Output: ------- A matplotlib plot that show points as core and noise along @@ -151,27 +174,38 @@ class DbScan: >>> DbScan(4,1.9).plot_dbscan() Plotted Successfully - ''' + """ if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data['x'][i-1], data['y'][i-1], color='red') - circle = plt.Circle((data['x'][i-1], data['y'][i-1]), - e, color='blue', fill=False) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") + circle = plt.Circle( + (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False + ) plt.gca().add_artist(circle) - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) else: - plt.scatter(data['x'][i-1], data['y'][i-1], color='green') - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') - plt.xlabel('X') - plt.ylabel('Y') - plt.title('DBSCAN Clustering') - plt.legend(['Core','Noise']) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) + plt.xlabel("X") + plt.ylabel("Y") + plt.title("DBSCAN Clustering") + plt.legend(["Core", "Noise"]) plt.show() print("Plotted Successfully") From ab2822788e2b5bd2f647a34805d7a395f7a34550 Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Wed, 2 Oct 2024 16:10:40 +0530 Subject: [PATCH 12/27] Update Final dbscan.py --- machine_learning/dbscan.py | 114 +++++++++++++------------------------ 1 file changed, 41 insertions(+), 73 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 2ae3991d0..0a396a866 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -1,11 +1,11 @@ import math -from typing import dict, list, optional + import matplotlib.pyplot as plt import pandas as pd - +from typing import Dict, List, Optional class DbScan: - """ + ''' DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -25,28 +25,14 @@ class DbScan: obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - """ - - def __init__( - self, - minpts: int, - radius: int, - file: optional[str] = ( - {"x": 3, "y": 7}, - {"x": 4, "y": 6}, - {"x": 5, "y": 5}, - {"x": 6, "y": 4}, - {"x": 7, "y": 3}, - {"x": 6, "y": 2}, - {"x": 7, "y": 2}, - {"x": 8, "y": 4}, - {"x": 3, "y": 3}, - {"x": 2, "y": 6}, - {"x": 3, "y": 5}, - {"x": 2, "y": 4}, - ), - ) -> None: - """ + ''' + def __init__(self, minpts : int, radius : int, file : Optional[str] = + ({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5}, + {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2}, + {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3}, + {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}) + ) -> None: + ''' Constructor Args: @@ -74,14 +60,13 @@ class DbScan: 6 | 4 7 | 3 ----- - """ + ''' self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() - - def perform_dbscan(self) -> dict[int, list[int]]: - """ + def perform_dbscan(self) -> Dict[int, List[int]]: + ''' Args: ----------- None @@ -107,30 +92,25 @@ class DbScan: 11 [2, 10, 11, 12] 12 [9, 11, 12] - """ + ''' if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt( - pow(data["x"][j] - data["x"][i], 2) - + pow(data["y"][j] - data["y"][i], 2) - ) + dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) + + pow(data['y'][j] - data['y'][i],2)) if dist < e: - if i + 1 in dict1: - dict1[i + 1].append(j + 1) + if i+1 in dict1: + dict1[i+1].append(j+1) else: - dict1[i + 1] = [ - j + 1, - ] + dict1[i+1] = [j+1,] return dict1 - def print_dbscan(self) -> None: - """ + ''' Outputs: -------- Prints each point and if it is a core or a noise (w/ border) @@ -148,25 +128,24 @@ class DbScan: 10 [1, 10, 11] ---> Noise ---> Border 11 [2, 10, 11, 12] ---> Core 12 [9, 11, 12] ---> Noise ---> Border - """ + ''' for i in self.dict1: - print(i, " ", self.dict1[i], end=" ---> ") + print(i," ",self.dict1[i], end=' ---> ') if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: if ( - i != j - and len(self.dict1[j]) >= self.minpts + i != j + and len(self.dict1[j]) >= self.minpts and i in self.dict1[j] ): print("Noise ---> Border") break else: print("Noise") - def plot_dbscan(self) -> None: - """ + ''' Output: ------- A matplotlib plot that show points as core and noise along @@ -174,38 +153,27 @@ class DbScan: >>> DbScan(4,1.9).plot_dbscan() Plotted Successfully - """ + ''' if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") - circle = plt.Circle( - (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False - ) + plt.scatter(data['x'][i-1], data['y'][i-1], color='red') + circle = plt.Circle((data['x'][i-1], data['y'][i-1]), + e, color='blue', fill=False) plt.gca().add_artist(circle) - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) + plt.text(data['x'][i-1], data['y'][i-1], + 'P'+str(i), ha='center', va='bottom') else: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) - plt.xlabel("X") - plt.ylabel("Y") - plt.title("DBSCAN Clustering") - plt.legend(["Core", "Noise"]) + plt.scatter(data['x'][i-1], data['y'][i-1], color='green') + plt.text(data['x'][i-1], data['y'][i-1], + 'P'+str(i), ha='center', va='bottom') + plt.xlabel('X') + plt.ylabel('Y') + plt.title('DBSCAN Clustering') + plt.legend(['Core','Noise']) plt.show() print("Plotted Successfully") From 67ccda1f0ec510fe03f88a240c68796db9a204d3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 2 Oct 2024 10:41:25 +0000 Subject: [PATCH 13/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/dbscan.py | 109 ++++++++++++++++++++++++------------- 1 file changed, 71 insertions(+), 38 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 0a396a866..1501724d5 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -4,8 +4,9 @@ import matplotlib.pyplot as plt import pandas as pd from typing import Dict, List, Optional + class DbScan: - ''' + """ DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -25,14 +26,28 @@ class DbScan: obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - ''' - def __init__(self, minpts : int, radius : int, file : Optional[str] = - ({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5}, - {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2}, - {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3}, - {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}) - ) -> None: - ''' + """ + + def __init__( + self, + minpts: int, + radius: int, + file: Optional[str] = ( + {"x": 3, "y": 7}, + {"x": 4, "y": 6}, + {"x": 5, "y": 5}, + {"x": 6, "y": 4}, + {"x": 7, "y": 3}, + {"x": 6, "y": 2}, + {"x": 7, "y": 2}, + {"x": 8, "y": 4}, + {"x": 3, "y": 3}, + {"x": 2, "y": 6}, + {"x": 3, "y": 5}, + {"x": 2, "y": 4}, + ), + ) -> None: + """ Constructor Args: @@ -60,13 +75,14 @@ class DbScan: 6 | 4 7 | 3 ----- - ''' + """ self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> Dict[int, List[int]]: - ''' + """ Args: ----------- None @@ -92,25 +108,30 @@ class DbScan: 11 [2, 10, 11, 12] 12 [9, 11, 12] - ''' + """ if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) - + pow(data['y'][j] - data['y'][i],2)) + dist = math.sqrt( + pow(data["x"][j] - data["x"][i], 2) + + pow(data["y"][j] - data["y"][i], 2) + ) if dist < e: - if i+1 in dict1: - dict1[i+1].append(j+1) + if i + 1 in dict1: + dict1[i + 1].append(j + 1) else: - dict1[i+1] = [j+1,] + dict1[i + 1] = [ + j + 1, + ] return dict1 + def print_dbscan(self) -> None: - ''' + """ Outputs: -------- Prints each point and if it is a core or a noise (w/ border) @@ -128,24 +149,25 @@ class DbScan: 10 [1, 10, 11] ---> Noise ---> Border 11 [2, 10, 11, 12] ---> Core 12 [9, 11, 12] ---> Noise ---> Border - ''' + """ for i in self.dict1: - print(i," ",self.dict1[i], end=' ---> ') + print(i, " ", self.dict1[i], end=" ---> ") if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: if ( - i != j - and len(self.dict1[j]) >= self.minpts + i != j + and len(self.dict1[j]) >= self.minpts and i in self.dict1[j] ): print("Noise ---> Border") break else: print("Noise") + def plot_dbscan(self) -> None: - ''' + """ Output: ------- A matplotlib plot that show points as core and noise along @@ -153,27 +175,38 @@ class DbScan: >>> DbScan(4,1.9).plot_dbscan() Plotted Successfully - ''' + """ if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data['x'][i-1], data['y'][i-1], color='red') - circle = plt.Circle((data['x'][i-1], data['y'][i-1]), - e, color='blue', fill=False) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") + circle = plt.Circle( + (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False + ) plt.gca().add_artist(circle) - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) else: - plt.scatter(data['x'][i-1], data['y'][i-1], color='green') - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') - plt.xlabel('X') - plt.ylabel('Y') - plt.title('DBSCAN Clustering') - plt.legend(['Core','Noise']) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) + plt.xlabel("X") + plt.ylabel("Y") + plt.title("DBSCAN Clustering") + plt.legend(["Core", "Noise"]) plt.show() print("Plotted Successfully") From 8b4d5e8338d568ec3e2bc0b660b2dda06bf70ec7 Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Wed, 2 Oct 2024 16:24:52 +0530 Subject: [PATCH 14/27] Update final2 dbscan.py --- machine_learning/dbscan.py | 124 +++++++++++++++---------------------- 1 file changed, 51 insertions(+), 73 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 1501724d5..34285a9cd 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -1,12 +1,18 @@ +''' + +Author : Gowtham Kamalasekar +LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ + +''' + import math import matplotlib.pyplot as plt import pandas as pd -from typing import Dict, List, Optional - +from typing import dict, list class DbScan: - """ + ''' DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -26,28 +32,14 @@ class DbScan: obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - """ - - def __init__( - self, - minpts: int, - radius: int, - file: Optional[str] = ( - {"x": 3, "y": 7}, - {"x": 4, "y": 6}, - {"x": 5, "y": 5}, - {"x": 6, "y": 4}, - {"x": 7, "y": 3}, - {"x": 6, "y": 2}, - {"x": 7, "y": 2}, - {"x": 8, "y": 4}, - {"x": 3, "y": 3}, - {"x": 2, "y": 6}, - {"x": 3, "y": 5}, - {"x": 2, "y": 4}, - ), - ) -> None: - """ + ''' + def __init__(self, minpts : int, radius : int, file : str = + ({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5}, + {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2}, + {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3}, + {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}) + ) -> None: + ''' Constructor Args: @@ -75,14 +67,13 @@ class DbScan: 6 | 4 7 | 3 ----- - """ + ''' self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() - - def perform_dbscan(self) -> Dict[int, List[int]]: - """ + def perform_dbscan(self) -> dict[int, list[int]]: + ''' Args: ----------- None @@ -108,30 +99,25 @@ class DbScan: 11 [2, 10, 11, 12] 12 [9, 11, 12] - """ + ''' if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt( - pow(data["x"][j] - data["x"][i], 2) - + pow(data["y"][j] - data["y"][i], 2) - ) + dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) + + pow(data['y'][j] - data['y'][i],2)) if dist < e: - if i + 1 in dict1: - dict1[i + 1].append(j + 1) + if i+1 in dict1: + dict1[i+1].append(j+1) else: - dict1[i + 1] = [ - j + 1, - ] + dict1[i+1] = [j+1,] return dict1 - def print_dbscan(self) -> None: - """ + ''' Outputs: -------- Prints each point and if it is a core or a noise (w/ border) @@ -149,25 +135,24 @@ class DbScan: 10 [1, 10, 11] ---> Noise ---> Border 11 [2, 10, 11, 12] ---> Core 12 [9, 11, 12] ---> Noise ---> Border - """ + ''' for i in self.dict1: - print(i, " ", self.dict1[i], end=" ---> ") + print(i," ",self.dict1[i], end=' ---> ') if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: if ( - i != j - and len(self.dict1[j]) >= self.minpts + i != j + and len(self.dict1[j]) >= self.minpts and i in self.dict1[j] ): print("Noise ---> Border") break else: print("Noise") - def plot_dbscan(self) -> None: - """ + ''' Output: ------- A matplotlib plot that show points as core and noise along @@ -175,38 +160,31 @@ class DbScan: >>> DbScan(4,1.9).plot_dbscan() Plotted Successfully - """ + ''' if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") - circle = plt.Circle( - (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False - ) + plt.scatter(data['x'][i-1], data['y'][i-1], color='red') + circle = plt.Circle((data['x'][i-1], data['y'][i-1]), + e, color='blue', fill=False) plt.gca().add_artist(circle) - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) + plt.text(data['x'][i-1], data['y'][i-1], + 'P'+str(i), ha='center', va='bottom') else: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) - plt.xlabel("X") - plt.ylabel("Y") - plt.title("DBSCAN Clustering") - plt.legend(["Core", "Noise"]) + plt.scatter(data['x'][i-1], data['y'][i-1], color='green') + plt.text(data['x'][i-1], data['y'][i-1], + 'P'+str(i), ha='center', va='bottom') + plt.xlabel('X') + plt.ylabel('Y') + plt.title('DBSCAN Clustering') + plt.legend(['Core','Noise']) plt.show() print("Plotted Successfully") + +if __name__ == "__main__": + import doctest + doctest.testmod() From 0b6579460e3b495a4bf2c7976e7c8c3ce2bfaaa6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 2 Oct 2024 10:55:13 +0000 Subject: [PATCH 15/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/dbscan.py | 115 ++++++++++++++++++++++++------------- 1 file changed, 75 insertions(+), 40 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 34285a9cd..e14e5e4b0 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -1,9 +1,9 @@ -''' +""" Author : Gowtham Kamalasekar LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ -''' +""" import math @@ -11,8 +11,9 @@ import matplotlib.pyplot as plt import pandas as pd from typing import dict, list + class DbScan: - ''' + """ DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -32,14 +33,28 @@ class DbScan: obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - ''' - def __init__(self, minpts : int, radius : int, file : str = - ({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5}, - {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2}, - {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3}, - {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}) - ) -> None: - ''' + """ + + def __init__( + self, + minpts: int, + radius: int, + file: str = ( + {"x": 3, "y": 7}, + {"x": 4, "y": 6}, + {"x": 5, "y": 5}, + {"x": 6, "y": 4}, + {"x": 7, "y": 3}, + {"x": 6, "y": 2}, + {"x": 7, "y": 2}, + {"x": 8, "y": 4}, + {"x": 3, "y": 3}, + {"x": 2, "y": 6}, + {"x": 3, "y": 5}, + {"x": 2, "y": 4}, + ), + ) -> None: + """ Constructor Args: @@ -67,13 +82,14 @@ class DbScan: 6 | 4 7 | 3 ----- - ''' + """ self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> dict[int, list[int]]: - ''' + """ Args: ----------- None @@ -99,25 +115,30 @@ class DbScan: 11 [2, 10, 11, 12] 12 [9, 11, 12] - ''' + """ if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) - + pow(data['y'][j] - data['y'][i],2)) + dist = math.sqrt( + pow(data["x"][j] - data["x"][i], 2) + + pow(data["y"][j] - data["y"][i], 2) + ) if dist < e: - if i+1 in dict1: - dict1[i+1].append(j+1) + if i + 1 in dict1: + dict1[i + 1].append(j + 1) else: - dict1[i+1] = [j+1,] + dict1[i + 1] = [ + j + 1, + ] return dict1 + def print_dbscan(self) -> None: - ''' + """ Outputs: -------- Prints each point and if it is a core or a noise (w/ border) @@ -135,24 +156,25 @@ class DbScan: 10 [1, 10, 11] ---> Noise ---> Border 11 [2, 10, 11, 12] ---> Core 12 [9, 11, 12] ---> Noise ---> Border - ''' + """ for i in self.dict1: - print(i," ",self.dict1[i], end=' ---> ') + print(i, " ", self.dict1[i], end=" ---> ") if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: if ( - i != j - and len(self.dict1[j]) >= self.minpts + i != j + and len(self.dict1[j]) >= self.minpts and i in self.dict1[j] ): print("Noise ---> Border") break else: print("Noise") + def plot_dbscan(self) -> None: - ''' + """ Output: ------- A matplotlib plot that show points as core and noise along @@ -160,31 +182,44 @@ class DbScan: >>> DbScan(4,1.9).plot_dbscan() Plotted Successfully - ''' + """ if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data['x'][i-1], data['y'][i-1], color='red') - circle = plt.Circle((data['x'][i-1], data['y'][i-1]), - e, color='blue', fill=False) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") + circle = plt.Circle( + (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False + ) plt.gca().add_artist(circle) - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) else: - plt.scatter(data['x'][i-1], data['y'][i-1], color='green') - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') - plt.xlabel('X') - plt.ylabel('Y') - plt.title('DBSCAN Clustering') - plt.legend(['Core','Noise']) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) + plt.xlabel("X") + plt.ylabel("Y") + plt.title("DBSCAN Clustering") + plt.legend(["Core", "Noise"]) plt.show() print("Plotted Successfully") + if __name__ == "__main__": import doctest + doctest.testmod() From e13b9d9ef202ad14faec90a471035d87d2f3dd69 Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Wed, 2 Oct 2024 16:33:05 +0530 Subject: [PATCH 16/27] Update final3 dbscan.py --- machine_learning/dbscan.py | 127 ++++++++++++++----------------------- 1 file changed, 46 insertions(+), 81 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index e14e5e4b0..b260d2ceb 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -1,19 +1,18 @@ -""" +''' Author : Gowtham Kamalasekar LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ -""" - -import math - -import matplotlib.pyplot as plt -import pandas as pd -from typing import dict, list - +''' class DbScan: - """ + import math + + import matplotlib.pyplot as plt + import pandas as pd + from typing import dict, list + + ''' DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -33,28 +32,14 @@ class DbScan: obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - """ - - def __init__( - self, - minpts: int, - radius: int, - file: str = ( - {"x": 3, "y": 7}, - {"x": 4, "y": 6}, - {"x": 5, "y": 5}, - {"x": 6, "y": 4}, - {"x": 7, "y": 3}, - {"x": 6, "y": 2}, - {"x": 7, "y": 2}, - {"x": 8, "y": 4}, - {"x": 3, "y": 3}, - {"x": 2, "y": 6}, - {"x": 3, "y": 5}, - {"x": 2, "y": 4}, - ), - ) -> None: - """ + ''' + def __init__(self, minpts : int, radius : int, file : str = + ({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5}, + {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2}, + {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3}, + {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}) + ) -> None: + ''' Constructor Args: @@ -82,14 +67,13 @@ class DbScan: 6 | 4 7 | 3 ----- - """ + ''' self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() - def perform_dbscan(self) -> dict[int, list[int]]: - """ + ''' Args: ----------- None @@ -115,30 +99,25 @@ class DbScan: 11 [2, 10, 11, 12] 12 [9, 11, 12] - """ + ''' if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt( - pow(data["x"][j] - data["x"][i], 2) - + pow(data["y"][j] - data["y"][i], 2) - ) + dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) + + pow(data['y'][j] - data['y'][i],2)) if dist < e: - if i + 1 in dict1: - dict1[i + 1].append(j + 1) + if i+1 in dict1: + dict1[i+1].append(j+1) else: - dict1[i + 1] = [ - j + 1, - ] + dict1[i+1] = [j+1,] return dict1 - def print_dbscan(self) -> None: - """ + ''' Outputs: -------- Prints each point and if it is a core or a noise (w/ border) @@ -156,25 +135,24 @@ class DbScan: 10 [1, 10, 11] ---> Noise ---> Border 11 [2, 10, 11, 12] ---> Core 12 [9, 11, 12] ---> Noise ---> Border - """ + ''' for i in self.dict1: - print(i, " ", self.dict1[i], end=" ---> ") + print(i," ",self.dict1[i], end=' ---> ') if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: if ( - i != j - and len(self.dict1[j]) >= self.minpts + i != j + and len(self.dict1[j]) >= self.minpts and i in self.dict1[j] ): print("Noise ---> Border") break else: print("Noise") - def plot_dbscan(self) -> None: - """ + ''' Output: ------- A matplotlib plot that show points as core and noise along @@ -182,44 +160,31 @@ class DbScan: >>> DbScan(4,1.9).plot_dbscan() Plotted Successfully - """ + ''' if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") - circle = plt.Circle( - (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False - ) + plt.scatter(data['x'][i-1], data['y'][i-1], color='red') + circle = plt.Circle((data['x'][i-1], data['y'][i-1]), + e, color='blue', fill=False) plt.gca().add_artist(circle) - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) + plt.text(data['x'][i-1], data['y'][i-1], + 'P'+str(i), ha='center', va='bottom') else: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) - plt.xlabel("X") - plt.ylabel("Y") - plt.title("DBSCAN Clustering") - plt.legend(["Core", "Noise"]) + plt.scatter(data['x'][i-1], data['y'][i-1], color='green') + plt.text(data['x'][i-1], data['y'][i-1], + 'P'+str(i), ha='center', va='bottom') + plt.xlabel('X') + plt.ylabel('Y') + plt.title('DBSCAN Clustering') + plt.legend(['Core','Noise']) plt.show() print("Plotted Successfully") - if __name__ == "__main__": import doctest - doctest.testmod() From 61beb794377ad6c075c177d280b1aedaa217c2ec Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 2 Oct 2024 11:03:27 +0000 Subject: [PATCH 17/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/dbscan.py | 117 ++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 41 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index b260d2ceb..62f22af74 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -1,9 +1,10 @@ -''' +""" Author : Gowtham Kamalasekar LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ -''' +""" + class DbScan: import math @@ -11,8 +12,8 @@ class DbScan: import matplotlib.pyplot as plt import pandas as pd from typing import dict, list - - ''' + + """ DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -32,14 +33,28 @@ class DbScan: obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - ''' - def __init__(self, minpts : int, radius : int, file : str = - ({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5}, - {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2}, - {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3}, - {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}) - ) -> None: - ''' + """ + + def __init__( + self, + minpts: int, + radius: int, + file: str = ( + {"x": 3, "y": 7}, + {"x": 4, "y": 6}, + {"x": 5, "y": 5}, + {"x": 6, "y": 4}, + {"x": 7, "y": 3}, + {"x": 6, "y": 2}, + {"x": 7, "y": 2}, + {"x": 8, "y": 4}, + {"x": 3, "y": 3}, + {"x": 2, "y": 6}, + {"x": 3, "y": 5}, + {"x": 2, "y": 4}, + ), + ) -> None: + """ Constructor Args: @@ -67,13 +82,14 @@ class DbScan: 6 | 4 7 | 3 ----- - ''' + """ self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> dict[int, list[int]]: - ''' + """ Args: ----------- None @@ -99,25 +115,30 @@ class DbScan: 11 [2, 10, 11, 12] 12 [9, 11, 12] - ''' + """ if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) - + pow(data['y'][j] - data['y'][i],2)) + dist = math.sqrt( + pow(data["x"][j] - data["x"][i], 2) + + pow(data["y"][j] - data["y"][i], 2) + ) if dist < e: - if i+1 in dict1: - dict1[i+1].append(j+1) + if i + 1 in dict1: + dict1[i + 1].append(j + 1) else: - dict1[i+1] = [j+1,] + dict1[i + 1] = [ + j + 1, + ] return dict1 + def print_dbscan(self) -> None: - ''' + """ Outputs: -------- Prints each point and if it is a core or a noise (w/ border) @@ -135,24 +156,25 @@ class DbScan: 10 [1, 10, 11] ---> Noise ---> Border 11 [2, 10, 11, 12] ---> Core 12 [9, 11, 12] ---> Noise ---> Border - ''' + """ for i in self.dict1: - print(i," ",self.dict1[i], end=' ---> ') + print(i, " ", self.dict1[i], end=" ---> ") if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: if ( - i != j - and len(self.dict1[j]) >= self.minpts + i != j + and len(self.dict1[j]) >= self.minpts and i in self.dict1[j] ): print("Noise ---> Border") break else: print("Noise") + def plot_dbscan(self) -> None: - ''' + """ Output: ------- A matplotlib plot that show points as core and noise along @@ -160,31 +182,44 @@ class DbScan: >>> DbScan(4,1.9).plot_dbscan() Plotted Successfully - ''' + """ if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data['x'][i-1], data['y'][i-1], color='red') - circle = plt.Circle((data['x'][i-1], data['y'][i-1]), - e, color='blue', fill=False) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") + circle = plt.Circle( + (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False + ) plt.gca().add_artist(circle) - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) else: - plt.scatter(data['x'][i-1], data['y'][i-1], color='green') - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') - plt.xlabel('X') - plt.ylabel('Y') - plt.title('DBSCAN Clustering') - plt.legend(['Core','Noise']) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) + plt.xlabel("X") + plt.ylabel("Y") + plt.title("DBSCAN Clustering") + plt.legend(["Core", "Noise"]) plt.show() print("Plotted Successfully") + if __name__ == "__main__": import doctest + doctest.testmod() From 0708d4b851943565bbf8dd4fa39fe0be0bba2e06 Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Wed, 2 Oct 2024 16:36:03 +0530 Subject: [PATCH 18/27] Update final5 dbscan.py --- machine_learning/dbscan.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 62f22af74..3b25bdcb4 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -7,12 +7,6 @@ LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ class DbScan: - import math - - import matplotlib.pyplot as plt - import pandas as pd - from typing import dict, list - """ DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise @@ -34,7 +28,13 @@ class DbScan: obj.print_dbscan() obj.plot_dbscan() """ + + import math + import matplotlib.pyplot as plt + import pandas as pd + from typing import dict, list + def __init__( self, minpts: int, @@ -217,8 +217,7 @@ class DbScan: plt.legend(["Core", "Noise"]) plt.show() print("Plotted Successfully") - - + if __name__ == "__main__": import doctest From d2dbdc1136a5a82917e66abc0645be6128a671e2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 2 Oct 2024 11:07:01 +0000 Subject: [PATCH 19/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/dbscan.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 3b25bdcb4..1fd2174b0 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -28,13 +28,13 @@ class DbScan: obj.print_dbscan() obj.plot_dbscan() """ - + import math import matplotlib.pyplot as plt import pandas as pd from typing import dict, list - + def __init__( self, minpts: int, @@ -217,7 +217,8 @@ class DbScan: plt.legend(["Core", "Noise"]) plt.show() print("Plotted Successfully") - + + if __name__ == "__main__": import doctest From 59f4a0e0462749aa89ffe4850958f59475c1033e Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Wed, 2 Oct 2024 19:41:32 +0530 Subject: [PATCH 20/27] Delete machine_learning/dbscan.py --- machine_learning/dbscan.py | 225 ------------------------------------- 1 file changed, 225 deletions(-) delete mode 100644 machine_learning/dbscan.py diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py deleted file mode 100644 index 1fd2174b0..000000000 --- a/machine_learning/dbscan.py +++ /dev/null @@ -1,225 +0,0 @@ -""" - -Author : Gowtham Kamalasekar -LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ - -""" - - -class DbScan: - """ - DBSCAN Algorithm : - Density-Based Spatial Clustering Of Applications With Noise - Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN - - Functions: - ---------- - __init__() : Constructor that sets minPts, radius and file - perform_dbscan() : Invoked by constructor and calculates the core - and noise points and returns a dictionary. - print_dbscan() : Prints the core and noise points along - with stating if the noise are border points or not. - plot_dbscan() : Plots the points to show the core and noise point. - - To create a object - ------------------ - import dbscan - obj = dbscan.DbScan(minpts, radius, file) - obj.print_dbscan() - obj.plot_dbscan() - """ - - import math - - import matplotlib.pyplot as plt - import pandas as pd - from typing import dict, list - - def __init__( - self, - minpts: int, - radius: int, - file: str = ( - {"x": 3, "y": 7}, - {"x": 4, "y": 6}, - {"x": 5, "y": 5}, - {"x": 6, "y": 4}, - {"x": 7, "y": 3}, - {"x": 6, "y": 2}, - {"x": 7, "y": 2}, - {"x": 8, "y": 4}, - {"x": 3, "y": 3}, - {"x": 2, "y": 6}, - {"x": 3, "y": 5}, - {"x": 2, "y": 4}, - ), - ) -> None: - """ - Constructor - - Args: - ----------- - minpts (int) : Minimum number of points needed to be - within the radius to considered as core - radius (int) : The radius from a given core point where - other core points can be considered as core - file (csv) : CSV file location. Should contain x and y - coordinate value for each point. - - Example : - minPts = 4 - radius = 1.9 - file = 'data_dbscan.csv' - - File Structure of CSV Data: - --------------------------- - _____ - x | y - ----- - 3 | 7 - 4 | 6 - 5 | 5 - 6 | 4 - 7 | 3 - ----- - """ - self.minpts = minpts - self.radius = radius - self.file = file - self.dict1 = self.perform_dbscan() - - def perform_dbscan(self) -> dict[int, list[int]]: - """ - Args: - ----------- - None - - Return: - -------- - Dictionary with points and the list - of points that lie in its radius - - >>> result = DbScan(4, 1.9).perform_dbscan() - >>> for key in sorted(result): - ... print(key, sorted(result[key])) - 1 [1, 2, 10] - 2 [1, 2, 3, 11] - 3 [2, 3, 4] - 4 [3, 4, 5] - 5 [4, 5, 6, 7, 8] - 6 [5, 6, 7] - 7 [5, 6, 7] - 8 [5, 8] - 9 [9, 12] - 10 [1, 10, 11] - 11 [2, 10, 11, 12] - 12 [9, 11, 12] - - """ - if type(self.file) is str: - data = pd.read_csv(self.file) - else: - data = pd.DataFrame(list(self.file)) - e = self.radius - dict1 = {} - for i in range(len(data)): - for j in range(len(data)): - dist = math.sqrt( - pow(data["x"][j] - data["x"][i], 2) - + pow(data["y"][j] - data["y"][i], 2) - ) - if dist < e: - if i + 1 in dict1: - dict1[i + 1].append(j + 1) - else: - dict1[i + 1] = [ - j + 1, - ] - return dict1 - - def print_dbscan(self) -> None: - """ - Outputs: - -------- - Prints each point and if it is a core or a noise (w/ border) - - >>> DbScan(4,1.9).print_dbscan() - 1 [1, 2, 10] ---> Noise ---> Border - 2 [1, 2, 3, 11] ---> Core - 3 [2, 3, 4] ---> Noise ---> Border - 4 [3, 4, 5] ---> Noise ---> Border - 5 [4, 5, 6, 7, 8] ---> Core - 6 [5, 6, 7] ---> Noise ---> Border - 7 [5, 6, 7] ---> Noise ---> Border - 8 [5, 8] ---> Noise ---> Border - 9 [9, 12] ---> Noise - 10 [1, 10, 11] ---> Noise ---> Border - 11 [2, 10, 11, 12] ---> Core - 12 [9, 11, 12] ---> Noise ---> Border - """ - for i in self.dict1: - print(i, " ", self.dict1[i], end=" ---> ") - if len(self.dict1[i]) >= self.minpts: - print("Core") - else: - for j in self.dict1: - if ( - i != j - and len(self.dict1[j]) >= self.minpts - and i in self.dict1[j] - ): - print("Noise ---> Border") - break - else: - print("Noise") - - def plot_dbscan(self) -> None: - """ - Output: - ------- - A matplotlib plot that show points as core and noise along - with the circle that lie within it. - - >>> DbScan(4,1.9).plot_dbscan() - Plotted Successfully - """ - if type(self.file) is str: - data = pd.read_csv(self.file) - else: - data = pd.DataFrame(list(self.file)) - e = self.radius - for i in self.dict1: - if len(self.dict1[i]) >= self.minpts: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") - circle = plt.Circle( - (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False - ) - plt.gca().add_artist(circle) - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) - else: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) - plt.xlabel("X") - plt.ylabel("Y") - plt.title("DBSCAN Clustering") - plt.legend(["Core", "Noise"]) - plt.show() - print("Plotted Successfully") - - -if __name__ == "__main__": - import doctest - - doctest.testmod() From cd539e378dce200fd96cf4cb7fe0386722c41674 Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Wed, 2 Oct 2024 19:42:19 +0530 Subject: [PATCH 21/27] Final Update of DBSCAN --- machine_learning/dbscan.py | 190 +++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 machine_learning/dbscan.py diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py new file mode 100644 index 000000000..f55037896 --- /dev/null +++ b/machine_learning/dbscan.py @@ -0,0 +1,190 @@ +''' + +Author : Gowtham Kamalasekar +LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ + +''' +import math + +import matplotlib.pyplot as plt +import pandas as pd +from typing import dict, list + +class DbScan: + + ''' + DBSCAN Algorithm : + Density-Based Spatial Clustering Of Applications With Noise + Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN + + Functions: + ---------- + __init__() : Constructor that sets minPts, radius and file + perform_dbscan() : Invoked by constructor and calculates the core + and noise points and returns a dictionary. + print_dbscan() : Prints the core and noise points along + with stating if the noise are border points or not. + plot_dbscan() : Plots the points to show the core and noise point. + + To create a object + ------------------ + import dbscan + obj = dbscan.DbScan(minpts, radius, file) + obj.print_dbscan() + obj.plot_dbscan() + ''' + def __init__(self, minpts : int, radius : int, file : str = + ({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5}, + {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2}, + {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3}, + {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}) + ) -> None: + ''' + Constructor + + Args: + ----------- + minpts (int) : Minimum number of points needed to be + within the radius to considered as core + radius (int) : The radius from a given core point where + other core points can be considered as core + file (csv) : CSV file location. Should contain x and y + coordinate value for each point. + + Example : + minPts = 4 + radius = 1.9 + file = 'data_dbscan.csv' + + File Structure of CSV Data: + --------------------------- + _____ + x | y + ----- + 3 | 7 + 4 | 6 + 5 | 5 + 6 | 4 + 7 | 3 + ----- + ''' + self.minpts = minpts + self.radius = radius + self.file = file + self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> dict[int, list[int]]: + ''' + Args: + ----------- + None + + Return: + -------- + Dictionary with points and the list + of points that lie in its radius + + >>> result = DbScan(4, 1.9).perform_dbscan() + >>> for key in sorted(result): + ... print(key, sorted(result[key])) + 1 [1, 2, 10] + 2 [1, 2, 3, 11] + 3 [2, 3, 4] + 4 [3, 4, 5] + 5 [4, 5, 6, 7, 8] + 6 [5, 6, 7] + 7 [5, 6, 7] + 8 [5, 8] + 9 [9, 12] + 10 [1, 10, 11] + 11 [2, 10, 11, 12] + 12 [9, 11, 12] + + ''' + if type(self.file) is str: + data = pd.read_csv(self.file) + else: + data = pd.DataFrame(list(self.file)) + e = self.radius + dict1 = {} + for i in range(len(data)): + for j in range(len(data)): + dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) + + pow(data['y'][j] - data['y'][i],2)) + if dist < e: + if i+1 in dict1: + dict1[i+1].append(j+1) + else: + dict1[i+1] = [j+1,] + return dict1 + def print_dbscan(self) -> None: + ''' + Outputs: + -------- + Prints each point and if it is a core or a noise (w/ border) + + >>> DbScan(4,1.9).print_dbscan() + 1 [1, 2, 10] ---> Noise ---> Border + 2 [1, 2, 3, 11] ---> Core + 3 [2, 3, 4] ---> Noise ---> Border + 4 [3, 4, 5] ---> Noise ---> Border + 5 [4, 5, 6, 7, 8] ---> Core + 6 [5, 6, 7] ---> Noise ---> Border + 7 [5, 6, 7] ---> Noise ---> Border + 8 [5, 8] ---> Noise ---> Border + 9 [9, 12] ---> Noise + 10 [1, 10, 11] ---> Noise ---> Border + 11 [2, 10, 11, 12] ---> Core + 12 [9, 11, 12] ---> Noise ---> Border + ''' + for i in self.dict1: + print(i," ",self.dict1[i], end=' ---> ') + if len(self.dict1[i]) >= self.minpts: + print("Core") + else: + for j in self.dict1: + if ( + i != j + and len(self.dict1[j]) >= self.minpts + and i in self.dict1[j] + ): + print("Noise ---> Border") + break + else: + print("Noise") + def plot_dbscan(self) -> None: + ''' + Output: + ------- + A matplotlib plot that show points as core and noise along + with the circle that lie within it. + + >>> DbScan(4,1.9).plot_dbscan() + Plotted Successfully + ''' + if type(self.file) is str: + data = pd.read_csv(self.file) + else: + data = pd.DataFrame(list(self.file)) + e = self.radius + for i in self.dict1: + if len(self.dict1[i]) >= self.minpts: + plt.scatter(data['x'][i-1], data['y'][i-1], color='red') + circle = plt.Circle((data['x'][i-1], data['y'][i-1]), + e, color='blue', fill=False) + plt.gca().add_artist(circle) + plt.text(data['x'][i-1], data['y'][i-1], + 'P'+str(i), ha='center', va='bottom') + else: + plt.scatter(data['x'][i-1], data['y'][i-1], color='green') + plt.text(data['x'][i-1], data['y'][i-1], + 'P'+str(i), ha='center', va='bottom') + plt.xlabel('X') + plt.ylabel('Y') + plt.title('DBSCAN Clustering') + plt.legend(['Core','Noise']) + plt.show() + print("Plotted Successfully") + +if __name__ == "__main__": + import doctest + doctest.testmod() From 5e148f524dad30f5f05b921f796201977cf7c706 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 2 Oct 2024 14:12:56 +0000 Subject: [PATCH 22/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/dbscan.py | 117 ++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 41 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index f55037896..e14e5e4b0 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -1,18 +1,19 @@ -''' +""" Author : Gowtham Kamalasekar LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ -''' +""" + import math import matplotlib.pyplot as plt import pandas as pd from typing import dict, list + class DbScan: - - ''' + """ DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -32,14 +33,28 @@ class DbScan: obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - ''' - def __init__(self, minpts : int, radius : int, file : str = - ({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5}, - {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2}, - {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3}, - {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}) - ) -> None: - ''' + """ + + def __init__( + self, + minpts: int, + radius: int, + file: str = ( + {"x": 3, "y": 7}, + {"x": 4, "y": 6}, + {"x": 5, "y": 5}, + {"x": 6, "y": 4}, + {"x": 7, "y": 3}, + {"x": 6, "y": 2}, + {"x": 7, "y": 2}, + {"x": 8, "y": 4}, + {"x": 3, "y": 3}, + {"x": 2, "y": 6}, + {"x": 3, "y": 5}, + {"x": 2, "y": 4}, + ), + ) -> None: + """ Constructor Args: @@ -67,13 +82,14 @@ class DbScan: 6 | 4 7 | 3 ----- - ''' + """ self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> dict[int, list[int]]: - ''' + """ Args: ----------- None @@ -99,25 +115,30 @@ class DbScan: 11 [2, 10, 11, 12] 12 [9, 11, 12] - ''' + """ if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) - + pow(data['y'][j] - data['y'][i],2)) + dist = math.sqrt( + pow(data["x"][j] - data["x"][i], 2) + + pow(data["y"][j] - data["y"][i], 2) + ) if dist < e: - if i+1 in dict1: - dict1[i+1].append(j+1) + if i + 1 in dict1: + dict1[i + 1].append(j + 1) else: - dict1[i+1] = [j+1,] + dict1[i + 1] = [ + j + 1, + ] return dict1 + def print_dbscan(self) -> None: - ''' + """ Outputs: -------- Prints each point and if it is a core or a noise (w/ border) @@ -135,24 +156,25 @@ class DbScan: 10 [1, 10, 11] ---> Noise ---> Border 11 [2, 10, 11, 12] ---> Core 12 [9, 11, 12] ---> Noise ---> Border - ''' + """ for i in self.dict1: - print(i," ",self.dict1[i], end=' ---> ') + print(i, " ", self.dict1[i], end=" ---> ") if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: if ( - i != j - and len(self.dict1[j]) >= self.minpts + i != j + and len(self.dict1[j]) >= self.minpts and i in self.dict1[j] ): print("Noise ---> Border") break else: print("Noise") + def plot_dbscan(self) -> None: - ''' + """ Output: ------- A matplotlib plot that show points as core and noise along @@ -160,31 +182,44 @@ class DbScan: >>> DbScan(4,1.9).plot_dbscan() Plotted Successfully - ''' + """ if type(self.file) is str: - data = pd.read_csv(self.file) + data = pd.read_csv(self.file) else: data = pd.DataFrame(list(self.file)) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data['x'][i-1], data['y'][i-1], color='red') - circle = plt.Circle((data['x'][i-1], data['y'][i-1]), - e, color='blue', fill=False) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") + circle = plt.Circle( + (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False + ) plt.gca().add_artist(circle) - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) else: - plt.scatter(data['x'][i-1], data['y'][i-1], color='green') - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') - plt.xlabel('X') - plt.ylabel('Y') - plt.title('DBSCAN Clustering') - plt.legend(['Core','Noise']) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) + plt.xlabel("X") + plt.ylabel("Y") + plt.title("DBSCAN Clustering") + plt.legend(["Core", "Noise"]) plt.show() print("Plotted Successfully") + if __name__ == "__main__": import doctest + doctest.testmod() From 7c76e5c99251877bcaa36d13933167e93e97c0c5 Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Wed, 2 Oct 2024 19:53:08 +0530 Subject: [PATCH 23/27] full final update of dbscan --- machine_learning/dbscan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index e14e5e4b0..1216c20b5 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -6,10 +6,10 @@ LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ """ import math +from typing import dict, list import matplotlib.pyplot as plt import pandas as pd -from typing import dict, list class DbScan: From d49fea0cc6cb3d3e54f153b1d764d1c606690472 Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Wed, 2 Oct 2024 20:03:34 +0530 Subject: [PATCH 24/27] Changed typing accordingly --- machine_learning/dbscan.py | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 1216c20b5..2cc02cf89 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -6,7 +6,7 @@ LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ """ import math -from typing import dict, list +from typing import Dict, List import matplotlib.pyplot as plt import pandas as pd @@ -39,20 +39,7 @@ class DbScan: self, minpts: int, radius: int, - file: str = ( - {"x": 3, "y": 7}, - {"x": 4, "y": 6}, - {"x": 5, "y": 5}, - {"x": 6, "y": 4}, - {"x": 7, "y": 3}, - {"x": 6, "y": 2}, - {"x": 7, "y": 2}, - {"x": 8, "y": 4}, - {"x": 3, "y": 3}, - {"x": 2, "y": 6}, - {"x": 3, "y": 5}, - {"x": 2, "y": 4}, - ), + file: str = "None", ) -> None: """ Constructor @@ -85,10 +72,27 @@ class DbScan: """ self.minpts = minpts self.radius = radius - self.file = file + self.file = ( + file + if file != "None" + else ( + {"x": 3, "y": 7}, + {"x": 4, "y": 6}, + {"x": 5, "y": 5}, + {"x": 6, "y": 4}, + {"x": 7, "y": 3}, + {"x": 6, "y": 2}, + {"x": 7, "y": 2}, + {"x": 8, "y": 4}, + {"x": 3, "y": 3}, + {"x": 2, "y": 6}, + {"x": 3, "y": 5}, + {"x": 2, "y": 4}, + ) + ) self.dict1 = self.perform_dbscan() - def perform_dbscan(self) -> dict[int, list[int]]: + def perform_dbscan(self) -> Dict[int, List[int]]: """ Args: ----------- From 3fa1d184267e3bfb4a208168a9062ba0d3ad3d7f Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Wed, 2 Oct 2024 20:07:45 +0530 Subject: [PATCH 25/27] Update dbscan.py removed typing --- machine_learning/dbscan.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 2cc02cf89..262b46b11 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -6,7 +6,6 @@ LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ """ import math -from typing import Dict, List import matplotlib.pyplot as plt import pandas as pd @@ -92,7 +91,7 @@ class DbScan: ) self.dict1 = self.perform_dbscan() - def perform_dbscan(self) -> Dict[int, List[int]]: + def perform_dbscan(self) -> dict[int, list[int]]: """ Args: ----------- From 94caecf36e6ad1ac004f0b613d344f76c51fa0cc Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Wed, 2 Oct 2024 20:12:01 +0530 Subject: [PATCH 26/27] Update dbscan.py with annotation for dict1 --- machine_learning/dbscan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 262b46b11..90fec1c0a 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -124,7 +124,7 @@ class DbScan: else: data = pd.DataFrame(list(self.file)) e = self.radius - dict1 = {} + dict1: dict[int, list[int]] = {} for i in range(len(data)): for j in range(len(data)): dist = math.sqrt( From 36700a13ee46d2f4bb01d00df2d66f72393d96a9 Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Wed, 2 Oct 2024 21:14:43 +0530 Subject: [PATCH 27/27] Update dbscan.py with more test cases --- machine_learning/dbscan.py | 73 +++++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 90fec1c0a..744ef69b8 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -7,6 +7,7 @@ LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ import math +import matplotlib.patches as mpatches import matplotlib.pyplot as plt import pandas as pd @@ -118,6 +119,38 @@ class DbScan: 11 [2, 10, 11, 12] 12 [9, 11, 12] + >>> result = DbScan(3, 2.5).perform_dbscan() + >>> for key in sorted(result): + ... print(key, sorted(result[key])) + 1 [1, 2, 10, 11] + 2 [1, 2, 3, 10, 11] + 3 [2, 3, 4, 11] + 4 [3, 4, 5, 6, 7, 8] + 5 [4, 5, 6, 7, 8] + 6 [4, 5, 6, 7] + 7 [4, 5, 6, 7, 8] + 8 [4, 5, 7, 8] + 9 [9, 11, 12] + 10 [1, 2, 10, 11, 12] + 11 [1, 2, 3, 9, 10, 11, 12] + 12 [9, 10, 11, 12] + + >>> result = DbScan(5, 2.5).perform_dbscan() + >>> for key in sorted(result): + ... print(key, sorted(result[key])) + 1 [1, 2, 10, 11] + 2 [1, 2, 3, 10, 11] + 3 [2, 3, 4, 11] + 4 [3, 4, 5, 6, 7, 8] + 5 [4, 5, 6, 7, 8] + 6 [4, 5, 6, 7] + 7 [4, 5, 6, 7, 8] + 8 [4, 5, 7, 8] + 9 [9, 11, 12] + 10 [1, 2, 10, 11, 12] + 11 [1, 2, 3, 9, 10, 11, 12] + 12 [9, 10, 11, 12] + """ if type(self.file) is str: data = pd.read_csv(self.file) @@ -159,6 +192,35 @@ class DbScan: 10 [1, 10, 11] ---> Noise ---> Border 11 [2, 10, 11, 12] ---> Core 12 [9, 11, 12] ---> Noise ---> Border + + >>> DbScan(5,2.5).print_dbscan() + 1 [1, 2, 10, 11] ---> Noise ---> Border + 2 [1, 2, 3, 10, 11] ---> Core + 3 [2, 3, 4, 11] ---> Noise ---> Border + 4 [3, 4, 5, 6, 7, 8] ---> Core + 5 [4, 5, 6, 7, 8] ---> Core + 6 [4, 5, 6, 7] ---> Noise ---> Border + 7 [4, 5, 6, 7, 8] ---> Core + 8 [4, 5, 7, 8] ---> Noise ---> Border + 9 [9, 11, 12] ---> Noise ---> Border + 10 [1, 2, 10, 11, 12] ---> Core + 11 [1, 2, 3, 9, 10, 11, 12] ---> Core + 12 [9, 10, 11, 12] ---> Noise ---> Border + + >>> DbScan(2,0.5).print_dbscan() + 1 [1] ---> Noise + 2 [2] ---> Noise + 3 [3] ---> Noise + 4 [4] ---> Noise + 5 [5] ---> Noise + 6 [6] ---> Noise + 7 [7] ---> Noise + 8 [8] ---> Noise + 9 [9] ---> Noise + 10 [10] ---> Noise + 11 [11] ---> Noise + 12 [12] ---> Noise + """ for i in self.dict1: print(i, " ", self.dict1[i], end=" ---> ") @@ -185,6 +247,13 @@ class DbScan: >>> DbScan(4,1.9).plot_dbscan() Plotted Successfully + + >>> DbScan(5,2.5).plot_dbscan() + Plotted Successfully + + >>> DbScan(5,2.5).plot_dbscan() + Plotted Successfully + """ if type(self.file) is str: data = pd.read_csv(self.file) @@ -214,10 +283,12 @@ class DbScan: ha="center", va="bottom", ) + core_legend = mpatches.Patch(color="red", label="Core") + noise_legend = mpatches.Patch(color="green", label="Noise") plt.xlabel("X") plt.ylabel("Y") plt.title("DBSCAN Clustering") - plt.legend(["Core", "Noise"]) + plt.legend(handles=[core_legend, noise_legend]) plt.show() print("Plotted Successfully")