From 49e9f614f56278ea65f089737a5a956116baa5ce Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Tue, 1 Oct 2024 21:13:40 +0530 Subject: [PATCH] Update dbscan.py --- machine_learning/dbscan.py | 112 +++++++++++++++---------------------- 1 file changed, 45 insertions(+), 67 deletions(-) diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 566f8e217..d83bdbaba 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -1,11 +1,9 @@ import pandas as pd import math import matplotlib.pyplot as plt -from typing import Dict, List - - -class dbscan: - """ +from typing import dict, list +class dbScan: + ''' DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN @@ -20,20 +18,22 @@ class dbscan: To create a object ------------------ import dbscan - obj = dbscan.dbscan(minPts, radius, file) + obj = dbscan.dbscan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - """ - - def __init__(self, minPts: int, radius: int, file: str) -> None: - """ + ''' + def __init__(self, minpts : int, radius : int, file : str) -> None: + ''' Constructor Attributes: ----------- - minPts (int) : Minimum number of points needed to be within the radius to considered as core - radius (int) : The radius from a given core point where other core points can be considered as core - file (csv) : CSV file location. Should contain x and y coordinate value for each point. + minpts (int) : Minimum number of points needed to be + within the radius to considered as core + radius (int) : The radius from a given core point where + other core points can be considered as core + file (csv) : CSV file location. Should contain x and y + coordinate value for each point. Example : minPts = 4 @@ -51,99 +51,77 @@ class dbscan: 6 | 4 7 | 3 ----- - """ - self.minPts = minPts + ''' + self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> dict[int, list[int]]: + ''' + >>>perform_dbscan() - def perform_dbscan(self) -> Dict[int, List[int]]: - """ Parameters: ----------- None Return: -------- - Dictionary with points and the list of points that lie in its radius - """ + Dictionary with points and the list of points + that lie in its radius + ''' data = pd.read_csv(self.file) - minPts = self.minPts + minpts = self.minpts e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt( - pow(data["x"][j] - data["x"][i], 2) - + pow(data["y"][j] - data["y"][i], 2) - ) + dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) + pow(data['y'][j] - data['y'][i],2)) if dist < e: - if i + 1 in dict1: - dict1[i + 1].append(j + 1) + if i+1 in dict1: + dict1[i+1].append(j+1) else: - dict1[i + 1] = [ - j + 1, - ] + dict1[i+1] = [j+1,] return dict1 - def print_dbscan(self) -> None: - """ + ''' Outputs: -------- Prints each point and if it is a core or a noise (w/ border) - """ + ''' for i in self.dict1: - print(i, " ", self.dict1[i], end=" ---> ") - if len(self.dict1[i]) >= self.minPts: + print(i," ",self.dict1[i], end=' ---> ') + if len(self.dict1[i]) >= self.minpts: print("Core") else: for j in self.dict1: - if ( - i != j - and len(self.dict1[j]) >= self.minPts - and i in self.dict1[j] - ): + if i != j and len(self.dict1[j]) >= self.minpts and i in self.dict1[j]: print("Noise ---> Border") break else: print("Noise") - def plot_dbscan(self) -> None: - """ + ''' Output: ------- - A matplotlib plot that show points as core and noise along with the circle that lie within it. - """ + A matplotlib plot that show points as core and noise along + with the circle that lie within it. + ''' data = pd.read_csv(self.file) e = self.radius for i in self.dict1: - if len(self.dict1[i]) >= self.minPts: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") - circle = plt.Circle( - (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False - ) + if len(self.dict1[i]) >= self.minpts: + plt.scatter(data['x'][i-1], data['y'][i-1], color='red') + circle = plt.Circle((data['x'][i-1], data['y'][i-1]), e, color='blue', fill=False) plt.gca().add_artist(circle) - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) + plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') else: - plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") - plt.text( - data["x"][i - 1], - data["y"][i - 1], - "P" + str(i), - ha="center", - va="bottom", - ) - plt.xlabel("X") - plt.ylabel("Y") - plt.title("DBSCAN Clustering") - plt.legend(["Core", "Noise"]) + plt.scatter(data['x'][i-1], data['y'][i-1], color='green') + plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') + plt.xlabel('X') + plt.ylabel('Y') + plt.title('DBSCAN Clustering') + plt.legend(['Core','Noise']) plt.show()