From da81c073ebe149ea110111bb2dce3457574eb499 Mon Sep 17 00:00:00 2001 From: tkgowtham Date: Tue, 1 Oct 2024 20:49:14 +0530 Subject: [PATCH] Update and rename DBSCAN.py to dbscan.py --- machine_learning/{DBSCAN.py => dbscan.py} | 92 +++++++++++++---------- 1 file changed, 52 insertions(+), 40 deletions(-) rename machine_learning/{DBSCAN.py => dbscan.py} (63%) diff --git a/machine_learning/DBSCAN.py b/machine_learning/dbscan.py similarity index 63% rename from machine_learning/DBSCAN.py rename to machine_learning/dbscan.py index c746161e8..3e044d8f6 100644 --- a/machine_learning/DBSCAN.py +++ b/machine_learning/dbscan.py @@ -1,40 +1,13 @@ import pandas as pd -import numpy as np import math import matplotlib.pyplot as plt - -class DBSCAN: +from typing import Dict, List +class dbscan: ''' - Author : Gowtham Kamalasekar - LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ - DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN - Attributes: - ----------- - minPts (int) : Minimum number of points needed to be within the radius to considered as core - radius (int) : The radius from a given core point where other core points can be considered as core - file (csv) : CSV file location. Should contain x and y coordinate value for each point. - - Example : - minPts = 4 - radius = 1.9 - file = 'data_dbscan.csv' - - File Structure of CSV Data: - --------------------------- - _____ - x | y - ----- - 3 | 7 - 4 | 6 - 5 | 5 - 6 | 4 - 7 | 3 - ----- - Functions: ---------- __init__() : Constructor that sets minPts, radius and file @@ -44,19 +17,52 @@ class DBSCAN: To create a object ------------------ - import DBSCAN - obj = DBSCAN.DBSCAN(minPts, radius, file) + import dbscan + obj = dbscan.dbscan(minPts, radius, file) obj.print_dbscan() obj.plot_dbscan() ''' - - def __init__(self, minPts, radius, file): + def __init__(self, minPts : int, radius : int, file : str) -> None: + ''' + Constructor + + Attributes: + ----------- + minPts (int) : Minimum number of points needed to be within the radius to considered as core + radius (int) : The radius from a given core point where other core points can be considered as core + file (csv) : CSV file location. Should contain x and y coordinate value for each point. + + Example : + minPts = 4 + radius = 1.9 + file = 'data_dbscan.csv' + + File Structure of CSV Data: + --------------------------- + _____ + x | y + ----- + 3 | 7 + 4 | 6 + 5 | 5 + 6 | 4 + 7 | 3 + ----- + ''' self.minPts = minPts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> Dict[int, List[int]]: + ''' + Parameters: + ----------- + None - def perform_dbscan(self): + Return: + -------- + Dictionary with points and the list of points that lie in its radius + ''' data = pd.read_csv(self.file) minPts = self.minPts @@ -73,8 +79,12 @@ class DBSCAN: dict1[i+1] = [j+1,] return dict1 - - def print_dbscan(self): + def print_dbscan(self) -> None: + ''' + Outputs: + -------- + Prints each point and if it is a core or a noise (w/ border) + ''' for i in self.dict1: print(i," ",self.dict1[i], end=' ---> ') if len(self.dict1[i]) >= self.minPts: @@ -86,8 +96,12 @@ class DBSCAN: break else: print("Noise") - - def plot_dbscan(self): + def plot_dbscan(self) -> None: + ''' + Output: + ------- + A matplotlib plot that show points as core and noise along with the circle that lie within it. + ''' data = pd.read_csv(self.file) e = self.radius for i in self.dict1: @@ -99,10 +113,8 @@ class DBSCAN: else: plt.scatter(data['x'][i-1], data['y'][i-1], color='green') plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') - plt.xlabel('X') plt.ylabel('Y') plt.title('DBSCAN Clustering') - plt.legend(['Core','Noise']) plt.show()