Update and rename DBSCAN.py to dbscan.py

This commit is contained in:
tkgowtham 2024-10-01 20:49:14 +05:30 committed by GitHub
parent e107d6d5d0
commit da81c073eb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,40 +1,13 @@
import pandas as pd import pandas as pd
import numpy as np
import math import math
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from typing import Dict, List
class DBSCAN: class dbscan:
''' '''
Author : Gowtham Kamalasekar
LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/
DBSCAN Algorithm : DBSCAN Algorithm :
Density-Based Spatial Clustering Of Applications With Noise Density-Based Spatial Clustering Of Applications With Noise
Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN
Attributes:
-----------
minPts (int) : Minimum number of points needed to be within the radius to considered as core
radius (int) : The radius from a given core point where other core points can be considered as core
file (csv) : CSV file location. Should contain x and y coordinate value for each point.
Example :
minPts = 4
radius = 1.9
file = 'data_dbscan.csv'
File Structure of CSV Data:
---------------------------
_____
x | y
-----
3 | 7
4 | 6
5 | 5
6 | 4
7 | 3
-----
Functions: Functions:
---------- ----------
__init__() : Constructor that sets minPts, radius and file __init__() : Constructor that sets minPts, radius and file
@ -44,19 +17,52 @@ class DBSCAN:
To create a object To create a object
------------------ ------------------
import DBSCAN import dbscan
obj = DBSCAN.DBSCAN(minPts, radius, file) obj = dbscan.dbscan(minPts, radius, file)
obj.print_dbscan() obj.print_dbscan()
obj.plot_dbscan() obj.plot_dbscan()
''' '''
def __init__(self, minPts : int, radius : int, file : str) -> None:
'''
Constructor
def __init__(self, minPts, radius, file): Attributes:
-----------
minPts (int) : Minimum number of points needed to be within the radius to considered as core
radius (int) : The radius from a given core point where other core points can be considered as core
file (csv) : CSV file location. Should contain x and y coordinate value for each point.
Example :
minPts = 4
radius = 1.9
file = 'data_dbscan.csv'
File Structure of CSV Data:
---------------------------
_____
x | y
-----
3 | 7
4 | 6
5 | 5
6 | 4
7 | 3
-----
'''
self.minPts = minPts self.minPts = minPts
self.radius = radius self.radius = radius
self.file = file self.file = file
self.dict1 = self.perform_dbscan() self.dict1 = self.perform_dbscan()
def perform_dbscan(self) -> Dict[int, List[int]]:
'''
Parameters:
-----------
None
def perform_dbscan(self): Return:
--------
Dictionary with points and the list of points that lie in its radius
'''
data = pd.read_csv(self.file) data = pd.read_csv(self.file)
minPts = self.minPts minPts = self.minPts
@ -73,8 +79,12 @@ class DBSCAN:
dict1[i+1] = [j+1,] dict1[i+1] = [j+1,]
return dict1 return dict1
def print_dbscan(self) -> None:
def print_dbscan(self): '''
Outputs:
--------
Prints each point and if it is a core or a noise (w/ border)
'''
for i in self.dict1: for i in self.dict1:
print(i," ",self.dict1[i], end=' ---> ') print(i," ",self.dict1[i], end=' ---> ')
if len(self.dict1[i]) >= self.minPts: if len(self.dict1[i]) >= self.minPts:
@ -86,8 +96,12 @@ class DBSCAN:
break break
else: else:
print("Noise") print("Noise")
def plot_dbscan(self) -> None:
def plot_dbscan(self): '''
Output:
-------
A matplotlib plot that show points as core and noise along with the circle that lie within it.
'''
data = pd.read_csv(self.file) data = pd.read_csv(self.file)
e = self.radius e = self.radius
for i in self.dict1: for i in self.dict1:
@ -99,10 +113,8 @@ class DBSCAN:
else: else:
plt.scatter(data['x'][i-1], data['y'][i-1], color='green') plt.scatter(data['x'][i-1], data['y'][i-1], color='green')
plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom') plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom')
plt.xlabel('X') plt.xlabel('X')
plt.ylabel('Y') plt.ylabel('Y')
plt.title('DBSCAN Clustering') plt.title('DBSCAN Clustering')
plt.legend(['Core','Noise']) plt.legend(['Core','Noise'])
plt.show() plt.show()