Update and rename DBSCAN.py to dbscan.py

This commit is contained in:
tkgowtham 2024-10-01 20:49:14 +05:30 committed by GitHub
parent e107d6d5d0
commit da81c073eb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,40 +1,13 @@
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
class DBSCAN:
from typing import Dict, List
class dbscan:
'''
Author : Gowtham Kamalasekar
LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/
DBSCAN Algorithm :
Density-Based Spatial Clustering Of Applications With Noise
Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN
Attributes:
-----------
minPts (int) : Minimum number of points needed to be within the radius to considered as core
radius (int) : The radius from a given core point where other core points can be considered as core
file (csv) : CSV file location. Should contain x and y coordinate value for each point.
Example :
minPts = 4
radius = 1.9
file = 'data_dbscan.csv'
File Structure of CSV Data:
---------------------------
_____
x | y
-----
3 | 7
4 | 6
5 | 5
6 | 4
7 | 3
-----
Functions:
----------
__init__() : Constructor that sets minPts, radius and file
@ -44,19 +17,52 @@ class DBSCAN:
To create a object
------------------
import DBSCAN
obj = DBSCAN.DBSCAN(minPts, radius, file)
import dbscan
obj = dbscan.dbscan(minPts, radius, file)
obj.print_dbscan()
obj.plot_dbscan()
'''
def __init__(self, minPts, radius, file):
def __init__(self, minPts : int, radius : int, file : str) -> None:
'''
Constructor
Attributes:
-----------
minPts (int) : Minimum number of points needed to be within the radius to considered as core
radius (int) : The radius from a given core point where other core points can be considered as core
file (csv) : CSV file location. Should contain x and y coordinate value for each point.
Example :
minPts = 4
radius = 1.9
file = 'data_dbscan.csv'
File Structure of CSV Data:
---------------------------
_____
x | y
-----
3 | 7
4 | 6
5 | 5
6 | 4
7 | 3
-----
'''
self.minPts = minPts
self.radius = radius
self.file = file
self.dict1 = self.perform_dbscan()
def perform_dbscan(self) -> Dict[int, List[int]]:
'''
Parameters:
-----------
None
def perform_dbscan(self):
Return:
--------
Dictionary with points and the list of points that lie in its radius
'''
data = pd.read_csv(self.file)
minPts = self.minPts
@ -73,8 +79,12 @@ class DBSCAN:
dict1[i+1] = [j+1,]
return dict1
def print_dbscan(self):
def print_dbscan(self) -> None:
'''
Outputs:
--------
Prints each point and if it is a core or a noise (w/ border)
'''
for i in self.dict1:
print(i," ",self.dict1[i], end=' ---> ')
if len(self.dict1[i]) >= self.minPts:
@ -86,8 +96,12 @@ class DBSCAN:
break
else:
print("Noise")
def plot_dbscan(self):
def plot_dbscan(self) -> None:
'''
Output:
-------
A matplotlib plot that show points as core and noise along with the circle that lie within it.
'''
data = pd.read_csv(self.file)
e = self.radius
for i in self.dict1:
@ -99,10 +113,8 @@ class DBSCAN:
else:
plt.scatter(data['x'][i-1], data['y'][i-1], color='green')
plt.text(data['x'][i-1], data['y'][i-1], 'P'+str(i), ha='center', va='bottom')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('DBSCAN Clustering')
plt.legend(['Core','Noise'])
plt.show()