Update final3 dbscan.py

This commit is contained in:
tkgowtham 2024-10-02 16:33:05 +05:30 committed by GitHub
parent 0b6579460e
commit e13b9d9ef2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,19 +1,18 @@
""" '''
Author : Gowtham Kamalasekar Author : Gowtham Kamalasekar
LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/ LinkedIn : https://www.linkedin.com/in/gowtham-kamalasekar/
""" '''
import math
import matplotlib.pyplot as plt
import pandas as pd
from typing import dict, list
class DbScan: class DbScan:
""" import math
import matplotlib.pyplot as plt
import pandas as pd
from typing import dict, list
'''
DBSCAN Algorithm : DBSCAN Algorithm :
Density-Based Spatial Clustering Of Applications With Noise Density-Based Spatial Clustering Of Applications With Noise
Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN
@ -33,28 +32,14 @@ class DbScan:
obj = dbscan.DbScan(minpts, radius, file) obj = dbscan.DbScan(minpts, radius, file)
obj.print_dbscan() obj.print_dbscan()
obj.plot_dbscan() obj.plot_dbscan()
""" '''
def __init__(self, minpts : int, radius : int, file : str =
def __init__( ({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5},
self, {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2},
minpts: int, {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3},
radius: int, {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4})
file: str = ( ) -> None:
{"x": 3, "y": 7}, '''
{"x": 4, "y": 6},
{"x": 5, "y": 5},
{"x": 6, "y": 4},
{"x": 7, "y": 3},
{"x": 6, "y": 2},
{"x": 7, "y": 2},
{"x": 8, "y": 4},
{"x": 3, "y": 3},
{"x": 2, "y": 6},
{"x": 3, "y": 5},
{"x": 2, "y": 4},
),
) -> None:
"""
Constructor Constructor
Args: Args:
@ -82,14 +67,13 @@ class DbScan:
6 | 4 6 | 4
7 | 3 7 | 3
----- -----
""" '''
self.minpts = minpts self.minpts = minpts
self.radius = radius self.radius = radius
self.file = file self.file = file
self.dict1 = self.perform_dbscan() self.dict1 = self.perform_dbscan()
def perform_dbscan(self) -> dict[int, list[int]]: def perform_dbscan(self) -> dict[int, list[int]]:
""" '''
Args: Args:
----------- -----------
None None
@ -115,7 +99,7 @@ class DbScan:
11 [2, 10, 11, 12] 11 [2, 10, 11, 12]
12 [9, 11, 12] 12 [9, 11, 12]
""" '''
if type(self.file) is str: if type(self.file) is str:
data = pd.read_csv(self.file) data = pd.read_csv(self.file)
else: else:
@ -124,21 +108,16 @@ class DbScan:
dict1 = {} dict1 = {}
for i in range(len(data)): for i in range(len(data)):
for j in range(len(data)): for j in range(len(data)):
dist = math.sqrt( dist = math.sqrt(pow(data['x'][j] - data['x'][i],2)
pow(data["x"][j] - data["x"][i], 2) + pow(data['y'][j] - data['y'][i],2))
+ pow(data["y"][j] - data["y"][i], 2)
)
if dist < e: if dist < e:
if i + 1 in dict1: if i+1 in dict1:
dict1[i + 1].append(j + 1) dict1[i+1].append(j+1)
else: else:
dict1[i + 1] = [ dict1[i+1] = [j+1,]
j + 1,
]
return dict1 return dict1
def print_dbscan(self) -> None: def print_dbscan(self) -> None:
""" '''
Outputs: Outputs:
-------- --------
Prints each point and if it is a core or a noise (w/ border) Prints each point and if it is a core or a noise (w/ border)
@ -156,9 +135,9 @@ class DbScan:
10 [1, 10, 11] ---> Noise ---> Border 10 [1, 10, 11] ---> Noise ---> Border
11 [2, 10, 11, 12] ---> Core 11 [2, 10, 11, 12] ---> Core
12 [9, 11, 12] ---> Noise ---> Border 12 [9, 11, 12] ---> Noise ---> Border
""" '''
for i in self.dict1: for i in self.dict1:
print(i, " ", self.dict1[i], end=" ---> ") print(i," ",self.dict1[i], end=' ---> ')
if len(self.dict1[i]) >= self.minpts: if len(self.dict1[i]) >= self.minpts:
print("Core") print("Core")
else: else:
@ -172,9 +151,8 @@ class DbScan:
break break
else: else:
print("Noise") print("Noise")
def plot_dbscan(self) -> None: def plot_dbscan(self) -> None:
""" '''
Output: Output:
------- -------
A matplotlib plot that show points as core and noise along A matplotlib plot that show points as core and noise along
@ -182,7 +160,7 @@ class DbScan:
>>> DbScan(4,1.9).plot_dbscan() >>> DbScan(4,1.9).plot_dbscan()
Plotted Successfully Plotted Successfully
""" '''
if type(self.file) is str: if type(self.file) is str:
data = pd.read_csv(self.file) data = pd.read_csv(self.file)
else: else:
@ -190,36 +168,23 @@ class DbScan:
e = self.radius e = self.radius
for i in self.dict1: for i in self.dict1:
if len(self.dict1[i]) >= self.minpts: if len(self.dict1[i]) >= self.minpts:
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") plt.scatter(data['x'][i-1], data['y'][i-1], color='red')
circle = plt.Circle( circle = plt.Circle((data['x'][i-1], data['y'][i-1]),
(data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False e, color='blue', fill=False)
)
plt.gca().add_artist(circle) plt.gca().add_artist(circle)
plt.text( plt.text(data['x'][i-1], data['y'][i-1],
data["x"][i - 1], 'P'+str(i), ha='center', va='bottom')
data["y"][i - 1],
"P" + str(i),
ha="center",
va="bottom",
)
else: else:
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") plt.scatter(data['x'][i-1], data['y'][i-1], color='green')
plt.text( plt.text(data['x'][i-1], data['y'][i-1],
data["x"][i - 1], 'P'+str(i), ha='center', va='bottom')
data["y"][i - 1], plt.xlabel('X')
"P" + str(i), plt.ylabel('Y')
ha="center", plt.title('DBSCAN Clustering')
va="bottom", plt.legend(['Core','Noise'])
)
plt.xlabel("X")
plt.ylabel("Y")
plt.title("DBSCAN Clustering")
plt.legend(["Core", "Noise"])
plt.show() plt.show()
print("Plotted Successfully") print("Plotted Successfully")
if __name__ == "__main__": if __name__ == "__main__":
import doctest import doctest
doctest.testmod() doctest.testmod()