Update dbscan.py

This commit is contained in:
tkgowtham 2024-10-02 15:43:16 +05:30 committed by GitHub
parent 8d86c62d56
commit 254854e832
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,20 +1,21 @@
import pandas as pd
import math import math
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from typing import dict, list import pandas as pd
from typing import dict, list, optional
class DbScan: class DbScan:
""" '''
DBSCAN Algorithm : DBSCAN Algorithm :
Density-Based Spatial Clustering Of Applications With Noise Density-Based Spatial Clustering Of Applications With Noise
Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN Reference Website : https://en.wikipedia.org/wiki/DBSCAN
Reference YouTube Video : https://youtu.be/-p354tQsKrs?si=t1IxCFhrOB-RAcIU
Functions: Functions:
---------- ----------
__init__() : Constructor that sets minPts, radius and file __init__() : Constructor that sets minPts, radius and file
perform_dbscan() : Invoked by constructor and calculates the core and noise points and returns a dictionary. perform_dbscan() : Invoked by constructor and calculates the core
print_dbscan() : Prints the core and noise points along with stating if the noise are border points or not. and noise points and returns a dictionary.
print_dbscan() : Prints the core and noise points along
with stating if the noise are border points or not.
plot_dbscan() : Plots the points to show the core and noise point. plot_dbscan() : Plots the points to show the core and noise point.
To create a object To create a object
@ -23,13 +24,17 @@ class DbScan:
obj = dbscan.DbScan(minpts, radius, file) obj = dbscan.DbScan(minpts, radius, file)
obj.print_dbscan() obj.print_dbscan()
obj.plot_dbscan() obj.plot_dbscan()
""" '''
def __init__(self, minpts : int, radius : int, file : optional[str] =
def __init__(self, minpts: int, radius: int, file: str) -> None: [{'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5},
""" {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2},
{'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3},
{'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}]
) -> None:
'''
Constructor Constructor
Attributes: Args:
----------- -----------
minpts (int) : Minimum number of points needed to be minpts (int) : Minimum number of points needed to be
within the radius to considered as core within the radius to considered as core
@ -54,97 +59,111 @@ class DbScan:
6 | 4 6 | 4
7 | 3 7 | 3
----- -----
""" '''
self.minpts = minpts self.minpts = minpts
self.radius = radius self.radius = radius
self.file = file self.file = file
self.dict1 = self.perform_dbscan() self.dict1 = self.perform_dbscan()
def perform_dbscan(self) -> dict[int, list[int]]: def perform_dbscan(self) -> dict[int, list[int]]:
""" '''
Parameters: Args:
----------- -----------
None None
Return: Return:
-------- --------
Dictionary with points and the list of points Dictionary with points and the list
that lie in its radius of points that lie in its radius
"""
data = pd.read_csv(self.file) >>> result = DbScan(4, 1.9).perform_dbscan()
>>> for key in sorted(result):
... print(key, sorted(result[key]))
1 [1, 2, 10]
2 [1, 2, 3, 11]
3 [2, 3, 4]
4 [3, 4, 5]
5 [4, 5, 6, 7, 8]
6 [5, 6, 7]
7 [5, 6, 7]
8 [5, 8]
9 [9, 12]
10 [1, 10, 11]
11 [2, 10, 11, 12]
12 [9, 11, 12]
'''
data = pd.read_csv(self.file) if type(self.file) == type("str") else pd.DataFrame(self.file)
e = self.radius e = self.radius
dict1 = {} dict1 = {}
for i in range(len(data)): for i in range(len(data)):
for j in range(len(data)): for j in range(len(data)):
dist = math.sqrt( dist = math.sqrt(pow(data['x'][j] - data['x'][i],2)
pow(data["x"][j] - data["x"][i], 2) + pow(data['y'][j] - data['y'][i],2))
+ pow(data["y"][j] - data["y"][i], 2)
)
if dist < e: if dist < e:
if i + 1 in dict1: if i+1 in dict1:
dict1[i + 1].append(j + 1) dict1[i+1].append(j+1)
else: else:
dict1[i + 1] = [ dict1[i+1] = [j+1,]
j + 1,
]
return dict1 return dict1
def print_dbscan(self) -> None: def print_dbscan(self) -> None:
""" '''
Outputs: Outputs:
-------- --------
Prints each point and if it is a core or a noise (w/ border) Prints each point and if it is a core or a noise (w/ border)
"""
>>> DbScan(4,1.9).print_dbscan()
1 [1, 2, 10] ---> Noise ---> Border
2 [1, 2, 3, 11] ---> Core
3 [2, 3, 4] ---> Noise ---> Border
4 [3, 4, 5] ---> Noise ---> Border
5 [4, 5, 6, 7, 8] ---> Core
6 [5, 6, 7] ---> Noise ---> Border
7 [5, 6, 7] ---> Noise ---> Border
8 [5, 8] ---> Noise ---> Border
9 [9, 12] ---> Noise
10 [1, 10, 11] ---> Noise ---> Border
11 [2, 10, 11, 12] ---> Core
12 [9, 11, 12] ---> Noise ---> Border
'''
for i in self.dict1: for i in self.dict1:
print(i, " ", self.dict1[i], end=" ---> ") print(i," ",self.dict1[i], end=' ---> ')
if len(self.dict1[i]) >= self.minpts: if len(self.dict1[i]) >= self.minpts:
print("Core") print("Core")
else: else:
for j in self.dict1: for j in self.dict1:
if ( if i != j and len(self.dict1[j]) >= self.minpts:
i != j if i in self.dict1[j]:
and len(self.dict1[j]) >= self.minpts
and i in self.dict1[j]
):
print("Noise ---> Border") print("Noise ---> Border")
break break
else: else:
print("Noise") print("Noise")
def plot_dbscan(self) -> None: def plot_dbscan(self) -> None:
""" '''
Output: Output:
------- -------
A matplotlib plot that show points as core and noise along A matplotlib plot that show points as core and noise along
with the circle that lie within it. with the circle that lie within it.
"""
data = pd.read_csv(self.file) >>> DbScan(4,1.9).plot_dbscan()
Plotted Successfully
'''
data = pd.read_csv(self.file) if type(self.file) == type("str") else pd.DataFrame(self.file)
e = self.radius e = self.radius
for i in self.dict1: for i in self.dict1:
if len(self.dict1[i]) >= self.minpts: if len(self.dict1[i]) >= self.minpts:
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") plt.scatter(data['x'][i-1], data['y'][i-1], color='red')
circle = plt.Circle( circle = plt.Circle((data['x'][i-1], data['y'][i-1]),
(data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False e, color='blue', fill=False)
)
plt.gca().add_artist(circle) plt.gca().add_artist(circle)
plt.text( plt.text(data['x'][i-1], data['y'][i-1],
data["x"][i - 1], 'P'+str(i), ha='center', va='bottom')
data["y"][i - 1],
"P" + str(i),
ha="center",
va="bottom",
)
else: else:
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") plt.scatter(data['x'][i-1], data['y'][i-1], color='green')
plt.text( plt.text(data['x'][i-1], data['y'][i-1],
data["x"][i - 1], 'P'+str(i), ha='center', va='bottom')
data["y"][i - 1], plt.xlabel('X')
"P" + str(i), plt.ylabel('Y')
ha="center", plt.title('DBSCAN Clustering')
va="bottom", plt.legend(['Core','Noise'])
)
plt.xlabel("X")
plt.ylabel("Y")
plt.title("DBSCAN Clustering")
plt.legend(["Core", "Noise"])
plt.show() plt.show()
print("Plotted Successfully")