Update dbscan.py

This commit is contained in:
tkgowtham 2024-10-02 16:03:06 +05:30 committed by GitHub
parent b7e5e9c112
commit 249b0e8871
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,15 +1,12 @@
import math import math
from typing import dict, list, optional
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import pandas as pd import pandas as pd
from typing import dict, list, optional
class DbScan: class DbScan:
""" '''
DBSCAN Algorithm : DBSCAN Algorithm :
Density-Based Spatial Clustering Of Applications With Noise Density-Based Spatial Clustering Of Applications With Noise
Reference Website : https://en.wikipedia.org/wiki/DBSCAN Refer this website for more details : https://en.wikipedia.org/wiki/DBSCAN
Reference YouTube Video : https://youtu.be/-p354tQsKrs?si=t1IxCFhrOB-RAcIU
Functions: Functions:
---------- ----------
@ -26,28 +23,14 @@ class DbScan:
obj = dbscan.DbScan(minpts, radius, file) obj = dbscan.DbScan(minpts, radius, file)
obj.print_dbscan() obj.print_dbscan()
obj.plot_dbscan() obj.plot_dbscan()
""" '''
def __init__(self, minpts : int, radius : int, file : optional[str] =
def __init__( ({'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5},
self, {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2},
minpts: int, {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3},
radius: int, {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4})
file: optional[str] = [
{"x": 3, "y": 7},
{"x": 4, "y": 6},
{"x": 5, "y": 5},
{"x": 6, "y": 4},
{"x": 7, "y": 3},
{"x": 6, "y": 2},
{"x": 7, "y": 2},
{"x": 8, "y": 4},
{"x": 3, "y": 3},
{"x": 2, "y": 6},
{"x": 3, "y": 5},
{"x": 2, "y": 4},
],
) -> None: ) -> None:
""" '''
Constructor Constructor
Args: Args:
@ -75,14 +58,13 @@ class DbScan:
6 | 4 6 | 4
7 | 3 7 | 3
----- -----
""" '''
self.minpts = minpts self.minpts = minpts
self.radius = radius self.radius = radius
self.file = file self.file = file
self.dict1 = self.perform_dbscan() self.dict1 = self.perform_dbscan()
def perform_dbscan(self) -> dict[int, list[int]]: def perform_dbscan(self) -> dict[int, list[int]]:
""" '''
Args: Args:
----------- -----------
None None
@ -108,31 +90,25 @@ class DbScan:
11 [2, 10, 11, 12] 11 [2, 10, 11, 12]
12 [9, 11, 12] 12 [9, 11, 12]
""" '''
data = ( if type(self.file) is str:
pd.read_csv(self.file) data = pd.read_csv(self.file)
if type(self.file) == type("str") else:
else pd.DataFrame(self.file) data = pd.DataFrame(list(self.file))
)
e = self.radius e = self.radius
dict1 = {} dict1 = {}
for i in range(len(data)): for i in range(len(data)):
for j in range(len(data)): for j in range(len(data)):
dist = math.sqrt( dist = math.sqrt(pow(data['x'][j] - data['x'][i],2)
pow(data["x"][j] - data["x"][i], 2) + pow(data['y'][j] - data['y'][i],2))
+ pow(data["y"][j] - data["y"][i], 2)
)
if dist < e: if dist < e:
if i+1 in dict1: if i+1 in dict1:
dict1[i+1].append(j+1) dict1[i+1].append(j+1)
else: else:
dict1[i + 1] = [ dict1[i+1] = [j+1,]
j + 1,
]
return dict1 return dict1
def print_dbscan(self) -> None: def print_dbscan(self) -> None:
""" '''
Outputs: Outputs:
-------- --------
Prints each point and if it is a core or a noise (w/ border) Prints each point and if it is a core or a noise (w/ border)
@ -150,22 +126,24 @@ class DbScan:
10 [1, 10, 11] ---> Noise ---> Border 10 [1, 10, 11] ---> Noise ---> Border
11 [2, 10, 11, 12] ---> Core 11 [2, 10, 11, 12] ---> Core
12 [9, 11, 12] ---> Noise ---> Border 12 [9, 11, 12] ---> Noise ---> Border
""" '''
for i in self.dict1: for i in self.dict1:
print(i, " ", self.dict1[i], end=" ---> ") print(i," ",self.dict1[i], end=' ---> ')
if len(self.dict1[i]) >= self.minpts: if len(self.dict1[i]) >= self.minpts:
print("Core") print("Core")
else: else:
for j in self.dict1: for j in self.dict1:
if i != j and len(self.dict1[j]) >= self.minpts: if (
if i in self.dict1[j]: i != j
and len(self.dict1[j]) >= self.minpts
and i in self.dict1[j]
):
print("Noise ---> Border") print("Noise ---> Border")
break break
else: else:
print("Noise") print("Noise")
def plot_dbscan(self) -> None: def plot_dbscan(self) -> None:
""" '''
Output: Output:
------- -------
A matplotlib plot that show points as core and noise along A matplotlib plot that show points as core and noise along
@ -173,39 +151,27 @@ class DbScan:
>>> DbScan(4,1.9).plot_dbscan() >>> DbScan(4,1.9).plot_dbscan()
Plotted Successfully Plotted Successfully
""" '''
data = ( if type(self.file) is str:
pd.read_csv(self.file) data = pd.read_csv(self.file)
if type(self.file) == type("str") else:
else pd.DataFrame(self.file) data = pd.DataFrame(list(self.file))
)
e = self.radius e = self.radius
for i in self.dict1: for i in self.dict1:
if len(self.dict1[i]) >= self.minpts: if len(self.dict1[i]) >= self.minpts:
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") plt.scatter(data['x'][i-1], data['y'][i-1], color='red')
circle = plt.Circle( circle = plt.Circle((data['x'][i-1], data['y'][i-1]),
(data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False e, color='blue', fill=False)
)
plt.gca().add_artist(circle) plt.gca().add_artist(circle)
plt.text( plt.text(data['x'][i-1], data['y'][i-1],
data["x"][i - 1], 'P'+str(i), ha='center', va='bottom')
data["y"][i - 1],
"P" + str(i),
ha="center",
va="bottom",
)
else: else:
plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") plt.scatter(data['x'][i-1], data['y'][i-1], color='green')
plt.text( plt.text(data['x'][i-1], data['y'][i-1],
data["x"][i - 1], 'P'+str(i), ha='center', va='bottom')
data["y"][i - 1], plt.xlabel('X')
"P" + str(i), plt.ylabel('Y')
ha="center", plt.title('DBSCAN Clustering')
va="bottom", plt.legend(['Core','Noise'])
)
plt.xlabel("X")
plt.ylabel("Y")
plt.title("DBSCAN Clustering")
plt.legend(["Core", "Noise"])
plt.show() plt.show()
print("Plotted Successfully") print("Plotted Successfully")