diff --git a/machine_learning/dbscan.py b/machine_learning/dbscan.py index 0b791652b..acb58dce6 100644 --- a/machine_learning/dbscan.py +++ b/machine_learning/dbscan.py @@ -2,13 +2,15 @@ import math import matplotlib.pyplot as plt import pandas as pd from typing import dict, list, optional + + class DbScan: - ''' + """ DBSCAN Algorithm : Density-Based Spatial Clustering Of Applications With Noise Reference Website : https://en.wikipedia.org/wiki/DBSCAN Reference YouTube Video : https://youtu.be/-p354tQsKrs?si=t1IxCFhrOB-RAcIU - + Functions: ---------- __init__() : Constructor that sets minPts, radius and file @@ -24,14 +26,28 @@ class DbScan: obj = dbscan.DbScan(minpts, radius, file) obj.print_dbscan() obj.plot_dbscan() - ''' - def __init__(self, minpts : int, radius : int, file : optional[str] = - [{'x': 3, 'y': 7}, {'x': 4, 'y': 6}, {'x': 5, 'y': 5}, - {'x': 6, 'y': 4},{'x': 7, 'y': 3}, {'x': 6, 'y': 2}, - {'x': 7, 'y': 2}, {'x': 8, 'y': 4},{'x': 3, 'y': 3}, - {'x': 2, 'y': 6}, {'x': 3, 'y': 5}, {'x': 2, 'y': 4}] - ) -> None: - ''' + """ + + def __init__( + self, + minpts: int, + radius: int, + file: optional[str] = [ + {"x": 3, "y": 7}, + {"x": 4, "y": 6}, + {"x": 5, "y": 5}, + {"x": 6, "y": 4}, + {"x": 7, "y": 3}, + {"x": 6, "y": 2}, + {"x": 7, "y": 2}, + {"x": 8, "y": 4}, + {"x": 3, "y": 3}, + {"x": 2, "y": 6}, + {"x": 3, "y": 5}, + {"x": 2, "y": 4}, + ], + ) -> None: + """ Constructor Args: @@ -59,13 +75,14 @@ class DbScan: 6 | 4 7 | 3 ----- - ''' + """ self.minpts = minpts self.radius = radius self.file = file self.dict1 = self.perform_dbscan() + def perform_dbscan(self) -> dict[int, list[int]]: - ''' + """ Args: ----------- None @@ -91,22 +108,31 @@ class DbScan: 11 [2, 10, 11, 12] 12 [9, 11, 12] - ''' - data = pd.read_csv(self.file) if type(self.file) == type("str") else pd.DataFrame(self.file) + """ + data = ( + pd.read_csv(self.file) + if type(self.file) == type("str") + else pd.DataFrame(self.file) + ) e = self.radius dict1 = {} for i in range(len(data)): for j in range(len(data)): - dist = math.sqrt(pow(data['x'][j] - data['x'][i],2) - + pow(data['y'][j] - data['y'][i],2)) + dist = math.sqrt( + pow(data["x"][j] - data["x"][i], 2) + + pow(data["y"][j] - data["y"][i], 2) + ) if dist < e: - if i+1 in dict1: - dict1[i+1].append(j+1) + if i + 1 in dict1: + dict1[i + 1].append(j + 1) else: - dict1[i+1] = [j+1,] + dict1[i + 1] = [ + j + 1, + ] return dict1 + def print_dbscan(self) -> None: - ''' + """ Outputs: -------- Prints each point and if it is a core or a noise (w/ border) @@ -124,9 +150,9 @@ class DbScan: 10 [1, 10, 11] ---> Noise ---> Border 11 [2, 10, 11, 12] ---> Core 12 [9, 11, 12] ---> Noise ---> Border - ''' + """ for i in self.dict1: - print(i," ",self.dict1[i], end=' ---> ') + print(i, " ", self.dict1[i], end=" ---> ") if len(self.dict1[i]) >= self.minpts: print("Core") else: @@ -137,8 +163,9 @@ class DbScan: break else: print("Noise") + def plot_dbscan(self) -> None: - ''' + """ Output: ------- A matplotlib plot that show points as core and noise along @@ -146,24 +173,39 @@ class DbScan: >>> DbScan(4,1.9).plot_dbscan() Plotted Successfully - ''' - data = pd.read_csv(self.file) if type(self.file) == type("str") else pd.DataFrame(self.file) + """ + data = ( + pd.read_csv(self.file) + if type(self.file) == type("str") + else pd.DataFrame(self.file) + ) e = self.radius for i in self.dict1: if len(self.dict1[i]) >= self.minpts: - plt.scatter(data['x'][i-1], data['y'][i-1], color='red') - circle = plt.Circle((data['x'][i-1], data['y'][i-1]), - e, color='blue', fill=False) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="red") + circle = plt.Circle( + (data["x"][i - 1], data["y"][i - 1]), e, color="blue", fill=False + ) plt.gca().add_artist(circle) - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) else: - plt.scatter(data['x'][i-1], data['y'][i-1], color='green') - plt.text(data['x'][i-1], data['y'][i-1], - 'P'+str(i), ha='center', va='bottom') - plt.xlabel('X') - plt.ylabel('Y') - plt.title('DBSCAN Clustering') - plt.legend(['Core','Noise']) + plt.scatter(data["x"][i - 1], data["y"][i - 1], color="green") + plt.text( + data["x"][i - 1], + data["y"][i - 1], + "P" + str(i), + ha="center", + va="bottom", + ) + plt.xlabel("X") + plt.ylabel("Y") + plt.title("DBSCAN Clustering") + plt.legend(["Core", "Noise"]) plt.show() print("Plotted Successfully")