Transfer .ipynb files to TheAlgorithms/Jupyter (#1414)

This commit is contained in:
Christian Clauss 2019-10-22 08:45:03 +02:00 committed by GitHub
parent f93cce66a6
commit 4531ea425e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 0 additions and 3940 deletions

File diff suppressed because one or more lines are too long

View File

@ -1,271 +0,0 @@
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_moons
import warnings
def euclidean_distance(q, p):
"""
Calculates the Euclidean distance
between points q and p
Distance can only be calculated between numeric values
>>> euclidean_distance([1,'a'],[1,2])
Traceback (most recent call last):
...
ValueError: Non-numeric input detected
The dimentions of both the points must be the same
>>> euclidean_distance([1,1,1],[1,2])
Traceback (most recent call last):
...
ValueError: expected dimensions to be 2-d, instead got p:3 and q:2
Supports only two dimentional points
>>> euclidean_distance([1,1,1],[1,2])
Traceback (most recent call last):
...
ValueError: expected dimensions to be 2-d, instead got p:3 and q:2
Input should be in the format [x,y] or (x,y)
>>> euclidean_distance(1,2)
Traceback (most recent call last):
...
TypeError: inputs must be iterable, either list [x,y] or tuple (x,y)
"""
if not hasattr(q, "__iter__") or not hasattr(p, "__iter__"):
raise TypeError("inputs must be iterable, either list [x,y] or tuple (x,y)")
if isinstance(q, str) or isinstance(p, str):
raise TypeError("inputs cannot be str")
if len(q) != 2 or len(p) != 2:
raise ValueError(
"expected dimensions to be 2-d, instead got p:{} and q:{}".format(
len(q), len(p)
)
)
for num in q + p:
try:
num = int(num)
except:
raise ValueError("Non-numeric input detected")
a = pow((q[0] - p[0]), 2)
b = pow((q[1] - p[1]), 2)
return pow((a + b), 0.5)
def find_neighbors(db, q, eps):
"""
Finds all points in the db that
are within a distance of eps from Q
eps value should be a number
>>> find_neighbors({ (1,2):{'label':'undefined'}, (2,3):{'label':'undefined'}}, (2,5),'a')
Traceback (most recent call last):
...
ValueError: eps should be either int or float
Q must be a 2-d point as list or tuple
>>> find_neighbors({ (1,2):{'label':'undefined'}, (2,3):{'label':'undefined'}}, 2, 0.5)
Traceback (most recent call last):
...
TypeError: Q must a 2-dimentional point in the format (x,y) or [x,y]
Points must be in correct format
>>> find_neighbors([], (2,2) ,0.4)
Traceback (most recent call last):
...
TypeError: db must be a dict of points in the format {(x,y):{'label':'boolean/undefined'}}
"""
if not isinstance(eps, (int, float)):
raise ValueError("eps should be either int or float")
if not hasattr(q, "__iter__"):
raise TypeError("Q must a 2-dimentional point in the format (x,y) or [x,y]")
if not isinstance(db, dict):
raise TypeError(
"db must be a dict of points in the format {(x,y):{'label':'boolean/undefined'}}"
)
return [p for p in db if euclidean_distance(q, p) <= eps]
def plot_cluster(db, clusters, ax):
"""
Extracts all the points in the db and puts them together
as seperate clusters and finally plots them
db cannot be empty
>>> fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(7, 5))
>>> plot_cluster({},[1,2], axes[1] )
Traceback (most recent call last):
...
Exception: db is empty. No points to cluster
clusters cannot be empty
>>> fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(7, 5))
>>> plot_cluster({ (1,2):{'label':'undefined'}, (2,3):{'label':'undefined'}},[],axes[1] )
Traceback (most recent call last):
...
Exception: nothing to cluster. Empty clusters
clusters cannot be empty
>>> fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(7, 5))
>>> plot_cluster({ (1,2):{'label':'undefined'}, (2,3):{'label':'undefined'}},[],axes[1] )
Traceback (most recent call last):
...
Exception: nothing to cluster. Empty clusters
ax must be a plotable
>>> plot_cluster({ (1,2):{'label':'1'}, (2,3):{'label':'2'}},[1,2], [] )
Traceback (most recent call last):
...
TypeError: ax must be an slot in a matplotlib figure
"""
if len(db) == 0:
raise Exception("db is empty. No points to cluster")
if len(clusters) == 0:
raise Exception("nothing to cluster. Empty clusters")
if not hasattr(ax, "plot"):
raise TypeError("ax must be an slot in a matplotlib figure")
temp = []
noise = []
for i in clusters:
stack = []
for k, v in db.items():
if v["label"] == i:
stack.append(k)
elif v["label"] == "noise":
noise.append(k)
temp.append(stack)
color = iter(plt.cm.rainbow(np.linspace(0, 1, len(clusters))))
for i in range(0, len(temp)):
c = next(color)
x = [l[0] for l in temp[i]]
y = [l[1] for l in temp[i]]
ax.plot(x, y, "ro", c=c)
x = [l[0] for l in noise]
y = [l[1] for l in noise]
ax.plot(x, y, "ro", c="0")
def dbscan(db, eps, min_pts):
"""
Implementation of the DBSCAN algorithm
Points must be in correct format
>>> dbscan([], (2,2) ,0.4)
Traceback (most recent call last):
...
TypeError: db must be a dict of points in the format {(x,y):{'label':'boolean/undefined'}}
eps value should be a number
>>> dbscan({ (1,2):{'label':'undefined'}, (2,3):{'label':'undefined'}},'a',20 )
Traceback (most recent call last):
...
ValueError: eps should be either int or float
min_pts value should be an integer
>>> dbscan({ (1,2):{'label':'undefined'}, (2,3):{'label':'undefined'}},0.4,20.0 )
Traceback (most recent call last):
...
ValueError: min_pts should be int
db cannot be empty
>>> dbscan({},0.4,20.0 )
Traceback (most recent call last):
...
Exception: db is empty, nothing to cluster
min_pts cannot be negative
>>> dbscan({ (1,2):{'label':'undefined'}, (2,3):{'label':'undefined'}}, 0.4, -20)
Traceback (most recent call last):
...
ValueError: min_pts or eps cannot be negative
eps cannot be negative
>>> dbscan({ (1,2):{'label':'undefined'}, (2,3):{'label':'undefined'}},-0.4, 20)
Traceback (most recent call last):
...
ValueError: min_pts or eps cannot be negative
"""
if not isinstance(db, dict):
raise TypeError(
"db must be a dict of points in the format {(x,y):{'label':'boolean/undefined'}}"
)
if len(db) == 0:
raise Exception("db is empty, nothing to cluster")
if not isinstance(eps, (int, float)):
raise ValueError("eps should be either int or float")
if not isinstance(min_pts, int):
raise ValueError("min_pts should be int")
if min_pts < 0 or eps < 0:
raise ValueError("min_pts or eps cannot be negative")
if min_pts == 0:
warnings.warn("min_pts is 0. Are you sure you want this ?")
if eps == 0:
warnings.warn("eps is 0. Are you sure you want this ?")
clusters = []
c = 0
for p in db:
if db[p]["label"] != "undefined":
continue
neighbors = find_neighbors(db, p, eps)
if len(neighbors) < min_pts:
db[p]["label"] = "noise"
continue
c += 1
clusters.append(c)
db[p]["label"] = c
neighbors.remove(p)
seed_set = neighbors.copy()
while seed_set != []:
q = seed_set.pop(0)
if db[q]["label"] == "noise":
db[q]["label"] = c
if db[q]["label"] != "undefined":
continue
db[q]["label"] = c
neighbors_n = find_neighbors(db, q, eps)
if len(neighbors_n) >= min_pts:
seed_set = seed_set + neighbors_n
return db, clusters
if __name__ == "__main__":
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(7, 5))
x, label = make_moons(n_samples=200, noise=0.1, random_state=19)
axes[0].plot(x[:, 0], x[:, 1], "ro")
points = {(point[0], point[1]): {"label": "undefined"} for point in x}
eps = 0.25
min_pts = 12
db, clusters = dbscan(points, eps, min_pts)
plot_cluster(db, clusters, axes[1])
plt.show()

File diff suppressed because it is too large Load Diff

View File

@ -1,401 +0,0 @@
User ID,Gender,Age,EstimatedSalary,Purchased
15624510,Male,19,19000,0
15810944,Male,35,20000,0
15668575,Female,26,43000,0
15603246,Female,27,57000,0
15804002,Male,19,76000,0
15728773,Male,27,58000,0
15598044,Female,27,84000,0
15694829,Female,32,150000,1
15600575,Male,25,33000,0
15727311,Female,35,65000,0
15570769,Female,26,80000,0
15606274,Female,26,52000,0
15746139,Male,20,86000,0
15704987,Male,32,18000,0
15628972,Male,18,82000,0
15697686,Male,29,80000,0
15733883,Male,47,25000,1
15617482,Male,45,26000,1
15704583,Male,46,28000,1
15621083,Female,48,29000,1
15649487,Male,45,22000,1
15736760,Female,47,49000,1
15714658,Male,48,41000,1
15599081,Female,45,22000,1
15705113,Male,46,23000,1
15631159,Male,47,20000,1
15792818,Male,49,28000,1
15633531,Female,47,30000,1
15744529,Male,29,43000,0
15669656,Male,31,18000,0
15581198,Male,31,74000,0
15729054,Female,27,137000,1
15573452,Female,21,16000,0
15776733,Female,28,44000,0
15724858,Male,27,90000,0
15713144,Male,35,27000,0
15690188,Female,33,28000,0
15689425,Male,30,49000,0
15671766,Female,26,72000,0
15782806,Female,27,31000,0
15764419,Female,27,17000,0
15591915,Female,33,51000,0
15772798,Male,35,108000,0
15792008,Male,30,15000,0
15715541,Female,28,84000,0
15639277,Male,23,20000,0
15798850,Male,25,79000,0
15776348,Female,27,54000,0
15727696,Male,30,135000,1
15793813,Female,31,89000,0
15694395,Female,24,32000,0
15764195,Female,18,44000,0
15744919,Female,29,83000,0
15671655,Female,35,23000,0
15654901,Female,27,58000,0
15649136,Female,24,55000,0
15775562,Female,23,48000,0
15807481,Male,28,79000,0
15642885,Male,22,18000,0
15789109,Female,32,117000,0
15814004,Male,27,20000,0
15673619,Male,25,87000,0
15595135,Female,23,66000,0
15583681,Male,32,120000,1
15605000,Female,59,83000,0
15718071,Male,24,58000,0
15679760,Male,24,19000,0
15654574,Female,23,82000,0
15577178,Female,22,63000,0
15595324,Female,31,68000,0
15756932,Male,25,80000,0
15726358,Female,24,27000,0
15595228,Female,20,23000,0
15782530,Female,33,113000,0
15592877,Male,32,18000,0
15651983,Male,34,112000,1
15746737,Male,18,52000,0
15774179,Female,22,27000,0
15667265,Female,28,87000,0
15655123,Female,26,17000,0
15595917,Male,30,80000,0
15668385,Male,39,42000,0
15709476,Male,20,49000,0
15711218,Male,35,88000,0
15798659,Female,30,62000,0
15663939,Female,31,118000,1
15694946,Male,24,55000,0
15631912,Female,28,85000,0
15768816,Male,26,81000,0
15682268,Male,35,50000,0
15684801,Male,22,81000,0
15636428,Female,30,116000,0
15809823,Male,26,15000,0
15699284,Female,29,28000,0
15786993,Female,29,83000,0
15709441,Female,35,44000,0
15710257,Female,35,25000,0
15582492,Male,28,123000,1
15575694,Male,35,73000,0
15756820,Female,28,37000,0
15766289,Male,27,88000,0
15593014,Male,28,59000,0
15584545,Female,32,86000,0
15675949,Female,33,149000,1
15672091,Female,19,21000,0
15801658,Male,21,72000,0
15706185,Female,26,35000,0
15789863,Male,27,89000,0
15720943,Male,26,86000,0
15697997,Female,38,80000,0
15665416,Female,39,71000,0
15660200,Female,37,71000,0
15619653,Male,38,61000,0
15773447,Male,37,55000,0
15739160,Male,42,80000,0
15689237,Male,40,57000,0
15679297,Male,35,75000,0
15591433,Male,36,52000,0
15642725,Male,40,59000,0
15701962,Male,41,59000,0
15811613,Female,36,75000,0
15741049,Male,37,72000,0
15724423,Female,40,75000,0
15574305,Male,35,53000,0
15678168,Female,41,51000,0
15697020,Female,39,61000,0
15610801,Male,42,65000,0
15745232,Male,26,32000,0
15722758,Male,30,17000,0
15792102,Female,26,84000,0
15675185,Male,31,58000,0
15801247,Male,33,31000,0
15725660,Male,30,87000,0
15638963,Female,21,68000,0
15800061,Female,28,55000,0
15578006,Male,23,63000,0
15668504,Female,20,82000,0
15687491,Male,30,107000,1
15610403,Female,28,59000,0
15741094,Male,19,25000,0
15807909,Male,19,85000,0
15666141,Female,18,68000,0
15617134,Male,35,59000,0
15783029,Male,30,89000,0
15622833,Female,34,25000,0
15746422,Female,24,89000,0
15750839,Female,27,96000,1
15749130,Female,41,30000,0
15779862,Male,29,61000,0
15767871,Male,20,74000,0
15679651,Female,26,15000,0
15576219,Male,41,45000,0
15699247,Male,31,76000,0
15619087,Female,36,50000,0
15605327,Male,40,47000,0
15610140,Female,31,15000,0
15791174,Male,46,59000,0
15602373,Male,29,75000,0
15762605,Male,26,30000,0
15598840,Female,32,135000,1
15744279,Male,32,100000,1
15670619,Male,25,90000,0
15599533,Female,37,33000,0
15757837,Male,35,38000,0
15697574,Female,33,69000,0
15578738,Female,18,86000,0
15762228,Female,22,55000,0
15614827,Female,35,71000,0
15789815,Male,29,148000,1
15579781,Female,29,47000,0
15587013,Male,21,88000,0
15570932,Male,34,115000,0
15794661,Female,26,118000,0
15581654,Female,34,43000,0
15644296,Female,34,72000,0
15614420,Female,23,28000,0
15609653,Female,35,47000,0
15594577,Male,25,22000,0
15584114,Male,24,23000,0
15673367,Female,31,34000,0
15685576,Male,26,16000,0
15774727,Female,31,71000,0
15694288,Female,32,117000,1
15603319,Male,33,43000,0
15759066,Female,33,60000,0
15814816,Male,31,66000,0
15724402,Female,20,82000,0
15571059,Female,33,41000,0
15674206,Male,35,72000,0
15715160,Male,28,32000,0
15730448,Male,24,84000,0
15662067,Female,19,26000,0
15779581,Male,29,43000,0
15662901,Male,19,70000,0
15689751,Male,28,89000,0
15667742,Male,34,43000,0
15738448,Female,30,79000,0
15680243,Female,20,36000,0
15745083,Male,26,80000,0
15708228,Male,35,22000,0
15628523,Male,35,39000,0
15708196,Male,49,74000,0
15735549,Female,39,134000,1
15809347,Female,41,71000,0
15660866,Female,58,101000,1
15766609,Female,47,47000,0
15654230,Female,55,130000,1
15794566,Female,52,114000,0
15800890,Female,40,142000,1
15697424,Female,46,22000,0
15724536,Female,48,96000,1
15735878,Male,52,150000,1
15707596,Female,59,42000,0
15657163,Male,35,58000,0
15622478,Male,47,43000,0
15779529,Female,60,108000,1
15636023,Male,49,65000,0
15582066,Male,40,78000,0
15666675,Female,46,96000,0
15732987,Male,59,143000,1
15789432,Female,41,80000,0
15663161,Male,35,91000,1
15694879,Male,37,144000,1
15593715,Male,60,102000,1
15575002,Female,35,60000,0
15622171,Male,37,53000,0
15795224,Female,36,126000,1
15685346,Male,56,133000,1
15691808,Female,40,72000,0
15721007,Female,42,80000,1
15794253,Female,35,147000,1
15694453,Male,39,42000,0
15813113,Male,40,107000,1
15614187,Male,49,86000,1
15619407,Female,38,112000,0
15646227,Male,46,79000,1
15660541,Male,40,57000,0
15753874,Female,37,80000,0
15617877,Female,46,82000,0
15772073,Female,53,143000,1
15701537,Male,42,149000,1
15736228,Male,38,59000,0
15780572,Female,50,88000,1
15769596,Female,56,104000,1
15586996,Female,41,72000,0
15722061,Female,51,146000,1
15638003,Female,35,50000,0
15775590,Female,57,122000,1
15730688,Male,41,52000,0
15753102,Female,35,97000,1
15810075,Female,44,39000,0
15723373,Male,37,52000,0
15795298,Female,48,134000,1
15584320,Female,37,146000,1
15724161,Female,50,44000,0
15750056,Female,52,90000,1
15609637,Female,41,72000,0
15794493,Male,40,57000,0
15569641,Female,58,95000,1
15815236,Female,45,131000,1
15811177,Female,35,77000,0
15680587,Male,36,144000,1
15672821,Female,55,125000,1
15767681,Female,35,72000,0
15600379,Male,48,90000,1
15801336,Female,42,108000,1
15721592,Male,40,75000,0
15581282,Male,37,74000,0
15746203,Female,47,144000,1
15583137,Male,40,61000,0
15680752,Female,43,133000,0
15688172,Female,59,76000,1
15791373,Male,60,42000,1
15589449,Male,39,106000,1
15692819,Female,57,26000,1
15727467,Male,57,74000,1
15734312,Male,38,71000,0
15764604,Male,49,88000,1
15613014,Female,52,38000,1
15759684,Female,50,36000,1
15609669,Female,59,88000,1
15685536,Male,35,61000,0
15750447,Male,37,70000,1
15663249,Female,52,21000,1
15638646,Male,48,141000,0
15734161,Female,37,93000,1
15631070,Female,37,62000,0
15761950,Female,48,138000,1
15649668,Male,41,79000,0
15713912,Female,37,78000,1
15586757,Male,39,134000,1
15596522,Male,49,89000,1
15625395,Male,55,39000,1
15760570,Male,37,77000,0
15566689,Female,35,57000,0
15725794,Female,36,63000,0
15673539,Male,42,73000,1
15705298,Female,43,112000,1
15675791,Male,45,79000,0
15747043,Male,46,117000,1
15736397,Female,58,38000,1
15678201,Male,48,74000,1
15720745,Female,37,137000,1
15637593,Male,37,79000,1
15598070,Female,40,60000,0
15787550,Male,42,54000,0
15603942,Female,51,134000,0
15733973,Female,47,113000,1
15596761,Male,36,125000,1
15652400,Female,38,50000,0
15717893,Female,42,70000,0
15622585,Male,39,96000,1
15733964,Female,38,50000,0
15753861,Female,49,141000,1
15747097,Female,39,79000,0
15594762,Female,39,75000,1
15667417,Female,54,104000,1
15684861,Male,35,55000,0
15742204,Male,45,32000,1
15623502,Male,36,60000,0
15774872,Female,52,138000,1
15611191,Female,53,82000,1
15674331,Male,41,52000,0
15619465,Female,48,30000,1
15575247,Female,48,131000,1
15695679,Female,41,60000,0
15713463,Male,41,72000,0
15785170,Female,42,75000,0
15796351,Male,36,118000,1
15639576,Female,47,107000,1
15693264,Male,38,51000,0
15589715,Female,48,119000,1
15769902,Male,42,65000,0
15587177,Male,40,65000,0
15814553,Male,57,60000,1
15601550,Female,36,54000,0
15664907,Male,58,144000,1
15612465,Male,35,79000,0
15810800,Female,38,55000,0
15665760,Male,39,122000,1
15588080,Female,53,104000,1
15776844,Male,35,75000,0
15717560,Female,38,65000,0
15629739,Female,47,51000,1
15729908,Male,47,105000,1
15716781,Female,41,63000,0
15646936,Male,53,72000,1
15768151,Female,54,108000,1
15579212,Male,39,77000,0
15721835,Male,38,61000,0
15800515,Female,38,113000,1
15591279,Male,37,75000,0
15587419,Female,42,90000,1
15750335,Female,37,57000,0
15699619,Male,36,99000,1
15606472,Male,60,34000,1
15778368,Male,54,70000,1
15671387,Female,41,72000,0
15573926,Male,40,71000,1
15709183,Male,42,54000,0
15577514,Male,43,129000,1
15778830,Female,53,34000,1
15768072,Female,47,50000,1
15768293,Female,42,79000,0
15654456,Male,42,104000,1
15807525,Female,59,29000,1
15574372,Female,58,47000,1
15671249,Male,46,88000,1
15779744,Male,38,71000,0
15624755,Female,54,26000,1
15611430,Female,60,46000,1
15774744,Male,60,83000,1
15629885,Female,39,73000,0
15708791,Male,59,130000,1
15793890,Female,37,80000,0
15646091,Female,46,32000,1
15596984,Female,46,74000,0
15800215,Female,42,53000,0
15577806,Male,41,87000,1
15749381,Female,58,23000,1
15683758,Male,42,64000,0
15670615,Male,48,33000,1
15715622,Female,44,139000,1
15707634,Male,49,28000,1
15806901,Female,57,33000,1
15775335,Male,56,60000,1
15724150,Female,49,39000,1
15627220,Male,39,71000,0
15672330,Male,47,34000,1
15668521,Female,48,35000,1
15807837,Male,48,33000,1
15592570,Male,47,23000,1
15748589,Female,45,45000,1
15635893,Male,60,42000,1
15757632,Female,39,59000,0
15691863,Female,46,41000,1
15706071,Male,51,23000,1
15654296,Female,50,20000,1
15755018,Male,36,33000,0
15594041,Female,49,36000,1
1 User ID Gender Age EstimatedSalary Purchased
2 15624510 Male 19 19000 0
3 15810944 Male 35 20000 0
4 15668575 Female 26 43000 0
5 15603246 Female 27 57000 0
6 15804002 Male 19 76000 0
7 15728773 Male 27 58000 0
8 15598044 Female 27 84000 0
9 15694829 Female 32 150000 1
10 15600575 Male 25 33000 0
11 15727311 Female 35 65000 0
12 15570769 Female 26 80000 0
13 15606274 Female 26 52000 0
14 15746139 Male 20 86000 0
15 15704987 Male 32 18000 0
16 15628972 Male 18 82000 0
17 15697686 Male 29 80000 0
18 15733883 Male 47 25000 1
19 15617482 Male 45 26000 1
20 15704583 Male 46 28000 1
21 15621083 Female 48 29000 1
22 15649487 Male 45 22000 1
23 15736760 Female 47 49000 1
24 15714658 Male 48 41000 1
25 15599081 Female 45 22000 1
26 15705113 Male 46 23000 1
27 15631159 Male 47 20000 1
28 15792818 Male 49 28000 1
29 15633531 Female 47 30000 1
30 15744529 Male 29 43000 0
31 15669656 Male 31 18000 0
32 15581198 Male 31 74000 0
33 15729054 Female 27 137000 1
34 15573452 Female 21 16000 0
35 15776733 Female 28 44000 0
36 15724858 Male 27 90000 0
37 15713144 Male 35 27000 0
38 15690188 Female 33 28000 0
39 15689425 Male 30 49000 0
40 15671766 Female 26 72000 0
41 15782806 Female 27 31000 0
42 15764419 Female 27 17000 0
43 15591915 Female 33 51000 0
44 15772798 Male 35 108000 0
45 15792008 Male 30 15000 0
46 15715541 Female 28 84000 0
47 15639277 Male 23 20000 0
48 15798850 Male 25 79000 0
49 15776348 Female 27 54000 0
50 15727696 Male 30 135000 1
51 15793813 Female 31 89000 0
52 15694395 Female 24 32000 0
53 15764195 Female 18 44000 0
54 15744919 Female 29 83000 0
55 15671655 Female 35 23000 0
56 15654901 Female 27 58000 0
57 15649136 Female 24 55000 0
58 15775562 Female 23 48000 0
59 15807481 Male 28 79000 0
60 15642885 Male 22 18000 0
61 15789109 Female 32 117000 0
62 15814004 Male 27 20000 0
63 15673619 Male 25 87000 0
64 15595135 Female 23 66000 0
65 15583681 Male 32 120000 1
66 15605000 Female 59 83000 0
67 15718071 Male 24 58000 0
68 15679760 Male 24 19000 0
69 15654574 Female 23 82000 0
70 15577178 Female 22 63000 0
71 15595324 Female 31 68000 0
72 15756932 Male 25 80000 0
73 15726358 Female 24 27000 0
74 15595228 Female 20 23000 0
75 15782530 Female 33 113000 0
76 15592877 Male 32 18000 0
77 15651983 Male 34 112000 1
78 15746737 Male 18 52000 0
79 15774179 Female 22 27000 0
80 15667265 Female 28 87000 0
81 15655123 Female 26 17000 0
82 15595917 Male 30 80000 0
83 15668385 Male 39 42000 0
84 15709476 Male 20 49000 0
85 15711218 Male 35 88000 0
86 15798659 Female 30 62000 0
87 15663939 Female 31 118000 1
88 15694946 Male 24 55000 0
89 15631912 Female 28 85000 0
90 15768816 Male 26 81000 0
91 15682268 Male 35 50000 0
92 15684801 Male 22 81000 0
93 15636428 Female 30 116000 0
94 15809823 Male 26 15000 0
95 15699284 Female 29 28000 0
96 15786993 Female 29 83000 0
97 15709441 Female 35 44000 0
98 15710257 Female 35 25000 0
99 15582492 Male 28 123000 1
100 15575694 Male 35 73000 0
101 15756820 Female 28 37000 0
102 15766289 Male 27 88000 0
103 15593014 Male 28 59000 0
104 15584545 Female 32 86000 0
105 15675949 Female 33 149000 1
106 15672091 Female 19 21000 0
107 15801658 Male 21 72000 0
108 15706185 Female 26 35000 0
109 15789863 Male 27 89000 0
110 15720943 Male 26 86000 0
111 15697997 Female 38 80000 0
112 15665416 Female 39 71000 0
113 15660200 Female 37 71000 0
114 15619653 Male 38 61000 0
115 15773447 Male 37 55000 0
116 15739160 Male 42 80000 0
117 15689237 Male 40 57000 0
118 15679297 Male 35 75000 0
119 15591433 Male 36 52000 0
120 15642725 Male 40 59000 0
121 15701962 Male 41 59000 0
122 15811613 Female 36 75000 0
123 15741049 Male 37 72000 0
124 15724423 Female 40 75000 0
125 15574305 Male 35 53000 0
126 15678168 Female 41 51000 0
127 15697020 Female 39 61000 0
128 15610801 Male 42 65000 0
129 15745232 Male 26 32000 0
130 15722758 Male 30 17000 0
131 15792102 Female 26 84000 0
132 15675185 Male 31 58000 0
133 15801247 Male 33 31000 0
134 15725660 Male 30 87000 0
135 15638963 Female 21 68000 0
136 15800061 Female 28 55000 0
137 15578006 Male 23 63000 0
138 15668504 Female 20 82000 0
139 15687491 Male 30 107000 1
140 15610403 Female 28 59000 0
141 15741094 Male 19 25000 0
142 15807909 Male 19 85000 0
143 15666141 Female 18 68000 0
144 15617134 Male 35 59000 0
145 15783029 Male 30 89000 0
146 15622833 Female 34 25000 0
147 15746422 Female 24 89000 0
148 15750839 Female 27 96000 1
149 15749130 Female 41 30000 0
150 15779862 Male 29 61000 0
151 15767871 Male 20 74000 0
152 15679651 Female 26 15000 0
153 15576219 Male 41 45000 0
154 15699247 Male 31 76000 0
155 15619087 Female 36 50000 0
156 15605327 Male 40 47000 0
157 15610140 Female 31 15000 0
158 15791174 Male 46 59000 0
159 15602373 Male 29 75000 0
160 15762605 Male 26 30000 0
161 15598840 Female 32 135000 1
162 15744279 Male 32 100000 1
163 15670619 Male 25 90000 0
164 15599533 Female 37 33000 0
165 15757837 Male 35 38000 0
166 15697574 Female 33 69000 0
167 15578738 Female 18 86000 0
168 15762228 Female 22 55000 0
169 15614827 Female 35 71000 0
170 15789815 Male 29 148000 1
171 15579781 Female 29 47000 0
172 15587013 Male 21 88000 0
173 15570932 Male 34 115000 0
174 15794661 Female 26 118000 0
175 15581654 Female 34 43000 0
176 15644296 Female 34 72000 0
177 15614420 Female 23 28000 0
178 15609653 Female 35 47000 0
179 15594577 Male 25 22000 0
180 15584114 Male 24 23000 0
181 15673367 Female 31 34000 0
182 15685576 Male 26 16000 0
183 15774727 Female 31 71000 0
184 15694288 Female 32 117000 1
185 15603319 Male 33 43000 0
186 15759066 Female 33 60000 0
187 15814816 Male 31 66000 0
188 15724402 Female 20 82000 0
189 15571059 Female 33 41000 0
190 15674206 Male 35 72000 0
191 15715160 Male 28 32000 0
192 15730448 Male 24 84000 0
193 15662067 Female 19 26000 0
194 15779581 Male 29 43000 0
195 15662901 Male 19 70000 0
196 15689751 Male 28 89000 0
197 15667742 Male 34 43000 0
198 15738448 Female 30 79000 0
199 15680243 Female 20 36000 0
200 15745083 Male 26 80000 0
201 15708228 Male 35 22000 0
202 15628523 Male 35 39000 0
203 15708196 Male 49 74000 0
204 15735549 Female 39 134000 1
205 15809347 Female 41 71000 0
206 15660866 Female 58 101000 1
207 15766609 Female 47 47000 0
208 15654230 Female 55 130000 1
209 15794566 Female 52 114000 0
210 15800890 Female 40 142000 1
211 15697424 Female 46 22000 0
212 15724536 Female 48 96000 1
213 15735878 Male 52 150000 1
214 15707596 Female 59 42000 0
215 15657163 Male 35 58000 0
216 15622478 Male 47 43000 0
217 15779529 Female 60 108000 1
218 15636023 Male 49 65000 0
219 15582066 Male 40 78000 0
220 15666675 Female 46 96000 0
221 15732987 Male 59 143000 1
222 15789432 Female 41 80000 0
223 15663161 Male 35 91000 1
224 15694879 Male 37 144000 1
225 15593715 Male 60 102000 1
226 15575002 Female 35 60000 0
227 15622171 Male 37 53000 0
228 15795224 Female 36 126000 1
229 15685346 Male 56 133000 1
230 15691808 Female 40 72000 0
231 15721007 Female 42 80000 1
232 15794253 Female 35 147000 1
233 15694453 Male 39 42000 0
234 15813113 Male 40 107000 1
235 15614187 Male 49 86000 1
236 15619407 Female 38 112000 0
237 15646227 Male 46 79000 1
238 15660541 Male 40 57000 0
239 15753874 Female 37 80000 0
240 15617877 Female 46 82000 0
241 15772073 Female 53 143000 1
242 15701537 Male 42 149000 1
243 15736228 Male 38 59000 0
244 15780572 Female 50 88000 1
245 15769596 Female 56 104000 1
246 15586996 Female 41 72000 0
247 15722061 Female 51 146000 1
248 15638003 Female 35 50000 0
249 15775590 Female 57 122000 1
250 15730688 Male 41 52000 0
251 15753102 Female 35 97000 1
252 15810075 Female 44 39000 0
253 15723373 Male 37 52000 0
254 15795298 Female 48 134000 1
255 15584320 Female 37 146000 1
256 15724161 Female 50 44000 0
257 15750056 Female 52 90000 1
258 15609637 Female 41 72000 0
259 15794493 Male 40 57000 0
260 15569641 Female 58 95000 1
261 15815236 Female 45 131000 1
262 15811177 Female 35 77000 0
263 15680587 Male 36 144000 1
264 15672821 Female 55 125000 1
265 15767681 Female 35 72000 0
266 15600379 Male 48 90000 1
267 15801336 Female 42 108000 1
268 15721592 Male 40 75000 0
269 15581282 Male 37 74000 0
270 15746203 Female 47 144000 1
271 15583137 Male 40 61000 0
272 15680752 Female 43 133000 0
273 15688172 Female 59 76000 1
274 15791373 Male 60 42000 1
275 15589449 Male 39 106000 1
276 15692819 Female 57 26000 1
277 15727467 Male 57 74000 1
278 15734312 Male 38 71000 0
279 15764604 Male 49 88000 1
280 15613014 Female 52 38000 1
281 15759684 Female 50 36000 1
282 15609669 Female 59 88000 1
283 15685536 Male 35 61000 0
284 15750447 Male 37 70000 1
285 15663249 Female 52 21000 1
286 15638646 Male 48 141000 0
287 15734161 Female 37 93000 1
288 15631070 Female 37 62000 0
289 15761950 Female 48 138000 1
290 15649668 Male 41 79000 0
291 15713912 Female 37 78000 1
292 15586757 Male 39 134000 1
293 15596522 Male 49 89000 1
294 15625395 Male 55 39000 1
295 15760570 Male 37 77000 0
296 15566689 Female 35 57000 0
297 15725794 Female 36 63000 0
298 15673539 Male 42 73000 1
299 15705298 Female 43 112000 1
300 15675791 Male 45 79000 0
301 15747043 Male 46 117000 1
302 15736397 Female 58 38000 1
303 15678201 Male 48 74000 1
304 15720745 Female 37 137000 1
305 15637593 Male 37 79000 1
306 15598070 Female 40 60000 0
307 15787550 Male 42 54000 0
308 15603942 Female 51 134000 0
309 15733973 Female 47 113000 1
310 15596761 Male 36 125000 1
311 15652400 Female 38 50000 0
312 15717893 Female 42 70000 0
313 15622585 Male 39 96000 1
314 15733964 Female 38 50000 0
315 15753861 Female 49 141000 1
316 15747097 Female 39 79000 0
317 15594762 Female 39 75000 1
318 15667417 Female 54 104000 1
319 15684861 Male 35 55000 0
320 15742204 Male 45 32000 1
321 15623502 Male 36 60000 0
322 15774872 Female 52 138000 1
323 15611191 Female 53 82000 1
324 15674331 Male 41 52000 0
325 15619465 Female 48 30000 1
326 15575247 Female 48 131000 1
327 15695679 Female 41 60000 0
328 15713463 Male 41 72000 0
329 15785170 Female 42 75000 0
330 15796351 Male 36 118000 1
331 15639576 Female 47 107000 1
332 15693264 Male 38 51000 0
333 15589715 Female 48 119000 1
334 15769902 Male 42 65000 0
335 15587177 Male 40 65000 0
336 15814553 Male 57 60000 1
337 15601550 Female 36 54000 0
338 15664907 Male 58 144000 1
339 15612465 Male 35 79000 0
340 15810800 Female 38 55000 0
341 15665760 Male 39 122000 1
342 15588080 Female 53 104000 1
343 15776844 Male 35 75000 0
344 15717560 Female 38 65000 0
345 15629739 Female 47 51000 1
346 15729908 Male 47 105000 1
347 15716781 Female 41 63000 0
348 15646936 Male 53 72000 1
349 15768151 Female 54 108000 1
350 15579212 Male 39 77000 0
351 15721835 Male 38 61000 0
352 15800515 Female 38 113000 1
353 15591279 Male 37 75000 0
354 15587419 Female 42 90000 1
355 15750335 Female 37 57000 0
356 15699619 Male 36 99000 1
357 15606472 Male 60 34000 1
358 15778368 Male 54 70000 1
359 15671387 Female 41 72000 0
360 15573926 Male 40 71000 1
361 15709183 Male 42 54000 0
362 15577514 Male 43 129000 1
363 15778830 Female 53 34000 1
364 15768072 Female 47 50000 1
365 15768293 Female 42 79000 0
366 15654456 Male 42 104000 1
367 15807525 Female 59 29000 1
368 15574372 Female 58 47000 1
369 15671249 Male 46 88000 1
370 15779744 Male 38 71000 0
371 15624755 Female 54 26000 1
372 15611430 Female 60 46000 1
373 15774744 Male 60 83000 1
374 15629885 Female 39 73000 0
375 15708791 Male 59 130000 1
376 15793890 Female 37 80000 0
377 15646091 Female 46 32000 1
378 15596984 Female 46 74000 0
379 15800215 Female 42 53000 0
380 15577806 Male 41 87000 1
381 15749381 Female 58 23000 1
382 15683758 Male 42 64000 0
383 15670615 Male 48 33000 1
384 15715622 Female 44 139000 1
385 15707634 Male 49 28000 1
386 15806901 Female 57 33000 1
387 15775335 Male 56 60000 1
388 15724150 Female 49 39000 1
389 15627220 Male 39 71000 0
390 15672330 Male 47 34000 1
391 15668521 Female 48 35000 1
392 15807837 Male 48 33000 1
393 15592570 Male 47 23000 1
394 15748589 Female 45 45000 1
395 15635893 Male 60 42000 1
396 15757632 Female 39 59000 0
397 15691863 Female 46 41000 1
398 15706071 Male 51 23000 1
399 15654296 Female 50 20000 1
400 15755018 Male 36 33000 0
401 15594041 Female 49 36000 1

View File

@ -1,103 +0,0 @@
# Random Forest Classification
# Importing the libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
script_dir = os.path.dirname(os.path.realpath(__file__))
dataset = pd.read_csv(os.path.join(script_dir, "Social_Network_Ads.csv"))
X = dataset.iloc[:, [2, 3]].values
y = dataset.iloc[:, 4].values
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.25, random_state=0
)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Fitting Random Forest Classification to the Training set
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(
n_estimators=10, criterion="entropy", random_state=0
)
classifier.fit(X_train, y_train)
# Predicting the Test set results
y_pred = classifier.predict(X_test)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_train, y_train
X1, X2 = np.meshgrid(
np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01),
np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01),
)
plt.contourf(
X1,
X2,
classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
alpha=0.75,
cmap=ListedColormap(("red", "green")),
)
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(
X_set[y_set == j, 0],
X_set[y_set == j, 1],
c=ListedColormap(("red", "green"))(i),
label=j,
)
plt.title("Random Forest Classification (Training set)")
plt.xlabel("Age")
plt.ylabel("Estimated Salary")
plt.legend()
plt.show()
# Visualising the Test set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_test, y_test
X1, X2 = np.meshgrid(
np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01),
np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01),
)
plt.contourf(
X1,
X2,
classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
alpha=0.75,
cmap=ListedColormap(("red", "green")),
)
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(
X_set[y_set == j, 0],
X_set[y_set == j, 1],
c=ListedColormap(("red", "green"))(i),
label=j,
)
plt.title("Random Forest Classification (Test set)")
plt.xlabel("Age")
plt.ylabel("Estimated Salary")
plt.legend()
plt.show()

File diff suppressed because one or more lines are too long

View File

@ -1,11 +0,0 @@
Position,Level,Salary
Business Analyst,1,45000
Junior Consultant,2,50000
Senior Consultant,3,60000
Manager,4,80000
Country Manager,5,110000
Region Manager,6,150000
Partner,7,200000
Senior Partner,8,300000
C-level,9,500000
CEO,10,1000000
1 Position Level Salary
2 Business Analyst 1 45000
3 Junior Consultant 2 50000
4 Senior Consultant 3 60000
5 Manager 4 80000
6 Country Manager 5 110000
7 Region Manager 6 150000
8 Partner 7 200000
9 Senior Partner 8 300000
10 C-level 9 500000
11 CEO 10 1000000

File diff suppressed because one or more lines are too long

View File

@ -1,44 +0,0 @@
# Random Forest Regression
# Importing the libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
script_dir = os.path.dirname(os.path.realpath(__file__))
dataset = pd.read_csv(os.path.join(script_dir, "Position_Salaries.csv"))
X = dataset.iloc[:, 1:2].values
y = dataset.iloc[:, 2].values
# Splitting the dataset into the Training set and Test set
"""from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)"""
# Feature Scaling
"""from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
sc_y = StandardScaler()
y_train = sc_y.fit_transform(y_train)"""
# Fitting Random Forest Regression to the dataset
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=10, random_state=0)
regressor.fit(X, y)
# Predicting a new result
y_pred = regressor.predict([[6.5]])
# Visualising the Random Forest Regression results (higher resolution)
X_grid = np.arange(min(X), max(X), 0.01)
X_grid = X_grid.reshape((len(X_grid), 1))
plt.scatter(X, y, color="red")
plt.plot(X_grid, regressor.predict(X_grid), color="blue")
plt.title("Truth or Bluff (Random Forest Regression)")
plt.xlabel("Position level")
plt.ylabel("Salary")
plt.show()

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long