mirror of
https://github.com/TheAlgorithms/Python.git
synced 2024-11-24 13:31:07 +00:00
Fix SettingWithCopy warning by pandas (#2346)
* Fix SettingWithCopy warning in pandas https://github.com/TheAlgorithms/Python/issues/2282 * Update k_means_clust.py * Update k_means_clust.py * Update k_means_clust.py * Update k_means_clust.py * Update k_means_clust.py * Update k_means_clust.py
This commit is contained in:
parent
ee28deea4a
commit
d402cd0b6e
|
@ -1,13 +1,10 @@
|
||||||
"""README, Author - Anurag Kumar(mailto:anuragkumarak95@gmail.com)
|
"""README, Author - Anurag Kumar(mailto:anuragkumarak95@gmail.com)
|
||||||
|
|
||||||
Requirements:
|
Requirements:
|
||||||
- sklearn
|
- sklearn
|
||||||
- numpy
|
- numpy
|
||||||
- matplotlib
|
- matplotlib
|
||||||
|
|
||||||
Python:
|
Python:
|
||||||
- 3.5
|
- 3.5
|
||||||
|
|
||||||
Inputs:
|
Inputs:
|
||||||
- X , a 2D numpy array of features.
|
- X , a 2D numpy array of features.
|
||||||
- k , number of clusters to create.
|
- k , number of clusters to create.
|
||||||
|
@ -16,10 +13,8 @@ Inputs:
|
||||||
- maxiter , maximum number of iterations to process.
|
- maxiter , maximum number of iterations to process.
|
||||||
- heterogeneity , empty list that will be filled with hetrogeneity values if passed
|
- heterogeneity , empty list that will be filled with hetrogeneity values if passed
|
||||||
to kmeans func.
|
to kmeans func.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
1. define 'k' value, 'X' features array and 'hetrogeneity' empty list
|
1. define 'k' value, 'X' features array and 'hetrogeneity' empty list
|
||||||
|
|
||||||
2. create initial_centroids,
|
2. create initial_centroids,
|
||||||
initial_centroids = get_initial_centroids(
|
initial_centroids = get_initial_centroids(
|
||||||
X,
|
X,
|
||||||
|
@ -27,9 +22,7 @@ Usage:
|
||||||
seed=0 # seed value for initial centroid generation,
|
seed=0 # seed value for initial centroid generation,
|
||||||
# None for randomness(default=None)
|
# None for randomness(default=None)
|
||||||
)
|
)
|
||||||
|
|
||||||
3. find centroids and clusters using kmeans function.
|
3. find centroids and clusters using kmeans function.
|
||||||
|
|
||||||
centroids, cluster_assignment = kmeans(
|
centroids, cluster_assignment = kmeans(
|
||||||
X,
|
X,
|
||||||
k,
|
k,
|
||||||
|
@ -38,19 +31,14 @@ Usage:
|
||||||
record_heterogeneity=heterogeneity,
|
record_heterogeneity=heterogeneity,
|
||||||
verbose=True # whether to print logs in console or not.(default=False)
|
verbose=True # whether to print logs in console or not.(default=False)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
4. Plot the loss function, hetrogeneity values for every iteration saved in
|
4. Plot the loss function, hetrogeneity values for every iteration saved in
|
||||||
hetrogeneity list.
|
hetrogeneity list.
|
||||||
plot_heterogeneity(
|
plot_heterogeneity(
|
||||||
heterogeneity,
|
heterogeneity,
|
||||||
k
|
k
|
||||||
)
|
)
|
||||||
|
|
||||||
5. Transfers Dataframe into excel format it must have feature called
|
5. Transfers Dataframe into excel format it must have feature called
|
||||||
'Clust' with k means clustering numbers in it.
|
'Clust' with k means clustering numbers in it.
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
|
@ -222,7 +210,6 @@ def ReportGenerator(
|
||||||
in order to run the function following libraries must be imported:
|
in order to run the function following libraries must be imported:
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
>>> data = pd.DataFrame()
|
>>> data = pd.DataFrame()
|
||||||
>>> data['numbers'] = [1, 2, 3]
|
>>> data['numbers'] = [1, 2, 3]
|
||||||
>>> data['col1'] = [0.5, 2.5, 4.5]
|
>>> data['col1'] = [0.5, 2.5, 4.5]
|
||||||
|
@ -287,10 +274,10 @@ def ReportGenerator(
|
||||||
.T.reset_index()
|
.T.reset_index()
|
||||||
.rename(index=str, columns={"level_0": "Features", "level_1": "Type"})
|
.rename(index=str, columns={"level_0": "Features", "level_1": "Type"})
|
||||||
) # rename columns
|
) # rename columns
|
||||||
|
# calculate the size of cluster(count of clientID's)
|
||||||
clustersize = report[
|
clustersize = report[
|
||||||
(report["Features"] == "dummy") & (report["Type"] == "count")
|
(report["Features"] == "dummy") & (report["Type"] == "count")
|
||||||
] # calculate the size of cluster(count of clientID's)
|
].copy() # avoid SettingWithCopyWarning
|
||||||
clustersize.Type = (
|
clustersize.Type = (
|
||||||
"ClusterSize" # rename created cluster df to match report column names
|
"ClusterSize" # rename created cluster df to match report column names
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user