Fix SettingWithCopy warning by pandas (#2346)

* Fix SettingWithCopy warning in pandas

https://github.com/TheAlgorithms/Python/issues/2282

* Update k_means_clust.py

* Update k_means_clust.py

* Update k_means_clust.py

* Update k_means_clust.py

* Update k_means_clust.py

* Update k_means_clust.py
This commit is contained in:
BAKEZQ 2020-08-23 19:40:57 +08:00 committed by GitHub
parent ee28deea4a
commit d402cd0b6e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,13 +1,10 @@
"""README, Author - Anurag Kumar(mailto:anuragkumarak95@gmail.com) """README, Author - Anurag Kumar(mailto:anuragkumarak95@gmail.com)
Requirements: Requirements:
- sklearn - sklearn
- numpy - numpy
- matplotlib - matplotlib
Python: Python:
- 3.5 - 3.5
Inputs: Inputs:
- X , a 2D numpy array of features. - X , a 2D numpy array of features.
- k , number of clusters to create. - k , number of clusters to create.
@ -16,10 +13,8 @@ Inputs:
- maxiter , maximum number of iterations to process. - maxiter , maximum number of iterations to process.
- heterogeneity , empty list that will be filled with hetrogeneity values if passed - heterogeneity , empty list that will be filled with hetrogeneity values if passed
to kmeans func. to kmeans func.
Usage: Usage:
1. define 'k' value, 'X' features array and 'hetrogeneity' empty list 1. define 'k' value, 'X' features array and 'hetrogeneity' empty list
2. create initial_centroids, 2. create initial_centroids,
initial_centroids = get_initial_centroids( initial_centroids = get_initial_centroids(
X, X,
@ -27,9 +22,7 @@ Usage:
seed=0 # seed value for initial centroid generation, seed=0 # seed value for initial centroid generation,
# None for randomness(default=None) # None for randomness(default=None)
) )
3. find centroids and clusters using kmeans function. 3. find centroids and clusters using kmeans function.
centroids, cluster_assignment = kmeans( centroids, cluster_assignment = kmeans(
X, X,
k, k,
@ -38,19 +31,14 @@ Usage:
record_heterogeneity=heterogeneity, record_heterogeneity=heterogeneity,
verbose=True # whether to print logs in console or not.(default=False) verbose=True # whether to print logs in console or not.(default=False)
) )
4. Plot the loss function, hetrogeneity values for every iteration saved in 4. Plot the loss function, hetrogeneity values for every iteration saved in
hetrogeneity list. hetrogeneity list.
plot_heterogeneity( plot_heterogeneity(
heterogeneity, heterogeneity,
k k
) )
5. Transfers Dataframe into excel format it must have feature called 5. Transfers Dataframe into excel format it must have feature called
'Clust' with k means clustering numbers in it. 'Clust' with k means clustering numbers in it.
""" """
import warnings import warnings
@ -222,7 +210,6 @@ def ReportGenerator(
in order to run the function following libraries must be imported: in order to run the function following libraries must be imported:
import pandas as pd import pandas as pd
import numpy as np import numpy as np
>>> data = pd.DataFrame() >>> data = pd.DataFrame()
>>> data['numbers'] = [1, 2, 3] >>> data['numbers'] = [1, 2, 3]
>>> data['col1'] = [0.5, 2.5, 4.5] >>> data['col1'] = [0.5, 2.5, 4.5]
@ -287,10 +274,10 @@ def ReportGenerator(
.T.reset_index() .T.reset_index()
.rename(index=str, columns={"level_0": "Features", "level_1": "Type"}) .rename(index=str, columns={"level_0": "Features", "level_1": "Type"})
) # rename columns ) # rename columns
# calculate the size of cluster(count of clientID's)
clustersize = report[ clustersize = report[
(report["Features"] == "dummy") & (report["Type"] == "count") (report["Features"] == "dummy") & (report["Type"] == "count")
] # calculate the size of cluster(count of clientID's) ].copy() # avoid SettingWithCopyWarning
clustersize.Type = ( clustersize.Type = (
"ClusterSize" # rename created cluster df to match report column names "ClusterSize" # rename created cluster df to match report column names
) )