Fix grammar and spelling mistakes in sequential_minimum_optimization.py (#11427)

This commit is contained in:
Tianyi Zheng 2024-06-13 14:47:29 -07:00 committed by GitHub
parent 41a1cdf38d
commit 446742387e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,11 +1,9 @@
"""
Implementation of sequential minimal optimization (SMO) for support vector machines
(SVM).
Sequential minimal optimization (SMO) for support vector machines (SVM)
Sequential minimal optimization (SMO) is an algorithm for solving the quadratic
programming (QP) problem that arises during the training of support vector
machines.
It was invented by John Platt in 1998.
programming (QP) problem that arises during the training of SVMs. It was invented by
John Platt in 1998.
Input:
0: type: numpy.ndarray.
@ -124,8 +122,7 @@ class SmoSVM:
b_old = self._b
self._b = b
# 4: update error value,here we only calculate those non-bound samples'
# error
# 4: update error, here we only calculate the error for non-bound samples
self._unbound = [i for i in self._all_samples if self._is_unbound(i)]
for s in self.unbound:
if s in (i1, i2):
@ -136,7 +133,7 @@ class SmoSVM:
+ (self._b - b_old)
)
# if i1 or i2 is non-bound,update there error value to zero
# if i1 or i2 is non-bound, update their error value to zero
if self._is_unbound(i1):
self._error[i1] = 0
if self._is_unbound(i2):
@ -161,7 +158,7 @@ class SmoSVM:
results.append(result)
return np.array(results)
# Check if alpha violate KKT condition
# Check if alpha violates the KKT condition
def _check_obey_kkt(self, index):
alphas = self.alphas
tol = self._tol
@ -172,20 +169,19 @@ class SmoSVM:
# Get value calculated from kernel function
def _k(self, i1, i2):
# for test samples,use Kernel function
# for test samples, use kernel function
if isinstance(i2, np.ndarray):
return self.Kernel(self.samples[i1], i2)
# for train samples,Kernel values have been saved in matrix
# for training samples, kernel values have been saved in matrix
else:
return self._K_matrix[i1, i2]
# Get sample's error
# Get error for sample
def _e(self, index):
"""
Two cases:
1:Sample[index] is non-bound,Fetch error from list: _error
2:sample[index] is bound,Use predicted value deduct true value: g(xi) - yi
1: Sample[index] is non-bound, fetch error from list: _error
2: sample[index] is bound, use predicted value minus true value: g(xi) - yi
"""
# get from error data
if self._is_unbound(index):
@ -196,7 +192,7 @@ class SmoSVM:
yi = self.tags[index]
return gx - yi
# Calculate Kernel matrix of all possible i1,i2 ,saving time
# Calculate kernel matrix of all possible i1, i2, saving time
def _calculate_k_matrix(self):
k_matrix = np.zeros([self.length, self.length])
for i in self._all_samples:
@ -206,7 +202,7 @@ class SmoSVM:
)
return k_matrix
# Predict test sample's tag
# Predict tag for test sample
def _predict(self, sample):
k = self._k
predicted_value = (
@ -222,30 +218,31 @@ class SmoSVM:
# Choose alpha1 and alpha2
def _choose_alphas(self):
locis = yield from self._choose_a1()
if not locis:
loci = yield from self._choose_a1()
if not loci:
return None
return locis
return loci
def _choose_a1(self):
"""
Choose first alpha ;steps:
1:First loop over all sample
2:Second loop over all non-bound samples till all non-bound samples does not
voilate kkt condition.
3:Repeat this two process endlessly,till all samples does not voilate kkt
condition samples after first loop.
Choose first alpha
Steps:
1: First loop over all samples
2: Second loop over all non-bound samples until no non-bound samples violate
the KKT condition.
3: Repeat these two processes until no samples violate the KKT condition
after the first loop.
"""
while True:
all_not_obey = True
# all sample
print("scanning all sample!")
print("Scanning all samples!")
for i1 in [i for i in self._all_samples if self._check_obey_kkt(i)]:
all_not_obey = False
yield from self._choose_a2(i1)
# non-bound sample
print("scanning non-bound sample!")
print("Scanning non-bound samples!")
while True:
not_obey = True
for i1 in [
@ -256,17 +253,18 @@ class SmoSVM:
not_obey = False
yield from self._choose_a2(i1)
if not_obey:
print("all non-bound samples fit the KKT condition!")
print("All non-bound samples satisfy the KKT condition!")
break
if all_not_obey:
print("all samples fit the KKT condition! Optimization done!")
print("All samples satisfy the KKT condition!")
break
return False
def _choose_a2(self, i1):
"""
Choose the second alpha by using heuristic algorithm ;steps:
1: Choose alpha2 which gets the maximum step size (|E1 - E2|).
Choose the second alpha using a heuristic algorithm
Steps:
1: Choose alpha2 that maximizes the step size (|E1 - E2|).
2: Start in a random point, loop over all non-bound samples till alpha1 and
alpha2 are optimized.
3: Start in a random point, loop over all samples till alpha1 and alpha2 are
@ -320,7 +318,7 @@ class SmoSVM:
k22 = k(i2, i2)
k12 = k(i1, i2)
# select the new alpha2 which could get the minimal objectives
# select the new alpha2 which could achieve the minimal objectives
if (eta := k11 + k22 - 2.0 * k12) > 0.0:
a2_new_unc = a2 + (y2 * (e1 - e2)) / eta
# a2_new has a boundary
@ -335,7 +333,7 @@ class SmoSVM:
l1 = a1 + s * (a2 - l)
h1 = a1 + s * (a2 - h)
# way 1
# Method 1
f1 = y1 * (e1 + b) - a1 * k(i1, i1) - s * a2 * k(i1, i2)
f2 = y2 * (e2 + b) - a2 * k(i2, i2) - s * a1 * k(i1, i2)
ol = (
@ -353,9 +351,8 @@ class SmoSVM:
+ s * h * h1 * k(i1, i2)
)
"""
# way 2
Use objective function check which alpha2 new could get the minimal
objectives
Method 2: Use objective function to check which alpha2_new could achieve the
minimal objectives
"""
if ol < (oh - self._eps):
a2_new = l
@ -375,7 +372,7 @@ class SmoSVM:
return a1_new, a2_new
# Normalise data using min_max way
# Normalize data using min-max method
def _norm(self, data):
if self._init:
self._min = np.min(data, axis=0)
@ -424,7 +421,7 @@ class Kernel:
def _check(self):
if self._kernel == self._rbf and self.gamma < 0:
raise ValueError("gamma value must greater than 0")
raise ValueError("gamma value must be non-negative")
def _get_kernel(self, kernel_name):
maps = {"linear": self._linear, "poly": self._polynomial, "rbf": self._rbf}
@ -444,27 +441,27 @@ def count_time(func):
start_time = time.time()
func(*args, **kwargs)
end_time = time.time()
print(f"smo algorithm cost {end_time - start_time} seconds")
print(f"SMO algorithm cost {end_time - start_time} seconds")
return call_func
@count_time
def test_cancel_data():
print("Hello!\nStart test svm by smo algorithm!")
def test_cancer_data():
print("Hello!\nStart test SVM using the SMO algorithm!")
# 0: download dataset and load into pandas' dataframe
if not os.path.exists(r"cancel_data.csv"):
if not os.path.exists(r"cancer_data.csv"):
request = urllib.request.Request( # noqa: S310
CANCER_DATASET_URL,
headers={"User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"},
)
response = urllib.request.urlopen(request) # noqa: S310
content = response.read().decode("utf-8")
with open(r"cancel_data.csv", "w") as f:
with open(r"cancer_data.csv", "w") as f:
f.write(content)
data = pd.read_csv(
"cancel_data.csv",
"cancer_data.csv",
header=None,
dtype={0: str}, # Assuming the first column contains string data
)
@ -480,13 +477,13 @@ def test_cancel_data():
test_tags, test_samples = test_data[:, 0], test_data[:, 1:]
# 3: choose kernel function, and set initial alphas to zero (optional)
mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
al = np.zeros(train_data.shape[0])
# 4: calculating best alphas using SMO algorithm and predict test_data samples
mysvm = SmoSVM(
train=train_data,
kernel_func=mykernel,
kernel_func=my_kernel,
alpha_list=al,
cost=0.4,
b=0.0,
@ -501,30 +498,30 @@ def test_cancel_data():
for i in range(test_tags.shape[0]):
if test_tags[i] == predict[i]:
score += 1
print(f"\nall: {test_num}\nright: {score}\nfalse: {test_num - score}")
print(f"\nAll: {test_num}\nCorrect: {score}\nIncorrect: {test_num - score}")
print(f"Rough Accuracy: {score / test_tags.shape[0]}")
def test_demonstration():
# change stdout
print("\nStart plot,please wait!!!")
print("\nStarting plot, please wait!")
sys.stdout = open(os.devnull, "w")
ax1 = plt.subplot2grid((2, 2), (0, 0))
ax2 = plt.subplot2grid((2, 2), (0, 1))
ax3 = plt.subplot2grid((2, 2), (1, 0))
ax4 = plt.subplot2grid((2, 2), (1, 1))
ax1.set_title("linear svm,cost:0.1")
ax1.set_title("Linear SVM, cost = 0.1")
test_linear_kernel(ax1, cost=0.1)
ax2.set_title("linear svm,cost:500")
ax2.set_title("Linear SVM, cost = 500")
test_linear_kernel(ax2, cost=500)
ax3.set_title("rbf kernel svm,cost:0.1")
ax3.set_title("RBF kernel SVM, cost = 0.1")
test_rbf_kernel(ax3, cost=0.1)
ax4.set_title("rbf kernel svm,cost:500")
ax4.set_title("RBF kernel SVM, cost = 500")
test_rbf_kernel(ax4, cost=500)
sys.stdout = sys.__stdout__
print("Plot done!!!")
print("Plot done!")
def test_linear_kernel(ax, cost):
@ -535,10 +532,10 @@ def test_linear_kernel(ax, cost):
scaler = StandardScaler()
train_x_scaled = scaler.fit_transform(train_x, train_y)
train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))
mykernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5)
my_kernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5)
mysvm = SmoSVM(
train=train_data,
kernel_func=mykernel,
kernel_func=my_kernel,
cost=cost,
tolerance=0.001,
auto_norm=False,
@ -555,10 +552,10 @@ def test_rbf_kernel(ax, cost):
scaler = StandardScaler()
train_x_scaled = scaler.fit_transform(train_x, train_y)
train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))
mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
mysvm = SmoSVM(
train=train_data,
kernel_func=mykernel,
kernel_func=my_kernel,
cost=cost,
tolerance=0.001,
auto_norm=False,
@ -571,11 +568,11 @@ def plot_partition_boundary(
model, train_data, ax, resolution=100, colors=("b", "k", "r")
):
"""
We can not get the optimum w of our kernel svm model which is different from linear
svm. For this reason, we generate randomly distributed points with high desity and
prediced values of these points are calculated by using our trained model. Then we
could use this prediced values to draw contour map.
And this contour map can represent svm's partition boundary.
We cannot get the optimal w of our kernel SVM model, which is different from a
linear SVM. For this reason, we generate randomly distributed points with high
density, and predicted values of these points are calculated using our trained
model. Then we could use this predicted values to draw contour map, and this contour
map represents the SVM's partition boundary.
"""
train_data_x = train_data[:, 1]
train_data_y = train_data[:, 2]
@ -620,6 +617,6 @@ def plot_partition_boundary(
if __name__ == "__main__":
test_cancel_data()
test_cancer_data()
test_demonstration()
plt.show()