Fix grammar and spelling mistakes in sequential_minimum_optimization.py (#11427)

This commit is contained in:
Tianyi Zheng 2024-06-13 14:47:29 -07:00 committed by GitHub
parent 41a1cdf38d
commit 446742387e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,11 +1,9 @@
""" """
Implementation of sequential minimal optimization (SMO) for support vector machines Sequential minimal optimization (SMO) for support vector machines (SVM)
(SVM).
Sequential minimal optimization (SMO) is an algorithm for solving the quadratic Sequential minimal optimization (SMO) is an algorithm for solving the quadratic
programming (QP) problem that arises during the training of support vector programming (QP) problem that arises during the training of SVMs. It was invented by
machines. John Platt in 1998.
It was invented by John Platt in 1998.
Input: Input:
0: type: numpy.ndarray. 0: type: numpy.ndarray.
@ -124,8 +122,7 @@ class SmoSVM:
b_old = self._b b_old = self._b
self._b = b self._b = b
# 4: update error value,here we only calculate those non-bound samples' # 4: update error, here we only calculate the error for non-bound samples
# error
self._unbound = [i for i in self._all_samples if self._is_unbound(i)] self._unbound = [i for i in self._all_samples if self._is_unbound(i)]
for s in self.unbound: for s in self.unbound:
if s in (i1, i2): if s in (i1, i2):
@ -136,7 +133,7 @@ class SmoSVM:
+ (self._b - b_old) + (self._b - b_old)
) )
# if i1 or i2 is non-bound,update there error value to zero # if i1 or i2 is non-bound, update their error value to zero
if self._is_unbound(i1): if self._is_unbound(i1):
self._error[i1] = 0 self._error[i1] = 0
if self._is_unbound(i2): if self._is_unbound(i2):
@ -161,7 +158,7 @@ class SmoSVM:
results.append(result) results.append(result)
return np.array(results) return np.array(results)
# Check if alpha violate KKT condition # Check if alpha violates the KKT condition
def _check_obey_kkt(self, index): def _check_obey_kkt(self, index):
alphas = self.alphas alphas = self.alphas
tol = self._tol tol = self._tol
@ -172,20 +169,19 @@ class SmoSVM:
# Get value calculated from kernel function # Get value calculated from kernel function
def _k(self, i1, i2): def _k(self, i1, i2):
# for test samples,use Kernel function # for test samples, use kernel function
if isinstance(i2, np.ndarray): if isinstance(i2, np.ndarray):
return self.Kernel(self.samples[i1], i2) return self.Kernel(self.samples[i1], i2)
# for train samples,Kernel values have been saved in matrix # for training samples, kernel values have been saved in matrix
else: else:
return self._K_matrix[i1, i2] return self._K_matrix[i1, i2]
# Get sample's error # Get error for sample
def _e(self, index): def _e(self, index):
""" """
Two cases: Two cases:
1:Sample[index] is non-bound,Fetch error from list: _error 1: Sample[index] is non-bound, fetch error from list: _error
2:sample[index] is bound,Use predicted value deduct true value: g(xi) - yi 2: sample[index] is bound, use predicted value minus true value: g(xi) - yi
""" """
# get from error data # get from error data
if self._is_unbound(index): if self._is_unbound(index):
@ -196,7 +192,7 @@ class SmoSVM:
yi = self.tags[index] yi = self.tags[index]
return gx - yi return gx - yi
# Calculate Kernel matrix of all possible i1,i2 ,saving time # Calculate kernel matrix of all possible i1, i2, saving time
def _calculate_k_matrix(self): def _calculate_k_matrix(self):
k_matrix = np.zeros([self.length, self.length]) k_matrix = np.zeros([self.length, self.length])
for i in self._all_samples: for i in self._all_samples:
@ -206,7 +202,7 @@ class SmoSVM:
) )
return k_matrix return k_matrix
# Predict test sample's tag # Predict tag for test sample
def _predict(self, sample): def _predict(self, sample):
k = self._k k = self._k
predicted_value = ( predicted_value = (
@ -222,30 +218,31 @@ class SmoSVM:
# Choose alpha1 and alpha2 # Choose alpha1 and alpha2
def _choose_alphas(self): def _choose_alphas(self):
locis = yield from self._choose_a1() loci = yield from self._choose_a1()
if not locis: if not loci:
return None return None
return locis return loci
def _choose_a1(self): def _choose_a1(self):
""" """
Choose first alpha ;steps: Choose first alpha
1:First loop over all sample Steps:
2:Second loop over all non-bound samples till all non-bound samples does not 1: First loop over all samples
voilate kkt condition. 2: Second loop over all non-bound samples until no non-bound samples violate
3:Repeat this two process endlessly,till all samples does not voilate kkt the KKT condition.
condition samples after first loop. 3: Repeat these two processes until no samples violate the KKT condition
after the first loop.
""" """
while True: while True:
all_not_obey = True all_not_obey = True
# all sample # all sample
print("scanning all sample!") print("Scanning all samples!")
for i1 in [i for i in self._all_samples if self._check_obey_kkt(i)]: for i1 in [i for i in self._all_samples if self._check_obey_kkt(i)]:
all_not_obey = False all_not_obey = False
yield from self._choose_a2(i1) yield from self._choose_a2(i1)
# non-bound sample # non-bound sample
print("scanning non-bound sample!") print("Scanning non-bound samples!")
while True: while True:
not_obey = True not_obey = True
for i1 in [ for i1 in [
@ -256,20 +253,21 @@ class SmoSVM:
not_obey = False not_obey = False
yield from self._choose_a2(i1) yield from self._choose_a2(i1)
if not_obey: if not_obey:
print("all non-bound samples fit the KKT condition!") print("All non-bound samples satisfy the KKT condition!")
break break
if all_not_obey: if all_not_obey:
print("all samples fit the KKT condition! Optimization done!") print("All samples satisfy the KKT condition!")
break break
return False return False
def _choose_a2(self, i1): def _choose_a2(self, i1):
""" """
Choose the second alpha by using heuristic algorithm ;steps: Choose the second alpha using a heuristic algorithm
1: Choose alpha2 which gets the maximum step size (|E1 - E2|). Steps:
2: Start in a random point,loop over all non-bound samples till alpha1 and 1: Choose alpha2 that maximizes the step size (|E1 - E2|).
2: Start in a random point, loop over all non-bound samples till alpha1 and
alpha2 are optimized. alpha2 are optimized.
3: Start in a random point,loop over all samples till alpha1 and alpha2 are 3: Start in a random point, loop over all samples till alpha1 and alpha2 are
optimized. optimized.
""" """
self._unbound = [i for i in self._all_samples if self._is_unbound(i)] self._unbound = [i for i in self._all_samples if self._is_unbound(i)]
@ -306,7 +304,7 @@ class SmoSVM:
if i1 == i2: if i1 == i2:
return None, None return None, None
# calculate L and H which bound the new alpha2 # calculate L and H which bound the new alpha2
s = y1 * y2 s = y1 * y2
if s == -1: if s == -1:
l, h = max(0.0, a2 - a1), min(self._c, self._c + a2 - a1) # noqa: E741 l, h = max(0.0, a2 - a1), min(self._c, self._c + a2 - a1) # noqa: E741
@ -320,7 +318,7 @@ class SmoSVM:
k22 = k(i2, i2) k22 = k(i2, i2)
k12 = k(i1, i2) k12 = k(i1, i2)
# select the new alpha2 which could get the minimal objectives # select the new alpha2 which could achieve the minimal objectives
if (eta := k11 + k22 - 2.0 * k12) > 0.0: if (eta := k11 + k22 - 2.0 * k12) > 0.0:
a2_new_unc = a2 + (y2 * (e1 - e2)) / eta a2_new_unc = a2 + (y2 * (e1 - e2)) / eta
# a2_new has a boundary # a2_new has a boundary
@ -335,7 +333,7 @@ class SmoSVM:
l1 = a1 + s * (a2 - l) l1 = a1 + s * (a2 - l)
h1 = a1 + s * (a2 - h) h1 = a1 + s * (a2 - h)
# way 1 # Method 1
f1 = y1 * (e1 + b) - a1 * k(i1, i1) - s * a2 * k(i1, i2) f1 = y1 * (e1 + b) - a1 * k(i1, i1) - s * a2 * k(i1, i2)
f2 = y2 * (e2 + b) - a2 * k(i2, i2) - s * a1 * k(i1, i2) f2 = y2 * (e2 + b) - a2 * k(i2, i2) - s * a1 * k(i1, i2)
ol = ( ol = (
@ -353,9 +351,8 @@ class SmoSVM:
+ s * h * h1 * k(i1, i2) + s * h * h1 * k(i1, i2)
) )
""" """
# way 2 Method 2: Use objective function to check which alpha2_new could achieve the
Use objective function check which alpha2 new could get the minimal minimal objectives
objectives
""" """
if ol < (oh - self._eps): if ol < (oh - self._eps):
a2_new = l a2_new = l
@ -375,7 +372,7 @@ class SmoSVM:
return a1_new, a2_new return a1_new, a2_new
# Normalise data using min_max way # Normalize data using min-max method
def _norm(self, data): def _norm(self, data):
if self._init: if self._init:
self._min = np.min(data, axis=0) self._min = np.min(data, axis=0)
@ -424,7 +421,7 @@ class Kernel:
def _check(self): def _check(self):
if self._kernel == self._rbf and self.gamma < 0: if self._kernel == self._rbf and self.gamma < 0:
raise ValueError("gamma value must greater than 0") raise ValueError("gamma value must be non-negative")
def _get_kernel(self, kernel_name): def _get_kernel(self, kernel_name):
maps = {"linear": self._linear, "poly": self._polynomial, "rbf": self._rbf} maps = {"linear": self._linear, "poly": self._polynomial, "rbf": self._rbf}
@ -444,27 +441,27 @@ def count_time(func):
start_time = time.time() start_time = time.time()
func(*args, **kwargs) func(*args, **kwargs)
end_time = time.time() end_time = time.time()
print(f"smo algorithm cost {end_time - start_time} seconds") print(f"SMO algorithm cost {end_time - start_time} seconds")
return call_func return call_func
@count_time @count_time
def test_cancel_data(): def test_cancer_data():
print("Hello!\nStart test svm by smo algorithm!") print("Hello!\nStart test SVM using the SMO algorithm!")
# 0: download dataset and load into pandas' dataframe # 0: download dataset and load into pandas' dataframe
if not os.path.exists(r"cancel_data.csv"): if not os.path.exists(r"cancer_data.csv"):
request = urllib.request.Request( # noqa: S310 request = urllib.request.Request( # noqa: S310
CANCER_DATASET_URL, CANCER_DATASET_URL,
headers={"User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"}, headers={"User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"},
) )
response = urllib.request.urlopen(request) # noqa: S310 response = urllib.request.urlopen(request) # noqa: S310
content = response.read().decode("utf-8") content = response.read().decode("utf-8")
with open(r"cancel_data.csv", "w") as f: with open(r"cancer_data.csv", "w") as f:
f.write(content) f.write(content)
data = pd.read_csv( data = pd.read_csv(
"cancel_data.csv", "cancer_data.csv",
header=None, header=None,
dtype={0: str}, # Assuming the first column contains string data dtype={0: str}, # Assuming the first column contains string data
) )
@ -479,14 +476,14 @@ def test_cancel_data():
train_data, test_data = samples[:328, :], samples[328:, :] train_data, test_data = samples[:328, :], samples[328:, :]
test_tags, test_samples = test_data[:, 0], test_data[:, 1:] test_tags, test_samples = test_data[:, 0], test_data[:, 1:]
# 3: choose kernel function,and set initial alphas to zero(optional) # 3: choose kernel function, and set initial alphas to zero (optional)
mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5) my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
al = np.zeros(train_data.shape[0]) al = np.zeros(train_data.shape[0])
# 4: calculating best alphas using SMO algorithm and predict test_data samples # 4: calculating best alphas using SMO algorithm and predict test_data samples
mysvm = SmoSVM( mysvm = SmoSVM(
train=train_data, train=train_data,
kernel_func=mykernel, kernel_func=my_kernel,
alpha_list=al, alpha_list=al,
cost=0.4, cost=0.4,
b=0.0, b=0.0,
@ -501,30 +498,30 @@ def test_cancel_data():
for i in range(test_tags.shape[0]): for i in range(test_tags.shape[0]):
if test_tags[i] == predict[i]: if test_tags[i] == predict[i]:
score += 1 score += 1
print(f"\nall: {test_num}\nright: {score}\nfalse: {test_num - score}") print(f"\nAll: {test_num}\nCorrect: {score}\nIncorrect: {test_num - score}")
print(f"Rough Accuracy: {score / test_tags.shape[0]}") print(f"Rough Accuracy: {score / test_tags.shape[0]}")
def test_demonstration(): def test_demonstration():
# change stdout # change stdout
print("\nStart plot,please wait!!!") print("\nStarting plot, please wait!")
sys.stdout = open(os.devnull, "w") sys.stdout = open(os.devnull, "w")
ax1 = plt.subplot2grid((2, 2), (0, 0)) ax1 = plt.subplot2grid((2, 2), (0, 0))
ax2 = plt.subplot2grid((2, 2), (0, 1)) ax2 = plt.subplot2grid((2, 2), (0, 1))
ax3 = plt.subplot2grid((2, 2), (1, 0)) ax3 = plt.subplot2grid((2, 2), (1, 0))
ax4 = plt.subplot2grid((2, 2), (1, 1)) ax4 = plt.subplot2grid((2, 2), (1, 1))
ax1.set_title("linear svm,cost:0.1") ax1.set_title("Linear SVM, cost = 0.1")
test_linear_kernel(ax1, cost=0.1) test_linear_kernel(ax1, cost=0.1)
ax2.set_title("linear svm,cost:500") ax2.set_title("Linear SVM, cost = 500")
test_linear_kernel(ax2, cost=500) test_linear_kernel(ax2, cost=500)
ax3.set_title("rbf kernel svm,cost:0.1") ax3.set_title("RBF kernel SVM, cost = 0.1")
test_rbf_kernel(ax3, cost=0.1) test_rbf_kernel(ax3, cost=0.1)
ax4.set_title("rbf kernel svm,cost:500") ax4.set_title("RBF kernel SVM, cost = 500")
test_rbf_kernel(ax4, cost=500) test_rbf_kernel(ax4, cost=500)
sys.stdout = sys.__stdout__ sys.stdout = sys.__stdout__
print("Plot done!!!") print("Plot done!")
def test_linear_kernel(ax, cost): def test_linear_kernel(ax, cost):
@ -535,10 +532,10 @@ def test_linear_kernel(ax, cost):
scaler = StandardScaler() scaler = StandardScaler()
train_x_scaled = scaler.fit_transform(train_x, train_y) train_x_scaled = scaler.fit_transform(train_x, train_y)
train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled)) train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))
mykernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5) my_kernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5)
mysvm = SmoSVM( mysvm = SmoSVM(
train=train_data, train=train_data,
kernel_func=mykernel, kernel_func=my_kernel,
cost=cost, cost=cost,
tolerance=0.001, tolerance=0.001,
auto_norm=False, auto_norm=False,
@ -555,10 +552,10 @@ def test_rbf_kernel(ax, cost):
scaler = StandardScaler() scaler = StandardScaler()
train_x_scaled = scaler.fit_transform(train_x, train_y) train_x_scaled = scaler.fit_transform(train_x, train_y)
train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled)) train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))
mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5) my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
mysvm = SmoSVM( mysvm = SmoSVM(
train=train_data, train=train_data,
kernel_func=mykernel, kernel_func=my_kernel,
cost=cost, cost=cost,
tolerance=0.001, tolerance=0.001,
auto_norm=False, auto_norm=False,
@ -571,11 +568,11 @@ def plot_partition_boundary(
model, train_data, ax, resolution=100, colors=("b", "k", "r") model, train_data, ax, resolution=100, colors=("b", "k", "r")
): ):
""" """
We can not get the optimum w of our kernel svm model which is different from linear We cannot get the optimal w of our kernel SVM model, which is different from a
svm. For this reason, we generate randomly distributed points with high desity and linear SVM. For this reason, we generate randomly distributed points with high
prediced values of these points are calculated by using our trained model. Then we density, and predicted values of these points are calculated using our trained
could use this prediced values to draw contour map. model. Then we could use this predicted values to draw contour map, and this contour
And this contour map can represent svm's partition boundary. map represents the SVM's partition boundary.
""" """
train_data_x = train_data[:, 1] train_data_x = train_data[:, 1]
train_data_y = train_data[:, 2] train_data_y = train_data[:, 2]
@ -620,6 +617,6 @@ def plot_partition_boundary(
if __name__ == "__main__": if __name__ == "__main__":
test_cancel_data() test_cancer_data()
test_demonstration() test_demonstration()
plt.show() plt.show()