# 《Machine Learning in Action》—— 剖析支持向量机，优化SMO

• 第一个$\alpha_i$的选取

$\left\{ \begin{array}{c} \alpha_i=0 \quad <=> \quad y_i(w^T+b)\geq1\\ \alpha_i=C \quad <=> \quad y_i(w^T+b)\leq1\\ 0\leq\alpha_i\leq C \quad <=> \quad y_i(w^T+b)=1\\ \end{array} \right.$

"""
Author：Taoye
微信公众号：玩世不恭的Coder
Explain：外层循环，选取第一个合适alpha
Parameters：
x_data: 样本属性特征矩阵
y_label: 属性特征对应的标签
C：惩罚参数
toler：容错率
max_iter：迭代次数
Return:
b: 决策面的参数b
alphas：获取决策面参数w所需要的alphas
"""
def outer_smo(self, data_struct, x_data, y_label, C, toler, max_iter):
iter_num, ergodic_flag, alpha_optimization_num = 0, True, 0
while (iter_num < max_iter) and ((alpha_optimization_num > 0) or (ergodic_flag)):
alpha_optimization_num = 0
if ergodic_flag:
for i in range(data_struct.m):
alpha_optimization_num += self.inner_smo(i, data_struct)
print("遍历所有样本数据：第%d次迭代，样本为：%d，alpha优化的次数：%d" % (iter_num, i, alpha_optimization_num))
iter_num += 1
else:
no_zero_index = np.nonzero((data_struct.alphas.A > 0) * (data_struct.alphas.A < C))[0]
for i in no_zero_index:
alpha_optimization_num += self.inner_smo(i, data_struct)
print("非边界遍历样本数据：第%d次迭代，样本为：%d，alpha优化的次数：%d" % (iter_num, i, alpha_optimization_num))
iter_num += 1
if ergodic_flag: ergodic_flag = False
elif alpha_optimization_num == 0: ergodic_flag = True
print("迭代次数：%d" % iter_num)
return data_struct.b, data_struct.alphas

• 第二个$\alpha_j$的选取

$\alpha_2^{new}=\frac{y_2(E_1-E_2)}{\eta}+\alpha_2^{old}$

def select_appropriate_j(self, i, data_struct, E_i):
max_k, max_delta_E, E_j = -1, 0, 0
data_struct.E_cache[i] = [1, E_i]
valid_E_cache_list = np.nonzero(data_struct.E_cache[:, 0].A)[0]
if (len(valid_E_cache_list) > 1):
for k in valid_E_cache_list:
if k == i: continue
E_k = self.calc_E(data_struct.alphas, data_struct.y_label, data_struct.x_data, data_struct.b, k)
delta_E = abs(E_i - E_k)
if (delta_E > max_delta_E): max_k, max_delta_E, E_j = k, delta_E, E_k
return max_k, E_j
else:
j = self.random_select_alpha_j(i, data_struct.m)
E_j = self.calc_E(data_struct.alphas, data_struct.y_label, data_struct.x_data, data_struct.b, j)
return j, E_j


1. x_data：etablish_data随机生成数据集中的属性矩阵
2. y_label：etablish_data随机生成数据集中的标签
3. C：惩罚参数
4. toler：容错率
5. m：数据样本数
6. alphas：SMO算法所需要训练的$\alpha$向量
7. b：SMO算法所需要训练的$b$参数
8. E_cache：用于保存误差，第一列为有效标志位，第二列为样本索引对应的误差E

def update_E_k(self, data_struct, k):
E_k = self.calc_E(data_struct.alphas, data_struct.y_label, data_struct.x_data, data_struct.b, k)                                        #计算Ek
data_struct.E_cache[k] = [1,E_k]


import numpy as np
import pylab as pl
from matplotlib import pyplot as plt

class DataStruct:
def __init__(self, x_data, y_label, C, toler):
self.x_data = x_data
self.y_label = y_label
self.C = C
self.toler = toler
self.m = x_data.shape[0]
self.alphas = np.mat(np.zeros((self.m, 1)))
self.b = 0
self.E_cache = np.mat(np.zeros((self.m, 2)))

class OptimizeLinearSVM:
def __init__(self):
pass

"""
Author: Taoye
微信公众号: 玩世不恭的Coder
Explain: 用于生成训练数据集
Parameters:
data_number: 样本数据数目
Return:
x_data: 数据样本的属性矩阵
y_label: 样本属性所对应的标签
"""
def etablish_data(self, data_number):
np.random.seed(38)
x_data = np.concatenate((np.add(np.random.randn(data_number, 2), [3, 3]),
np.subtract(np.random.randn(data_number, 2), [3, 3])),
axis = 0)      # random随机生成数据，+ -3达到不同类别数据分隔的目的
temp_data = np.zeros([data_number])
temp_data.fill(-1)
y_label = np.concatenate((temp_data, np.ones([data_number])), axis = 0)
return x_data, y_label

"""
Author: Taoye
微信公众号: 玩世不恭的Coder
Explain: 随机选取alpha_j
Parameters:
alpha_i_index: 第一个alpha的索引
alpha_number: alpha总数目
Return:
alpha_j_index: 第二个alpha的索引
"""
def random_select_alpha_j(self, alpha_i_index, alpha_number):
alpha_j_index = alpha_i_index
while alpha_j_index == alpha_i_index:
alpha_j_index = np.random.randint(0, alpha_number)
return alpha_j_index

"""
Author: Taoye
微信公众号: 玩世不恭的Coder
Explain: 使得alpha_j在[L, R]区间之内
Parameters:
alpha_j: 原始alpha_j
L: 左边界值
R: 右边界值
Return:
L,R,alpha_j: 修改之后的alpha_j
"""
def modify_alpha(self, alpha_j, L, R):
if alpha_j < L: return L
if alpha_j > R: return R
return alpha_j

"""
Author: Taoye
微信公众号: 玩世不恭的Coder
Explain: 计算误差并返回
"""
def calc_E(self, alphas, y_label, x_data, b, i):
f_x_i = float(np.dot(np.multiply(alphas, y_label).T, x_data * x_data[i, :].T)) + b
return f_x_i - float(y_label[i])

"""
Author: Taoye
微信公众号: 玩世不恭的Coder
Explain: 计算eta并返回
"""
def calc_eta(self, x_data, i, j):
eta = 2.0 * x_data[i, :] * x_data[j, :].T \
- x_data[i, :] * x_data[i, :].T \
- x_data[j, :] * x_data[j,:].T
return eta

"""
Author: Taoye
微信公众号: 玩世不恭的Coder
Explain: 计算b1, b2并返回
"""
def calc_b(self, b, x_data, y_label, alphas, alpha_i_old, alpha_j_old, E_i, E_j, i, j):
b1 = b - E_i \
- y_label[i] * (alphas[i] - alpha_i_old) * x_data[i, :] * x_data[i, :].T \
- y_label[j] * (alphas[j] - alpha_j_old) * x_data[i, :] * x_data[j, :].T
b2 = b - E_j \
- y_label[i] * (alphas[i] - alpha_i_old) * x_data[i, :] * x_data[j, :].T \
- y_label[j] * (alphas[j] - alpha_j_old) * x_data[j, :] * x_data[j, :].T
return b1, b2

def select_appropriate_j(self, i, data_struct, E_i):
max_k, max_delta_E, E_j = -1, 0, 0
data_struct.E_cache[i] = [1, E_i]
valid_E_cache_list = np.nonzero(data_struct.E_cache[:, 0].A)[0]
if (len(valid_E_cache_list) > 1):
for k in valid_E_cache_list:
if k == i: continue
E_k = self.calc_E(data_struct.alphas, data_struct.y_label, data_struct.x_data, data_struct.b, k)
delta_E = abs(E_i - E_k)
if (delta_E > max_delta_E): max_k, max_delta_E, E_j = k, delta_E, E_k
return max_k, E_j
else:
j = self.random_select_alpha_j(i, data_struct.m)
E_j = self.calc_E(data_struct.alphas, data_struct.y_label, data_struct.x_data, data_struct.b, j)
return j, E_j

def update_E_k(self, data_struct, k):
E_k = self.calc_E(data_struct.alphas, data_struct.y_label, data_struct.x_data, data_struct.b, k)                                        #计算Ek
data_struct.E_cache[k] = [1,E_k]

"""
Author: Taoye
微信公众号: 玩世不恭的Coder
Explain: smo内层
"""
def inner_smo(self, i, data_strcut):
E_i = self.calc_E(data_strcut.alphas, data_struct.y_label, data_struct.x_data, data_struct.b, i)      # 调用calc_E方法计算样本i的误差
if ((data_struct.y_label[i] * E_i < -data_struct.toler) and (data_struct.alphas[i] < data_struct.C)) or ((data_struct.y_label[i] * E_i > data_struct.toler) and (data_struct.alphas[i] > 0)):
j, E_j = self.select_appropriate_j(i, data_strcut, E_i)              # 选取一个恰当的j
alpha_i_old, alpha_j_old = data_struct.alphas[i].copy(), data_struct.alphas[j].copy()
if (data_struct.y_label[i] != data_struct.y_label[j]):               # 确保alphas在[L, R]区间内
L, R = max(0, data_struct.alphas[j] - data_struct.alphas[i]), min(data_struct.C, data_struct.C + data_struct.alphas[j] - data_struct.alphas[i])
else:
L, R = max(0, data_struct.alphas[j] + data_struct.alphas[i] - data_struct.C), min(data_struct.C, data_struct.alphas[j] + data_struct.alphas[i])
if L == R: print("L==R"); return 0          # L==R时选取下一个样本
eta = self.calc_eta(data_struct.x_data, i, j)                  # 计算eta值
if eta >= 0: print("eta>=0"); return 0
data_struct.alphas[j] -= data_struct.y_label[j] * (E_i - E_j) / eta
data_struct.alphas[j] = self.modify_alpha(data_struct.alphas[j], L, R)     # 修改alpha[j]
self.update_E_k(data_strcut, j)
if (abs(data_strcut.alphas[j] - alpha_j_old) < 0.000001): print("alpha_j修改太小了"); return 0
data_struct.alphas[i] += data_strcut.y_label[j] * data_strcut.y_label[i] * (alpha_j_old - data_strcut.alphas[j])
self.update_E_k(data_strcut, i)
b1, b2= self.calc_b(data_struct.b, data_struct.x_data, data_struct.y_label, data_struct.alphas, alpha_i_old, alpha_j_old, E_i, E_j, i, j)    # 计算b值
if (0 < data_struct.alphas[i]) and (data_struct.C > data_struct.alphas[i]): data_struct.b = b1
elif (0 < data_struct.alphas[j]) and (data_struct.C > data_struct.alphas[j]): data_struct.b = b2
else: data_struct.b = (b1 + b2)/2.0
return 1
else: return 0

"""
Author：Taoye
微信公众号：玩世不恭的Coder
Explain：外层循环，选取第一个合适alpha
Parameters：
x_data: 样本属性特征矩阵
y_label: 属性特征对应的标签
C：惩罚参数
toler：容错率
max_iter：迭代次数
Return:
b: 决策面的参数b
alphas：获取决策面参数w所需要的alphas
"""
def outer_smo(self, data_struct, x_data, y_label, C, toler, max_iter):
iter_num, ergodic_flag, alpha_optimization_num = 0, True, 0
while (iter_num < max_iter) and ((alpha_optimization_num > 0) or (ergodic_flag)):
alpha_optimization_num = 0
if ergodic_flag:
for i in range(data_struct.m):
alpha_optimization_num += self.inner_smo(i, data_struct)
print("遍历所有样本数据：第%d次迭代，样本为：%d，alpha优化的次数：%d" % (iter_num, i, alpha_optimization_num))
iter_num += 1
else:
no_zero_index = np.nonzero((data_struct.alphas.A > 0) * (data_struct.alphas.A < C))[0]
for i in no_zero_index:
alpha_optimization_num += self.inner_smo(i, data_struct)
print("非边界遍历样本数据：第%d次迭代，样本为：%d，alpha优化的次数：%d" % (iter_num, i, alpha_optimization_num))
iter_num += 1
if ergodic_flag: ergodic_flag = False
elif alpha_optimization_num == 0: ergodic_flag = True
print("迭代次数：%d" % iter_num)
return data_struct.b, data_struct.alphas

"""
Author: Taoye
微信公众号: 玩世不恭的Coder
Explain: 根据公式计算出w权值向量
Parameters:
x_data: 样本属性特征矩阵
y_label: 属性特征对应的标签
alphas：linear_smo方法所返回的alphas向量
Return:
w: 决策面的参数w
"""
def calc_w(self, x_data, y_label, alphas):
x_data, y_label, alphas = np.array(x_data), np.array(y_label), np.array(alphas)
return np.dot((np.tile(y_label.reshape(1, -1).T, (1, 2)) * x_data).T, alphas).tolist()

"""
Author: Taoye
微信公众号: 玩世不恭的Coder
Explain: 绘制出分类结果
Parameters:
x_data: 样本属性特征矩阵
y_label: 属性特征对应的标签
w：决策面的w参数
b：决策面的参数b
"""
def plot_result(self, x_data, y_label, w, b):
data_number, _ = x_data.shape; middle = int(data_number / 2)
plt.scatter(x_data[:, 0], x_data[:, 1], c = y_label, cmap = pl.cm.Paired)
x1, x2 = np.max(x_data), np.min(x_data)
w1, w2 = w[0][0], w[1][0]
y1, y2 = (-b - w1 * x1) / w2, (-b - w1 * x2) / w2
plt.plot([float(x1), float(x2)], [float(y1), float(y2)])    # 绘制决策面
for index, alpha in enumerate(alphas):
if alpha > 0:
b_temp = - w1 * x_data[index][0] - w2 * x_data[index][1]
y1_temp, y2_temp = (-b_temp - w1 * x1) / w2, (-b_temp - w1 * x2) / w2
plt.plot([float(x1), float(x2)], [float(y1_temp), float(y2_temp)], "k--")    # 绘制支持向量
plt.scatter(x_data[index][0], x_data[index][1], s=150, c='none', alpha=0.7, linewidth=2, edgecolor='red')   # 圈出支持向量
plt.show()

if __name__ == '__main__':
optimize_linear_svm = OptimizeLinearSVM()
x_data, y_label = optimize_linear_svm.etablish_data(50)
data_struct = DataStruct(np.mat(x_data), np.mat(y_label).T, 0.8, 0.00001)
b, alphas = optimize_linear_svm.outer_smo(data_struct, x_data, y_label, data_struct.C, data_struct.toler, 10)
w = optimize_linear_svm.calc_w(x_data, y_label, alphas)
optimize_linear_svm.plot_result(x_data, y_label, w, b)


[1] 《机器学习实战》：Peter Harrington 人民邮电出版社
[2] 《统计学习方法》：李航 第二版 清华大学出版社

posted @ 2020-11-16 17:03  玩世不恭的Coder  阅读(227)  评论(1编辑  收藏  举报