Python SVM 我自己设计了三种不同的loss function,并对比了效果
# 解决分类问题
import math
from LinearRegression import *
def calDistance(x,y,w,b):
r"""
计算一个点(x,y)到直线(w,b)的距离
:param x: point x
:param y: point y
:param w: 直线的斜率
:param b: 直线的截距
:return: 返回距离
"""
x0 = x
y0 = y
x1 = (y0-b)/w
y1 = w*x0+b
d0 = math.sqrt((x0-x1)**2+(y0-y1)**2)
if d0==0:
return 0
else:
dis = abs(x0-x1)*abs(y0-y1)/d0
return dis
def getSVMLoss(G1, G2, w, b):
r"""
计算在(w,b)的前提下,整个数据集的loss;
loss function 是 hinge loss
:param G1:第一类样本pandas,第一列是X,第二列是Y
:param G2:第二类样本pandas,第一列是X,第二列是Y
:param w:斜率
:param b:截距
:return:返回当前斜率和截距下的loss
"""
total_loss = 0
#G1的loss
class1Num = G1.shape[0]
d1min = 99999
x_f_1, y_f_1 = 0, 0
for i in range(class1Num):
x = G1.iloc[i,0]
y = G1.iloc[i,1]
d = calDistance(x,y,w,b)
if (w*x+b) > y:
total_loss += d
####
if d < d1min:
x_f_1, y_f_1 = x, y
d1min = d
#G2的loss
class2Num = G2.shape[0]
d2min = 99999
x_f_2, y_f_2 = 0, 0
for i in range(class2Num):
x = G2.iloc[i,0]
y = G2.iloc[i,1]
d = calDistance(x,y,w,b)
if w*x+b < y: #分类错误进行惩罚
total_loss += d
if d < d2min:#分类错误进行惩罚
x_f_2, y_f_2 = x, y
d2min = d
total_loss = total_loss + abs(d2min - d1min)#如果两者相距太远,进行惩罚
return total_loss
def getSVMLoss2(G1, G2, w, b):
r"""
理解好wiki上的概念后,重新写的loss function.
现在的直线是y= w*x+b
换算成线性代数中的直线(w^T*x-b = 0)就是w*x-y-(-b) = 0
即:(w^T) = [w,-1], (b) = -b 【←这个理解是错误的】
当我设置为 (w^T) = [w,-1], (b) = -b,模型没有收敛到正确结果;
当我设置为 (w^T) = [w,1], (b) = b,模型收敛到了正确结果;
我们的目标是最小化 ||w||,同时满足约束条件y_i(w^T*x_i-b)>=1
那么如何做呢?不满足条件的给一个很大的惩罚;||w||本身当做loss的一个部分;
:param G1:
:param G2:
:param w:
:param b:
:return:
"""
total_loss = 0
w_v = np.array([w, 1]) #w^T
w_v_nor = np.linalg.norm(w_v, ord = 2) # ||w^T||,计算w向量的2范数
total_loss = total_loss + w_v_nor
#然后对不满足约束条件的变量也进行惩罚
#G1的loss
class1Num = G1.shape[0]
class1Loss = 0
for i in range(class1Num):
x = G1.iloc[i,0]
y = G1.iloc[i,1]
x_v = np.array([x,y])
class1Loss += max(0, 1-w_v@x_v-b) #大于1没有惩罚,小于1进行惩罚;
# if co < 1:# 本来(w^T*x_i - b)>1,小于1就进行惩罚;
# class1Loss += abs(co-1)
class1Loss = class1Loss/class1Num
total_loss += class1Loss
#G2的loss
class2Num = G2.shape[0]
class2Loss = 0
for i in range(class2Num):
x = G2.iloc[i,0]
y = G2.iloc[i,1]
x_v = np.array([x,y])
class2Loss += max(0, 1+w_v@x_v-b)#小于-1没有惩罚,大于-1有惩罚
# co = w_v@x_v
# if co > -1: # 本来(w^T*x_i - b)<-1 ,大于-1就进行惩罚;
# total_loss += abs(co-1)
class2Loss = class2Loss/class2Num
total_loss += class2Loss
return total_loss
def getSVMLoss3(G1, G2, w, b):
r"""
"""
total_loss = 0
w_v = np.array([w, 1]) #w^T
w_v_nor = np.linalg.norm(w_v, ord = 2) # ||w^T||,计算w向量的2范数
total_loss = total_loss + w_v_nor
#然后对不满足约束条件的变量也进行惩罚
#G1的loss
class1Num = G1.shape[0]
class1Loss = 0
for i in range(class1Num):
x = G1.iloc[i,0]
y = G1.iloc[i,1]
if y < w*x+b+1:
class1Loss += w*x+b+1-y
class1Loss = class1Loss/class1Num
total_loss += class1Loss
#G2的loss
class2Num = G2.shape[0]
class2Loss = 0
for i in range(class2Num):
x = G2.iloc[i,0]
y = G2.iloc[i,1]
if y > w*x+b-1:
class2Loss += y - (w*x+b-1) #这里应该改为垂直距离,而不是截距差,不然就会出现结果的样子,
class2Loss = class2Loss/class2Num
total_loss += class2Loss
return total_loss
def SVMFit(G1, G2):
w_last, b_last = -5, 100
w, b = -6, 99
loss_last = 1
loss = 0
stop = 100000
i = 0
eta = 1e-3
count = 0
while(i < stop):
print("{:05d}: w is {:.2f}, b is {:.2f}, loss is {:.2f}".format(i,w,b,loss))
# loss = getSVMLoss(G1, G2, w, b)#方案1 (按照自己的理解设计的loss)
# loss = getSVMLoss2(G1, G2, w, b)#方案2 (使用线性代数的概念)
loss = getSVMLoss3(G1, G2, w, b)#方案3(替换方案2中的线代符号,使用直白的物理理解)
if loss == 0:
break
if loss - loss_last < 0.1:
count += 1
if count>1000:
break
wn = w - eta * (loss-loss_last)/(w-w_last)
bn = b - eta * (loss-loss_last)/(b-b_last)
w_last = w
w = wn
b_last = b
b = bn
loss_last = loss
i += 1
return w, b, loss
if __name__ == "__main__":
print("to solve classification problem")
np.random.seed(5)
G1, G2 = getData2()
fig, ax = plt.subplots()
ax.scatter(G1['X'], G1['Y'], color="C0")
ax.scatter(G2['X'], G2['Y'], color="C1")
ax.plot(np.array([50,50]), np.array([0,100]))
ax.plot(np.array([0,100]), np.array([50,50]))
w, b = -6, 99
x = np.arange(0, 100, 1)
y = w * x + b
ax.plot(x, y, color="C2",label="original")
w_f, b_f, loss_f = SVMFit(G1, G2)
y_f = w_f * x + b_f
ax.plot(x, y_f, color="C3",label="final")
ax.legend()
ax.set_xlim(xmin = 0, xmax = 100)
ax.set_ylim(ymin = 0, ymax = 100)
ax.set_title("LOSS-{}, eta = {}, loss is {:.2f}".format(3, 1e-3, loss_f))
fig.show()
# x,y,w,b
# print("距离是:{:.2f}".format(calDistance(1,0,1,0)))
结论:
1、loss function的设计对结果很重要;
2、不同的loss,选择的学习率也不一样。如果不改变学习率,模型很有可能不收敛;
3、对比一下三个结果吧↓ loss function1和loss function2效果一样。loss function3效果最差;

浙公网安备 33010602011771号