def getData2():
r"""
使用numpy生成随机数;
使用pandas构造满足条件的随机数;
:return:
"""
df = pd.DataFrame()
df['X'] = np.random.randint(1,100,size=(100))
df['Y'] = np.random.randint(1,100,size=(100))
G1 = df[(df['X']>50) & (df['Y']>50)]
G2 = df[(df['X']<50) & (df['Y']<50)]
G1 = G1.reset_index(drop=True)
G2 = G2.reset_index(drop=True)
return (G1, G2)
# 解决分类问题
import math
from LinearRegression import *
def calDistance(x,y,w,b):
r"""
计算一个点(x,y)到直线(w,b)的距离
:param x: point x
:param y: point y
:param w: 直线的斜率
:param b: 直线的截距
:return: 返回距离
"""
x0 = x
y0 = y
x1 = (y0-b)/w
y1 = w*x0+b
d0 = math.sqrt((x0-x1)**2+(y0-y1)**2)
if d0==0:
return 0
else:
dis = abs(x0-x1)*abs(y0-y1)/d0
return dis
def getSVMLoss(G1, G2, w, b):
r"""
计算在(w,b)的前提下,整个数据集的loss;
loss function 是 hinge loss
:param G1:第一类样本pandas,第一列是X,第二列是Y
:param G2:第二类样本pandas,第一列是X,第二列是Y
:param w:斜率
:param b:截距
:return:返回当前斜率和截距下的loss
"""
total_loss = 0
#G1的loss
class1Num = G1.shape[0]
d1min = 99999
x_f_1, y_f_1 = 0, 0
for i in range(class1Num):
x = G1.iloc[i,0]
y = G1.iloc[i,1]
d = calDistance(x,y,w,b)
if (w*x+b) > y:
total_loss += d
####
if d < d1min:
x_f_1, y_f_1 = x, y
d1min = d
#G2的loss
class2Num = G2.shape[0]
d2min = 99999
x_f_2, y_f_2 = 0, 0
for i in range(class2Num):
x = G2.iloc[i,0]
y = G2.iloc[i,1]
d = calDistance(x,y,w,b)
if w*x+b < y: #分类错误进行惩罚
total_loss += d
if d < d2min:#分类错误进行惩罚
x_f_2, y_f_2 = x, y
d2min = d
total_loss = total_loss + abs(d2min - d1min)#如果两者相距太远,进行惩罚
return total_loss
def SVMFit(G1, G2):
w_last, b_last = -5, 100
w, b = -6, 99
loss_last = 1
loss = 0
stop = 10000
i = 0
eta = 1e-4
count = 0
while(i < stop):
print("{:05d}: w is {:.2f}, b is {:.2f}, loss is {:.2f}".format(i,w,b,loss))
loss = getSVMLoss(G1, G2, w, b)
if loss == 0:
break
if loss - loss_last < 0.1:
count += 1
if count>1000:
break
wn = w - eta * (loss-loss_last)/(w-w_last)
bn = b - eta * (loss-loss_last)/(b-b_last)
w_last = w
w = wn
b_last = b
b = bn
loss_last = loss
i += 1
return w, b
if __name__ == "__main__":
print("to solve classification problem")
np.random.seed(5)
G1, G2 = getData2()
fig, ax = plt.subplots()
ax.scatter(G1['X'], G1['Y'], color="C0")
ax.scatter(G2['X'], G2['Y'], color="C1")
ax.plot(np.array([50,50]), np.array([0,100]))
ax.plot(np.array([0,100]), np.array([50,50]))
w, b = -6, 99
x = np.arange(0, 100, 1)
y = w * x + b
ax.plot(x, y, color="C2",label="original")
w_f, b_f = SVMFit(G1, G2)
y_f = w_f * x + b_f
ax.plot(x, y_f, color="C3",label="final")
ax.legend()
ax.set_xlim(xmin = 0, xmax = 100)
ax.set_ylim(ymin = 0, ymax = 100)
fig.show()
# x,y,w,b
# print("距离是:{:.2f}".format(calDistance(1,0,1,0)))
![]()