SVM

支持向量机

本节将依SMO算法训练线性SVM, 核方法的使用可以很方便的进行扩展.

import numpy as np
import matplotlib.pyplot as plt

下面的Point的类中的:

\[g(x) = \sum_{i=1}^N \alpha_i y_i k(x_i, x) + b, \]

\[e = g - y, yg = y * g \]

class Point:
    """
    因为在SM0算法中,我们需要不断更新以及判断数据点的
    各种属性,所以创建了一个新的数据类型Point.
    """
    def __init__(self, x, y, k, order,
                 g, alpha, c):
        assert y == 1 or y == -1, "Invalid y"
        self.x = x  #数据向量
        self.y = y  # 类
        self.order = order  #序
        self.k = k  #距离矩阵
        self.c = c  
        self.g = g
        self.yg = g * y #衡量违背条件的指标
        self.alpha = alpha
        self.e = g - y

    def kkt(self, epsilon=0.001):
        """
        检查是否符合KKT条件
        :epsilon: 条件判断时允许的误差
        :return: 0 表示符合, 1 表示 不符合 其原因是便于统计不满足kkt的数据点的总量
        """
        alpha = self.alpha
        cond = self.y * self.g
        if alpha == 0. and cond < 1 - epsilon:
            self.satisfy_kkt = False
        elif 0 < alpha < self.c and (cond <1 - epsilon or cond > 1 + epsilon):
            self.satisfy_kkt = False
        elif alpha == self.c and cond > 1 + epsilon:
            self.satisfy_kkt = False
        else:
            self.satisfy_kkt = True
        if self.satisfy_kkt:
            return 0
        else:
            return 1
    def update_g_e(self, dog1, dog2, oldb, newb, newalpha1, newalpha2):
        """
        更新g, e参数
        """
        k1 = self.k[self.order, dog1.order]
        k2 = self.k[self.order, dog2.order]
        self.g = self.g + (newalpha1- dog1.alpha) * dog1.y * k1 \
                        + (newalpha2- dog2.alpha) * dog2.y * k2 \
                        + newb - oldb
        self.e = self.g - self.y
        self.yg = self.g * self.y

下面是Points类的定义, 用训练和预测,值得一提的是在initial函数中对于alpha的初始化,从\([0, self.c / self.size]\)中依照均匀分布选取,并且对最后一个样本的进行特别处理,以保证\(\sum \alpha_i y_i=0\), 这意味着\(alpha\)将是一个可行解, 一些文章中似乎是将alpha初始化为0的,不知道按照这种方式怎么进行更新迭代,因为:

\[\alpha_2^{new, unc} = \alpha_2^{old} + \frac{y_2(E_1-E_2)}{\eta}, \]

\(\alpha^{old} = 0, b = 0, \Rightarrow E=0 \Rightarrow \alpha^{new}=0\). 意味着样本的参数\(\alpha\)是不会变化的.

另外calc_func_value,计算的是:

\[\frac{1}{2} \sum_{i=1}^N\sum_{j=1}^N \alpha_i \alpha_j y_i y_j k(x_i, x_j) - \sum_{i=1}^N \alpha_i \]

这个值越小收敛的越好

class Points:
    """
    数据点的集合
    """
    def __init__(self, points, labels, c=1.):
        self.points = []  #用于保存Point对象
        self.size = len(points)
        self.initial(points, labels, c=c)


    def distance(self, point1, point2):
        """
        计算内积矩阵, 可以在这个函数中加入核方法而达到
        核SVM的目的.
        """
        return point1 @ point2

    def initial(self, points, labels, b=0., c=1.):
        """
        初始化 w, b
        w: 权重
        b: 截距
        c: 正则化项的系数
        :return:
        """
        self.b =  b
        self.c = c
        
        # alpha 进行初始化
        alpha = np.random.uniform(0, self.c / self.size, size=self.size)
        y = labels
        the_sum = np.sum(y * alpha)
        alpha[-1] = alpha[-1] - the_sum * y[-1]
        while not (0 < alpha[-1] < self.c):
            alpha = np.random.uniform(0, self.c / self.size, size=self.size)
            the_sum = np.sum(y * alpha)
            alpha[-1] = alpha[-1] - the_sum * y[-1]
            
        alphay = alpha * y
        k = np.zeros((self.size, self.size))
        for i in range(self.size):
            for j in range(i, self.size):
                k[i, j] = self.distance(points[i], points[j])
                k[j, i] = self.distance(points[j], points[i])
        self.k = k #内积矩阵
        g = k @ alphay + self.b
        for i in range(self.size):
            point = Point(points[i], labels[i], k, i,
                          g[i], alpha[i], c)
            self.points.append(point)


    def find_dog1(self, points):
        """
        寻找第一个点
        """
        lucky_dog1 = None
        lucky_value1 = 99999999999.
        dogs = points.copy()
        #首先我们在不满足的kkt条件的支持向量中寻找
        for dog in dogs:
            if not dog.satisfy_kkt and dog.alpha > 0:
                cond =np.abs(dog.yg - 1)
                if cond < lucky_value1:
                    lucky_dog1 = dog
                    lucky_value1 = cond
                    
        #如果没找到,再在整个训练集中找违背kkt最严重的点.
        if lucky_dog1 is None:
            for dog in dogs:
                if not dog.satisfy_kkt:
                    if dog.yg < lucky_value1:
                        lucky_dog1 = dog
                        lucky_value1 = dog.yg
        return lucky_dog1

    def find_dog2(self, points, dog1):
        """
        寻找第二个点
        """
        dogs = points.copy()
        e1 = dog1.e
        lucky_dog2 = None
        lucky_value2 = -1.
        #寻找使得|e2-e1|最大的点
        for dog in dogs:
            e2 = dog.e
            value = np.abs(e2 - e1)
            if value > lucky_value2:
                lucky_dog2 = dog
                lucky_value2 = value
        return lucky_dog2


    def calc_l_h(self, dogs):
        """
        alpha需要裁剪,所以要计算L, H
        :param dogs:
        :return:
        """
        dog1 = dogs[0]
        dog2 = dogs[1]
        if dog1.y != dog2.y:
            l = max(0, dog2.alpha-dog1.alpha)
            h = min(self.c, self.c + dog2.alpha-dog1.alpha)
        else:
            l = max(0, dog2.alpha+dog1.alpha-self.c)
            h = min(self.c, dog2.alpha+dog1.alpha)
        return (l, h)

    def update(self, dogs, l, h):
        """
        更新alpha, b, g, e
        """
        dog1 = dogs[0]
        dog2 = dogs[1]
        e1 = dog1.e
        e2 = dog2.e
        k11 = self.k[dog1.order, dog1.order]
        k12 = self.k[dog1.order, dog2.order]
        k21 = self.k[dog2.order, dog1.order]
        k22 = self.k[dog2.order, dog2.order]
        eta =  k11 + k22 - 2 * k12
        alpha2_unc = dog2.alpha + dog2.y * (e1 - e2) / eta  #未裁剪的alpha2
        if alpha2_unc < l:
            alpha2 = l
        elif l < alpha2_unc < h:
            alpha2 = alpha2_unc
        else:
            alpha2 = h
        alpha1 = dog1.alpha + dog1.y * dog2.y * (dog2.alpha - alpha2)
        b1 = -e1 - dog1.y * k11 * (alpha1 - dog1.alpha) - \
             dog2.y * k21 * (alpha2 - dog2.alpha) + self.b
        b2 = -e2 - dog1.y * k12 * (alpha1 - dog1.alpha) - \
             dog2.y * k22 * (alpha2 - dog2.alpha) + self.b
        b = (b1 + b2) / 2
        #更新g, e
        for point in self.points:
            point.update_g_e(dog1, dog2, self.b, b, alpha1, alpha2)

        #更新alpha, b
        dog1.alpha = alpha1
        dog2.alpha = alpha2
        self.b = b
        return (alpha1, alpha2, b)

    def update_kkt(self, epsilon):
        """
        检查每个Point是否符合kkt条件,并返回
        不合格的Point的数量
        """
        count = 0
        for point in self.points:
            count += point.kkt(epsilon)
        return count

    def calc_func_value(self):
        """
        计算函数的值
        """
        alpha = np.array([point.alpha for point in self.points])
        y = np.array([point.y for point in self.points])
        alphay = alpha * y
        value = alphay @ self.k @ alphay - np.sum(alpha)
        return value

    def calc_w(self):
        """
        计算w
        """
        w = 0.0
        for point in self.points:
            w += point.alpha * point.y * point.x
        self.w = w
        return w


    def smo(self, max_iter, x, y1, y2, epsilon=1e-3, enough_num=10):
        """
        事实上,并不需要传入x, y1, y2, 这里只是做可视化用,
        w 也仅在线性分类器时是有用的.
        enough_num: 当不满足kkt条件的数目小于enough_num是退出
        """
        iteration = 0
        value_pre = None
        points1 = self.points.copy()
        points2 = self.points.copy()
        while True:
            iteration += 1
            count = self.update_kkt(epsilon)
            value = self.calc_func_value()
            #print(iteration, value, count)
            if count < enough_num or iteration > max_iter:
                break
            
            #下面部分的含义是如果我们所选的Point不能函数值下降,那么遍历其它的点
            #直到目标函数有足够的下降,如果还不行,那么抛弃第一个Point,选择其它的Point
            if value_pre == value:
                points2.remove(dog2)
                if len(points2) > 1:
                    dog2 = self.find_dog2(points2, dog1)
                else:
                    points1.remove(dog1)
                    points2 = points1.copy()
                    dog1 = self.find_dog1(points1)
                    points2.remove(dog1)
                    dog2 = self.find_dog2(points2, dog1)
            else:
                points1 = self.points.copy()
                points2 = self.points.copy()
                dog1 = self.find_dog1(points1)
                points2.remove(dog1)
                dog2 = self.find_dog2(points2, dog1)

            value_pre = value
            l, h = self.calc_l_h((dog1, dog2))
            self.update((dog1, dog2), l, h)
            
            """
            可视化部分
            if iteration % 10 == 9:
                self.calc_w()
                plt.cla()
                plt.scatter(x, y1)
                plt.scatter(x, y2)
                y = -self.w[0] / self.w[1] * x - self.b
                plt.title("SVM")
                plt.plot(x, y, color="red")
                plt.pause(0.01)
            """
        self.calc_w()

        return self.w, self.b
    
    def pre(self, x):
        """
        给定x 预测其分类, %%sh上,如果是线性分类器,
        只需计算self.w,再计算self.w @ x + self.b即可,
        为了方便扩展,这里做了一般化.
        """
        value = self.b
        for point in self.points:
            value += point.alpha * point.y * \
                    self.distance(x, point.x)
        sign = 1 if value >= 0 else -1
        return sign

准备数据

蓝色部分:

\[y1 = 2x + 5 + \epsilon_1, \epsilon1 \sim \mathcal{N}(0, \sigma^2). \]

红色部分

\[y2 = 2x - 5 + \epsilon_2, \epsilon2 \sim \mathcal{N}(0, \sigma^2) \]

np.random.seed(10086)
x = np.linspace(0, 10, 100)
e1 = np.random.randn(100) * 2
e2 = np.random.randn(100) * 2
y1 = 2 * x + 5 + e1
y2 = 2 * x - 5 + e2
data1 = np.hstack((x.reshape(-1, 1), y1.reshape(-1, 1)))
data2 = np.hstack((x.reshape(-1, 1), y2.reshape(-1, 1)))
data = np.vstack((data1, data2))
labels = np.hstack((np.ones(100), -np.ones(100)))
plt.scatter(x, y1)
plt.scatter(x, y2)
plt.show()

在这里插入图片描述

测试

test = Points(data, labels, c=10)
w, b = test.smo(30000, x, y1, y2, epsilon=1e-7)
print(w, b)
plt.scatter(x, y1)
plt.scatter(x, y2)
y = -w[0] / w[1] * x - b
plt.plot(x, y, color="red")
plt.show()
[-1.25243632  0.5887332 ] -1.1498779874793863

在这里插入图片描述

有些时候结果是不会那么好的, 在收敛过程中还是会有波动的, 而且数据越是混越是波动大

test.pre(np.array([10., 0.]))
-1
test.pre(np.array([0., 10.]))
1
test = Points(data, labels, c=1)
w, b = test.smo(30000, x, y1, y2, epsilon=1e-7)
print(w, b)
plt.scatter(x, y1)
plt.scatter(x, y2)
y = -w[0] / w[1] * x - b
plt.plot(x, y, color="red")
plt.show()
[-1.33721113  0.83955356] -1.6680372246340474

在这里插入图片描述

test = Points(data, labels, c=0.1)
w, b = test.smo(30000, x, y1, y2, epsilon=1e-7)
print(w, b)
plt.scatter(x, y1)
plt.scatter(x, y2)
y = -w[0] / w[1] * x - b
plt.plot(x, y, color="red")
plt.show()
[-0.78780019  0.44522279] -0.6061702858422205

在这里插入图片描述

实验中发现, c越大波动越大,或许在训练过程可以动态调整c

posted @ 2019-08-20 17:00  馒头and花卷  阅读(235)  评论(0编辑  收藏  举报