机器学习实战第五章Logistic回归

def gradAscent(dataMatIn, classLabels):
    dataMatrix = mat(dataMatIn)             #convert to NumPy matrix
    labelMat = mat(classLabels).transpose() #convert to NumPy matrix
    m,n = shape(dataMatrix)
    alpha = 0.001
    maxCycles = 500
    weights = ones((n,1))
    for k in range(maxCycles):              #heavy on matrix operations
        h = sigmoid(dataMatrix*weights)     #matrix mult
        error = (labelMat - h)              #vector subtraction
        weights = weights + alpha * dataMatrix.transpose()* error #matrix mult
    return weights

这是书中梯度上升算法的代码,但看到倒数第三行和倒数第二行的时候就懵逼了,书中说这里略去了一个简单的数据推导,嘤嘤嘤,想了一会没想出来,于是乎就百度看看大神的解释,这是找到的一篇解释的比较好的,仔细一看发现是在Ag的机器学习视频中讲过的,忘了。。。

Sigmoid函数: $g(z)=\frac{1}{1+e^{-z}}$

      $h_{\theta }(x)=g(\theta ^{T}x)$

这里$\theta$和书中w一样,表示系数

代价函数如下,代价函数是用来计算预测值(类别)与实际值(类别)之间的误差的

      $cost(h_{\theta} (x),y)=\left\{\begin{matrix}
      -log(h_{\theta}(x)), y=1\\
      -log(1-h_{\theta}(x)), y=0\end{matrix}\right.$

写在一起表示为:

      $cost(h_{\theta} (x),y)=-ylog(h_{\theta}(x))-(1-y)log(1-h_{\theta}(x))$

 总体的代价函数为:

      $J(\theta)=\frac{1}{m}\sum_{m}^{i=1}cost(h_{\theta} (x^{(i)}),y^{(i)})=\frac{1}{m}\sum_{m}^{i=1}-y^{(i)}log(h_{\theta}(x^{(i)}))-(1-y^{(i)})log(1-h_{\theta}(x^{(i)}))$

要使误差最小,即求$J(\theta)$最小,也可以转化成就$-J(\theta)$的最大值,可以用梯度上升算法来求最大值,

      $\theta := \theta+ \alpha \frac{\partial J(\theta )}{\partial \theta_{j}}$

下面是推导过程:

    

第三步到第四步:

    

所以权重的迭代更新公式为:  

  $\theta_{j} = \theta_{j}+ \alpha \sum_{m}^{i=1}(y_{i}-h_{\theta}(x^{(i)}))x^{(i))}$

 

gradAscent是最原始的梯度上升算法,每次更新系数都需要计算整个数据集,计算量较大
stocGradAscent0是随机梯度上升算法,每次只使用一个样本点来更新系数
stocGradAscent1是改进后的随机梯度上升算法,1.增加了迭代次数,2.同时随机选取样本点对系数进行更新,减少了周期性波动 3.alpha每次迭代时都会进行调整,缓解数据波动和高频振动(这里不是太懂)    

 完整的程序代码

import random

import numpy as np
import matplotlib.pyplot as plt


def loadDataSet():
    dataMat = []
    labelMat = []
    fr = open('testSet.txt')
    for line in fr.readlines():
        lineArr = line.strip().split()
        dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])]) #分别是x0,x1,x2
        labelMat.append(int(lineArr[2]))    #标签
    return dataMat, labelMat

def sigmoid(inX):
    return 1.0 / (1 + np.exp(-inX))

def gradAscent(dataMatIn, classLabels):
    dataMatrix = np.mat(dataMatIn)  #100 * 3 array转换成matrix
    labelMat = np.mat(classLabels).transpose()  #100 * 3
    m, n = np.shape(dataMatrix) #100, 3
    alpha = 0.001
    maxCycles = 500
    weights = np.ones((n,1))    #3 * 1
    for k in range(maxCycles):
        h = sigmoid(dataMatrix * weights)   #100 * 1
        error = labelMat - h
        weights = weights + alpha * dataMatrix.transpose() * error #3 * 1
    return weights


"""画出决策边界"""
def plotBestFit(weights):
    dataMat, labelMat = loadDataSet()
    dataArr = np.array(dataMat) #numpy的matrix类型转化成array
    n = np.shape(dataArr)[0]    #样本数100
    xcord1 = [];ycord1 = [] #类别为1的样本的x,y坐标
    xcord2 = [];ycord2 = [] #类别为0的样本的x,y坐标
    for i in range(n):
        if int(labelMat[i]) == 1:
            xcord1.append(dataArr[i, 1]);
            ycord1.append(dataArr[i, 2])
        else:
            xcord2.append(dataArr[i, 1]);
            ycord2.append(dataArr[i, 2])
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')   #s参数是数据点的粗细,marker是标记('s'是正方形,默认圆形)
    ax.scatter(xcord2, ycord2, s=30, c='green')
    x = np.arange(-3.0, 3.0, 0.1)   #直线x坐标的取值范围
    y = (-weights[0] - weights[1] * x) / weights[2] #直线方程
    ax.plot(x, y)
    plt.xlabel('X1');
    plt.ylabel('X2');
    plt.show()

"""画出系数变化图"""
def plotWeight(weight0, weight1, weight2):
    x = np.arange(len(weight0))
    fig = plt.figure()
    ax = fig.add_subplot(311)  # 等价于ax = plt.subplot(311)
    print(weight0)
    ax.plot(x, weight0)
    ax = fig.add_subplot(312)
    ax.plot(x, weight1)
    ax = fig.add_subplot(313)
    ax.plot(x, weight2)
    plt.show()


"""随机梯度上升算法"""
def stocGradAscent0(dataArr, classbels, numIter):    #书上的命名容易误导人,改一下
    m, n = np.shape(dataArr)
    alpha = 0.01
    weights = np.ones(n)
    w0 = []; w1 = []; w2 = []
    for j in range(numIter):
        for i in range(m):
            h = sigmoid(sum(dataArr[i] * weights))  #数值
            error = classbels[i] - h    #数值
            weights = weights + alpha * error * dataArr[i]
        w0.append(weights[0])
        w1.append(weights[1])
        w2.append(weights[2])
    return weights, w0, w1, w2


"""改进的随机梯度上升法"""
def stocGradAscent1(dataArr, classbels, numIter=150):
    m, n = np.shape(dataArr)
    weights = np.ones(n)
    w0 = []; w1 = []; w2 = []
    for j in range(numIter):
        dataIndex = range(m)
        for i in range(m):
            alpha = 4 / (1.0 + j + i) + 0.01
            randIndex = int(random.uniform(0, len(dataIndex)))  #通过随机选取样本,来减少周期性的波动
            h = sigmoid(sum(dataArr[randIndex] * weights))
            error = classbels[randIndex] - h    #数值
            weights = weights + alpha * error * dataArr[randIndex]
        # w0.append(weights[0])
        # w1.append(weights[1])
        # w2.append(weights[2])
    # return weights, w0, w1, w2
    return weights

dataMat, labelMat = loadDataSet()
weights = gradAscent(dataMat, labelMat)
plotBestFit(weights.getA())  #getA()函数将Numpy.matrix型转为ndarray型

dataList, labelList = loadDataSet()
weights,w0,w1,w2 = stocGradAscent0(np.array(dataList), labelList, 200)
plotBestFit(weights)
plotWeight(w0, w1,w2)

dataList, labelList = loadDataSet()
weights,w0,w1,w2 = stocGradAscent1(np.array(dataList), labelList)
plotBestFit(weights)
plotWeight(w0, w1,w2)


"""分类函数"""
def classifyVector(inX, weights):
    prob = sigmoid(sum(inX * weights))
    if prob > 0.5: return 1.0
    else: return 0.0

def colicTest():
    frTrain = open("horseColicTraining.txt")
    frTest = open("horseColicTest.txt")
    trainingSet = []; trainingLabels = []
    for line in frTrain.readlines():
        currLine = line.strip().split('\t')
        lineArr = []
        for i in range(len(currLine)-1):
            lineArr.append(float(currLine[i]))
        trainingSet.append(lineArr)
        trainingLabels.append(float(currLine[len(currLine) - 1]))
    trainWeights = stocGradAscent1(np.array(trainingSet), trainingLabels, 500)
    errorCount = 0; numTestVec = 1
    for line in frTest.readlines():
        numTestVec += 1
        currLine = line.strip().split('\t')
        lineArr = []
        for i in range(len(currLine)-1):
            lineArr.append(float(currLine[i]))
        if int(classifyVector(np.array(lineArr), trainWeights)) != int(currLine[len(currLine)-1]):
            errorCount += 1
    errorRate = float(errorCount) / float(numTestVec)
    print("the error rate of this test is: %f" % errorRate)
    return errorRate

"""计算10次的平均错误率"""
def multiTest():
    numTests = 10; errorSum = 0.0
    for k in range(numTests):
        errorSum += colicTest()
    print("after %d iterations the average error rate is: %f" % (numTests, errorSum / float(numTests)))

multiTest()

梯度上升算法,迭代500次效果图   

 

   随机梯度上升算法,迭代200次效果图

                    随机梯度上升算法,迭代200次回归系数变化图                

  书上是这样的,不知道为什么不一样

 

                                       

         改进后的随机梯度上升算法的系数变化图,收敛的更快了

 

改进后的随机梯度上升算法效果图             

posted @ 2018-08-04 23:18  weiququ  阅读(1235)  评论(0编辑  收藏  举报