机器学习 周志华 3.3 答案

数据集见文章末尾。 

from numpy import *
from numpy.linalg import *
import matplotlib.pyplot as plt


def loadDataSet(fileName):
    #加载数据,返回样本数组,其中每一行为一个样本,每行最后一列为标记。
    dataMat = []
    fr = open(fileName)
    for line in fr.readlines():
        lineArr = []
        curLine = line.strip().split('\t') #假设数据之间以列表符分隔
        for i in range(len(curLine)):
            lineArr.append(float(curLine[i]))
        dataMat.append(lineArr)
    dataArr = array(dataMat)
    return dataArr


def gradienet_down(dataArr, betaArrT, n, a, error):
    # 使用梯度下降法训练模型
    yArr = dataArr[:, 2]
    xArr = column_stack((dataArr[:, 0:2], ones(shape(dataArr)[0])))
    for i in range(n):
        start = logistic(xArr, yArr, betaArrT)
        df = dfunc(xArr, yArr, betaArrT)
        betaArrT = betaArrT - a * df
        end = logistic(xArr, yArr, betaArrT)
        if abs(start - end) < error:
            break
    return betaArrT, i


def newtwon(dataArr, betaArrT, n, error):
    # 使用牛顿迭代法训练模型
    yArr = dataArr[:, 2]    # y表示最终预测结果:1表示好瓜,0表示坏瓜;yArr预测结果的集合。
    xArr = column_stack((dataArr[:, 0:2], ones(shape(dataArr)[0])))
    for i in range(n):
        df = dfunc(xArr, yArr, betaArrT)    # 一阶导数
        if dot(df, df.transpose()) < error:
            break
        d2f = d2func(xArr, yArr, betaArrT)  # 二阶导数
        betaArrT = betaArrT - solve(d2f, df)
    return betaArrT, i


def LDA(dataArr, betaArrT):
    #使用线性判别分析训练模型
    xArr = dataArr[:, 0:2]
    m, n=shape(xArr)
    u0 = zeros((1, 2)) #反例均值
    u1 = zeros((1, 2)) #正例均值
    m0 = 0
    m1 = 0
    for i in range(m):
        if dataArr[i, -1] == 0:
            u0[0, :] += xArr[i, :]
            m0 += 1
        else:
            u1[0, :] += xArr[i, :]
            m1 += 1
    u0 = u0 / m0
    u1 = u1 / m1
    sw = zeros((2, 2))   # 类内散度矩阵
    for i in range(m):
        if dataArr[i, -1] == 0:
            sw += (xArr[i, :] - u0) * (xArr[i, :] - u0).T
        else:
            sw += (xArr[i, :] - u1) * (xArr[i, :] - u1).T
    return dot(inv(sw), (u0 - u1).T)


def logistic(xArr, yArr, betaArrT):
    #计算对数似然l(w,b)
    m, n = shape(xArr) #m为样本数,n为属性值数
    result = 0
    for i in range(m):
        result += -yArr[i]*dot(xArr[i], betaArrT)+log(1+exp(dot(xArr[i], betaArrT)))
    return result


def p1(x, betaArrT):
    #计算后验概率估计P1
    return exp(dot(x, betaArrT))/(1+exp(dot(x, betaArrT)))


def dfunc(xArr, yArr, betaArrT):
    #求一阶导数
    m, n = shape(xArr)
    result = 0
    for i in range(m):
        result += xArr[i] * ((yArr[i]-p1(xArr[i], betaArrT)))     # 公式(3.30)
    return -result.transpose()


def d2func(xArr, yArr, betaArrT):
    #求二阶导数
    m, n= shape(xArr)
    d2f = zeros((m, m))
    for i in range(m):
        d2f[i][i] = p1(xArr[i], betaArrT) * (1 - p1(xArr[i], betaArrT))
    return mat(xArr.transpose()) * mat(d2f) * mat(xArr)


def plotDataSet(dataArr, betaArrT, fig, index):
    #可视化数据集
    m, n = shape(dataArr) #m为样本数,n-1为属性值数
    xcord1 = [] #好瓜密度
    ycord1 = [] #好瓜含糖率
    xcord2 = [] #坏瓜密度
    ycord2 = [] #坏瓜含糖率
    for i in range(m):
        if dataArr[i, 2] == 1:
            for j in range(n - 1):
                if j == 0:
                    xcord1.append(dataArr[i, j])
                else:
                    ycord1.append(dataArr[i, j])
        else:
            for j in range(n - 1):
                if j == 0:
                    xcord2.append(dataArr[i, j])
                else:
                    ycord2.append(dataArr[i, j])
    ax = fig.add_subplot(index)
    ax.scatter(xcord1, ycord1, s=30, c='red') #红色代表好瓜
    ax.scatter(xcord2, ycord2, s=30, c='green') #绿色代表坏瓜
    plotResult(betaArrT)


def plotResult(betaArrT):
    x1 = arange(0, 0.8, 0.01)
    y1 = [-(betaArrT[2] + betaArrT[0] * x1[k]) / betaArrT[1] for k in range(len(x1))]
    plt.plot(x1, y1)


if __name__ == '__main__':
    fileName = '/home/jq/桌面/西瓜数据集3.0'
    dataArr = loadDataSet(fileName)

    beta = [1, 1, 1]    # 最终的参数w,b; w的转置是矩阵:[w1,w2]。
    betaArrT = array(beta).transpose()      # 返回转置矩阵
    n = 1000    # 牛顿法迭代的上限
    a = 0.1
    error = 0.0001

    result, step = gradienet_down(dataArr, betaArrT, n, a, error)
    result1, step1 = newtwon(dataArr, betaArrT, n, error)
    result2 = LDA(dataArr, betaArrT)

    print('梯度下降法共迭代了' + str(step) + '次,结果为:beta=' + str(result))
    print('牛顿法共迭代了' + str(step1) + '次,结果为:beta=' + str(result1))
    print('LDA结果为:w=' + str(result2))

    fig = plt.figure()
    plotDataSet(dataArr, result, fig, 221)
    plotDataSet(dataArr, result1, fig, 222)
    result2 = [result2[0,0],result2[1,0],0]
    plotDataSet(dataArr, result2, fig, 223)
    plt.show()

 

 数据集:

参考:https://www.jianshu.com/p/3a9023d141bc

   https://blog.csdn.net/bebusy/article/details/82753619

 

posted @ 2019-07-18 21:52  JQS  阅读(1158)  评论(0编辑  收藏  举报