逻辑回归
数据
吴恩达老师的数据
数据来源
logistics_regression介绍
线性二分类模型:有一个线性决策面,用sigmoid函数来计算后验概率 ,是判别模型
假设函数

代价函数

梯度计算

代码实现
import numpy as np
import math
import matplotlib.pyplot as plt
import random
#获取数据
def getData():
X=[]
Y=[]
X0=[]
X1=[]
X2=[]
Y0=[]
with open("./ex4Data/ex4x.dat") as f:
for line in f.readlines():
data =line.split('\n\t')
for str0 in data:
sub_str = str0.split(' ')
if sub_str:
X.append(sub_str)
for i in range(0,80):
X0.append(float(1))
X1.append(float(X[i][3]))
X2.append(float(X[i][6]))
X=[X0,X1,X2]
with open("./ex4Data/ex4y.dat") as f2:
for line in f2.readlines():
data =line.split('\n\t')
for str0 in data:
sub_str=str0.split(' ')
if sub_str:
Y0.append(sub_str)
for i in range(0,80):
Y.append(float(Y0[i][3]))
xMat=np.mat(X).T
yMat=np.mat(Y).T
x0Mat=np.mat(X0)
return xMat,yMat,x0Mat
xMat,yMat,x0Mat=getData()
#损失函数
def cost(xMat,yMat,weights):
m,n=xMat.shape
hypothesis=1/( np.exp( -np.dot(xMat,weights) )+1 )
cost=(-1/m)*(np.sum(yMat.T*np.log(hypothesis)+(x0Mat-yMat).T*np.log(x0Mat-hypothesis)))
return cost
#批处理
def Training(xMat,yMat,alpha=0.01,maxepochs=200,epslion=0.001):
m,n=xMat.shape
weights=np.zeros((n,1))#weights 3*1
weights[0]=[-16] #如果各个参数为0的话,需要跑二十万轮才会出现结果,因此将参数初始为接近答案,以减小过程
while epochs_count<maxepochs:
loss=cost(xMat,yMat,weights)
hypothesis=1/(np.exp( -np.dot(xMat,weights) )+1)
error=yMat-hypothesis #error 80*1
grad=(1/m)*np.dot(xMat.T,error)
weights=weights+alpha*grad#参数更新,梯度下降
loss_new=cost(xMat,yMat,weights)#weights更新后,损失的值
if abs(loss_new-loss)<epslion:
break
loss_list.append(loss_new)
epochs_count+=1
print("第"+str(epochs_count)+"轮,weight0="+str(weights[0])+",weight1="+str(weights[1])+",weights2="+str(weights[2]))
plt.plot(loss_list)
plt.show()
return weights
#随机梯度下降 注意:计算损失函数应该使用全部数据集,而求梯度时使用随机数据
def SGD_Training(xMat,yMat,alpha=0.01,maxepochs=200):
m,n=xMat.shape
weights=np.zeros((n,1))#weights 3*1
weights[0]=[-16] #如果各个参数为0的话,需要跑二十万轮才会出现结果,因此将参数初始为接近答案,以减小过程
epochs_count=0
loss_list=[]
while epochs_count<maxepochs:
i=random.randint(0,m-1)
data_x=xMat[i]
data_y=yMat[i]
loss=cost(xMat,yMat,weights)
hypothesis=1/(np.exp( -np.dot(data_x,weights) )+1)
error=data_y-hypothesis #error 80*1
grad=(1/m)*np.dot(data_x.T,error)
weights=weights+alpha*grad#参数更新,梯度下降
loss_new=cost(xMat,yMat,weights)#weights更新后,损失的值
if abs(loss-loss_new)<0.000001:
break
loss_list.append(loss_new)
epochs_count+=1
print("第"+str(epochs_count)+"轮,weight0="+str(weights[0])+",weight1="+str(weights[1])+",weights2="+str(weights[2]))
plt.plot(loss_list)
plt.show()
return weights
#牛顿法
def Newton_Training(xMat,yMat,maxepochs=200,epslion=0.001):
m,n=xMat.shape
weights=np.zeros((n,1))#weights 3*1
epochs_count=0
loss_list=[]
while epochs_count<maxepochs:
loss=cost(xMat,yMat,weights)
hypothesis=1/(np.exp( -np.dot(xMat,weights) )+1)
error=hypothesis-yMat #error 80*1
grad=(1/m)*np.dot(xMat.T,error) #grad 3*1
H=(1/m)*xMat.T*np.diag( np.multiply(hypothesis,(1-hypothesis)).T.getA()[0] )*xMat
weights=weights-np.linalg.pinv(H) *grad#参数更新,梯度下降,注意求H的逆
loss_new=cost(xMat,yMat,weights)#weights更新后,损失的值
if abs(loss_new-loss)<epslion:
break
loss_list.append(loss_new)
epochs_count+=1
print("第"+str(epochs_count)+"轮,weight0="+str(weights[0])+",weight1="+str(weights[1])+",weights2="+str(weights[2]))
plt.plot(loss_list)
plt.show()
return weights
#批梯度下降算法
#Weights=Training(xMat,yMat,alpha=0.001,maxepochs=10000,epslion=0.00001)
#随机梯度下降算法
#Weights=SGD_Training(xMat,yMat,alpha=0.001,maxepochs=1000)
#牛顿法
#Weights=Newton_Training(xMat,yMat,maxepochs=10,epslion=0.001)

浙公网安备 33010602011771号