多层感知器训练mnist+反向传播算法
一.介绍反向传播算法+推导
反向传播算法是人工神经网络最常用且最有效的算法,其算法思路为:
1.前向传播:输入层以及偏置经加权求和被激活函数“打包”送到隐藏层,再经隐藏层以及偏置加权求和被激活函数“打包”送到输出层,在输出层中,我们可以输出总误差。
2.反向传播:a.从输出层反向到隐藏层,通过链式法则调整输出层以及隐藏层之间的权重矩阵,b.从后一层隐藏层到前一层隐藏层,通过链式法则调整隐藏层与隐藏层之间的权重矩阵
3.重复迭代,通过限制迭代次数,使得测试结果达到满意的程度
对于反向传播的推导如下:


二.多层感知器训练mnist
为了方便举例,我们只用mnist中的前5000条数据进行训练,这里我们默认隐藏层只有一层,令隐藏层的节点数为500,学习率为0.001(隐藏层数的设定,隐藏层节点数的设定,学习率的设定,可以后期用合理的方式再严格完善,这里只是为了方便举例直接设定,当然也可以通过不断的尝试选择较优的设定)
python代码如下:(类中加入了sgd函数,epoch=1,只完整训练一次,batchsize=全部数据)
# -*- coding:utf-8 -*-
#mlp.py
import numpy as np
import random
import struct
#读取数据
trainimage_path="E:\\caffe\\study\\work\\train\\train-images-idx3-ubyte\\train-images.idx3-ubyte"
trainlabel_path="E:\\caffe\\study\\work\\train\\train-labels-idx1-ubyte\\train-labels.idx1-ubyte"
def getimage(filepath):#将二进制文件转换成像素特征的数据
readfile= open(filepath, 'rb') #以二进制方式打开文件
file= readfile.read()
readfile.close()
index = 0
nummagic,numimgs,numrows,numcols=struct.unpack_from(">iiii",file,index)
index += struct.calcsize("iiii")
images = []
for i in range(numimgs):
imgval = struct.unpack_from(">784B", file, index)
index += struct.calcsize("784B")
imgval = list(imgval)
for j in range(len(imgval)):
if imgval[j] > 1:
imgval[j] = 1
images.append(imgval)
return np.array(images)
def getlabel(filepath):
readfile = open(filepath, 'rb')
file = readfile.read()
readfile.close()
index = 0
magic, numitems = struct.unpack_from(">ii", file, index)
index += struct.calcsize("ii")
labels = []
for x in range(numitems):
im = struct.unpack_from(">1B", file, index)
index += struct.calcsize("1B")
labels.append(im[0])
return np.array(labels)
trainimage=getimage(trainimage_path)
trainimage=[list(i) for i in trainimage]
trainimage=[i+[1] for i in trainimage]
trainimage=trainimage[:5000]
trainlabel=getlabel(trainlabel_path)
trainlabel=list(trainlabel)
trainlabel=trainlabel[:5000]
class Mlp(object):
def __init__(self,xdata=trainimage,ydata=trainlabel,w1=0,w2=0,eta=0.001,hnum=500,eps=0.5,times=15):
self.xdata=xdata
self.ydata=ydata
self.w1=w1
self.w2=w2
self.eta=eta#学习率
self.eps=eps#阈值
self.times=times#限制迭代次数
self.hnum=hnum#隐藏层节点个数
#bp多层感知机训练mnist:这里咱们默认只有一个隐藏层,隐藏层的节点数通过试凑法来设定,初步设定500个,看后续效果来更改
def relu(self,x):#relu激活函数
a=np.max([0,x])
return a
def fp(self,itrainimage,itrainlabel):#前向传播
net1=np.dot(self.w1,itrainimage)#加权求和
out1=np.mat([self.relu(i) for i in list(np.array(net1)[0])]+[1])#作用隐藏层的激活函数
net2=np.dot(self.w2,out1.T)#加权求和
out2=[self.relu(i) for i in list(np.array(net2.T)[0])]#作用relu激活函数
Ei=[0.5*(out2[i]-0)**2 if i!=itrainlabel else 0.5*(out2[i]-1)**2 for i in range(10)]#计算每个输出层节点的误差
E=sum(Ei)#计算总误差
print(out2)
return out2,out1,E
def bp2(self,out2,out1,itrainlabel):#反向传播:输出->隐藏
E_out2=[out2[i]-0 if i!=itrainlabel else out2[i]-1 for i in range(10)]
out_net2=[0 if i==0 else 1 for i in out2]
net_w2=[list(np.array(out1)[0]) for i in range(10)]
E_net2=[E_out2[i]*out_net2[i] for i in range(10)]#前两项
E_w2=np.mat([list(np.array(net_w2[i])*E_net2[i]) for i in range(10)])
w2new=self.w2-self.eta*E_w2
return E_net2,w2new
def summult(self,x,y):#列表对应相乘再求和
a=sum([x[i]*y[i] for i in range(len(x))])
return a
def bp1(self,out1,itrainimage,E_net2):#反向传播:隐藏->输入
E_out1 = [self.summult(E_net2,list(np.array(self.w2[:,i].T)[0])) for i in range(self.hnum)]
out_net1 = [0 if i==0 else 1 for i in list(np.array(out1)[0][:-1])]
net_w1 = [itrainimage for i in range(self.hnum)]
E_net1=[E_out1[i]*out_net1[i] for i in range(self.hnum)]#前两项
E_w1=np.mat([list(np.array(net_w1[i])*E_net1[i]) for i in range(self.hnum)])
w1new = self.w1 - self.eta * E_w1
return w1new
def sgd(self,epoch,batchsize):#分batch训练:batchsize多少个数据训练一次,epoch完整训练的次数,相当于主函数了
self.w1 = np.mat(np.random.uniform(-0.5, 0.5, size=(self.hnum, 785))) # 初始化权重矩阵:隐藏->输入
self.w2 = np.mat(np.random.uniform(-0.5, 0.5, size=(10, self.hnum + 1))) # 初始化权重矩阵:输出->隐藏,别忘隐藏层也需要偏置
for n in range(epoch):
xbatches=[self.xdata[k:k+batchsize] for k in range(0,len(self.ydata),batchsize)]
ybatches=[self.ydata[k:k+batchsize] for k in range(0,len(self.ydata),batchsize)]
for j in range(len(xbatches)):
L = 0
exit_flag = False
while any([self.fp(xbatches[j][i], ybatches[j][i])[2] > self.eps for i in range(len(xbatches[j]))]):
for i in range(len(xbatches[j])):
while self.fp(xbatches[j][i], ybatches[j][i])[2] > self.eps:
out2, out1, E = self.fp(xbatches[j][i], ybatches[j][i])
E_net2, w2new = self.bp2(out2, out1, ybatches[j][i])
w1new = self.bp1(out1, xbatches[j][i], E_net2)
self.w2 = w2new
self.w1 = w1new
L = L + 1
if L > self.times:
exit_flag = True
break
if exit_flag:
break
if exit_flag:
break
print("complete")
def test(self):#计算正确率
a=0
for i in range(len(self.ydata)):
out2, out1, E=self.fp(self.xdata[i],self.ydata[i])
a=a+(np.max(out2)==out2[self.ydata[i]])
return a/len(self.ydata)
def main():
m=Mlp()
m.sgd(1,len(trainlabel))
acc=m.test()
print(acc)
if __name__ == "__main__":
main()
效果是:迭代了15次,准确率达到94%
注:迭代效果会受很多因素影响,比如:初始权重矩阵,激活函数的选取,迭代次数等的设定
注:学习率不要设置太大,不然会快速下降为0,一旦变为0就很难再被激活

浙公网安备 33010602011771号