# RNN求解过程推导与实现

BPTT,Back Propagation Through Time.

RNN展开网络如下图

RNN展开结构.jpg

RNN节点结构.jpg

RNN节点内部连接.jpg

1. function error = binaryRNN( )
2. largestNumber=256
3. T=8
4. dic=dec2bin(0:largestNumber-1)-'0';% 将uint8表示成二进制数组，这是一个查找表
5. %% 初始化参数
6. eta=0.1;% 学习步长
7. inputDim=2;% 输入维度
8. hiddenDim=16; %隐层节点个数
9. outputDim=1; % 输出层节点个数
10.
11. W=rand(hiddenDim,outputDim)*2-1;% (-1,1)参数矩阵
12. U=rand(hiddenDim,hiddenDim)*2-1;% (-1,1)参数矩阵
13. V=rand(inputDim,hiddenDim)*2-1; % (-1,1)参数矩阵
14.
15. delta_W=zeros(hiddenDim,outputDim); % 时刻间中间变量
16. delta_U=zeros(hiddenDim,hiddenDim);
17. delta_V=zeros(inputDim,hiddenDim);
18. error=0
19. for p=1:10000
20. aInt=randi(largestNumber/2);
21. bInt=randi(largestNumber/2);
22. a=dic(aInt+1,:);
23. b=dic(bInt+1,:);
24. cInt=aInt+bInt;
25. c=dic(cInt+1,:);
26. y=zeros(1,T);
27.
28. preh=zeros(1,hiddenDim);
29. hDic=zeros(T,hiddenDim);
30. %% 前向计算
31. for t=T:-1:1 % 注意应该从最低位计算，也就是二进制数组最右端开始计算
32. x=[a(t),b(t)];
33. h=sigmoid(x*V+preh*U);
34. y(t)=sigmoid(h*W);
35. hDic(t,:)=h;
36. preh=h;
37. end
38.
39. err=y-c;
40. error=error+norm(err,2)/2
41. next_delta_h=zeros(1,hiddenDim);
42. %% 反馈
43. for t=1:T
44. delta_y = err(t).*sigmoidOutput2d(y(t));
45. delta_h=(delta_y*W'+next_delta_h*U').*sigmoidOutput2d(hDic(t,:));
46.
47. delta_W=delta_W+hDic(t,:)'*delta_y;
48. if t<T
49. delta_U=delta_U+hDic(t+1,:)'*delta_h;
50. end
51. delta_V=delta_V+[a(t),b(t)]'*delta_h;
52. next_delta_h=delta_h;
53. end
54. % 梯度下降
55. W=W-eta*delta_W;
56. U=U-eta*delta_U;
57. V=V-eta*delta_V;
58.
59. delta_W=zeros(hiddenDim,outputDim);
60. delta_U=zeros(hiddenDim,hiddenDim);
61. delta_V=zeros(inputDim,hiddenDim);
62.
63. if mod(p,1000)==0
64. fprintf('Samples:%d\n',p);
65. fprintf('True:%d\n',cInt);
66. fprintf('Predict:%d\n',bin2dec(int2str(round(y))));
67. fprintf('Error:%f\n',norm(err,2)/2);
68. end
69. end
70. end
71.
72. function sx=sigmoid(x)
73. sx=1./(1+exp(-x));
74. end
75.
76. function dx=sigmoidOutput2d(output)
77. dx=output.*(1-output);
78. end

1. # include "highgui.h"
2. # include "cv.h"
3. # include <iostream>
4. #include "math.h"
5. #include<cstdlib>
6. using namespace std
7.
8. # define random(x) ((rand()*rand())%x) //生成0-x的随机数
9.
10. void Display(CvMat* mat)

11. cout << setiosflags(ios::fixed);
12. for (int i = 0; i < mat->rows; i++)

13. for (int j = 0; j < mat->cols; j++)
14. cout << cvmGet(mat, i, j) << " "
15. cout << endl;

16.

17.
18.
19. // sigmoid 函数
20. float sigmoid(float x)

21. return 1 / (1 + exp(-x));

22. CvMat* sigmoidM(CvMat* mat)

23. CvMat*mat2 = cvCloneMat(mat);
24.
25. for (int i = 0; i < mat2->rows; i++)

26. for (int j = 0; j < mat2->cols; j++)
27. cvmSet(mat2, i, j, sigmoid(cvmGet(mat, i, j)));

28. return mat2;

29. //sigmoid 函数的导数
30. float diffSigmoid(float x)

31. //注意，这里的x已经是sigmoid的结果
32. return x*(1 - x);

33. CvMat* diffSigmoidM(CvMat* mat)

34. CvMat* mat2 = cvCloneMat(mat);
35.
36. for (int i = 0; i < mat2->rows; i++)

37. for (int j = 0; j < mat2->cols; j++)

38. float t = cvmGet(mat, i, j);
39. cvmSet(mat2, i, j, t*(1 - t));

40.

41. return mat2;
42.

43.
44.
45. /**************随机生成inputdim个整数，并求和******************
46. * inputdim 整数的个数
47. * MAX 整数的最大范围
48. * Sample 存放整数
49. * 返回 整数和
50. **************************************************************/
51. int sample(int inputdim, CvMat* Sample,int MAX)

52. int sum = 0
53. for (int i = 0; i < inputdim; i++)

54. int t = random(MAX);
55. cvmSet(Sample, 0, i, t);
56. sum += cvmGet(Sample,0,i);

57. return sum;

58. /********将整数拆分成10以内的数，作为每个时刻的输入*************
59. * Sample 存放的整数 大小 1*inputdim
60. * 返回 拆分后的输入数据 大小 inputdim*9
61. ****************************************************************/
62. CvMat* splitM( CvMat*Sample)

63. CvMat* mat = cvCreateMat(Sample->cols, 8, CV_32F);
64. cvSetZero(mat);
65. for (int i = 0; i < mat->rows; i++)

66. int x = cvmGet(Sample,0,i);
67. for (int j = 0; j < 8; ++j)

68. cvmSet(mat,i,j, x % 10);
69. x = x / 10

70. return mat;

71.
72. /***************将数字数组整合成一个整数******************************
73. *mat 数字数组，即每个元素是十以内的整数，大小1*9
74. *返回 整合后的整数
75. *********************************************************************/
76. int merge(CvMat* mat)

77. double d = 0
78. for (int i = mat->cols; i >0; i--)

79. d = 10 * d + round(10*(cvmGet(mat,0,i-1)));
80.

81. return int(d);

82. /*****************将输出的数值拆分**************************************
83. * y 输出的数值
84. * 返回 长度为9的数组,这里转换成了0，1之间的数
85. ***********************************************************************/
86. CvMat* split(int y)

87. CvMat* mat = cvCreateMat(1, 8, CV_32F);
88. for (int i = 0; i < 8; i++)

89. cvmSet(mat,0,i, (y % 10) / 10.0);
90. y = y / 10

91. return mat;
92.

93.
94. /**********************产生随机矩阵******************************
95. * rows, cols, 矩阵的规模
96. * a, b, 区间
97. * 返回 返回[a,b]之间的随机矩阵
98. *****************************************************************/
99. CvMat*randM(int rows,int cols, float a,float b)

100. CvMat* mat = cvCreateMat(rows, cols, CV_32FC1);
101. float* ptr;
102. for (int i = 0; i < mat->rows; i++)

103. for (int j = 0; j < mat->cols; j++)

104. cvmSet(mat, i, j, random(1000) / 1000.0*(b - a) + a);

105. return mat;

106.
107. int main()

108. srand(time(NULL));
109. //首先，先定义网络
110. int inputdim = 2;//不超过10
111. int hiddendim = 16
112. int outputdim = 1
113. float eta = 0.1
114. int MAX = 100000000;//令整数最多八位
115. //初始化参数矩阵
116. CvMat* V = randM(inputdim, hiddendim,-1,1);
117. CvMat* U = randM(hiddendim, hiddendim, -1, 1);
118. CvMat* W = randM(hiddendim, outputdim, -1, 1);
119. CvMat* bh = randM(1, hiddendim, -1, 1);
120. CvMat* by = randM(1, outputdim, -1, 1);//偏置
121.
122. CvMat*Sample = cvCreateMat(1, inputdim, CV_32F);
123. cvSetZero(Sample);
124. CvMat* delta_V = cvCloneMat(V);
125. CvMat* delta_U = cvCloneMat(U);
126. CvMat* delta_W = cvCloneMat(W);
127. CvMat* delta_by = cvCloneMat(by);
128. CvMat* delta_bh = cvCloneMat(bh);
129.
130. //开始训练，训练集大小10000
131. for (int p = 0; p < 20000; p++)

132. int sum = sample(inputdim,Sample,MAX);
133. CvMat* sampleM = splitM(Sample);//每一行对应着一个整数的拆分，个位在前
134. CvMat* d = split(sum);//真实结果拆分，每位存放的是除以10后的小数
135. //正向计算
136. CvMat* pre_h = cvCreateMat(1, hiddendim, CV_32F);
137. cvSetZero(pre_h);//初始化最开始的h_{-1}
138. CvMat* y = cvCreateMat(1, 8, CV_32F);
139. cvSetZero(y);//定义输出量
140. CvMat* h = cvCreateMat(8, hiddendim, CV_32F);//每一行存储一个时刻的隐变量输出
141.
142. CvMat* temp1 = cvCreateMat(1, hiddendim, CV_32F);
143. CvMat* temp2 = cvCreateMat(1, outputdim, CV_32F);
144. CvMat* xt = cvCreateMatHeader(inputdim, 1, CV_32S);
145. for (int t = 0; t < 8; t++)

146. cvGetCol(sampleM, xt, t);//获取第t时刻输入值
147. cvGEMM(xt, V, 1,bh, 1, temp1, CV_GEMM_A_T);
148. cvGEMM(pre_h, U, 1, temp1, 1, pre_h);// t时刻隐层输出
149. pre_h = sigmoidM(pre_h);
150.
151. cvGEMM(pre_h, W, 1, by, 1, temp2);
152. float yvalue = sigmoid(cvmGet(temp2, 0, 0));
153. cvmSet(y, 0, t, yvalue);//t时刻的输出
154.
155. //保存隐层输出
156. for (int j = 0; j < hiddendim; j++)

157. cvmSet(h, t, j, cvmGet(pre_h, 0, j));

158. cvReleaseMat(&temp1);
159. cvReleaseMat(&temp2);
160.
161. //观察代码
162. int oy = merge(y);
163. CvMat* temp = cvCreateMat(1, 8, CV_32F);
164. cvSub(y, d, temp);
165. double error = 0.5*cvDotProduct(temp, temp);
166. if ((p+1)%1000==0

167. cout << "************************第" << p + 1 << "个样本***********" << endl;
168. cout << "真实值:" << sum%MAX << endl;
169. cout << "预测值:" << oy << endl;
170. cout << "误差：" << error << endl;

171. //反向传递误差
172. cvSetZero(delta_V);
173. cvSetZero(delta_U);
174. cvSetZero(delta_W);
175. cvSetZero(delta_bh);
176. cvSetZero(delta_by);
177.
178. CvMat* delta_h = cvCreateMat(1, hiddendim, CV_32F);
179. cvSetZero(delta_h);
180. CvMat* delta_y = cvCreateMat(1, outputdim, CV_32F);
181. cvSetZero(delta_y);
182. CvMat* next_delta_h = cvCreateMat(1, hiddendim, CV_32F);
183. cvSetZero(next_delta_h);
184.
185. for (int t = 7; t > 0; t--)

186. cvmSet(delta_y, 0, 0, (cvmGet(y, 0, t) - cvmGet(d, 0, t))*diffSigmoid(cvmGet(y, 0, t)));
187. cvGEMM(delta_y, W, 1, delta_h, 0, delta_h, CV_GEMM_B_T);
188. cvGEMM(next_delta_h, U, 1, delta_h, 1, delta_h, CV_GEMM_B_T);
189. cvMul(delta_h, diffSigmoidM(cvGetRow(h, temp, t)), delta_h);
190. //更新delta_y,delta_h
191. cvGEMM(cvGetRow(h, temp, t), delta_y, 1, delta_W, 1, delta_W, CV_GEMM_A_T);
192. if (t>0
193. cvGEMM(cvGetRow(h, temp, t - 1), delta_h, 1, delta_U, 1, delta_U, CV_GEMM_A_T);
194. cvGetCol(sampleM, xt, t);
195. cvGEMM(xt, delta_h, 1, delta_V, 1, delta_V);
196. cvAddWeighted(delta_by, 1, delta_y, 1, 0, delta_by);
197. cvAddWeighted(delta_bh, 1, delta_h, 1, 0, delta_bh);
198.
199. cvAddWeighted(delta_h, 1, next_delta_h, 0, 0, next_delta_h);
200.

201. cvAddWeighted(W, 1, delta_W, -eta, 0, W);
202. cvAddWeighted(V, 1, delta_V, -eta, 0, V);
203. cvAddWeighted(U, 1, delta_U, -eta, 0, U);
204.
205. cvAddWeighted(by, 1, delta_by, -eta, 0, by);
206. cvAddWeighted(bh, 1, delta_bh, -eta, 0, bh);
207.
208. cvReleaseMat(&sampleM);
209. cvReleaseMat(&d);
210. cvReleaseMat(&pre_h);
211. cvReleaseMat(&y);
212. cvReleaseMat(&h);
213. cvReleaseMat(&delta_h);
214. cvReleaseMat(&delta_y);

215. cvReleaseMat(&U);
216. cvReleaseMat(&V);
217. cvReleaseMat(&W);
218. cvReleaseMat(&by);
219. cvReleaseMat(&bh);
220. cvReleaseMat(&Sample);
221. cvReleaseMat(&delta_V);
222. cvReleaseMat(&delta_U);
223. cvReleaseMat(&delta_W);
224. cvReleaseMat(&delta_by);
225. cvReleaseMat(&delta_bh);
226. system("PAUSE");
227. return 0

1479024804302.jpg

1479024912622.jpg

PS. 作为opencv新手，觉得matlab半小时搞定的东西，opencv要捣鼓两个小时。。。

posted @ 2016-11-13 16:21  一只有恒心的小菜鸟  阅读(16434)  评论(0编辑  收藏  举报