1 # coding:utf8
2 import numpy as np
3 import cPickle
4 import theano
5 import os
6 import theano.tensor as T
7
8 class SoftMax:
9 def __init__(self,MAXT=50,step=0.15,landa=0):
10 self.MAXT = MAXT
11 self.step = step
12 self.landa = landa #在此权重衰减项未能提升正确率
13
14 def load_theta(self,datapath):
15 self.theta = cPickle.load(open(datapath,'rb'))
16
17 def process_train(self,data,label,typenum,batch_size=500):
18 valuenum=data.shape[1]
19 batches = data.shape[0] / batch_size
20 data = theano.shared(np.asarray(data,dtype=theano.config.floatX))
21 label = T.cast(theano.shared(np.asarray(label,dtype=theano.config.floatX)), 'int32')
22 x = T.matrix('x')
23 y = T.ivector('y')
24 index = T.lscalar()
25 theta = theano.shared(value=0.001*np.zeros((valuenum,typenum),
26 dtype=theano.config.floatX),
27 name='theta',borrow=True)
28 hx=T.nnet.softmax(T.dot(x,theta))
29 cost = -T.mean(T.log(hx)[T.arange(y.shape[0]), y]) +0.5*self.landa*T.sum(theta ** 2) #权重衰减项
30 g_theta = T.grad(cost, theta)
31 updates = [(theta, theta - self.step * g_theta)]
32 train_model = theano.function(
33 inputs=[index],outputs=cost,updates=updates,givens={
34 x: data[index * batch_size: (index + 1) * batch_size],
35 y: label[index * batch_size: (index + 1) * batch_size]
36 },allow_input_downcast=True
37 )
38 lastcostJ = np.inf
39 stop = False
40 epoch = 0
41 costj=[]
42 while (epoch < self.MAXT) and (not stop):
43 epoch = epoch + 1
44 for minibatch_index in xrange(batches):
45 costj.append(train_model(minibatch_index))
46 if np.mean(costj)>=lastcostJ:
47 print "costJ is increasing !!!"
48 stop=True
49 else:
50 lastcostJ=np.mean(costj)
51 print(( 'epoch %i, minibatch %i/%i,averange cost is %f') %
52 (epoch,minibatch_index + 1,batches,lastcostJ))
53 self.theta=theta
54 if not os.path.exists('data/softmax.pkl'):
55 f= open("data/softmax.pkl",'wb')
56 cPickle.dump(self.theta.get_value(),f)
57 f.close()
58 return self.theta.get_value()
59
60 def process_test(self,data,label,batch_size=500):
61 batches = label.shape[0] / batch_size
62 data = theano.shared(np.asarray(data,dtype=theano.config.floatX))
63 label = T.cast(theano.shared(np.asarray(label,dtype=theano.config.floatX)), 'int32')
64 x = T.matrix('x')
65 y = T.ivector('y')
66 index = T.lscalar()
67 hx=T.nnet.softmax(T.dot(x,self.theta))
68 predict = T.argmax(hx, axis=1)
69 errors=T.mean(T.neq(predict, y))
70 test_model = theano.function(
71 inputs=[index],outputs=errors,givens={
72 x: data[index * batch_size: (index + 1) * batch_size],
73 y: label[index * batch_size: (index + 1) * batch_size]
74 },allow_input_downcast=True
75 )
76 test_losses=[]
77 for minibatch_index in xrange(batches):
78 test_losses.append(test_model(minibatch_index))
79 test_score = np.mean(test_losses)
80 print(( 'minibatch %i/%i, test error of model %f %%') %
81 (minibatch_index + 1,batches,test_score * 100.))
82
83 def h(self,x):
84 m = np.exp(np.dot(x,self.theta))
85 sump = np.sum(m,axis=1)
86 return m/sump
87
88 def predict(self,x):
89 return np.argmax(self.h(x),axis=1)
90
91 if __name__ == '__main__':
92 f = open('mnist.pkl', 'rb')
93 training_data, validation_data, test_data = cPickle.load(f)
94 training_inputs = [np.reshape(x, 784) for x in training_data[0]]
95 data = np.array(training_inputs)
96 training_inputs = [np.reshape(x, 784) for x in validation_data[0]]
97 vdata = np.array(training_inputs)
98 f.close()
99 softmax = SoftMax()
100 softmax.process_train(data,training_data[1],10)
101 softmax.process_test(vdata,validation_data[1])
102 #minibatch 20/20, test error of model 7.530000 %