1 [code=python]
2 import os
3 import sys
4 import time
5
6 import numpy
7
8 import shelve
9
10 import theano
11 import theano.tensor as T
12 from theano.tensor.shared_randomstreams import RandomStreams
13
14 class dA(object):
15 """Denoising Auto-Encoder class (dA)
16
17 A denoising autoencoders tries to reconstruct the input from a corrupted
18 version of it by projecting it first in a latent space and reprojecting
19 it afterwards back in the input space. Please refer to Vincent et al.,2008
20 for more details. If x is the input then equation (1) computes a partially
21 destroyed version of x by means of a stochastic mapping q_D. Equation (2)
22 computes the projection of the input into the latent space. Equation (3)
23 computes the reconstruction of the input, while equation (4) computes the
24 reconstruction error.
25
26 .. math::
27
28 \tilde{x} ~ q_D(\tilde{x}|x) (1)
29
30 y = s(W \tilde{x} + b) (2)
31
32 x = s(W' y + b') (3)
33
34 L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4)
35
36 """
37
38 def __init__(
39 self,
40 numpy_rng,
41 theano_rng=None,
42 input=None,
43 #n_visible=784,
44 n_hidden=100,
45 W=None,
46 bhid=None,
47 #bvis=None
48 ):
49 """
50 Initialize the dA class by specifying the number of visible units (the
51 dimension d of the input ), the number of hidden units ( the dimension
52 d' of the latent or hidden space ) and the corruption level. The
53 constructor also receives symbolic variables for the input, weights and
54 bias. Such a symbolic variables are useful when, for example the input
55 is the result of some computations, or when weights are shared between
56 the dA and an MLP layer. When dealing with SdAs this always happens,
57 the dA on layer 2 gets as input the output of the dA on layer 1,
58 and the weights of the dA are used in the second stage of training
59 to construct an MLP.
60
61 :type numpy_rng: numpy.random.RandomState
62 :param numpy_rng: number random generator used to generate weights
63
64 :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
65 :param theano_rng: Theano random generator; if None is given one is
66 generated based on a seed drawn from `rng`
67
68 :type input: theano.tensor.TensorType
69 :param input: a symbolic description of the input or None for
70 standalone dA
71
72 :type n_hidden: int
73 :param n_hidden: number of hidden units
74
75 :type W: theano.tensor.TensorType
76 :param W: Theano variable pointing to a set of weights that should be
77 shared belong the dA and another architecture; if dA should
78 be standalone set this to None
79
80 :type bhid: theano.tensor.TensorType
81 :param bhid: Theano variable pointing to a set of biases values (for
82 hidden units) that should be shared belong dA and another
83 architecture; if dA should be standalone set this to None
84
85
86
87 """
88 #self.n_visible = n_visible
89 self.n_hidden = n_hidden
90
91 # create a Theano random generator that gives symbolic random values
92 if not theano_rng:
93 theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
94
95 # note : W' was written as `W_prime` and b' as `b_prime`
96 if not W:
97 # W is initialized with `initial_W` which is uniformely sampled
98 # from -4*sqrt(6./(n_visible+n_hidden)) and
99 # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
100 # converted using asarray to dtype
101 # theano.config.floatX so that the code is runable on GPU
102 initial_W = numpy.asarray(
103 numpy_rng.uniform(
104 low=-4 * numpy.sqrt(6. / (n_hidden + n_hidden)),
105 high=4 * numpy.sqrt(6. / (n_hidden + n_hidden)),
106 size=(n_hidden, n_hidden)
107 ),
108 dtype=theano.config.floatX
109 )
110 W=theano.shared(value=initial_W, name='W', borrow=True)
111
112 '''
113 if not bvis:
114 bvis = theano.shared(
115 value=numpy.zeros(
116 n_visible,
117 dtype=theano.config.floatX
118 ),
119 borrow=True
120 )
121 '''
122 if not bhid:
123 bhid = theano.shared(
124 value=numpy.zeros(
125 n_hidden,
126 dtype=theano.config.floatX
127 ),
128 name='b',
129 borrow=True
130 )
131
132 self.W = W
133 # b corresponds to the bias of the hidden
134 self.b = bhid
135 # b_prime corresponds to the bias of the visible
136 #self.b_prime = bvis
137 # tied weights, therefore W_prime is W transpose
138 #self.W_prime = self.W.T
139 self.theano_rng = theano_rng
140 # if no input is given, generate a variable representing the input
141 if input is None:
142 # we use a matrix because we expect a minibatch of several
143 # examples, each example being a row
144 self.x = T.dmatrix(name='input')
145 else:
146 self.x = input
147
148 self.params = [self.W, self.b]
149 # end-snippet-1
150 def get_hidden_values(self):
151 """ Computes the values of the hidden layer """
152 return T.sum(T.nnet.sigmoid(T.dot(self.x, self.W) + self.b),axis = 0)
153
154 '''
155 def get_corrupted_input(self, input, corruption_level):
156 """This function keeps ``1-corruption_level`` entries of the inputs the
157 same and zero-out randomly selected subset of size ``coruption_level``
158 Note : first argument of theano.rng.binomial is the shape(size) of
159 random numbers that it should produce
160 second argument is the number of trials
161 third argument is the probability of success of any trial
162
163 this will produce an array of 0s and 1s where 1 has a
164 probability of 1 - ``corruption_level`` and 0 with
165 ``corruption_level``
166
167 The binomial function return int64 data type by
168 default. int64 multiplicated by the input
169 type(floatX) always return float64. To keep all data
170 in floatX when floatX is float32, we set the dtype of
171 the binomial to floatX. As in our case the value of
172 the binomial is always 0 or 1, this don't change the
173 result. This is needed to allow the gpu to work
174 correctly as it only support float32 for now.
175
176 """
177 return self.theano_rng.binomial(size=input.shape, n=1,
178 p=1 - corruption_level,
179 dtype=theano.config.floatX) * input
180 '''
181 '''
182
183 def get_reconstructed_input(self, hidden):
184 """Computes the reconstructed input given the values of the
185 hidden layer
186
187 """
188 return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
189
190
191 def get_cost_updates(self, corruption_level, learning_rate):
192 """ This function computes the cost and the updates for one trainng
193 step of the dA """
194
195 #tilde_x = self.get_corrupted_input(self.x, corruption_level)
196 y = self.get_hidden_values(tilde_x)
197 #z = self.get_reconstructed_input(y)
198 # note : we sum over the size of a datapoint; if we are using
199 # minibatches, L will be a vector, with one entry per
200 # example in minibatch
201 L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
202 # note : L is now a vector, where each element is the
203 # cross-entropy cost of the reconstruction of the
204 # corresponding example of the minibatch. We need to
205 # compute the average of all these to get the cost of
206 # the minibatch
207 cost = T.mean(L)
208
209 # compute the gradients of the cost of the `dA` with respect
210 # to its parameters
211 gparams = T.grad(cost, self.params)
212 # generate the list of updates
213 updates = [
214 (param, param - learning_rate * gparam)
215 for param, gparam in zip(self.params, gparams)
216 ]
217
218 return (cost, updates)
219 '''
220
221
222 x = T.fmatrix('x') # question matrix
223 y = T.fmatrix('x') # answer matrix
224 index = T.lscalar()
225 rng = numpy.random.RandomState(23455)
226 theano_rng = RandomStreams(rng.randint(2 ** 30))
227 n_hidden=2
228 learning_rate=0.1
229 da_q=[]
230 da_a=[]
231 for count in range(n_hidden):
232 da_q.append(dA(
233 numpy_rng=rng,
234 theano_rng=theano_rng,
235 input=x,
236 #n_visible=28 * 28,
237 n_hidden=100
238 ))
239
240
241 for count in range(n_hidden):
242 da_a.append(dA(
243 numpy_rng=rng,
244 theano_rng=theano_rng,
245 input=y,
246 #n_visible=28 * 28,
247 n_hidden=100
248 ))
249 cost_matrix=[]
250 for hid_index in range(n_hidden):
251 cost_matrix.append(T.sum(T.sqr(da_q[hid_index].get_hidden_values()-da_a[hid_index].get_hidden_values())/2))
252 cost=T.sum(cost_matrix)
253 params=da_q[0].params+da_a[hid_index].params
254 for hid_index in range(1,n_hidden):
255 params+=da_q[hid_index].params+da_a[hid_index].params
256 gparams=T.grad(cost, params)
257 updates = []
258 for param, gparam in zip(params, gparams):
259 updates.append((param, param - learning_rate * gparam))
260 db = shelve.open(r'data\training_data\training_data_30_50_1_9_games.dat')
261 x1=db['train_set1']
262 q,a=x1[0]
263 q1,a1=x1[1]
264 train_da = theano.function(
265 [index],
266 cost,
267 updates=updates,
268 givens={
269 x: x1[0][0],
270 y: x1[0][1]
271 }
272 )
273 print train_da(0)
274 [/code]