木言成反

导航

简易机器学习代码(LR,Kmeans,NN,RNN)

Logistic Regression

特别需要注意的是 exp 和 log 的使用。

sigmoid 原始表达式为 1 / (1+exp(-z)),但如果直接使用 z=-710,会显示 overflow。因此对于 z<0 的情况,采用 exp(z) / (1 + exp(z)) ,这样一来,exp(-710) 就没问题了。这就是 scipy 包里的 expit 函数

log_logistic = log(sigmoid),注意和 expit 函数是一致的,分情况讨论。

 

 1 import numpy as np
 2 from scipy.special import expit
 3 from sklearn.utils.extmath import log_logistic
 4 
 5 def predict(theta, x):
 6     return expit(x.dot(theta))
 7 
 8 def compute_loss(y, yz):
 9     return - np.sum(log_logistic(yz))
10 
11 def gradientdescent(x, y, theta, iterations=2000, lr=0.01):
12     loss_list = []
13     for i in range(iterations):
14         yhat = predict(theta, x)
15         delta = x.T.dot(yhat - y) / m
16         loss = compute_loss(y, y * X.dot(theta))
17         loss_list.append(loss)
18         theta = theta - lr * delta
19     return theta, loss_list
20 
21 theta, loss_list = gradientdescent(X, y, np.zeros((n, 1)))

 

 

Kmeans

Kmeans 的本质就是 EM 算法,只不过是硬间隔而不是软间隔。首先初始化 K 个中心点,在 E 步,将样本分配到最近的中心点,在 M 步,选取新的中心点以最小化组内距离。

 

 1 import numpy as np
 2 
 3 
 4 def calc_dist(x1, x2):
 5     return sum([(x1[i] - x2[i])**2 for i in range(len(x1))])
 6 
 7 
 8 # Assign samples to given centers
 9 def E_step(X, cents):
10     cent_dict = dict(zip(cents, [[] for _ in range(len(cents))]))
11     for row in X:
12         min_dist, best_cent = 1e10, None
13         for cent in cent_dict:
14             dist = calc_dist(row, cent)
15             if dist < min_dist:
16                 min_dist = dist
17                 best_cent = cent
18         cent_dict[best_cent] += [row.tolist()]
19     return cent_dict
20     
21 
22 # Compute new centers
23 def M_step(cent_dict):
24     new_cents = []
25     for cent in cent_dict:
26         new_cent = np.mean(np.array(cent_dict[cent]), axis=0)
27         new_cents.append(tuple(new_cent))
28     return new_cents
29 
30 
31 def Kmeans(X, K=3, max_iter=10):
32     np.random.seed(1)
33     inds = np.random.choice(len(X), K)
34     init_cents = [tuple(X[i]) for i in inds]
35     cents = init_cents
36     for k in range(max_iter):
37         cent_dict = E_step(X, cents)
38         new_cents = M_step(cent_dict)
39         move = sum([calc_dist(c1, c2) for c1, c2 in zip(cents, new_cents)])
40         if move < 0.1:
41             print('Converged in %s steps' % k)
42             break
43         cents = new_cents
44     return cent_dict

 

 

 

Neural Network

注意 softmax 的计算,需要考虑到 exp 的 overflow。因此通常会在 softmax 分子分母同时乘上一个常数 C,log(C) = -max(z),这就是 shift_score。

这里使用了 scipy 包里的 logsumexp,理由同 LR,logsumexp = log(sum(exp(z)))。

 

 1 from scipy.special import logsumexp
 2 import numpy as np
 3 
 4 
 5 
 6 class Neural_Network:
 7     
 8 
 9     def __init__(self, n, h, c, std=1e-4):
10         W1 = np.random.randn(n, h) * std
11         b1 = np.zeros(h)
12         W2 = np.random.randn(h, c) * std
13         b2 = np.zeros(c)
14         self.params = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
15         
16         
17     def forward_backward_prop(self, X, y):
18         W1, b1 = self.params['W1'], self.params['b1']
19         W2, b2 = self.params['W2'], self.params['b2']
20         
21         # forward prop
22         hidden = X.dot(W1) + b1
23         relu = np.maximum(0, hidden)
24         scores = relu.dot(W2) + b2
25         shift_scores = scores - np.max(scores, axis=1, keepdims=True)
26         softmax = np.exp(shift_scores) / np.sum(np.exp(shift_scores), axis=1, keepdims=True)
27         loss = - np.sum(y * (shift_scores - logsumexp(shift_scores, axis=1, keepdims=True))) / X.shape[0]
28         
29         # backward prop
30         dscores = (softmax - y) / X.shape[0]
31         drelu = dscores.dot(W2.T)
32         dW2 = relu.T.dot(dscores)
33         db2 = np.sum(dscores, axis=0)
34         dhidden = (hidden > 0) * drelu
35         dW1 = X.T.dot(dhidden)
36         db1 = np.sum(dhidden, axis=0)
37         
38         grads = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2}
39         
40         return loss, grads
41         
42     
43     def train(self, X, y, lr=0.01, decay=0.95, iters=5000):
44         loss_list, acc_list = [], []
45         for it in range(iters):
46             loss, grads = self.forward_backward_prop(X, y)
47             loss_list.append(loss)
48             self.params['W1'] -= lr * grads['dW1']
49             self.params['b1'] -= lr * grads['db1']
50             self.params['W2'] -= lr * grads['dW2']
51             self.params['b2'] -= lr * grads['db2']
52             
53             if it % 100 == 0:
54                 yhat = self.predict(X)
55                 acc = np.sum(np.argmax(y, axis=1) == yhat) / X.shape[0]
56                 acc_list.append(acc)
57                 lr *= decay
58                 
59         return loss_list, acc_list
60     
61     
62     def predict(self, X):
63         hidden = X.dot(self.params['W1']) + self.params['b1']
64         relu = np.maximum(0, hidden)
65         scores = relu.dot(self.params['W2']) + self.params['b2']
66         yhat = np.argmax(scores, axis=1)
67         return yhat

 

 

Recurrent Neural Network

  1 import numpy as np
  2 
  3 
  4 
  5 def tanh(x):
  6     return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
  7 
  8 def softmax(x):
  9     ex = np.exp(x - np.max(x))
 10     return ex / ex.sum(axis=0)
 11 
 12 
 13 
 14 class RNN:
 15     
 16 
 17     def __init__(self, na, nx, ny, m, seed=1):
 18         np.random.seed(seed)
 19         Waa = np.random.randn(na, na)
 20         Wax = np.random.randn(na, nx)
 21         Wya = np.random.randn(ny, na)
 22         ba = np.random.randn(na, 1)
 23         by = np.random.randn(ny, 1)
 24         self.a0 = np.random.randn(na, m)
 25         self.params = {'Waa': Waa, 'Wax': Wax, 'Wya': Wya, 'ba': ba, 'by': by}
 26     
 27     
 28     def RNN_cell_forward(self, xt, a_prev):
 29         """
 30         Inputs:
 31         xt -- Current input data, of shape (nx, m).
 32         a_prev -- Previous hidden state, of shape (na, m)
 33         
 34         Outputs:
 35         at -- Current hidden state, of shape (na, m)
 36         yt -- Current prediction, of shape (ny, m)
 37         """
 38         Waa, Wax, ba = self.params['Waa'], self.params['Wax'], self.params['ba']
 39         Wya, by = self.params['Wya'], self.params['by']
 40         
 41         at = tanh(Waa.dot(a_prev) + Wax.dot(xt) + ba)
 42         score = Wya.dot(at) + by
 43         yt = softmax(score)
 44         return at, yt
 45     
 46     
 47     def RNN_forward(self, X, y):
 48         """
 49         Inputs:
 50         X -- Input data for every time step, of shape (nx, m, Tx)
 51         y -- Target for every time step, of shape (ny, m, Tx)
 52         
 53         Outputs:
 54         a -- Hidden states for every time-step, of shape (n_a, m, T_x)
 55         yhat -- Predictions for every time-step, of shape (n_y, m, T_x)
 56         """
 57         a_prev = self.a0
 58         na, m = a_prev.shape
 59         ny = y.shape[0]
 60         Tx = X.shape[2]
 61         
 62         a = np.zeros((na, m, Tx))
 63         yhat = np.zeros((ny, m, Tx))
 64         loss = 0
 65         for t in range(Tx):
 66             a_next, yt = self.RNN_cell_forward(X[:, :, t], a_prev)
 67             yhat[:, :, t] = yt
 68             a[:, :, t] = a_next
 69             loss -= np.sum(np.log(yt.T.dot(y[:, :, t])))
 70             a_prev = a_next
 71             
 72         cache = (a, yhat)
 73         return loss, cache
 74     
 75     
 76     def RNN_cell_backward(self, dz, grads, cache):
 77         """
 78         Inputs:
 79         dz -- Gradient of loss with respect to score
 80         grads -- Dictionary contains all gradients
 81         cache -- Tuple contains xt, a_next, a_prev
 82         
 83         Outputs:
 84         grads -- Dictionary contains all gradients
 85         """
 86         xt, a_next, a_prev = cache
 87         Waa, Wax, ba = self.params['Waa'], self.params['Wax'], self.params['ba']
 88         Wya, by = self.params['Wya'], self.params['by']
 89         
 90         grads['dWya'] += dz.dot(a_next.T)
 91         grads['dby'] += np.sum(dz, axis=1, keepdims=True)
 92         da_y = Wya.T.dot(dz) 
 93         da_a = grads['da_prev']
 94         da_next = da_y + da_a      # da is computed based on two paths, from da_y and da_a.
 95         dtanh = (1 - a_next**2) * da_next
 96         grads['dWaa'] += dtanh.dot(a_prev.T)
 97         grads['da_prev'] = Waa.T.dot(dtanh)
 98         grads['dWax'] += dtanh.dot(xt.T)
 99         grads['dba'] += np.sum(dtanh, axis=1, keepdims=True)
100         
101         return grads
102     
103     
104     def RNN_backward(self, X, y, cache):
105         """
106         Inputs:
107         X -- Input data for every time step, of shape (nx, m, Tx)
108         y -- Target for every time step, of shape (ny, m, Tx)
109         cache -- Tuple from RNN_forward, contains a, yhat
110         
111         Outputs:
112         grads -- Dictionary contains all gradients
113         a -- Hidden states for every time-step, of shape (n_a, m, T_x)
114         """
115         a, yhat = cache
116         Waa, Wax, ba = self.params['Waa'], self.params['Wax'], self.params['ba']
117         Wya, by = self.params['Wya'], self.params['by']
118         Tx = X.shape[2]
119         
120         grads = {}
121         grads['dWya'], grads['dby'] = np.zeros_like(Wya), np.zeros_like(by)
122         grads['dWaa'], grads['da_prev'] = np.zeros_like(Waa), np.zeros_like(self.a0)
123         grads['dWax'], grads['dba'] = np.zeros_like(Wax), np.zeros_like(ba)
124         
125         for t in reversed(range(Tx)):
126             # compute gradient of loss wrt score
127             dz = yhat[:, :, t] - y[:, :, t]
128             cell_cache = X[:, :, t], a[:, :, t], a[:, :, t-1]
129             grads = self.RNN_cell_backward(dz, grads, cell_cache)
130         
131         return grads, a
132     
133     
134     def update_parameters(self, grads, lr):
135         self.params['Wax'] -= lr * grads['dWax']
136         self.params['Waa'] -= lr * grads['dWaa']
137         self.params['Wya'] -= lr * grads['dWya']
138         self.params['ba']  -= lr * grads['dba']
139         self.params['by']  -= lr * grads['dby']
140     
141     
142     def clip(self, grads, maxValue):
143         for key in ['dWax', 'dWaa', 'dWya', 'dba', 'dby']:
144             gradient = grads[key]
145             grads[key] = np.clip(gradient, -maxValue, maxValue, out=gradient)
146         return grads
147     
148     
149     def train(self, X, y, lr, iters=1):
150         loss_list = []
151         for it in range(iters):
152             loss, cache = self.RNN_forward(X, y)
153             grads, a = self.RNN_backward(X, y, cache)
154             # Clip gradients between -5 (min) and 5 (max)
155             grads = self.clip(grads, 5)
156             self.update_parameters(grads, lr)
157             loss_list.append(loss)
158         return loss, grads, a

 

posted on 2018-09-06 17:19  木言成反  阅读(689)  评论(0编辑  收藏  举报