# 大叔学ML第二：线性回归

## 基本形式

$$\begin x_1^{(1)} & x_2^{(1)} & \cdots &x_n^{(1)}\ x_1^{(2)} & x_2^{(2)} & \cdots & x_n^{(2)}\ \vdots & \vdots & \vdots & \vdots\ x_1^{(m)} & x_2^{(m)} & \cdots & x_n^{(m)}\ \end$$

$$\begin y^{(1)} \ y^{(2)} \ \vdots \ y^{(m)} \ \end$$

$$h(x_1,x_2 \dots x_n)= \theta_0 + \theta_1 x_1 + \theta_2 x_2 + \dots + \theta_n x_n \tag{1}$$

$$h(x_1,x_2 \dots x_n)= \theta_0x_0 + \theta_1 x_1 + \theta_2 x_2 + \dots + \theta_n x_n \tag{2}$$

$$j(\theta_0,\theta_1\dots \theta_n)=\frac{1}{2m}\sum_m (h(x_1{(k)},x_2^{(k)} \dots x_n^{(k)}) - y^{(k)})^2$$

$$j(\theta_0,\theta_1\dots \theta_n)=\frac{1}{2m}\sum_{k=1}^m (\theta_0x_0^{(k)} + \theta_1 x_1^{(k)} + \theta_2 x_2^{(k)} + \dots + \theta_n x_n^{(k)} - y^{(k)})^2 \tag{3}$$

## 求解参数$\vec\theta$

### 梯度下降法

• $$\frac{\partial}{\partial\theta_0}j(\theta_0,\theta_1\dots \theta_n) = \frac{1}{m}\sum_{k=1}^m(\theta_0x_0^{(k)} + \theta_1x_1^{(k)} + \dots+ \theta_nx_n^{(k)} - y^{(k)})x_0^{(k)}$$
• $$\frac{\partial}{\partial\theta_1}j(\theta_0,\theta_1\dots \theta_n) = \frac{1}{m}\sum_{k=1}^m(\theta_0x_0^{(k)} + \theta_1x_1^{(k)} + \dots+ \theta_nx_n^{(k)}- y^{(k)})x_1^{(k)}$$
• $$\dots$$
• $$\frac{\partial}{\partial\theta_n}j(\theta_0,\theta_1\dots \theta_n) = \frac{1}{m}\sum_{k=1}^m(\theta_0x_0^{(k)} + \theta_1x_1^{(k)} + \dots+ \theta_nx_n^{(k)}- y^{(k)})x_n^{(k)}$$

import numpy as np

''' 求theta位置的梯度.

Args:
X: 样本
Y: 样本标记
m: 样本数
theta: 欲求梯度的位置

Returns:
gi: theta处函数的梯度值
'''
theta_size = np.size(theta)
g = np.zeros(theta_size)

for i in range(theta_size):
gi = 0 #第i个theta分量对应的偏导
for j in range(m):
gi += ((np.dot(X[j], theta) - Y[j]) * X[j, i])
gi = gi / m
g[i] = gi

return g

def gradient_descent(X, Y, step = 0.02, threshold = 0.01):
''' 梯度下降法求使代价函数最小的 theta

Args:
X: 样本
Y: 样本标记
step:步长
threshold:梯度模长阈值，低于此值时停止迭代
Returns:
theta: 使代价函数取最小值的theta
'''
theta = np.random.rand(4)

while(norm > threshold):
return theta

''' 以下是测试数据 '''

# 测试用线性函数
def linear_function(x1, x2, x3):
result = 1 + 2 * x1 + 3 * x2 + 4 * x3
result = result + np.random.rand() # 噪音
return result

# 计算函数值
def calculate(X):
rowsnumber = np.size(X, axis = 0)
Y = [linear_function (X[i, 0], X[i, 1], X[i, 2]) for i in range(0, rowsnumber)]
return Y

if __name__ == "__main__":
row_count = 500
X = np.random.randint(0, 10, (row_count, 3)) # 随机产生row_count个样本
Y = calculate(X) # 计算标记

X0 = np.ones((row_count, 1))
X = np.hstack((X0, X)) # 补充一列1

print('theta is ', theta)


$$\frac{\partial}{\partial\theta_n}j(\theta_0,\theta_1\dots \theta_n) = (\theta_0x_0^{(k)} + \theta_1x_1^{(k)} + \dots+ \theta_nx_n^{(k)}- y^{(k)})x_n^{(k)} \tag{5}$$

def gradient_sgd(X, Y, m, theta):
''' 求theta位置的梯度.

Args:
X: 样本
Y: 样本标记
m: 样本数
theta: 欲求梯度的位置

Returns:
gi: theta处函数的梯度值
'''
theta_size = np.size(theta)
g = np.zeros(theta_size)

for i in range(theta_size):
random_Index = np.random.randint(1, m + 1)
gi = ((np.dot(X[random_Index], theta) - Y[random_Index]) * X[random_Index, i])
g[i] = gi

return g


theta is [1.43718942 2.00043557 3.00620849 4.00674728]

### 正规方程导法

$$\begin x_0^{(1)} & x_1^{(1)} & x_2^{(1)} & \cdots &x_n^{(1)}\ x_0^{(2)} &x_1^{(2)} & x_2^{(2)} & \cdots & x_n^{(2)}\ \vdots & \vdots & \vdots & \vdots & \vdots\ x_0^{(m)} & x_1^{(m)} & x_2^{(m)} & \cdots & x_n^{(m)}\ \end$$

$$J(\vec\theta)=\frac{1}{2m}||X\vec\theta - \vec{y}||^2 \tag{6}$$

$$\begin J(\vec\theta)&=\frac{1}{2m}||X\vec\theta - \vec||2 \ &=\frac{1}{2m}(X\vec\theta - \vec)T(X\vec\theta - \vec) \ &=\frac{1}{2m}(\vec\theta TX T - \vecT)(X\vec\theta - \vec) \ &=\frac{1}{2m}(\vec\theta TXTX\vec\theta - \vec\thetaTX^T\vec- \vecTX\vec\theta + \vecT\vec)\ &=\frac{1}{2m}(\vec\theta TX TX\vec\theta - 2\vec^TX\vec\theta + \vec^T\vec)\ \end$$

$$\frac{d}{d\vec\theta}J(\vec\theta)=\frac{1}{m}(X^TX\vec\theta-X^T\vec{y})$$

# 测试用线性函数
def linear_function(x1, x2, x3):
result = 1 + 2 * x1 + 3 * x2 + 4 * x3
result = result + np.random.rand() # 噪音
return result

# 计算函数值
def calculate(X):
rowsnumber = np.size(X, axis = 0)
Y = [linear_function (X[i, 0], X[i, 1], X[i, 2]) for i in range(0, rowsnumber)]
return Y

if __name__ == "__main__":
row_count = 500
X = np.random.randint(0, 10, (row_count, 3)) # 随机产生row_count个样本
Y = calculate(X) # 计算标记

X0 = np.ones((row_count, 1))
X = np.hstack((X0, X)) # 补充一列1

theta = np.dot(np.dot(np.linalg.pinv(np.dot(X.T, X)), X.T), np.array(Y).T)
print('theta is ', theta)


### 调用函数库

import numpy as np
from sklearn import linear_model

# 测试用线性函数
def linear_function(x1, x2, x3):
result = 1 + 2 * x1 + 3 * x2 + 4 * x3
result = result + np.random.rand() # 噪音
return result

# 计算函数值
def calculate(X):
rowsnumber = np.size(X, axis = 0)
Y = [linear_function (X[i, 0], X[i, 1], X[i, 2]) for i in range(0, rowsnumber)]
return Y

if __name__ == "__main__":
row_count = 500
X = np.random.randint(0, 10, (row_count, 3)) # 随机产生row_count个样本
Y = calculate(X) # 计算标记

regr = linear_model.LinearRegression()
regr.fit(X, np.array(Y).T)

a, b = regr.coef_, regr.intercept_
print(a)
print(b)


[2.00384674 2.99234723 3.99603084]
1.5344826581936104

posted @ 2018-12-15 19:13  会长  阅读(777)  评论(2编辑  收藏  举报