# 线性回归学习笔记

#### 2. 线性回归

（对于一元线性回归，其实就是找一条直线去拟合点）

#### 3. 损失函数

$J(\theta_0,\theta_1,...,\theta_n) = \frac1{2m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)})-y^{(i)})^2$

$h_{\theta}(x) = \theta_1 x$

#### 4. 参数调整

$(\theta_0,\theta_1,...,\theta_n)^* = \mathop{\arg\min}\limits_{\theta_0,\theta_1,...,\theta_n} J(\theta_0,\theta_1,...,\theta_n)$

$J(\theta_0,\theta_1) = \frac1{2m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)})-y^{(i)})^2$分别对$\theta_0,\theta_1$求偏导

$\theta_j := \theta_j - \alpha \frac{\partial}{\partial\theta_j}J(\theta_0,\theta_1)\ (for\ j=0\ and\ j=1)$

$\frac{\partial}{\partial\theta_j}J(\theta_0,\theta_1) = \frac{\partial}{\partial\theta_j}(\frac1{2m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)})-y^{(i)})^2)=\frac{\partial}{\partial\theta_j}(\frac1{2m}\sum_{i=1}^{m}(\theta_0 + \theta_1x^{(i)}-y^{(i)})^2)$

$\theta_0 := \theta_0 - \alpha \frac1m\sum_{i=1}^{m} h_{\theta}(x^{(i)})-y^{(i)}$

$\theta_1 := \theta_1 - \alpha \frac1m\sum_{i=1}^{m} (h_{\theta}(x^{(i)})-y^{(i)}) * x^{(i)}$

#### 5. 一元线性回归

view code
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# y = k * x + b
def plot(k, b, color = 'red'):
x = np.linspace(0,10,100)
y = k * x + b
plt.plot(x, y, c = color)

class LinearRegression:
def __init__(self):
self.theta0, self.theta1 = 0, 0
pass
# 预测函数
def h(self, x):
return self.theta0 + self.theta1 * x
# 计算损失
def J(self):
loss = 0
for i in range(self.m):
loss += (self.h(self.X[i]) - self.y[i]) ** 2
return loss / (2 * self.m)

def fit(self, X, y, theta0 = 0, theta1 = 0, alpha = 10.0, maxiter = 100, eps = .3):
self.m = X.shape[0]
self.X = X
self.y = y
# 线性回归参数 y = theta0 + theta1 * x
self.theta0, self.theta1 = theta0, theta1
lastLoss = -1
plt.scatter(X,y)
plot(self.theta1, self.theta0, 'green')
plt.xlim(xmin = 0, xmax = 9)
for iterTime in range(maxiter):
# 梯度
g0, g1 = 0, 0
for i in range(self.m):
g0 += self.h(X[i]) - y[i]
g1 += (self.h(X[i]) - y[i]) * X[i]
g0 /= self.m
g1 /= self.m
self.theta0 -= alpha * g0
self.theta1 -= alpha * g1
nowLoss = self.J()
if lastLoss != -1 and abs(nowLoss - lastLoss) < eps:
break
lastLoss = nowLoss
plot(self.theta1, self.theta0)
self.mse = lastLoss
plt.show()
def pred(self, X):
n = X.shape[0]
y_pred = np.zeros(n)
for i in range(n):
y_pred[i] = self.h(X[i])
return y_pred

if __name__ == '__main__':
X = dataset.data[:,5]
y = dataset.target
fname = dataset.feature_names[5]
# 删除一些偏差过大的数据
idx = np.where(y!=50)
X = X[idx]
y = y[idx]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.4, random_state = 42)
model = LinearRegression()
model.fit(X_train,y_train, theta0 = -20, theta1 = 1, alpha = 5, maxiter = 20, eps = 0)
y_pred = model.pred(X_test)
print(model.mse)


#### 6. 多元线性回归

$h_{\theta} = \theta_0 + \theta_1x_1 + \theta_2x_2 + ... + \theta_nx_n = \theta_0 + \sum_{j=1}^n \theta_jx_j$

$J(\theta_0,\theta_1,...,\theta_n) = \frac1{2m}\sum_{i=1}^{m}(h_{\theta}(x^{(i)})-y^{(i)})^2 = \frac1{2m}\sum_{i=1}^{m} ((\theta_0 + \sum_{j=1}^n \theta_jx^{(i)}_{j})-y^{(i)})^2$

$\frac{\partial}{\partial \theta_0}J(\theta_0,\theta_1,...,\theta_n) = \frac{1}{m} \sum_{i=1}^m ((\theta_0 + \sum_{j=1}^n \theta_jx^{(i)}_{j}) - y^{(i)})$

$\frac{\partial}{\partial \theta_k}J(\theta_0,\theta_1,...,\theta_n) = \frac{1}{m} \sum_{i=1}^m ((\theta_0 + \sum_{j=1}^n \theta_jx^{(i)}_{j}) - y^{(i)}) x^{(i)}_{k},\ k \ne 0$

view code
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

class LinearRegression:
def __init__(self):
pass
# 预测函数
def h(self, x):
H = self.theta[0]
for i in range(x.shape[0]):
H += x[i] * self.theta[i-1]
return H
# 计算损失
def J(self):
mse = 0
for i in range(self.m):
mse += (self.h(self.X.iloc[i]) - self.y[i]) ** 2
return mse / (2 * self.m)

def fit(self, X, y, theta = None, alpha = 10.0, maxiter = 100, eps = .3):
self.m = X.shape[0]
self.n = X.shape[1]
self.X = X
self.y = y
# 线性回归参数 y = theta0 + theta1 * x
if (theta is not None) and (len(theta) == self.n + 1):
self.theta = np.array(theta)
else:
self.theta = np.zeros((self.n+1,))
lastMSE = -1
for iterTime in range(maxiter):
# 梯度
g = np.zeros((self.n+1,))
for i in range(self.m):
g[0] += self.h(X.iloc[i]) - y[i]
for j in range(1,self.n+1):
for i in range(self.m):
g[j] += (self.h(X.iloc[i]) - y[i]) * X.iloc[i,j-1]
g[j] /= self.m
for i in range(self.theta.shape[0]):
self.theta[i] -= alpha * g[i]
curMSE = self.J()
if lastMSE != -1 and abs(curMSE - lastMSE) < eps:
break
lastMSE = curMSE
self.mse = lastMSE
def pred(self, X):
m = X.shape[0]
y_pred = np.zeros(m)
for i in range(m):
y_pred[i] = self.h(X.iloc[i])
return y_pred

if __name__ == '__main__':
X = dataset.data[:,5]
y = dataset.target
fname = dataset.feature_names[5]
# 删除一些偏差过大的数据
idx = np.where(y!=50)
X = X[idx]
y = y[idx]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.4, random_state = 42)
dfX_train = pd.DataFrame(X_train)
dfX_test = pd.DataFrame(X_test)
model = LinearRegression()
model.fit(dfX_train,y_train, theta = (0,5), alpha = 0.004, maxiter = 20, eps = 1)
y_pred = model.pred(dfX_test)
print(model.mse)



#### 8. 正规方程

$X = \left( \begin{array}{cccc} x^{(1)}_{0} & x^{(1)}_{1} & \cdots & x^{(1)}_{n} \\ x^{(2)}_{0} & x^{(2)}_{1} & \cdots & x^{(2)}_{n} \\ \vdots & \vdots & \ddots & \vdots \\ x^{(m)}_{0} & x^{(m)}_{1} & \cdots & x^{(m)}_{n} \end{array}\right)$

$\theta = \left( \begin{array}{c} \theta_0 \\ \theta_1 \\ \vdots \\ \theta_n \end{array}\right)$

$y = \left( \begin{array}{c} y^{(1)} \\ y^{(2)} \\ \vdots \\ y^{(m)} \end{array}\right)$

$h_{\theta}(X) = X \theta$

$J(\theta) = \frac1{2m}(h_{\theta}(X) - y)^T(h_{\theta}(X) - y) = \frac1{2m}(X \theta - y)^T(X \theta - y)$

$\frac{\partial}{\partial \theta} J(\theta) = \frac1mX^T(X \theta - y) = 0$

$\hat{\theta} = (X^TX)^{-1}X^Ty$

view code

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import mean_squared_error
import numpy.linalg as alg

class NormalFunctionLinearRegression:
def __init__(self):
pass
def fit(self, X, y):
m, n = X.shape
X = np.hstack((np.ones((m,1)),np.array(X)))
y = np.array(y).reshape((m,1))
self.theta = alg.inv(X.transpose().dot(X)).dot(X.transpose()).dot(y)
return np.copy(self.theta)
def predict(self, X):
m, n = X.shape
X = np.hstack((np.ones((m,1)),np.array(X)))
return X.dot(self.theta)
pass

if __name__ == '__main__':
X = np.array([1,2,3,4,5,6,7])
y = np.array([4,20,31,32,40,53,50])
model = NormalFunctionLinearRegression()
X = pd.DataFrame(X)
theta = model.fit(X, y)
print(theta)
plt.scatter(X,y)
xl = np.linspace(0,8,5)
yl = theta[0] + theta[1] * xl
plt.plot(xl,yl)
plt.show()



#### 9. 特征缩放 归一化 标准化

$x^{(i)}_j := \frac{x^{(i)}_j - \mu_j}{s_{j}}$

$x^{(i)}_j := \frac{x^{(i)}_j - \mu_j}{\max{x_j} - \min{x_j}}$

posted @ 2021-04-18 16:46  _kiko  阅读(10)  评论(0编辑  收藏