15.衡量回归算法的标准

衡量回归算法的标准

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

波士顿房产数据

boston = datasets.load_boston()

x = boston.data[:, 5] # 只使用房间数量这个特征

x.shape
(506,)
y = boston.target
y.shape
(506,)
plt.scatter(x, y)

 

np.max(y)
50.0

 去除干扰数据

x = x[y < 50.0]
y = y[y < 50.0]

x.shape, y.shape
((490,), (490,))
plt.scatter(x, y)

 

 

使用简单线性回归

# 数据分割为训练集和测试集

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=666)

 

x_train.shape, y_train.shape
((367,), (367,))
# 使用训练集求归回方程

x_mean = np.mean(x_train)
y_mean = np.mean(y_train)

num = (x_train - x_mean).dot(y_train - y_mean)
d = (x_train - x_mean).dot(x_train - x_mean)

a = num / d
b = y_mean - a * x_mean

y_hat = a * x_train + b

 

x_train.shape, y_train.shape
((367,), (367,))
y_hat.shape
(367,)
plt.scatter(x_train, y_train)
plt.plot(x_train, y_hat)

 

 

衡量回归算法的标准

# 在回归方程下求测试集的预测值
y_predict = a * x_test + b

 

# MSE 预测值与真实值误差衡量

mse_test = np.sum((y_predict - y_test)**2) / len(y_test)
mse_test
28.215949368640807

# RMSE

from math import sqrt
rmse_test = sqrt(mse_test)
rmse_test
5.311868726600913

# MAE

mae_test = np.sum(np.absolute(y_predict - y_test)) / len(y_test)
mae_test
3.9489046062737834

 

sklearn 中的MSE MAE

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error


mean_squared_error(y_test, y_predict)
28.215949368640807

 

mean_absolute_error(y_test, y_predict)
3.9489046062737834

 

R Square

rsquare = 1 - mean_squared_error(y_test, y_predict) / np.var(y_test)
rsquare
0.5682464825049472

from sklearn.metrics import r2_score
r2_score(y_test, y_predict)
0.5682464825049472

posted @ 2020-11-25 23:31  止一  阅读(107)  评论(0编辑  收藏  举报