python--线性回归

首先先安装要用到的包:sklearn,顾名思义机器学习包

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets, linear_model
data = pd.read_csv('C://Users//leon//Desktop//CCPP.csv')  #导入数据
data.head()
data.shape
X = data[['AT', 'V', 'AP', 'RH']]           #用AT, V,AP和RH这4个列作为样本特征
y = data[['PE']]
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
print (X_train.shape)
print (y_train.shape)
print (X_test.shape)
print (y_test.shape)                         #训练
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()                 #建立模型
linreg.fit(X_train, y_train)
print (linreg.intercept_)                #输出模型常量
print (linreg.coef_)                    #自变量系数
y_pred = linreg.predict(X_test)
from sklearn import metrics
print ("MSE:",metrics.mean_squared_error(y_test, y_pred))     # 通过MSE值进行模型检验
from sklearn.model_selection import cross_val_predict
predicted = cross_val_predict(linreg, X, y, cv=10)
fig, ax = plt.subplots()
ax.scatter(y, predicted)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()   #作图观察

通过训练数据集进行预测

posted @ 2017-11-01 10:51  方木--数据分析与挖掘  阅读(801)  评论(4编辑  收藏  举报