Python数据挖掘之时序模型预测
一、单变量序列预测
from statsmodels.tsa.arima.model import ARIMA
import pandas as pd
import numpy as np
data = pd.read_excel('E:\桌面\作业\py/new_reg_data_GM11.xls')
data_y = data['y']
data_y = data_y.dropna()
model = ARIMA(data_y, order=(1,1,1))
model_fit = model.fit()
print('模型报告为:\n', model_fit.summary())
print('预测未来2年,其预测结果、标准误差、置信区间如下:\n', model_fit.forecast(2))
结果:

#用AR自回归预测
from statsmodels.tsa.ar_model import AutoReg
import pandas as pd
import numpy as np
data = pd.read_excel('E:\桌面\作业\py/new_reg_data_GM11.xls')
data_y = data['y']
data_y = data_y.dropna()
model = AutoReg(data_y, lags=1)
model_fit = model.fit()
print('模型报告为:\n', model_fit.summary())
print('预测未来2年,其预测结果、标准误差、置信区间如下:\n', model_fit.forecast(2))
结果:

import pandas as pd
import numpy as np
data = pd.read_excel('E:\桌面\作业\py/new_reg_data_GM11.xls')
data_y = data['y']
data_y = data_y.dropna()
data_ymin = np.min(data_y)
data_ymax = np.max(data_y)
mmdata_y = (data_y-data_ymin)/(data_ymax-data_ymin)
print(mmdata_y)
#原数据图序列
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
data_y.plot()
plt.show()
# 自相关图
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(data_y).show()
结果:

二、SVR预测
linearsvr = LinearSVR() # 调用LinearSVR()函数
linearsvr.fit(x_train,y_train)
x = ((data[feature] - data_mean[feature])/data_std[feature]).values # 预测,并还原结果。
data['y_pred'] = linearsvr.predict(x) * data_std['y'] + data_mean['y']
outputfile = 'E:\桌面\作业\py/new_reg_data_GM11_revenue.xls' # SVR预测后保存的结果
data.to_excel(outputfile)
print('真实值与预测值分别为:\n',data[['y','y_pred']])
fig = data[['y','y_pred']].plot(subplots = True, style=['b-o','r-*']) # 画出预测结果图
plt.show()