1 !pip install tushare
2 import tushare as ts
3 import matplotlib.pyplot as plt
4
5 df1 = ts.get_k_data('601318', ktype='D', start='2008-01-01', end='2020-05-10')
6 datapath1 = "./SH601318.csv"
7 df1.to_csv(datapath1)
8
9
10 import numpy as np
11 import tensorflow as tf
12 from tensorflow.keras.layers import Dense, Dropout, SimpleRNN
13 import matplotlib.pyplot as plt
14 import os
15 import pandas as pd
16 from sklearn.preprocessing import MinMaxScaler
17 from sklearn.metrics import mean_squared_error, mean_absolute_error
18 import math
19
20
21 pingan = pd.read_csv("./SH601318.csv")
22 pingan.head()
23 pingan.tail()
24
25
26 training_set = pingan.iloc[0:2201, 2:3].values
27 test_set = pingan.iloc[2201:, 2:3].values
28
29 #归一化
30 sc = MinMaxScaler(feature_range=(0, 1))
31 training_set_scaled = sc.fit_transform(training_set)
32 test_set = sc.fit_transform(test_set)
33
34
35 x_train = []
36 y_train = []
37
38 x_test = []
39 y_test = []
40
41
42 # 利用for循环,遍历整个训练集,提取训练集中连续60天的开盘价作为输入特征x_train,第61天的数据作为标签,for循环共构建2426-300-60=2066组数据。
43 for i in range(50, len(training_set_scaled)):
44 x_train.append(training_set_scaled[i-50:i, 0])
45 y_train.append(training_set_scaled[i, 0])
46
47
48 np.random.seed(7)
49 np.random.shuffle(x_train)
50 np.random.seed(7)
51 np.random.shuffle(y_train)
52 tf.random.set_seed(7)
53
54 x_train, y_train = np.array(x_train), np.array(y_train)
55
56 # 使x_train符合RNN输入要求:[送入样本数, 循环核时间展开步数, 每个时间步输入特征个数]。
57 # 此处整个数据集送入,送入样本数为x_train.shape[0]即2066组数据;输入60个开盘价,预测出第61天的开盘价,循环核时间展开步数为60; 每个时间步送入的特征是某一天的开盘价,只有1个数据,故每个时间步输入特征个数为1
58 x_train = np.reshape(x_train, (x_train.shape[0], 50, 1))
59
60 for i in range(50, len(test_set)):
61 x_test.append(test_set[i-50:i, 0])
62 y_test.append(test_set[i, 0])
63
64 x_test, y_test = np.array(x_test), np.array(y_test)
65 x_test = np.reshape(x_test, (x_test.shape[0], 50, 1))
66
67
68 model = tf.keras.Sequential([
69 SimpleRNN(80, return_sequences=True),
70 Dropout(0.2),
71 SimpleRNN(100),
72 Dropout(0.2),
73 Dense(1)
74 ])
75
76 model.compile(optimizer=tf.keras.optimizers.Adam(0.01),
77 loss='mean_squared_error')
78
79 checkpoint_save_path = "./checkpoint/rnn_stock.ckpt"
80
81 if os.path.exists(checkpoint_save_path + '.index'):
82 print("--------load the model-----------")
83 model.load_weights(checkpoint_save_path)
84
85 cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_save_path,
86 save_weights_only=True,
87 save_best_only=True,
88 monitor='val_loss')
89
90 history = model.fit(x_train, y_train, batch_size=32, epochs=15, validation_data=(x_test, y_test), validation_freq=1,
91 callbacks=[cp_callback])
92
93 model.summary()
94
95 with open("./weights.txt", 'w') as f:
96 for v in model.trainable_variables:
97 f.write(str(v.name) + '\n')
98 f.write(str(v.shape) + '\n')
99 f.write(str(v.numpy()) + '\n')
100
101
102
103 loss = history.history['loss']
104 val_loss = history.history['val_loss']
105
106 plt.plot(loss, label='Training Loss')
107 plt.plot(val_loss, label='Validation Loss')
108 plt.title('TRaining an Validation Loss')
109 plt.legend()
110 plt.show()
111
112
113 predicted_stock_price = model.predict(x_test)
114
115 predicted_stock_price = sc.inverse_transform(predicted_stock_price)
116
117 real_stock_price = sc.inverse_transform(test_set[50:])
118
119 plt.plot(real_stock_price, color='red', label='Pingan Stock Price')
120 plt.plot(predicted_stock_price, color='blue', label='Predicted Pingan Stock Price')
121 plt.xlabel('Time')
122 plt.ylabel('Pingan Stock Price')
123 plt.legend()
124 plt.show()
125
126
127 # calculate MSE 均方误差 ---> E[(预测值-真实值)^2] (预测值减真实值求平方后求均值)
128 mse = mean_squared_error(predicted_stock_price, real_stock_price)
129 # calculate RMSE 均方根误差--->sqrt[MSE] (对均方误差开方)
130 rmse = math.sqrt(mean_squared_error(predicted_stock_price, real_stock_price))
131 # calculate MAE 平均绝对误差----->E[|预测值-真实值|](预测值减真实值求绝对值后求均值)
132 mae = mean_absolute_error(predicted_stock_price, real_stock_price)
133 print('均方误差: %.6f' % mse)
134 print('均方根误差: %.6f' % rmse)
135 print('平均绝对误差: %.6f' % mae)