【PY从0到1】 金融数据处理2

# tushare ID: 409200
# 【PY从0到1】 金融数据处理2

# 导入库
import pandas as pd
import seaborn as sns
import tushare as ts
import matplotlib.pyplot as plt
import warnings; warnings.simplefilter('ignore')
import scipy.stats as stats # 绘制QQ图


#设置美化参数
sns.set()

# 读取数据
three_stock_data_close = pd.read_csv('d://three_stock_data_close.csv')
three_stock_data_close['date'] = pd.to_datetime(three_stock_data_close['date']) # 转换为时间序列
three_stock_data_close.set_index('date', inplace=True) # 设置新索引



# 1> 金融计算

# 计算每日收益
per_earnings = three_stock_data_close / three_stock_data_close.shift(1) - 1
per_earnings = per_earnings.dropna() # 删除缺失值


# 计算累计收益
cum_return = (1 + per_earnings).cumprod()

# 可视化
cum_return.plot(figsize=(8,6))

# 2> 绘制统计量

#① 查看统计量
print(per_earnings.describe())
#         002594.SH   600036.SZ   600660.SZ
# count  242.000000  242.000000  242.000000
# mean     0.006486    0.001192    0.002770
# std      0.036034    0.018727    0.026172
# min     -0.100000   -0.066650   -0.077674
# 25%     -0.015043   -0.009229   -0.012393
# 50%      0.004417    0.000000    0.001112
# 75%      0.025218    0.011047    0.014277
# max      0.100000    0.099891    0.098770

#② 每日收益直方图
plt.figure(figsize=(9,6))
p_e_zf1 = per_earnings['002594.SH']
p_e_zf2 = per_earnings['600036.SZ']
p_e_zf3 = per_earnings['600660.SZ']
plt.subplot(221)
plt.hist(p_e_zf1, bins=40)
plt.subplot(222)
plt.hist(p_e_zf2, bins=40)
plt.subplot(223)
plt.hist(p_e_zf3, bins=40) 

# ③ QQ图
# 检查每日收益是否满足正态分布
plt.figure(figsize=(9,6))
stats.probplot(p_e_zf1, dist='norm', plot=plt.subplot(221))
stats.probplot(p_e_zf2, dist='norm', plot=plt.subplot(222))
stats.probplot(p_e_zf3, dist='norm', plot=plt.subplot(223))

# 3> 股价的相关性

# 以HS300为例
# HS300数据标准化
# pro = ts.pro_api('')
hs300 = pro.index_daily(ts_code='399300.SZ', 
                        start_date='20191201',
                        end_date='20201201')
hs300.rename(columns={'trade_date': 'date',
                      'ts_code':'code'}, inplace=True)
hs300 = hs300[['close','date']]
hs300['date'] = pd.to_datetime(hs300['date']) # 转换为时间序列
hs300.set_index('date', inplace=True) # 设置新索引
hs300.rename(columns={'close':'HS300.SZ'},
                    inplace=True)
hs300_return = hs300 / hs300.shift(1) - 1
hs300_return = hs300_return.dropna()
hs300_cum = (1 + hs300_return).cumprod()

#合并至大表
cum = pd.concat([cum_return, hs300_cum], axis=1)
cum = cum.dropna()

# 可视化
plt.figure(figsize=(15,6))
plt.plot(cum['002594.SH'], label='002594.SH')
plt.plot(cum['600036.SZ'], label='600036.SZ')
plt.plot(cum['600660.SZ'], label='600660.SZ')
plt.plot(cum['HS300.SZ'], label='HS300.SZ')
plt.legend()

# 计算相关性
corrs = cum.corr()

# 可视化相关性
plt.figure(figsize=(10,6))
sns.heatmap(corrs)

# 两个股票之间的相关性
plt.figure(figsize =(8,6))
plt.title('Stock Correlation')
plt.plot(cum['600660.SZ'], cum['600036.SZ'], '.')
plt.xlabel('600660.SZ')
plt.ylabel('600036.SZ')

 

posted @ 2021-04-26 21:13  泥鳅不怕水  阅读(106)  评论(0)    收藏  举报