【PY从0到1】 金融数据处理2
# tushare ID: 409200 # 【PY从0到1】 金融数据处理2 # 导入库 import pandas as pd import seaborn as sns import tushare as ts import matplotlib.pyplot as plt import warnings; warnings.simplefilter('ignore') import scipy.stats as stats # 绘制QQ图 #设置美化参数 sns.set() # 读取数据 three_stock_data_close = pd.read_csv('d://three_stock_data_close.csv') three_stock_data_close['date'] = pd.to_datetime(three_stock_data_close['date']) # 转换为时间序列 three_stock_data_close.set_index('date', inplace=True) # 设置新索引 # 1> 金融计算 # 计算每日收益 per_earnings = three_stock_data_close / three_stock_data_close.shift(1) - 1 per_earnings = per_earnings.dropna() # 删除缺失值 # 计算累计收益 cum_return = (1 + per_earnings).cumprod() # 可视化 cum_return.plot(figsize=(8,6)) # 2> 绘制统计量 #① 查看统计量 print(per_earnings.describe()) # 002594.SH 600036.SZ 600660.SZ # count 242.000000 242.000000 242.000000 # mean 0.006486 0.001192 0.002770 # std 0.036034 0.018727 0.026172 # min -0.100000 -0.066650 -0.077674 # 25% -0.015043 -0.009229 -0.012393 # 50% 0.004417 0.000000 0.001112 # 75% 0.025218 0.011047 0.014277 # max 0.100000 0.099891 0.098770 #② 每日收益直方图 plt.figure(figsize=(9,6)) p_e_zf1 = per_earnings['002594.SH'] p_e_zf2 = per_earnings['600036.SZ'] p_e_zf3 = per_earnings['600660.SZ'] plt.subplot(221) plt.hist(p_e_zf1, bins=40) plt.subplot(222) plt.hist(p_e_zf2, bins=40) plt.subplot(223) plt.hist(p_e_zf3, bins=40) # ③ QQ图 # 检查每日收益是否满足正态分布 plt.figure(figsize=(9,6)) stats.probplot(p_e_zf1, dist='norm', plot=plt.subplot(221)) stats.probplot(p_e_zf2, dist='norm', plot=plt.subplot(222)) stats.probplot(p_e_zf3, dist='norm', plot=plt.subplot(223)) # 3> 股价的相关性 # 以HS300为例 # HS300数据标准化 # pro = ts.pro_api('') hs300 = pro.index_daily(ts_code='399300.SZ', start_date='20191201', end_date='20201201') hs300.rename(columns={'trade_date': 'date', 'ts_code':'code'}, inplace=True) hs300 = hs300[['close','date']] hs300['date'] = pd.to_datetime(hs300['date']) # 转换为时间序列 hs300.set_index('date', inplace=True) # 设置新索引 hs300.rename(columns={'close':'HS300.SZ'}, inplace=True) hs300_return = hs300 / hs300.shift(1) - 1 hs300_return = hs300_return.dropna() hs300_cum = (1 + hs300_return).cumprod() #合并至大表 cum = pd.concat([cum_return, hs300_cum], axis=1) cum = cum.dropna() # 可视化 plt.figure(figsize=(15,6)) plt.plot(cum['002594.SH'], label='002594.SH') plt.plot(cum['600036.SZ'], label='600036.SZ') plt.plot(cum['600660.SZ'], label='600660.SZ') plt.plot(cum['HS300.SZ'], label='HS300.SZ') plt.legend() # 计算相关性 corrs = cum.corr() # 可视化相关性 plt.figure(figsize=(10,6)) sns.heatmap(corrs) # 两个股票之间的相关性 plt.figure(figsize =(8,6)) plt.title('Stock Correlation') plt.plot(cum['600660.SZ'], cum['600036.SZ'], '.') plt.xlabel('600660.SZ') plt.ylabel('600036.SZ')
AI量化的成长之路

浙公网安备 33010602011771号