Python数据分析与挖掘第三章数据探索学习笔记

点击查看代码
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 

data = pd.DataFrame(data = np.random.randint(0,100,size=(8,4)),columns=['A','B','C','D'])

#查看数据基本描述
data.describe()
![photo1](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330233847648-71836622.png)
点击查看代码
data.sum()
data.mean()
data.var()
data.corr()
data.cov()
#偏度
data.skew()
data.kurt()#峰度
data.cumsum()
![photo2](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234230951-1706787427.png) ![photo3](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234254833-873773735.png) ![photo4](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234303119-1390018838.png) ![photo5](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234310872-515842936.png) ![photo6](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234332485-1338782614.png)
点击查看代码
data.rolling(2).sum()

# %%
data.cumprod()

# %%
data.cummax()

# %%
data.cummin()

# %%
data.rolling(2).mean()

# %%
print(data.rolling(8).std())
print(data.std())
![photo7](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234707232-1596748923.png) ![photo8](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234713068-786542334.png) ![photo9](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234720803-819899274.png) ![photo10](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234726457-845112790.png) ![photo11](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234734078-800825089.png) ![photo12](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234740032-18048410.png) ![photo13](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234749669-1617536556.png)
点击查看代码
# %%
#正弦曲线图
x = np.linspace(0,2*np.pi,50)
y = np.sin(x)
plt.plot(x,y,'g--')
plt.show()
![photo14](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234908814-539472817.png)
点击查看代码
#饼图
labels = ['A','B','C','D']
sizes = [30,40,25,5]
colors = ['yellowgreen','gold','lightskyblue','lightcoral']
explode = (0.2,0.1,0,0)
plt.pie(sizes,explode=explode,labels=labels,colors=colors,autopct='%1.1f%%',shadow=True,startangle=90)
plt.axis('equal')#显示为圆
plt.show()

![photo15](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330234938348-1518534521.png)
点击查看代码
#直方图
x = np.random.randn(1000)#一千个服从正态分布的随机数
plt.hist(x,10)#十组
plt.show()
![photo16](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330235011784-1602221095.png)
点击查看代码
#箱型图
x = np.random.randn(1000)
data = pd.DataFrame([x,x+1]).T#构造两列的DataFrame
data.plot(kind='box')
plt.show()
![photo17](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330235034541-1614945849.png)
点击查看代码
#箱型图
x = np.random.randn(1000)
data = pd.DataFrame([x,x+1]).T#构造两列的DataFrame
data.boxplot()
![photo18](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330235110088-1080800974.png)
点击查看代码
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
x = pd.Series(np.exp(np.arange(20)))
plt.figure(figsize = (8, 9))  # 设置画布大小 
ax1 = plt.subplot(2, 1, 1)
x.plot(label = '原始数据图', legend = True)
ax1 = plt.subplot(2, 1, 2)
x.plot(logy = True, label = '对数数据图', legend = True)
plt.show()
![photo19](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330235134454-1656732726.png)
点击查看代码
error = np.random.randn(10)  # 定义误差列
y = pd.Series(np.sin(np.arange(10)))  # 均值数据列
y.plot(yerr = error)  # 绘制误差图
plt.show()
![photo20](https://img2022.cnblogs.com/blog/1966855/202203/1966855-20220330235155849-2046253525.png)
posted @ 2022-03-30 23:53  贾诩来编程  阅读(69)  评论(0)    收藏  举报