数据挖掘
1、
import pandas as pd;
shuju = 'D:/WeixinWenjian/WeChat Files/wxid_5onnacvxxvpj22/FileStorage/File/2023-02/catering_fish_congee(1).xls';
data = pd.read_excel(shuju);
print(data.describe());
import matplotlib.pyplot as plt;
plt.rcParams['font.sans-serif'] = ['SimHei'];
plt.rcParams['axes.unicode_minus'] = False;
plt.figure();
p = data.boxplot(return_type='dict');
x = p['fliers'][0].get_xdata();
y = p['fliers'][0].get_ydata();
y.sort;
for i in range(len(x)):
if i>0:
plt.annotate(y[i], xy=(x[i],y[i]), xytext=(x[i]+0.05 -0.8/(y[i]-y[i-1]), y[i]));
else:
plt.annotate(y[i], xy=(x[i],y[i]), xytext=(x[i]+0.08, y[i]));
plt.title('箱型图——3023')
plt.show();
2、
import pandas as pd
import numpy as np
catering_sale = 'D:/WeixinWenjian/WeChat Files/wxid_5onnacvxxvpj22/FileStorage/File/2023-02/catering_fish_congee(1).xls'
data = pd.read_excel(catering_sale,names=['data','sale'])
print(data.describe())
bins = [0,500,1000,1500,2000,2500,3000,3500,4000]
labels = ['[0,500)','[500,1000)','[1000,1500)','[1500,2000)','[2000,2500)','[2500,3000)','[3000,3500)','[3500,4000)']
data['sale分层'] = pd.cut(data.sale,bins,labels=labels)
aggResult = data.groupby('sale分层').agg({'sale':'count'})
print(aggResult)
pAggResult = round(aggResult/aggResult.sum(),2,)*100
print(pAggResult)
import matplotlib.pyplot as plt
plt.figure(figsize=(10,6))
pAggResult['sale'].plot(kind='bar',width=0.8,fontsize=10)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.title('季度销售额频率分布直方图——3023',fontsize=20)
plt.show()
3、
import pandas as pd
import matplotlib.pyplot as plt
catering_dish_profit = 'D:/WeixinWenjian/WeChat Files/wxid_5onnacvxxvpj22/FileStorage/File/2023-02/catering_dish_profit(1).xls'
data = pd.read_excel(catering_dish_profit)
print(data.describe())
x = data['盈利']
labels = data['菜品名']
plt.figure(figsize=(8,6))
plt.pie(x,labels=labels)
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.title('菜品销售量分布(饼图)——3023')
plt.axis('equal')
plt.show()
4、
import pandas as pd
import matplotlib.pyplot as plt
df_normal = pd.read_csv("D:/WeixinWenjian/WeChat Files/wxid_5onnacvxxvpj22/FileStorage/File/2023-02/user.csv")
plt.figure(figsize=(8,4))
plt.scatter(df_normal["Date"],df_normal["Eletricity"], c='red')
x_major_locator = plt.MultipleLocator(7)
ax = plt.gca()
ax.xaxis.set_major_locator(x_major_locator)
plt.xlabel("日期")
plt.ylabel("每日电量")
plt.title("正常用户电量趋势(散点图)——3023")
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.show()
5、
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 2*np.pi,50)
y = np.sin(x)
plt.plot(x,y,'bp--')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.title("正弦函数(散点图)——3023")
plt.show()