数据挖掘

1、

import pandas as pd;
shuju = 'D:/WeixinWenjian/WeChat Files/wxid_5onnacvxxvpj22/FileStorage/File/2023-02/catering_fish_congee(1).xls';
data = pd.read_excel(shuju);
print(data.describe());

import matplotlib.pyplot as plt;
plt.rcParams['font.sans-serif'] = ['SimHei'];
plt.rcParams['axes.unicode_minus'] = False;

plt.figure();
p = data.boxplot(return_type='dict');
x = p['fliers'][0].get_xdata();
y = p['fliers'][0].get_ydata();
y.sort;

for i in range(len(x)):
if i>0:
plt.annotate(y[i], xy=(x[i],y[i]), xytext=(x[i]+0.05 -0.8/(y[i]-y[i-1]), y[i]));
else:
plt.annotate(y[i], xy=(x[i],y[i]), xytext=(x[i]+0.08, y[i]));

plt.title('箱型图——3023')

plt.show();

 2、

import pandas as pd
import numpy as np
catering_sale = 'D:/WeixinWenjian/WeChat Files/wxid_5onnacvxxvpj22/FileStorage/File/2023-02/catering_fish_congee(1).xls'
data = pd.read_excel(catering_sale,names=['data','sale'])
print(data.describe())
bins = [0,500,1000,1500,2000,2500,3000,3500,4000]
labels = ['[0,500)','[500,1000)','[1000,1500)','[1500,2000)','[2000,2500)','[2500,3000)','[3000,3500)','[3500,4000)']

data['sale分层'] = pd.cut(data.sale,bins,labels=labels)
aggResult = data.groupby('sale分层').agg({'sale':'count'})
print(aggResult)
pAggResult = round(aggResult/aggResult.sum(),2,)*100
print(pAggResult)
import matplotlib.pyplot as plt
plt.figure(figsize=(10,6))
pAggResult['sale'].plot(kind='bar',width=0.8,fontsize=10)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.title('季度销售额频率分布直方图——3023',fontsize=20)
plt.show()

 3、

import pandas as pd
import matplotlib.pyplot as plt
catering_dish_profit = 'D:/WeixinWenjian/WeChat Files/wxid_5onnacvxxvpj22/FileStorage/File/2023-02/catering_dish_profit(1).xls'
data = pd.read_excel(catering_dish_profit)
print(data.describe())

x = data['盈利']
labels = data['菜品名']
plt.figure(figsize=(8,6))
plt.pie(x,labels=labels)
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.title('菜品销售量分布(饼图)——3023')
plt.axis('equal')
plt.show()

 4、

import pandas as pd
import matplotlib.pyplot as plt

df_normal = pd.read_csv("D:/WeixinWenjian/WeChat Files/wxid_5onnacvxxvpj22/FileStorage/File/2023-02/user.csv")
plt.figure(figsize=(8,4))

plt.scatter(df_normal["Date"],df_normal["Eletricity"], c='red')
x_major_locator = plt.MultipleLocator(7)
ax = plt.gca()
ax.xaxis.set_major_locator(x_major_locator)
plt.xlabel("日期")
plt.ylabel("每日电量")
plt.title("正常用户电量趋势(散点图)——3023")
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.show()

 

5、

import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 2*np.pi,50)
y = np.sin(x)
plt.plot(x,y,'bp--')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.title("正弦函数(散点图)——3023")
plt.show()

 

posted @ 2023-02-24 13:52  35p  阅读(53)  评论(0)    收藏  举报