统计数据作图

import pandas as pd
import numpy  as  np
import matplotlib.pyplot as plt
#绘制直方图
catering_sale = 'D:\\TencentDate\\catering_fish_congee.xls'  # 餐饮数据
data0 = pd.read_excel(catering_sale,names=['date','sale'])  # 读取数据,指定“日期”列为索引
bins = [0,500,1000,1500,2000,2500,3000,3500,4000]
labels = ['[0,500)','[500,1000)','[1000,1500)','[1500,2000)',
       '[2000,2500)','[2500,3000)','[3000,3500)','[3500,4000)'] 
data0['sale分层'] = pd.cut(data0.sale, bins, labels=labels)
aggResult = data0.groupby(by=['sale分层'])['sale'].agg([("sale",np.size)])
pAggResult = round(aggResult/aggResult.sum(), 2, ) * 100
plt.figure(figsize=(10,6))  # 设置图框大小尺寸
pAggResult['sale'].plot(kind='bar',width=0.8,fontsize=10)  # 绘制频率直方图
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.title('3009季度销售额频率分布直方图',fontsize=20)
plt.show()

 

 

 

#绘制箱型图
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
plt.style.use("ggplot")  #使用ggplot的图形style
catering_fish_congee = 'D:\\TencentDate\\catering_fish_congee.xls'  # 餐饮数据
data = pd.read_excel(catering_fish_congee,names=['date','sale'])  # 读取数据,指定“date”列为索引
bins=[0,500,1000,1500,2000,2500,3000,3500,4000]
labels=['A_type','[500,1000)','[1000,1500)','[1500,2000)','[2000,2500)','[2500,3000)','[3000,3500)','[3500,4000)']
data['sale分层']=pd.cut(data.sale, bins, labels=labels)
print(data)
print(data.describe())
aggResult = data.groupby(by=['sale分层'])['sale'].agg([("sale",np.size)])
aggResult = data.groupby(by=['sale分层'])['sale'].agg([("sale",np.size)])
plt.figure()
p=data.boxplot(return_type='dict')
x=p['fliers'][0].get_xdata()
y=p['fliers'][0].get_ydata()
y.sort()
plt.title('3009季度销售额分布(箱型图)')
'''
用annotate添加注释
其中有些相近的点,注解会出现重叠,难以看清,需要一些技巧来控制
以下参数都是经过调试的,需要具体问题具体调试。
'''
for i in range(len(x)):
    if i>0:
        plt.annotate(y[i], xy=(x[i],y[i]), xytext=(x[i]+0.05 -0.8/(y[i]-y[i-1]), y[i]))
    else:
        plt.annotate(y[i], xy=(x[i],y[i]), xytext=(x[i]+0.08,y[i]))
plt.show()

 

 

 

#绘制abc部门间的销售额的折线图
dish_sale = 'D:\\TencentDate\\dish_sale.xls'
date_line=pd.read_excel(dish_sale)
plt.figure(figsize=(10,5))
plt.plot(date_line['月份'],date_line['A部门'],color='green',label='A部门',marker='o')
plt.plot(date_line['月份'],date_line['B部门'],color='red',label='B部门',marker='s')
plt.plot(date_line['月份'],date_line['C部门'],color='skyblue',label='C部门',marker='x')
plt.legend()
plt.ylabel('3009部门间销售额折线图')
plt.show()

 

 

 

#绘制饼图
catering_dish_profit = 'D:\\TencentDate\\catering_dish_profit.xls'
data_dish=pd.read_excel(catering_dish_profit)
x_dish=data_dish['盈利']
labels_dish=data_dish['菜品名']
plt.figure(figsize=(10,6)) 
plt.pie(x_dish,labels=labels_dish)
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.title('3009菜品销售额分布(饼图)')
plt.axis('equal')
plt.show()

 

 

 

#绘制散点图
x_point = np.linspace(-4, 2, 50)
y_point = np.exp(x_point) - x_point - 1
plt.figure( figsize = (8,4) )
plt.xlabel('x轴')
plt.ylabel('y轴')
plt.scatter(x_point, y_point, c='pink')
plt.title('3009散点图')
plt.show()

 

 

 

posted @ 2023-02-26 14:25  孤影化双皮奶  阅读(88)  评论(0)    收藏  举报