统计数据作图
import pandas as pd import numpy as np import matplotlib.pyplot as plt
#绘制直方图 catering_sale = 'D:\\TencentDate\\catering_fish_congee.xls' # 餐饮数据 data0 = pd.read_excel(catering_sale,names=['date','sale']) # 读取数据,指定“日期”列为索引 bins = [0,500,1000,1500,2000,2500,3000,3500,4000] labels = ['[0,500)','[500,1000)','[1000,1500)','[1500,2000)', '[2000,2500)','[2500,3000)','[3000,3500)','[3500,4000)'] data0['sale分层'] = pd.cut(data0.sale, bins, labels=labels) aggResult = data0.groupby(by=['sale分层'])['sale'].agg([("sale",np.size)]) pAggResult = round(aggResult/aggResult.sum(), 2, ) * 100 plt.figure(figsize=(10,6)) # 设置图框大小尺寸 pAggResult['sale'].plot(kind='bar',width=0.8,fontsize=10) # 绘制频率直方图 plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.title('3009季度销售额频率分布直方图',fontsize=20) plt.show()

#绘制箱型图 plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 plt.style.use("ggplot") #使用ggplot的图形style catering_fish_congee = 'D:\\TencentDate\\catering_fish_congee.xls' # 餐饮数据 data = pd.read_excel(catering_fish_congee,names=['date','sale']) # 读取数据,指定“date”列为索引 bins=[0,500,1000,1500,2000,2500,3000,3500,4000] labels=['A_type','[500,1000)','[1000,1500)','[1500,2000)','[2000,2500)','[2500,3000)','[3000,3500)','[3500,4000)'] data['sale分层']=pd.cut(data.sale, bins, labels=labels) print(data) print(data.describe()) aggResult = data.groupby(by=['sale分层'])['sale'].agg([("sale",np.size)]) aggResult = data.groupby(by=['sale分层'])['sale'].agg([("sale",np.size)]) plt.figure() p=data.boxplot(return_type='dict') x=p['fliers'][0].get_xdata() y=p['fliers'][0].get_ydata() y.sort() plt.title('3009季度销售额分布(箱型图)') ''' 用annotate添加注释 其中有些相近的点,注解会出现重叠,难以看清,需要一些技巧来控制 以下参数都是经过调试的,需要具体问题具体调试。 ''' for i in range(len(x)): if i>0: plt.annotate(y[i], xy=(x[i],y[i]), xytext=(x[i]+0.05 -0.8/(y[i]-y[i-1]), y[i])) else: plt.annotate(y[i], xy=(x[i],y[i]), xytext=(x[i]+0.08,y[i])) plt.show()

#绘制abc部门间的销售额的折线图 dish_sale = 'D:\\TencentDate\\dish_sale.xls' date_line=pd.read_excel(dish_sale) plt.figure(figsize=(10,5)) plt.plot(date_line['月份'],date_line['A部门'],color='green',label='A部门',marker='o') plt.plot(date_line['月份'],date_line['B部门'],color='red',label='B部门',marker='s') plt.plot(date_line['月份'],date_line['C部门'],color='skyblue',label='C部门',marker='x') plt.legend() plt.ylabel('3009部门间销售额折线图') plt.show()

#绘制饼图 catering_dish_profit = 'D:\\TencentDate\\catering_dish_profit.xls' data_dish=pd.read_excel(catering_dish_profit) x_dish=data_dish['盈利'] labels_dish=data_dish['菜品名'] plt.figure(figsize=(10,6)) plt.pie(x_dish,labels=labels_dish) plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.title('3009菜品销售额分布(饼图)') plt.axis('equal') plt.show()

#绘制散点图 x_point = np.linspace(-4, 2, 50) y_point = np.exp(x_point) - x_point - 1 plt.figure( figsize = (8,4) ) plt.xlabel('x轴') plt.ylabel('y轴') plt.scatter(x_point, y_point, c='pink') plt.title('3009散点图') plt.show()

浙公网安备 33010602011771号