数据挖掘作业
1、#箱型图
引用数据:catering_fish_congee.xls
代码:
import pandas as pd
catering_sale = 'C:/Users/Administrator/Desktop/test/data first week/catering_sale.xls' # 餐饮数据
data = pd.read_excel(catering_sale, index_col = u'日期') # 读取数据,指定“日期”列为索引列
print(data)
print(data.describe(),data.describe().max()-data.describe().min())
import matplotlib.pyplot as plt # 导入图像库
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
p = data.boxplot(return_type='dict') # 画箱线图,直接使用DataFrame的方法
x = p['fliers'][0].get_xdata() # 'flies'即为异常值的标签
y = p['fliers'][0].get_ydata()
y.sort() # 从小到大排序,该方法直接改变原对象
plt.title('3013',fontsize=20)
plt.show()
'''
用annotate添加注释
其中有些相近的点,注解会出现重叠,难以看清,需要一些技巧来控制
以下参数都是经过调试的,需要具体问题具体调试。
'''
for i in range(len(x)):
if i>0:
plt.annotate(y[i], xy = (x[i],y[i]), xytext=(x[i]+0.05 -0.8/(y[i]-y[i-1]),y[i]))
else:
plt.annotate(y[i], xy = (x[i],y[i]), xytext=(x[i]+0.08,y[i]))
plt.show() # 展示箱线图

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
2、#柱状图
import pandas as pd
import numpy as np
catering_sale = 'C:/Users/Administrator/Desktop/test/data first week/catering_fish_congee.xls' # 餐饮数据
data = pd.read_excel(catering_sale,names=['date','sale']) # 读取数据,指定“日期”列为索引
print(data.describe())
bins = [0,500,1000,1500,2000,2500,3000,3500,4000]
labels = ['A_lei','[500,1000)','[1000,1500)','[1500,2000)',
'[2000,2500)','[2500,3000)','[3000,3500)','[3500,4000)']
data['sale分层'] = pd.cut(data.sale, bins, labels=labels)
print(data)
aggResult = data.groupby('sale分层').agg({'sale':'count'})
print(aggResult)
pAggResult = round(aggResult/aggResult.sum(), 2, )
import matplotlib.pyplot as plt
plt.figure(figsize=(15,10)) # 设置图框大小尺寸
pAggResult['sale'].plot(kind='bar',width=0.1,fontsize=10) # 绘制频率直方图
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.title('频率分布直方图_3013',fontsize=20)
plt.show()

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
3、#饼图
import matplotlib.pyplot as plt
import pandas as pd
catering_dish_profit = 'C:/Users/Administrator/Desktop/test/data first week/catering_dish_profit.xls'
data_dish=pd.read_excel(catering_dish_profit)
x_dish=data_dish['盈利']
labels_dish=data_dish['菜品名']
plt.figure(figsize=(10,6))
plt.pie(x_dish,labels=labels_dish)
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.title('3013菜品销售额分布(饼图)')
plt.axis('equal')
plt.show()

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
4、散点图
import matplotlib.pyplot as plt
import pandas as pd
url = r"C:/Users/Administrator/Desktop/test/data first week/dish_sale.xls"
data = pd.read_excel(url)
da=data['date']
abumen=data['A部门']
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.scatter(da, abumen, c='red', s=100, label='A部门')
plt.xticks(range(1, 12, 4))
plt.yticks(range(5, 10, 2))
plt.xlabel("date", fontdict={'size': 16})
plt.ylabel("abumen", fontdict={'size': 16})
plt.title("Title", fontdict={'size': 20})
plt.legend()
plt.ylabel('销售额(万元)')
plt.xlabel('月份')
plt.title("3013")


浙公网安备 33010602011771号