python数据分析与挖掘实战1
import numpy as np import pandas as pd df = pd.read_excel('E:\桌面\catering_sale.xls') df.fillna(0) #print(df.isnull()) #print(df['销量'].sum()) import matplotlib.pyplot as plt #将(0,1)点和(2,4)连起来 plt.plot(df['日期'],df['销量']) plt.title('3118') plt.show() p=df.boxplot() plt.title('3118') plt.show() x=np.linspace(0,2*np.pi,256) y=np.sin(x) plt.plot(x,y) plt.title('3118') plt.show() x = np.linspace(-2*np.pi,2*np.pi,60,endpoint=True) s = np.sin(x) plt.figure() plt.plot(x,s,'b-*',) plt.xlabel("x",fontdict={'size':16}) plt.ylabel("y",fontdict={'size':16}) plt.legend("sin(x)") plt.title('3118') plt.show() catering_sale = 'E:\桌面\catering_fish_congee.xls' data = pd.read_excel(catering_sale,names=['date','sale']) bins = [0,500,1000,1500,2000,2500,3000,3500,4000] labels = ['[0,500)','[500,1000)','[1000,1500)','[1500,2000)', '[2000,2500)','[2500,3000)','[3000,3500)','[3500,4000)'] data['sale分层'] = pd.cut(data.sale, bins, labels=labels) aggResult = data.groupby(by=['sale分层'])['sale'].agg({'sale': np.size}) pAggResult = round(aggResult/aggResult.sum(), 2, ) * 100 plt.figure(figsize=(10,6)) pAggResult['sale'].plot(kind='bar',width=0.8,fontsize=10) plt.rcParams['font.sans-serif'] = ['SimHei'] plt.title('季度销售额频率分布直方图3118',fontsize=20) plt.show()
import pandas as pd import matplotlib.pyplot as plt catering_dish_profit='E:/桌面/catering_dish_profit.xls' data=pd.read_excel(catering_dish_profit) x=data['盈利'] labels=data['菜品名'] plt.figure(figsize=(8,6)) plt.pie(x,labels=labels) plt.rcParams['font.sans-serif']='SimHei' plt.title('菜品销量分布(饼图)3118') plt.axis('equal') plt.show() x=data['菜品名'] y=data['盈利'] plt.figure(figsize=(8,4)) plt.bar(x,y) plt.rcParams['font.sans-serif']='SimHei' plt.xlabel('菜品') plt.ylabel('销量') plt.title('3118') plt.show()
import pandas as pd import matplotlib.pyplot as plt catering_dish_profit='E:/桌面/dish_sale.xls' data=pd.read_excel(catering_dish_profit) plt.figure(figsize=(8,4)) plt.plot(data['月份'],data['A部门'],color='green',label='A部门',marker='o') plt.plot(data['月份'],data['B部门'],color='red',label='B部门',marker='s') plt.plot(data['月份'],data['C部门'],color='skyblue',label='C部门',marker='x') plt.legend() plt.ylabel('销售额(万元)') plt.show() data=pd.read_excel("E:/桌面/dish_sale_b(1).xls") plt.figure(figsize=(8,4)) plt.plot(data['月份'],data['2012年'],color='green',label='2012年',marker='o') plt.plot(data['月份'],data['2013年'],color='red',label='2013年',marker='s') plt.plot(data['月份'],data['2014年'],color='skyblue',label='2014年',marker='x') plt.legend() plt.ylabel('销售额(万元)') plt.show()
import pandas as pd import matplotlib.pyplot as plt df_normal = pd.read_csv("E:/桌面/user.csv",engine='python') plt.figure(figsize=(8,4)) plt.plot(df_normal["Date"],df_normal["Eletricity"]) plt.xlabel("日期") x_major_locator = plt.MultipleLocator(7) ax = plt.gca() ax.xaxis.set_major_locator(x_major_locator) plt.ylabel("每日电量") plt.rcParams['font.sans-serif'] = ['SimHei'] plt.title('3118',fontsize=20) plt.show() df_steal = pd.read_csv("E:/桌面/Steal user.csv",engine='python') plt.figure(figsize=(10,9)) plt.plot(df_steal["Date"],df_steal["Eletricity"]) plt.xlabel("日期") x_major_locator = plt.MultipleLocator(7) ax = plt.gca() ax.xaxis.set_major_locator(x_major_locator) plt.ylabel("每日电量") plt.rcParams['font.sans-serif'] = ['SimHei'] plt.title('3118',fontsize=20) plt.show()
import pandas as pd import matplotlib.pyplot as plt dish_profit = 'E:/桌面/catering_dish_profit.xls' data = pd.read_excel(dish_profit,index_col='菜品名') data = data['盈利'].copy() data.sort_values(ascending=False) plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False plt.figure() data.plot(kind='bar') plt.ylabel('盈利(元)') p = 1.0 * data.cumsum()/data.sum() p.plot(color='r',secondary_y=True,style='-o',linewidth=2) plt.annotate(format(p[6], '.4%'), xy=(6, p[6]), xytext=(6 * 0.9, p[6] * 0.9), arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=.2")) plt.ylabel('盈利(比例)') plt.title('3118',fontsize=20) plt.show()
import numpy as np
import matplotlib.pyplot as plt
plt.figure(figsize=(7,5))
plt.rcParams['axes.unicode_minus'] = False
x = np.linspace(0,2*np.pi,50)
y = np.sin(x)
plt.plot(x,y,'bp--')
plt.title('3118',fontsize=20)
plt.show()
labels = 'Frogs','Hogs','Dogs','Logs'
sizes = [15,30,45,10]
colors = ['yellowgreen','gold','lightskyblue','lightcoral']
explode = (0,0.1,0,0)
plt.pie(sizes,explode=explode,labels=labels,colors=colors,autopct='%1.1f%%',shadow=True,startangle=90)
plt.axis('equal')
plt.title('3118',fontsize=20)
plt.show()
x = np.random.randn(1000)
plt.hist(x,10)
plt.title('3118',fontsize=20)
plt.show()
import matplotlib.pyplot as plt import numpy as np import pandas as pd x = np.random.randn(1000) D = pd.DataFrame([x,x+1]).T D.plot(kind='box') plt.title('3118',fontsize=20) plt.show() plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False x = pd.Series(np.exp(np.arange(20))) plt.figure(figsize=(8,9)) ax1 = plt.subplot(2,1,1) plt.title('3118',fontsize=20) x.plot(label='原始数据图',legend=True) ax1 = plt.subplot(2,1,2) plt.title('3118',fontsize=20) x.plot(logy=True,label='对数数据图',legend=True) plt.show() error = np.random.randn(10) y = pd.Series(np.sin(np.arange(10))) y.plot(yerr=error) plt.title('3118',fontsize=20) plt.show()
总结:在数据分析和挖掘中,所画的图一般要用matplotlib,numpy,pandas这几个库,只要运用熟练就能很轻松的画出所需的数据图。