pandas结合matplotlib作图
1 # 简单作图 2 import pandas as pd 3 import matplotlib.pyplot as plt 4 5 students = pd.read_excel('009\Students.xlsx') 6 students.plot.bar(x = 'Field', y='Number') 7 plt.show() 8 9 #图中数据排序 10 import pandas as pd 11 import matplotlib.pyplot as plt 12 13 students = pd.read_excel('009\Students.xlsx') 14 # students.sort_index(by='Number', inplace=True, ascending=False) 将被下面的公式取代 15 students.sort_values(by='Number', inplace=True, ascending=False) 16 students.plot.bar(x = 'Field', y='Number') 17 plt.show() 18 19 #更改颜色 20 import pandas as pd 21 import matplotlib.pyplot as plt 22 23 students = pd.read_excel('009\Students.xlsx') 24 # students.sort_index(by='Number', inplace=True, ascending=False) 将被下面的公式取代 25 students.sort_values(by='Number', inplace=True, ascending=False) 26 students.plot.bar(x = 'Field', y='Number', color='green', \ 27 title='International Students in Filed') 28 # 现实X全部名称 29 # plt.tight_layout() 30 plt.show()
使用matplotlib作图
1 # 使用matplotlib作图 2 import pandas as pd 3 import matplotlib.pyplot as plt 4 5 students = pd.read_excel('009\Students.xlsx') 6 # students.sort_index(by='Number', inplace=True, ascending=False) 将被下面的公式取代 7 students.sort_values(by='Number', inplace=True, ascending=False) 8 # students.plot.bar(x = 'Field', y='Number', color='green', title='International Students in Filed') 9 plt.bar(students.Field, students.Number, color='orange') 10 plt.xticks(students.Field, rotation='90') 11 plt.xlabel('Field') 12 plt.ylabel('Number') 13 plt.title('International Students in Filed', fontsize=16) 14 # 现实X全部名称 15 #plt.tight_layout() 16 plt.show()
双柱形作图
1 # 原010课 2 import pandas as pd 3 import matplotlib.pyplot as plt 4 5 students = pd.read_excel('010\Students.xlsx') 6 students.sort_values(by='2017', inplace=True, ascending=False) 7 # print(students) 8 students.plot.bar(x='Field', y=['2016','2017'], color=['orange','red'], title = 'International Students by Field') 9 # plt.tight_layout() 10 plt.show() 11 12 #改进 13 import pandas as pd 14 import matplotlib.pyplot as plt 15 16 students = pd.read_excel('010\Students.xlsx') 17 students.sort_values(by='2017', inplace=True, ascending=False) 18 # print(students) 19 students.plot.bar(x='Field', y=['2016','2017'], color=['orange','red']) 20 plt.title('International Students by Field', fontsize=16, fontweight='bold') 21 plt.xlabel('Field', fontweight='bold') 22 plt.ylabel('Number', fontweight='bold') 23 # plt.tight_layout() 24 # 拿到轴 25 ax = plt.gca() 26 # rotation='45'或者不带'‘’都行 27 ax.set_xticklabels(students['Field'], rotation =45,ha='right') 28 f=plt.gcf() 29 f.subplots_adjust(left=0.2,bottom=0.42) 30 plt.show() 31 32 #最终版 保存图片 33 import pandas as pd 34 import matplotlib.pyplot as plt 35 36 students = pd.read_excel('010\Students.xlsx') 37 students.sort_values(by='2017', inplace=True, ascending=False) 38 # print(students) 39 students.plot.bar(x='Field', y=['2016','2017'], color=['orange','red']) 40 plt.title('International Students by Field', fontsize=16, fontweight='bold') 41 plt.xlabel('Field', fontweight='bold') 42 plt.ylabel('Number', fontweight='bold') 43 # plt.tight_layout() 44 # 拿到轴 45 ax = plt.gca() 46 # rotation='45'或者不带'‘’都行 47 ax.set_xticklabels(students['Field'], rotation =45,ha='right') 48 f=plt.gcf() 49 f.subplots_adjust(left=0.2,bottom=0.42) 50 plt.savefig('001.jpg', qualities=120) 51 plt.show()
叠加柱状图
1 # 011 叠加柱状图 2 import pandas as pd 3 import matplotlib.pyplot as plt 4 5 # 当\不能读取时,尝试使用\\ 6 users = pd.read_excel('011\\Users.xlsx') 7 users['Total']=users['Oct']+users['Nov']+users['Dec'] 8 # print(users) 9 users.sort_values(by=['Total'], inplace=True, ascending=False) 10 users.plot.bar(x='Name',y=['Oct','Nov','Dec'], stacked=True, title='User Behavior') 11 plt.tight_layout() 12 # plt.savefig('011\\0111.png') 13 plt.show() 14 15 #水平放置 16 import pandas as pd 17 import matplotlib.pyplot as plt 18 19 # 当\不能读取时,尝试使用\\ 20 users = pd.read_excel('011\\Users.xlsx') 21 users['Total']=users['Oct']+users['Nov']+users['Dec'] 22 # print(users) 23 users.sort_values(by=['Total'], inplace=True, ascending=True) 24 # barh 为 水平 25 users.plot.barh(x='Name',y=['Oct','Nov','Dec'], stacked=True, title='User Behavior', \ 26 color=['red', 'blue', 'orange']) 27 plt.tight_layout() 28 plt.savefig('011\\0111.png', dpi=1200) 29 plt.show()
饼图
1 # 012 饼图 2 import pandas as pd 3 import matplotlib.pyplot as plt 4 5 # 当\不能读取时,尝试使用\\,也可使用转义符r 6 students = pd.read_excel('012\\Students.xlsx', index_col='From') 7 # print(students) 8 students['2017'].plot.pie(fontsize=8) 9 plt.title('Source of International Students',fontsize=16,fontweight='bold') 10 plt.ylabel('2017', fontsize=12,fontweight='bold') 11 # plt.savefig('012\\012.jpg',dpi=500) 12 plt.show() 13 14 #排序版 15 import pandas as pd 16 import matplotlib.pyplot as plt 17 18 students = pd.read_excel('012\\Students.xlsx', index_col='From') 19 # startangle=-270 顺时针排序 20 students['2017'].sort_values(ascending=True).plot.pie(fontsize=8, startangle=-270) 21 plt.title('Source of International Students',fontsize=16,fontweight='bold') 22 plt.ylabel('2017', fontsize=12,fontweight='bold') 23 # plt.savefig('012\\012.jpg',dpi=500) 24 plt.show() 25 26 27 # 精进版 28 import pandas as pd 29 import matplotlib.pyplot as plt 30 31 # 当\不能读取时,尝试使用\\ 32 students = pd.read_excel('012\\Students.xlsx', index_col='From') 33 # counterclock=False 顺时针排序 34 # autopct='%1.1f%%' 可以加入百分比 35 students['2017'].plot.pie(fontsize=8, counterclock=False,startangle=-270) 36 plt.title('Source of International Students',fontsize=16,fontweight='bold') 37 plt.ylabel('2017', fontsize=12,fontweight='bold') 38 39 plt.axis('equal') 40 # plt.savefig('012\\012.jpg',dpi=500) 41 plt.show()
波动图
1 # 013 2 import pandas as pd 3 import matplotlib.pyplot as plt 4 5 orders = pd.read_excel('013\\Orders.xlsx', index_col='Week') 6 # print(orders.head(10)) 7 #print(orders.columns) 8 orders.plot(y=['Accessories', 'Bikes', 'Clothing', 'Components']) 9 plt.title('Sales Weekly Trend',fontsize = 16, fontweight='bold') 10 plt.xticks(orders.index) 11 # plt.savefig('013\\013.png',dpi=300) 12 plt.savefig('013\\013.pdf') 13 plt.show() 14 15 # 013叠加 16 import pandas as pd 17 import matplotlib.pyplot as plt 18 19 orders = pd.read_excel('013\\Orders.xlsx', index_col='Week') 20 #print(orders.columns) 21 22 # 叠加柱状图 23 #orders.plot.bar(y=['Accessories', 'Bikes', 'Clothing', 'Components'],stacked=True) 24 # 叠加趋势图 25 orders.plot.area(y=['Accessories', 'Bikes', 'Clothing', 'Components'],stacked=True) 26 plt.title('Sales Weekly Trend',fontsize = 16, fontweight='bold') 27 plt.ylabel('Total',fontsize=12,fontweight='bold') 28 plt.xticks(orders.index,fontsize=8) 29 plt.savefig('013\\0131.png',dpi=600) 30 # plt.savefig('013\\0131.pdf') 31 plt.show()
散点图与直方图
1 # 014 散点与直方图 2 import pandas as pd 3 import matplotlib.pyplot as plt 4 5 # 读取成DataFrame 6 homes = pd.read_excel('014\\home_data.xlsx') 7 print(homes.head(10)) 8 9 # 面积的分布图 10 11 import pandas as pd 12 import matplotlib.pyplot as plt 13 14 pd.options.display.max_columns = 777 15 homes = pd.read_excel('014\\home_data.xlsx') 16 17 homes.sqft_living.plot.hist(bins=100) 18 plt.xticks(range(0,max(homes.sqft_living),500), fontsize=8, rotation=90) 19 plt.show() 20 21 # 价格的分布图 22 23 import pandas as pd 24 import matplotlib.pyplot as plt 25 26 pd.options.display.max_columns = 777 27 homes = pd.read_excel('014\\home_data.xlsx') 28 29 homes.price.plot.hist(bins=100) 30 plt.xticks(range(0,max(homes.price),100000), fontsize=8, rotation=90) 31 plt.show() 32 33 # 密度图 34 import pandas as pd 35 import matplotlib.pyplot as plt 36 37 pd.options.display.max_columns = 777 38 homes = pd.read_excel('014\\home_data.xlsx') 39 40 homes.sqft_living.plot.kde() 41 plt.xticks(range(0,max(homes.sqft_living),500), fontsize=8, rotation=90) 42 43 plt.show() 44 45 # 列之间的相关性 46 import pandas as pd 47 import matplotlib.pyplot as plt 48 49 pd.options.display.max_columns = 777 50 homes = pd.read_excel('014\\home_data.xlsx') 51 # 显示列之间的相关性 52 # print(homes.corr()) 53 corrolation= homes.corr() 54 corrolation.to_excel('014\\corr.xlsx') 55 print('Done!')
本人的文档都是自我记录,以便日后查看。
浙公网安备 33010602011771号