表格可视化和线性回归模型预测
摘要:
本项目通过对对三个数据文件的可视化分析,每个数据文件做3个可视化图形(不同的图形),并建立模型对排名进行预测。
选题背景:
对外经济贸易、旅游业和居民收入情况是衡量一个国家经济和居民生活情况的重要依据。本项目对这三类数据进行可视化分析来体现我国近几年的发展情况。
数据说明:
该项目的数据均来自国家统计局。
实施过程及代码:
导入需要用到的库
1 import pandas as pd 2 import matplotlib.pyplot as plt 3 import numpy as np 4 import math 5 plt.rcParams['font.sans-serif']=['SimHei']#这两句作用为防止中文乱码 6 plt.rcParams['axes.unicode_minus']=False
读取csv数据
1 economy = pd.read_csv(r'D:/program/表格数据可视化/对外经济贸易 货物进出口总额.csv', header=2) 2 economy.head(8)
对表格‘对外经济贸易 货物进出口总额’进行可视化
画出进出口总额随时间变化趋势图
1 x = [2020,2019,2018,2017,2016,2015,2014,2013,2012,2011] 2 y = [] 3 jinchukouzonge = economy.loc[0][1:] 4 for i in jinchukouzonge: 5 y.append(i) 6 fig, axes = plt.subplots(1, 1, figsize=(10, 5)) 7 # 折线图 8 axes.plot(x, y, linestyle='-', color='#DE6B58', marker='x', linewidth=1.5) 9 # 画网格线 10 axes.grid(which='minor', c='lightgrey') 11 # 设置x、y轴标签 12 axes.set_ylabel("亿元(人民币)") 13 axes.set_xlabel("年份") 14 plt.title('进出口总额变化趋势') 15 plt.savefig(r'D:/program/表格数据可视化/进出口总额.png', dpi=100) 16 # 展示图片 17 plt.show()
画出出口总额随时间变化的柱状图
1 chukouzonge = economy.loc[1][1:] 2 jinkouzonge = economy.loc[2][1:] 3 jinchukoucha = economy.loc[3][1:] 4 y = [] 5 y1 = [] 6 y2 = [] 7 x = [2020,2019,2018,2017,2016,2015,2014,2013,2012,2011] 8 for i in chukouzonge: 9 y.append(i) 10 for i in jinkouzonge: 11 y1.append(i) 12 for i in jinchukoucha: 13 y2.append(i) 14 size = 10 15 x = np.array(x) 16 a = np.random.random(size) 17 b = np.random.random(size) 18 c = np.random.random(size) 19 20 total_width, n = 0.8, 3 21 width = total_width / n 22 x = x - (total_width - width) / 2 23 plt.figure(figsize=(15,10)) 24 plt.bar(x, y, width=width, label='出口总额') 25 plt.bar(x + width, y1, width=width, label='进口总额') 26 plt.bar(x + 2 * width, y2, width=width, label='进出口差额') 27 plt.title('进出口情况',fontdict={'weight':'normal','size': 20}) 28 plt.xlabel('年份',fontdict={'weight':'normal','size': 15}) 29 plt.ylabel('亿元(人民币)',fontdict={'weight':'normal','size': 15}) 30 plt.legend() 31 plt.savefig(r'D:/program/表格数据可视化/进出口情况.png', dpi=100) 32 plt.show()
进出口总额(美元)占比饼状图
1 x = ['2020','2019','2018','2017','2016','2015','2014','2013','2012','2011'] 2 y = [] 3 jinchukouzonge = economy.loc[4][1:] 4 for i in jinchukouzonge: 5 y.append(i) 6 labels = 'Frogs', 'Hogs', 'Dogs', 'Logs' 7 sizes = [15, 30, 45, 10] 8 explode = (0.1, 0, 0, 0,0,0,0,0,0,0) # only "explode" the 2nd slice (i.e. 'Hogs') 9 10 fig1, ax1 = plt.subplots() 11 ax1.pie(y, explode=explode, labels=x, autopct='%1.1f%%', 12 shadow=True, startangle=90) 13 ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. 14 plt.title('进出口总额(美元)占比饼状图') 15 plt.savefig(r'D:/program/表格数据可视化/进出口总额(美元)占比饼状图.png', dpi=100) 16 plt.show()
对表格‘旅游业—国内旅游情况’进行可视化
1 travel = pd.read_csv(r'D:/program/表格数据可视化/旅游业—国内旅游情况.csv', header=2) 2 travel.head(9)
人均消费柱状图
1 guonei = travel.loc[6][1:] 2 chengzhen = travel.loc[7][1:] 3 nongcun = travel.loc[8][1:] 4 x = [2020,2019,2018,2017,2016,2015,2014,2013,2012,2011] 5 y = [] 6 y1 = [] 7 y2 = [] 8 for i in guonei: 9 y.append(i) 10 for i in chengzhen: 11 y1.append(i) 12 for i in nongcun: 13 y2.append(i) 14 size = 10 15 x = np.array(x) 16 a = np.random.random(size) 17 b = np.random.random(size) 18 c = np.random.random(size) 19 20 total_width, n = 0.8, 3 21 width = total_width / n 22 x = x - (total_width - width) / 2 23 plt.figure(figsize=(15,10)) 24 plt.bar(x + width, y1, width=width, label='城镇居民国内旅游人均花费',color='g') 25 plt.bar(x, y, width=width, label='国内旅游人均花费') 26 plt.bar(x + 2 * width, y2, width=width, label='农村居民国内旅游人均花费') 27 plt.title('旅游人均花费情况',fontdict={'weight':'normal','size': 20}) 28 plt.xlabel('年份',fontdict={'weight':'normal','size': 15}) 29 plt.ylabel('元(人民币)',fontdict={'weight':'normal','size': 15}) 30 plt.legend() 31 plt.savefig(r'D:/program/表格数据可视化/旅游人均花费情况.png', dpi=100) 32 plt.show()
国内旅游总花费趋势图
1 x = [2020,2019,2018,2017,2016,2015,2014,2013,2012,2011] 2 y = [] 3 zonghuafei = travel.loc[3][1:] 4 for i in jinchukouzonge: 5 y.append(i) 6 fig, axes = plt.subplots(1, 1, figsize=(10, 5)) 7 # 折线图 8 axes.plot(x, y, linestyle='-', color='r', marker='+', linewidth=1.5) 9 # 画网格线 10 axes.grid(which='minor', c='lightgrey') 11 # 设置x、y轴标签 12 axes.set_ylabel("亿元(人民币)") 13 axes.set_xlabel("年份") 14 plt.title('国内旅游总花费趋势') 15 plt.savefig(r'D:/program/表格数据可视化/国内旅游总花费趋势图.png', dpi=100) 16 # 展示图片 17 plt.show()
国内游客人次饼状图
国内游客人次饼状图
对表格‘人民生活—全国及分城镇居民收支基本情况’进行可视化
1 life = pd.read_csv(r'D:/program/表格数据可视化/人民生活—全国及分城镇居民收支基本情况.csv', header=2) 2 life.head(12)
画居民人均可支配收入变化趋势图
1 x = [2020,2019,2018,2017,2016,2015,2014,2013] 2 y = [] 3 jumin = life.loc[3][1:] 4 for i in jumin: 5 y.append(i) 6 fig, axes = plt.subplots(1, 1, figsize=(10, 5)) 7 # 折线图 8 axes.plot(x, y, linestyle='-', color='b', marker='+', linewidth=1.5) 9 # 画网格线 10 axes.grid(which='minor', c='lightgrey') 11 # 设置x、y轴标签 12 axes.set_ylabel("元(人民币)") 13 axes.set_xlabel("年份") 14 plt.title('居民人均可支配收入变化趋势图') 15 plt.savefig(r'D:/program/表格数据可视化/居民人均可支配收入变化趋势图.png', dpi=100) 16 # 展示图片 17 plt.show()
人均消费支出对比柱状图
1 jm = life.loc[6][1:] 2 chengzhen = life.loc[8][1:] 3 nongcun = life.loc[10][1:] 4 y = [] 5 y1 = [] 6 y2 = [] 7 for i in jm: 8 y.append(i) 9 for i in chengzhen: 10 y1.append(i) 11 for i in nongcun: 12 y2.append(i) 13 size = 10 14 x = [2020,2019,2018,2017,2016,2015,2014,2013] 15 x = np.array(x) 16 a = np.random.random(size) 17 b = np.random.random(size) 18 c = np.random.random(size) 19 20 total_width, n = 0.8, 3 21 width = total_width / n 22 x = x - (total_width - width) / 2 23 plt.figure(figsize=(15,10)) 24 plt.bar(x + width, y1, width=width, label='城镇居民人均消费支出',color='g') 25 plt.bar(x, y, width=width, label='居民人均消费支出') 26 plt.bar(x + 2 * width, y2, width=width, label='农村居民人均消费支出') 27 plt.title('人均消费支出对比柱状图',fontdict={'weight':'normal','size': 20}) 28 plt.xlabel('年份',fontdict={'weight':'normal','size': 15}) 29 plt.ylabel('元(人民币)',fontdict={'weight':'normal','size': 15}) 30 plt.legend() 31 plt.savefig(r'D:/program/表格数据可视化/人均消费支出对比柱状图.png', dpi=100) 32 plt.show()
居民人均可支配收入同比增长饼图
1 x = ['2020','2019','2018','2017','2016','2015','2014'] 2 y = [] 3 guonei = life.loc[1][1:-1] 4 print(guonei) 5 for i in guonei: 6 y.append(i) 7 explode = (0, 0, 0, 0.2,0,0,0) # only "explode" the 2nd slice (i.e. 'Hogs') 8 9 fig1, ax1 = plt.subplots() 10 ax1.pie(y, explode=explode, labels=x, autopct='%1.1f%%', 11 shadow=True, startangle=90) 12 ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. 13 plt.title('居民人均可支配收入同比增长饼状图') 14 plt.savefig(r'D:/program/表格数据可视化/居民人均可支配收入同比增长饼状图.png', dpi=100) 15 plt.show()