大学排名爬取 + 绘制树状图 + 绘制圆饼图

Python爬虫与数据图表的实现

 

1. 参考教材实例20,编写Python爬虫程序,获取江西省所有高校的大学排名数据记录,并打印输出。

2. 使用numpy和matplotlib等库分析数据,并绘制南昌大学、华东交通大学、江西理工大学三个高校的总分排名、生源质量(新生高考成绩得分)、培养结果(毕业生就业率)、顶尖成果(高被引论文·篇)等四个指标构成的多指标柱形图。

3. 对江西各高校的顶尖成果(高被引论文数量)进行分析,使用matplotlib绘制各高校顶尖成果数构成的饼状图,并突出江西理工大学所在的饼状块。

毕竟不要验证登录,所以挺好写的.

 

 

一个好玩的爬虫:

 

  1 # Created by carryon on 18-12-24.
  2 import requests
  3 from bs4 import BeautifulSoup
  4 from prettytable import PrettyTable
  5 from test4.Wtsql import Wtsql
  6 import numpy as np
  7 import matplotlib.mlab as mlab
  8 import matplotlib.pyplot as plt
  9 import matplotlib
 10 
 11 
 12 def get():
 13     res = requests.get(
 14         url='http://zuihaodaxue.cn/zuihaodaxuepaiming2018.html',
 15         headers={
 16             'User-Agent': 'XXX'#自己填自己的
 17         }
 18     )
 19 
 20     res.encoding = res.apparent_encoding
 21 
 22     return res.text
 23 
 24 
 25 def beautiful(text, cur, db, ls):
 26     soup = BeautifulSoup(text, 'lxml')
 27     title = soup.find(name='thead').find(name='tr').find_all(name='th')
 28 
 29     lists = []
 30     for i in range(len(title)):
 31         if i <= 3:
 32             lists.append(title[i].text)
 33         else:
 34             select = title[i].find_all(name='option')
 35             for it in select:
 36                 lists.append(it.text)
 37 
 38     # table = PrettyTable(lists)
 39     rou = soup.find(name='tbody', attrs={'class': 'hidden_zhpm', 'style': 'text-align: center;'}).find_all('tr')
 40     paints = []
 41     draw_1 = []
 42     draw_2 = []
 43     for it in rou:
 44         pan = []
 45         ans = []
 46         tds = it.find_all('td')
 47         for i in range(len(tds)):
 48             # if tds[i].text
 49             pan.append(tds[i].text)
 50         if pan[1] == "江西理工大学" or pan[1] == "南昌大学" or pan[1] == "华东交通大学":
 51             ans.append(pan[1])
 52             ans.append(float(pan[3]))
 53             ans.append(float(pan[4]))
 54             # print(pan[5])
 55             pan[5] = str(pan[5]).strip('%')
 56             ans.append(float(pan[5]))
 57             ans.append(float(pan[9]))
 58             paints.append(ans)
 59             # print(ans)
 60         if pan[2] == "江西":
 61             draw_1.append(pan[1])
 62             draw_2.append(float(pan[9]))
 63         #     ls.insert(pan, cur, db)
 64             # table.add_row(pan)
 65             # print(table)
 66     # print(paints)
 67     # paint(paints)
 68     drawbing(draw_1, draw_2)
 69 def paint(line):
 70     matplotlib.rcParams['font.sans-serif'] = ['SimHei']
 71     matplotlib.rcParams['font.family'] = 'sans-serif'
 72     # 解决负号'-'显示为方块的问题
 73     matplotlib.rcParams['axes.unicode_minus'] = False
 74     # data to plot
 75     # line = [["江西理工大学",24.4,33.8,94.97,11],["南昌大学", 33.5, 52.4, 86.50, 108],["华东交通大学", 26.2, 42.8, 86.50,11]]
 76     n_groups = 4
 77     means_frank = (line[0][1], line[0][2], line[0][3], line[0][4])
 78     means_guido = (line[1][1], line[1][2], line[1][3], line[1][4])
 79     means_frank1 = (line[2][1], line[2][2], line[2][3], line[2][4])
 80 
 81     # create plot
 82     fig, ax = plt.subplots()
 83     index = np.arange(n_groups)
 84     bar_width = 0.15
 85     opacity = 0.99
 86 
 87     rects1 = plt.bar(index, means_frank, bar_width,
 88                      alpha=opacity,
 89                      color='b',
 90                      label=line[0][0])
 91 
 92     rects2 = plt.bar(index + bar_width, means_guido, bar_width,
 93                      alpha=opacity,
 94                      color='g',
 95                      label=line[1][0])
 96     rects3 = plt.bar(index + 2*bar_width, means_frank1, bar_width,
 97                      alpha=opacity,
 98                      color='r',
 99                      label=line[2][0])
100 
101 
102     plt.xlabel('江西省部分大学')
103     plt.ylabel('总评比')
104     plt.title('江西省部分大学总评比')
105     plt.xticks(index + bar_width, ("总分排名", "生源质量", "就业率", "顶尖成果"))
106     plt.legend()
107 
108     plt.tight_layout()
109     plt.show()
110 
111 
112 def drawbing(line, ans):
113 
114     matplotlib.rcParams['font.sans-serif'] = ['SimHei']
115     matplotlib.rcParams['font.family'] = 'sans-serif'
116     # 解决负号'-'显示为方块的问题
117     matplotlib.rcParams['axes.unicode_minus'] = False
118 
119     lable = line
120     fras = ans
121     explode = [0.2,0.2,0.2,0.2,0.2,0.5,0.2,0.2,0.2,0.2,0.2,0.2, 0.2,0.2,0.2, 0.2, 0.2]  # 设置离员中心的位置 此处是为了突出显示
122     plt.axes(aspect=1)  # 此处设置的目的 是为了让饼状图画出来是圆形
123     plt.pie(x=fras, labels=lable, autopct='%.2f%%', explode=explode,
124             shadow=False)  # 传入数据及参数,占比保留两位小数 explode突出显示 shadow 阴影
125     plt.show()
126 
127 
128 if __name__ == '__main__':
129     ls = Wtsql()
130     cur, db = ls.login()
131     text = get()
132     beautiful(text, cur, db, ls)
133     # paint()

 

还有连接数据库:

 1 # Created by carryon on 18-12-24.
 2 import pymysql
 3 
 4 
 5 class Wtsql:
 6     def login(self):
 7         db = pymysql.Connect(host="localhost", user="root", password="", db="")
 8         cur = db.cursor()
 9         return cur, db
10 
11     def insert(self, lists, cur, db):
12         sql = "insert into jiangxi(`pm`,`xxmc`, `ss`, `zf`, `syzl`, `pyjg`, `shsy`, `kygm`, `kyzl`, `djcg`, `djrc`, `kjfw`, `cgzh`, `xsgjh`)values" \
13               "('{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}')".format(
14             lists[0], lists[1], lists[2], lists[3], lists[4], lists[5], lists[6], lists[7], lists[8], lists[9],
15             lists[10], lists[11], lists[12], lists[13])
16         # print(sql)
17         try:
18             # 执行sql语句
19             cur.execute(sql)
20             # 提交到数据库执行
21             db.commit()
22         except Exception as e:
23             # 如果发生错误则回滚
24             print(e)
25             db.rollback()

 

posted @ 2018-12-26 16:40  #忘乎所以#  阅读(705)  评论(0)    收藏  举报