Python采集1000多所世界大学排名数据,制作可视化图

前言

QS世界大学排名(QS World University Rankings)是由英国一家国际教育市场咨询公司Quacquarelli Symonds(简称QS)所发表的年度世界大学排名

采集全球大学排名数据(源码已分享,求点赞)

import requests     # 发送请求
import re
import csv

with open('rank.csv', mode='a', encoding='utf-8', newline='') as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(['country', 'rank', 'region', 'score_1', 'score_2', 'score_3', 'score_4', 'score_5', 'score_6', 'total_score', 'stars', 'university', 'year'])
def replace(str_):
    str_ = re.findall('<div class="td-wrap"><div class="td-wrap-in">(.*?)</div></div>', str_)[0]
    return str_
url = 'https://www.qschina.cn/sites/default/files/qs-rankings-data/cn/2057712_indicators.txt'
# 1. 发送请求
response = requests.get(url)
# <Response [200]>: 请求成功
# 2. 获取数据
json_data = response.json()     # Python 字典
# 3. 解析数据
# 字典
data_list = json_data['data']
for i in data_list:
    country = i['location']     # 国家/地区
    rank = i['overall_rank']    # 排名
    region = i['region']        # 大洲
    score_1 = replace(i['ind_76'])       # 学术声誉
    score_2 = replace(i['ind_77'])       # 雇主声誉
    score_3 = replace(i['ind_36'])       # 师生比
    score_4 = replace(i['ind_73'])       # 教员引用率
    score_5 = replace(i['ind_18'])       # 国际教师
    score_6 = replace(i['ind_14'])       # 国际学生
    total_score = replace(i['overall'])       # 总分
    stars = i['stars']       # 星级
    uni = i['uni']       # 大学名称
    university = re.findall('<div class="td-wrap"><div class="td-wrap-in"><a href=".*?" class="uni-link">(.*?)</a></div></div>', uni)[0]
    year = "2021"       # 年份
    print(country, rank, region, score_1, score_2, score_3, score_4, score_5, score_6, total_score, stars, university, year)
    with open('rank.csv', mode='a', encoding='utf-8', newline='') as f:
        csv_writer = csv.writer(f)
        csv_writer.writerow([country, rank, region, score_1, score_2, score_3, score_4, score_5, score_6, total_score, stars, university, year])

 


可视化展示

导入所需模块

from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
from pyecharts.components import Table
import re
import pandas as pd

 

导入数据

df = pd.read_csv('rank.csv')

# 香港,澳门与中国大陆地区等在榜单中是分开的记录的,这边都归为china
df['loc'] = df['country']
df['country'].replace(['China (Mainland)', 'Hong Kong SAR', 'Taiwan', 'Macau SAR'],'China',inplace=True)

 

2021年世界大学排名(QS) TOP 100

bar = (Bar()
       .add_xaxis(university)
       .add_yaxis('', score, category_gap='30%')
       .set_global_opts(title_opts=opts.TitleOpts(title="2021年世界大学排名(QS) TOP 100",
                                                  pos_left="center",
                                                  title_textstyle_opts=opts.TextStyleOpts(font_size=20)),
                        datazoom_opts=opts.DataZoomOpts(range_start=70, range_end=100, orient='vertical'),
                        visualmap_opts=opts.VisualMapOpts(is_show=False, max_=100, min_=60, dimension=0,
                                range_color=['#00FFFF', '#FF7F50']),
                        legend_opts=opts.LegendOpts(is_show=False),
                        xaxis_opts=opts.AxisOpts(is_show=False, is_scale=True),
                        yaxis_opts=opts.AxisOpts(axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axislabel_opts=opts.LabelOpts(font_size=12)))
       .set_series_opts(label_opts=opts.LabelOpts(is_show=True,
                                                  position='right',
                                                  font_style='italic'),
                        itemstyle_opts={"normal": {
                                                    "barBorderRadius": [30, 30, 30, 30],
                                                    'shadowBlur': 10,
                                                    'shadowColor': 'rgba(120, 36, 50, 0.5)',
                                                    'shadowOffsetY': 5,
                                                }
                                       }
).reversal_axis())

grid = (
        Grid(init_opts=opts.InitOpts(theme='purple-passion', width='1000px', height='1200px'))
        .add(bar, grid_opts=opts.GridOpts(pos_right='10%', pos_left='20%'))
    )
grid.render_notebook()

 

TOP 500中的中国大学

bar = (Bar()
       .add_xaxis(university)
       .add_yaxis('', score, category_gap='30%')
       .set_global_opts(title_opts=opts.TitleOpts(title="TOP 500中的中国大学",
                                                  pos_left="center",
                                                  title_textstyle_opts=opts.TextStyleOpts(font_size=20)),
                        datazoom_opts=opts.DataZoomOpts(range_start=50, range_end=100, orient='vertical'),
                        visualmap_opts=opts.VisualMapOpts(is_show=False, max_=90, min_=20, dimension=0,
                                range_color=['#00FFFF', '#FF7F50']),
                        legend_opts=opts.LegendOpts(is_show=False),
                        xaxis_opts=opts.AxisOpts(is_show=False, is_scale=True),
                        yaxis_opts=opts.AxisOpts(axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axislabel_opts=opts.LabelOpts(font_size=12)))
       .set_series_opts(label_opts=opts.LabelOpts(is_show=True,
                                                  position='right',
                                                  font_style='italic'),
                        itemstyle_opts={"normal": {
                                                    "barBorderRadius": [30, 30, 30, 30],
                                                    'shadowBlur': 10,
                                                    'shadowColor': 'rgba(120, 36, 50, 0.5)',
                                                    'shadowOffsetY': 5,
                                                }
                                       }
).reversal_axis())

grid = (
        Grid(init_opts=opts.InitOpts(theme='purple-passion', width='1000px', height='1200px'))
        .add(bar, grid_opts=opts.GridOpts(pos_right='10%', pos_left='20%'))
    )
grid.render_notebook()

 

TOP 1000高校按大洲分布

t_data = df[(df.year==2021) & (df['rank']<=1000)]
t_data = t_data.groupby(['region'])['university'].count().reset_index()
t_data.columns = ['region', 'num']
t_data = t_data.sort_values(by="num" , ascending=False) 


bar = (Bar(init_opts=opts.InitOpts(theme='purple-passion', width='1000px', height='600px'))
       .add_xaxis(t_data['region'].tolist())
       .add_yaxis('出现次数', t_data['num'].tolist(), category_gap='50%')
       .set_global_opts(title_opts=opts.TitleOpts(title="TOP 1000高校按大洲分布",
                                                  pos_left="center",
                                                  title_textstyle_opts=opts.TextStyleOpts(font_size=20)),
                        visualmap_opts=opts.VisualMapOpts(is_show=False, max_=300, min_=0, dimension=1,
                                range_color=['#00FFFF', '#FF7F50']),
                        legend_opts=opts.LegendOpts(is_show=False),
                        xaxis_opts=opts.AxisOpts(axistick_opts=opts.AxisTickOpts(is_show=False),
                                                 axisline_opts=opts.AxisLineOpts(is_show=False),
                                                 axislabel_opts=opts.LabelOpts(font_size=15)),
                        yaxis_opts=opts.AxisOpts(is_show=False))
       .set_series_opts(label_opts=opts.LabelOpts(is_show=True,
                                                  position='top',
                                                  font_size=15,
                                                  font_style='italic'),
                        itemstyle_opts={"normal": {
                                                    "barBorderRadius": [30, 30, 30, 30],
                                                    'shadowBlur': 10,
                                                    'shadowColor': 'rgba(120, 36, 50, 0.5)',
                                                    'shadowOffsetY': 5,
                                                }
                                       }
))

bar.render_notebook()

 

TOP 1000高校按国家分布

fmt_js = """function (params) {return params.name+': '+Number(params.value[2]);}"""

mp = Map()
mp.add(
        "高校数量",
        data_pair,
        "world",
        is_map_symbol_show=False,
        is_roam=False)

mp.set_series_opts(label_opts=opts.LabelOpts(is_show=False),
                          itemstyle_opts={'normal': {
                                                'areaColor': '#191970',
                                                'borderColor': '#1773c3',
                                                'shadowColor': '#1773c3',
                                                'shadowBlur': 20,
                                                'opacity': 0.8
                                                    }
                                        })
    
mp.set_global_opts(
        title_opts=opts.TitleOpts(title="TOP 1000高校按国家分布", pos_left='center',
                                  title_textstyle_opts=opts.TextStyleOpts(font_size=18)),
        legend_opts=opts.LegendOpts(is_show=False),
        visualmap_opts=opts.VisualMapOpts(is_show=False, 
                                          max_=100,
                                          is_piecewise=False,
                                          dimension=0,
                                          range_color=['rgba(255,228,225,0.6)', 'rgba(255,0,0,0.9)', 'rgba(255,0,0,1)'])
    )

data_pair = [[x, y] for x, y in data_pair if x in country_list]    
geo = Geo()
    
# 需要先将几个国家的经纬度信息加入到geo中
for k, v in loc.items():
    geo.add_coordinate(k, v[0], v[1])
# 这里将geo的地图透明度配置为0
geo.add_schema(maptype="world", is_roam=False, itemstyle_opts={'normal': {'opacity': 0}})
    
geo.add("", data_pair, symbol_size=1)
# 显示标签配置
geo.set_series_opts(
    label_opts=opts.LabelOpts(
            is_show=True,
            position='right',
            color='white',
            font_size=12,
            font_weight='bold',
            formatter=JsCode(fmt_js)),
    )
    
grid = (
        Grid(init_opts=opts.InitOpts(theme='chalk', width='1000px', height='600px'))
        .add(mp, grid_opts=opts.GridOpts(pos_top="12%"))
        .add(geo, grid_opts=opts.GridOpts(pos_bottom="12%"))
    )

grid.render_notebook()

 

大洲-国家分布

c = (Sunburst(
        init_opts=opts.InitOpts(
            theme='purple-passion',
            width="1000px",
            height="1000px"))
    .add(
        "",
        data_pair=data_pair,
        highlight_policy="ancestor",
        radius=[0, "100%"],
        sort_='null',
        levels=[
            {},
            {
                "r0": "20%",
                "r": "48%",
                "itemStyle": {"borderColor": 'rgb(220,220,220)', "borderWidth": 2}
            },
            {"r0": "50%", "r": "80%", "label": {"align": "right"},
                "itemStyle": {"borderColor": 'rgb(220,220,220)', "borderWidth": 1}}
        ],
    )
    .set_global_opts(
        visualmap_opts=opts.VisualMapOpts(is_show=False, max_=300, min_=0, is_piecewise=False,
                                range_color=['#4285f4', '#34a853', '#fbbc05', '#ea4335', '#ea4335']),
        title_opts=opts.TitleOpts(title="TOP 1000\n\n大学地理分布",
                                               pos_left="center",
                                               pos_top="center",
                                               title_textstyle_opts=opts.TextStyleOpts(font_style='oblique', font_size=20),))
    .set_series_opts(label_opts=opts.LabelOpts(font_size=14, formatter="{b}: {c}"))
)

c.render_notebook()

 

posted @ 2022-07-12 14:31  松鼠爱吃饼干  阅读(402)  评论(0编辑  收藏  举报
Title