5.13

python爬虫
• 所花时间:3
• 代码行数:489
• 博客容量:1
• 代码如下:

import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt

# 设置请求头,模拟浏览器访问
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}


# 爬取每年的数据
def fetch_university_rankings(year):
    url = f'https://www.shanghairanking.cn/rankings/bcur/{year}'
    plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
    response = requests.get(url, headers=headers)
    response.encoding = 'utf-8'  # 设置正确的编码格式
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find('table', {'class': 'rk-table'})

    rankings = []

    for row in table.find('tbody').find_all('tr'):
        columns = row.find_all('td')
        rank = int(columns[0].text.strip())
        name = columns[1].text.strip().split('\n')[0].strip()  # 提取中文大学名称
        if rank <= 10:
            rankings.append((rank, name))
    return rankings


# 输出前10名大学信息
def print_top_10(rankings):
    for year, ranking in rankings.items():
        print(f"{year}年排名前10的大学:")
        for rank, name in ranking:
            print(f"排名 {rank}: {name}")
        print("\n")


# 获取2015-2019年排名前10的大学
rankings = {}
for year in range(2015, 2019 + 1):
    rankings[year] = fetch_university_rankings(year)

print_top_10(rankings)


# 数据可视化
def plot_rankings(rankings):
    plt.figure(figsize=(14, 7))

    # 找到所有上榜的大学
    all_universities = set()
    for ranking in rankings.values():
        for _, name in ranking:
            all_universities.add(name)

    # 绘制每所大学的排名波动图
    for university in all_universities:
        x = []
        y = []
        for year in range(2015, 2019 + 1):
            found = False
            for rank, name in rankings[year]:
                if name == university:
                    x.append(year)
                    y.append(rank)
                    found = True
                    break
            if not found:
                x.append(year)
                y.append(None)  # 如果该年没有排名,则为None
        plt.plot(x, y, marker='o', label=university)

    plt.gca().invert_yaxis()
    plt.xticks(range(2015, 2020))
    plt.xlabel('年份')
    plt.ylabel('排名')
    plt.title('2015-2019年前10位大学排名波动')
    plt.legend()
    plt.show()


plot_rankings(rankings)


# 查询特定大学的排名
def query_ranking(rankings):
    while True:
        university = input("请输入大学名称(或输入'退出'结束查询):")
        if university == '退出':
            break
        year = int(input("请输入查询年份(2015-2019):"))
        if year in rankings:
            found = False
            for rank, name in rankings[year]:
                if name == university:
                    print(f"{year}年 {university} 的排名是: {rank}")
                    found = True
                    break
            if not found:
                print(f"未找到 {university} 在 {year} 年的排名信息。")
        else:
            print(f"未找到 {year} 年的排名信息。")


query_ranking(rankings)


posted @ 2024-05-13 14:57  aallofitisst  阅读(8)  评论(0)    收藏  举报