爬虫作业

202231014043雷凯化

import requests

url = "https://www.sogou.com"

for i in range(20):
    response = requests.get(url)
    
    print(f"第{i+1}次请求返回状态：{response.status_code}")
    
    text_length = len(response.text)
    content_length = len(response.content)
    
    print(f"第{i+1}次请求text()属性返回内容长度：{text_length}")
    print(f"第{i+1}次请求content属性返回内容长度：{content_length}")
    print()

import requests
from bs4 import BeautifulSoup
import csv

url = "https://www.shanghairanking.cn/rankings/bcur/202111"

# 发起请求
response = requests.get(url)

# 解析HTML
soup = BeautifulSoup(response.text, "html.parser")

# 定位表格
table = soup.find("table", class_="rk-table")

# 创建CSV文件并写入表头
with open("university_ranking.csv", mode="w", encoding="utf-8", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["排名", "学校名称", "总分"])

    # 提取每一行的数据并写入CSV文件
    rows = table.find_all("tr")
    for row in rows[1:]:
        cols = row.find_all("td")
        rank = cols[0].text.strip()
        name = cols[1].text.strip()
        score = cols[2].text.strip()

        writer.writerow([rank, name, score])

print("数据已成功爬取并保存为 university_ranking.csv 文件")

posted @ 2023-12-29 10:28 laidisi 阅读(11) 评论(0) 收藏举报

刷新页面返回顶部

爬虫作业

公告