爬虫作业

202231014043雷凯化
import requests

url = "https://www.sogou.com"

for i in range(20):
    response = requests.get(url)
    
    print(f"第{i+1}次请求返回状态:{response.status_code}")
    
    text_length = len(response.text)
    content_length = len(response.content)
    
    print(f"第{i+1}次请求text()属性返回内容长度:{text_length}")
    print(f"第{i+1}次请求content属性返回内容长度:{content_length}")
    print()

 

import requests
from bs4 import BeautifulSoup
import csv

url = "https://www.shanghairanking.cn/rankings/bcur/202111"

# 发起请求
response = requests.get(url)

# 解析HTML
soup = BeautifulSoup(response.text, "html.parser")

# 定位表格
table = soup.find("table", class_="rk-table")

# 创建CSV文件并写入表头
with open("university_ranking.csv", mode="w", encoding="utf-8", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["排名", "学校名称", "总分"])

    # 提取每一行的数据并写入CSV文件
    rows = table.find_all("tr")
    for row in rows[1:]:
        cols = row.find_all("td")
        rank = cols[0].text.strip()
        name = cols[1].text.strip()
        score = cols[2].text.strip()

        writer.writerow([rank, name, score])

print("数据已成功爬取并保存为 university_ranking.csv 文件")

 

posted @ 2023-12-29 10:28  laidisi  阅读(11)  评论(0)    收藏  举报