爬虫作业
202231014043雷凯化
import requests url = "https://www.sogou.com" for i in range(20): response = requests.get(url) print(f"第{i+1}次请求返回状态:{response.status_code}") text_length = len(response.text) content_length = len(response.content) print(f"第{i+1}次请求text()属性返回内容长度:{text_length}") print(f"第{i+1}次请求content属性返回内容长度:{content_length}") print()
import requests from bs4 import BeautifulSoup import csv url = "https://www.shanghairanking.cn/rankings/bcur/202111" # 发起请求 response = requests.get(url) # 解析HTML soup = BeautifulSoup(response.text, "html.parser") # 定位表格 table = soup.find("table", class_="rk-table") # 创建CSV文件并写入表头 with open("university_ranking.csv", mode="w", encoding="utf-8", newline="") as file: writer = csv.writer(file) writer.writerow(["排名", "学校名称", "总分"]) # 提取每一行的数据并写入CSV文件 rows = table.find_all("tr") for row in rows[1:]: cols = row.find_all("td") rank = cols[0].text.strip() name = cols[1].text.strip() score = cols[2].text.strip() writer.writerow([rank, name, score]) print("数据已成功爬取并保存为 university_ranking.csv 文件")
浙公网安备 33010602011771号