爬虫html网页

import requests
from bs4 import BeautifulSoup

def function():
    # 目标URL(示例:豆瓣电影Top250)
    url = "https://movie.douban.com/top250"

    # 设置请求头,模拟浏览器访问
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    data = {}
    # 发送HTTP请求
    response = requests.get(url, params=data,headers=headers)

    # 检查请求是否成功
    if response.status_code == 200:
        # 解析HTML内容
        soup = BeautifulSoup(response.text, "html.parser")

        # 提取电影标题(示例:提取所有class为"title"的span标签)
        movie_titles = soup.find_all("span", class_="title")
            
        # 打印结果
        for idx, title in enumerate(movie_titles, 1):
            print(f"{idx}. {title.get_text()}")
    else:
        print(f"请求失败,状态码:{response.status_code}")

function()

posted @ 2025-06-17 20:34  呆呆酱  阅读(10)  评论(0)    收藏  举报