Python 下载 html 中的 图片

安装 requests、beautifulsoup4 库

# 安装 requests、beautifulsoup4 库
pip install requests beautifulsoup4 -i https://pypi.tuna.tsinghua.edu.cn/simple

完成代码

# pip install requests beautifulsoup4 -i https://pypi.tuna.tsinghua.edu.cn/simple

import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin


def download_images(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36"
    }

    # 发送HTTP请求并获取网页源代码
    response = requests.get(url, headers=headers)
    if response.status_code == 200:

        # 使用 BeautifulSoup 解析 HTML 内容
        soup = BeautifulSoup(response.text, "html.parser")

        # 创建一个目录来保存下载的图片
        download_dir = os.path.join(os.getcwd(), 'download_images')
        # 创建保存图片的文件夹
        os.makedirs(download_dir, exist_ok=True)

        # 获取图片链接
        image_links = []
        img_tags = soup.find_all('img')
        for img_tag in img_tags:
            # 获取图片的 URL
            img_url = img_tag.get('src')
            if img_url:
                if img_url.startswith("http"):
                    image_links.append(img_url)
                else:
                    # 将相对路径转换为绝对路径
                    img_url = urljoin(url, img_url)
                    image_links.append(img_url)

        # 下载图片并保存到文件夹
        for i, image_link in enumerate(image_links):
            img_response = requests.get(image_link, headers=headers)
            if img_response.status_code == 200:
                # 提取图片文件名
                img_filename = os.path.basename(urlparse(image_link).path)

                # 保存图片到本地
                with open(os.path.join(download_dir, img_filename), 'wb') as img_file:
                    img_file.write(img_response.content)
                print(f"Downloaded: {img_filename}")
            else:
                print(f"Failed to download image from: {img_url}")


if __name__ == "__main__":
    # url = input("Enter the URL to scrape images from: ")
    url = "http://www.vipsoft.com.cn"
    download_images(url)
posted @ 2024-08-02 09:36  VipSoft  阅读(49)  评论(0)    收藏  举报