压缩 word 文档中的图片大小,使文档不超过 10MB

方舟 Coding Plan 支持 Doubao、GLM、DeepSeek、Kimi 等模型,工具不限,现在订阅折上9折,低至8.9元,订阅越多越划算!立即订阅:https://volcengine.com/L/s3lNTNYxaEc/ 邀请码:KYNGTDZA

import os
from docx import Document
from PIL import Image
from io import BytesIO
import tempfile

def compress_image(image_data, target_max_kb=50, max_dimension=1920, initial_quality=75):
    """
    智能压缩图片,支持多种格式包括 EMF, WDP
    """
    try:
        # 尝试打开图片
        buffer = BytesIO(image_data)
        img = Image.open(buffer)

        # 获取原始格式信息
        original_format = img.format
        original_size = len(image_data) / 1024  # KB

        # 将所有格式转换为 RGB(JPEG 兼容)
        if img.mode not in ('RGB', 'L'):
            if img.mode == 'RGBA':
                # RGBA 转 RGB:创建白色背景
                background = Image.new('RGB', img.size, (255, 255, 255))
                if 'transparency' in img.info:
                    background.paste(img, mask=img.split()[3])
                else:
                    background.paste(img)
                img = background
            elif img.mode == 'P':
                # 调色板模式
                if 'transparency' in img.info:
                    img = img.convert('RGBA').convert('RGB')
                else:
                    img = img.convert('RGB')
            elif img.mode in ('LA', 'La', 'PA'):
                img = img.convert('RGBA').convert('RGB')
            elif img.mode == 'CMYK':
                img = img.convert('RGB')
            elif img.mode == 'I':
                # 32位整数模式
                img = img.convert('RGB')
            elif img.mode == 'F':
                # 32位浮点模式
                img = img.convert('RGB')
            else:
                img = img.convert('RGB')

        # 调整图片尺寸(如果太大)
        width, height = img.size
        if width > max_dimension or height > max_dimension:
            # 保持宽高比缩放
            ratio = min(max_dimension / width, max_dimension / height)
            new_size = (int(width * ratio), int(height * ratio))
            img = img.resize(new_size, Image.Resampling.LANCZOS)
            print(f"  - 尺寸: {width}x{height} → {new_size[0]}x{new_size[1]}")

        # 动态调整质量直到达到目标大小
        quality = initial_quality
        compressed_buffer = BytesIO()

        while quality > 20:
            compressed_buffer = BytesIO()
            img.save(compressed_buffer, format='JPEG', quality=quality, optimize=True)
            compressed_size = len(compressed_buffer.getvalue()) / 1024  # KB

            if compressed_size <= target_max_kb or quality <= 25:
                break

            quality -= 5

        compressed_buffer.seek(0)
        final_size = len(compressed_buffer.getvalue()) / 1024

        print(f"  - 格式: {original_format} → JPEG, 大小: {original_size:.1f}KB → {final_size:.1f}KB, 质量: {quality}")

        return compressed_buffer.getvalue()

    except Exception as e:
        print(f"  - 警告: 图片处理失败 ({str(e)}),保留原图")
        return image_data


def compress_docx_aggressive(input_path, output_path, target_size_mb=10, target_img_kb=30, max_dimension=1600, quality=70):
    """
    激进压缩 DOCX 文件

    参数:
        input_path: 输入文件路径
        output_path: 输出文件路径
        target_size_mb: 目标文件大小(MB)
        target_img_kb: 单张图片目标大小(KB)
        max_dimension: 图片最大尺寸(像素)
        quality: JPEG 初始质量 (1-100)
    """

    print(f"\n开始处理文档: {input_path}")
    print("=" * 60)

    # 打开文档
    doc = Document(input_path)

    # 统计图片信息
    image_count = 0
    total_original_size = 0
    total_compressed_size = 0

    for rel_id, rel in doc.part.rels.items():
        if "image" in rel.target_ref:
            image_count += 1
            image_part = rel.target_part
            original_data = image_part.blob
            total_original_size += len(original_data)

    print(f"文档中共有 {image_count} 张图片")
    print(f"图片总大小: {total_original_size / 1024 / 1024:.2f} MB")
    print(f"目标文件大小: {target_size_mb} MB")
    print("-" * 60)

    # 如果图片总大小已经很小,不需要压缩
    if total_original_size / 1024 / 1024 < target_size_mb:
        print("文件大小已符合要求,无需压缩")
        doc.save(output_path)
        return

    # 开始压缩
    print(f"\n开始压缩(目标: 每张图≤{target_img_kb}KB, 最大尺寸≤{max_dimension}px)...\n")

    processed = 0
    for rel_id, rel in doc.part.rels.items():
        if "image" in rel.target_ref:
            processed += 1
            print(f"[{processed}/{image_count}] 处理图片...")

            image_part = rel.target_part
            original_data = image_part.blob

            # 压缩图片
            compressed_data = compress_image(
                original_data,
                target_max_kb=target_img_kb,
                max_dimension=max_dimension,
                initial_quality=quality
            )

            # 替换图片数据
            image_part._blob = compressed_data
            total_compressed_size += len(compressed_data)

    # 保存文档
    print("\n" + "-" * 60)
    print("保存压缩后的文档...")
    doc.save(output_path)

    # 显示结果
    output_size = os.path.getsize(output_path) / 1024 / 1024  # MB
    compression_ratio = (1 - total_compressed_size / total_original_size) * 100

    print("\n" + "=" * 60)
    print("压缩完成!")
    print(f"原始图片总大小: {total_original_size / 1024 / 1024:.2f} MB")
    print(f"压缩后图片大小: {total_compressed_size / 1024 / 1024:.2f} MB")
    print(f"压缩率: {compression_ratio:.1f}%")
    print(f"输出文件大小: {output_size:.2f} MB")
    print(f"输出文件: {output_path}")

    if output_size > target_size_mb:
        print(f"\n警告: 文件仍超过 {target_size_mb}MB,建议进一步降低参数:")
        print(f"   - 降低 target_img_kb (当前: {target_img_kb}KB)")
        print(f"   - 降低 max_dimension (当前: {max_dimension}px)")
        print(f"   - 降低 quality (当前: {quality})")
    print("=" * 60)


if __name__ == "__main__":
    # 配置参数
    input_file = "./投标文件.docx"
    output_file = "compressed_document.docx"

    # 方案1: 温和压缩(目标 10MB)
    # compress_docx_aggressive(
    #     input_file,
    #     output_file,
    #     target_size_mb=10,      # 目标文件大小
    #     target_img_kb=50,       # 每张图片目标大小
    #     max_dimension=1920,     # 图片最大尺寸
    #     quality=75              # JPEG质量
    # )

    # 方案2: 中等压缩(目标 10MB,更激进)
    # compress_docx_aggressive(
    #     input_file,
    #     output_file,
    #     target_size_mb=10,      # 目标文件大小
    #     target_img_kb=30,       # 每张图片目标大小 (更小)
    #     max_dimension=1600,     # 图片最大尺寸 (更小)
    #     quality=70              # JPEG质量
    # )

    # 方案3: 激进压缩(如果方案2还是太大)
    output_file = "compressed_ultra.docx"
    compress_docx_aggressive(
        input_file,
        output_file,
        target_size_mb=10,
        target_img_kb=90,       # 每张图片20KB
        max_dimension=512,     # 更小的尺寸
        quality=65              # 更低的质量
    )

posted @ 2026-01-26 20:33  geyee  阅读(1)  评论(0)    收藏  举报