压缩 word 文档中的图片大小,使文档不超过 10MB
方舟 Coding Plan 支持 Doubao、GLM、DeepSeek、Kimi 等模型,工具不限,现在订阅折上9折,低至8.9元,订阅越多越划算!立即订阅:https://volcengine.com/L/s3lNTNYxaEc/ 邀请码:KYNGTDZA
import os
from docx import Document
from PIL import Image
from io import BytesIO
import tempfile
def compress_image(image_data, target_max_kb=50, max_dimension=1920, initial_quality=75):
"""
智能压缩图片,支持多种格式包括 EMF, WDP
"""
try:
# 尝试打开图片
buffer = BytesIO(image_data)
img = Image.open(buffer)
# 获取原始格式信息
original_format = img.format
original_size = len(image_data) / 1024 # KB
# 将所有格式转换为 RGB(JPEG 兼容)
if img.mode not in ('RGB', 'L'):
if img.mode == 'RGBA':
# RGBA 转 RGB:创建白色背景
background = Image.new('RGB', img.size, (255, 255, 255))
if 'transparency' in img.info:
background.paste(img, mask=img.split()[3])
else:
background.paste(img)
img = background
elif img.mode == 'P':
# 调色板模式
if 'transparency' in img.info:
img = img.convert('RGBA').convert('RGB')
else:
img = img.convert('RGB')
elif img.mode in ('LA', 'La', 'PA'):
img = img.convert('RGBA').convert('RGB')
elif img.mode == 'CMYK':
img = img.convert('RGB')
elif img.mode == 'I':
# 32位整数模式
img = img.convert('RGB')
elif img.mode == 'F':
# 32位浮点模式
img = img.convert('RGB')
else:
img = img.convert('RGB')
# 调整图片尺寸(如果太大)
width, height = img.size
if width > max_dimension or height > max_dimension:
# 保持宽高比缩放
ratio = min(max_dimension / width, max_dimension / height)
new_size = (int(width * ratio), int(height * ratio))
img = img.resize(new_size, Image.Resampling.LANCZOS)
print(f" - 尺寸: {width}x{height} → {new_size[0]}x{new_size[1]}")
# 动态调整质量直到达到目标大小
quality = initial_quality
compressed_buffer = BytesIO()
while quality > 20:
compressed_buffer = BytesIO()
img.save(compressed_buffer, format='JPEG', quality=quality, optimize=True)
compressed_size = len(compressed_buffer.getvalue()) / 1024 # KB
if compressed_size <= target_max_kb or quality <= 25:
break
quality -= 5
compressed_buffer.seek(0)
final_size = len(compressed_buffer.getvalue()) / 1024
print(f" - 格式: {original_format} → JPEG, 大小: {original_size:.1f}KB → {final_size:.1f}KB, 质量: {quality}")
return compressed_buffer.getvalue()
except Exception as e:
print(f" - 警告: 图片处理失败 ({str(e)}),保留原图")
return image_data
def compress_docx_aggressive(input_path, output_path, target_size_mb=10, target_img_kb=30, max_dimension=1600, quality=70):
"""
激进压缩 DOCX 文件
参数:
input_path: 输入文件路径
output_path: 输出文件路径
target_size_mb: 目标文件大小(MB)
target_img_kb: 单张图片目标大小(KB)
max_dimension: 图片最大尺寸(像素)
quality: JPEG 初始质量 (1-100)
"""
print(f"\n开始处理文档: {input_path}")
print("=" * 60)
# 打开文档
doc = Document(input_path)
# 统计图片信息
image_count = 0
total_original_size = 0
total_compressed_size = 0
for rel_id, rel in doc.part.rels.items():
if "image" in rel.target_ref:
image_count += 1
image_part = rel.target_part
original_data = image_part.blob
total_original_size += len(original_data)
print(f"文档中共有 {image_count} 张图片")
print(f"图片总大小: {total_original_size / 1024 / 1024:.2f} MB")
print(f"目标文件大小: {target_size_mb} MB")
print("-" * 60)
# 如果图片总大小已经很小,不需要压缩
if total_original_size / 1024 / 1024 < target_size_mb:
print("文件大小已符合要求,无需压缩")
doc.save(output_path)
return
# 开始压缩
print(f"\n开始压缩(目标: 每张图≤{target_img_kb}KB, 最大尺寸≤{max_dimension}px)...\n")
processed = 0
for rel_id, rel in doc.part.rels.items():
if "image" in rel.target_ref:
processed += 1
print(f"[{processed}/{image_count}] 处理图片...")
image_part = rel.target_part
original_data = image_part.blob
# 压缩图片
compressed_data = compress_image(
original_data,
target_max_kb=target_img_kb,
max_dimension=max_dimension,
initial_quality=quality
)
# 替换图片数据
image_part._blob = compressed_data
total_compressed_size += len(compressed_data)
# 保存文档
print("\n" + "-" * 60)
print("保存压缩后的文档...")
doc.save(output_path)
# 显示结果
output_size = os.path.getsize(output_path) / 1024 / 1024 # MB
compression_ratio = (1 - total_compressed_size / total_original_size) * 100
print("\n" + "=" * 60)
print("压缩完成!")
print(f"原始图片总大小: {total_original_size / 1024 / 1024:.2f} MB")
print(f"压缩后图片大小: {total_compressed_size / 1024 / 1024:.2f} MB")
print(f"压缩率: {compression_ratio:.1f}%")
print(f"输出文件大小: {output_size:.2f} MB")
print(f"输出文件: {output_path}")
if output_size > target_size_mb:
print(f"\n警告: 文件仍超过 {target_size_mb}MB,建议进一步降低参数:")
print(f" - 降低 target_img_kb (当前: {target_img_kb}KB)")
print(f" - 降低 max_dimension (当前: {max_dimension}px)")
print(f" - 降低 quality (当前: {quality})")
print("=" * 60)
if __name__ == "__main__":
# 配置参数
input_file = "./投标文件.docx"
output_file = "compressed_document.docx"
# 方案1: 温和压缩(目标 10MB)
# compress_docx_aggressive(
# input_file,
# output_file,
# target_size_mb=10, # 目标文件大小
# target_img_kb=50, # 每张图片目标大小
# max_dimension=1920, # 图片最大尺寸
# quality=75 # JPEG质量
# )
# 方案2: 中等压缩(目标 10MB,更激进)
# compress_docx_aggressive(
# input_file,
# output_file,
# target_size_mb=10, # 目标文件大小
# target_img_kb=30, # 每张图片目标大小 (更小)
# max_dimension=1600, # 图片最大尺寸 (更小)
# quality=70 # JPEG质量
# )
# 方案3: 激进压缩(如果方案2还是太大)
output_file = "compressed_ultra.docx"
compress_docx_aggressive(
input_file,
output_file,
target_size_mb=10,
target_img_kb=90, # 每张图片20KB
max_dimension=512, # 更小的尺寸
quality=65 # 更低的质量
)
浙公网安备 33010602011771号