Python的ZIP压缩工具
为什么需要zipfile?
ZIP是最常见的压缩格式,用于:
- 减小文件大小
- 打包多个文件
- 加密保护文件
- 网络传输
传统方式的问题:
# 没有zipfile,解压ZIP很麻烦
# 需要调用外部程序
import subprocess
subprocess.run(['unzip', 'file.zip'])
zipfile的解决方案:
import zipfile
with zipfile.ZipFile('file.zip', 'r') as zf:
zf.extractall()
创建ZIP文件
基本创建
import zipfile
import os
# 创建测试文件
os.makedirs('test_data', exist_ok=True)
for i in range(3):
with open(f'test_data/file{i}.txt', 'w') as f:
f.write(f"这是文件{i}的内容")
# 创建ZIP文件
with zipfile.ZipFile('my_archive.zip', 'w') as zf:
# 添加单个文件
zf.write('test_data/file0.txt', 'file0.txt')
# 批量添加文件
for i in range(1, 3):
filename = f'test_data/file{i}.txt'
arcname = f'archive/file{i}.txt' # 在ZIP中的路径
zf.write(filename, arcname)
print("ZIP文件创建完成")
print(f"文件大小: {os.path.getsize('my_archive.zip')} 字节")
添加整个目录
import zipfile
import os
def zip_directory(directory, zip_path):
"""压缩整个目录"""
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
for root, dirs, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
# 计算在ZIP中的相对路径
arcname = os.path.relpath(file_path, os.path.dirname(directory))
zf.write(file_path, arcname)
print(f"添加: {arcname}")
# 使用
zip_directory('test_data', 'full_directory.zip')
读取ZIP文件
查看ZIP内容
import zipfile
with zipfile.ZipFile('my_archive.zip', 'r') as zf:
# 查看所有文件
print("ZIP文件内容:")
for filename in zf.namelist():
print(f" {filename}")
# 获取详细信息
print("\n详细信息:")
for info in zf.infolist():
print(f"文件: {info.filename}")
print(f" 原始大小: {info.file_size} 字节")
print(f" 压缩大小: {info.compress_size} 字节")
print(f" 压缩率: {(1 - info.compress_size/info.file_size)*100:.1f}%")
print(f" 修改时间: {info.date_time}")
print("-" * 30)
读取特定文件
import zipfile
with zipfile.ZipFile('my_archive.zip', 'r') as zf:
# 读取文件内容
content = zf.read('file0.txt')
print("文件内容:")
print(content.decode('utf-8'))
# 使用ZipFile.open()读取
with zf.open('archive/file1.txt') as f:
content2 = f.read().decode('utf-8')
print("\n第二个文件内容:")
print(content2)
解压ZIP文件
解压所有文件
import zipfile
import os
with zipfile.ZipFile('my_archive.zip', 'r') as zf:
# 解压到当前目录
zf.extractall('extracted_files')
print("解压完成")
# 查看解压的文件
print("\n解压的文件:")
for root, dirs, files in os.walk('extracted_files'):
for file in files:
print(f" {os.path.join(root, file)}")
解压单个文件
import zipfile
with zipfile.ZipFile('my_archive.zip', 'r') as zf:
# 解压特定文件
zf.extract('file0.txt', 'single_extracted')
print("单个文件解压完成")
# 解压多个文件
files_to_extract = ['archive/file1.txt', 'archive/file2.txt']
for file in files_to_extract:
zf.extract(file, 'multiple_extracted')
print("多个文件解压完成")
ZIP文件加密
创建加密ZIP
import zipfile
import os
# 创建测试文件
with open('secret.txt', 'w') as f:
f.write("这是一个机密文件")
# 创建加密的ZIP文件
with zipfile.ZipFile('encrypted.zip', 'w', zipfile.ZIP_DEFLATED) as zf:
zf.setpassword(b'mysecretpassword') # 设置密码
zf.write('secret.txt')
print("加密ZIP创建完成")
读取加密ZIP
import zipfile
# 尝试读取加密ZIP
try:
with zipfile.ZipFile('encrypted.zip', 'r') as zf:
zf.setpassword(b'wrongpassword') # 错误密码
zf.extractall('decrypted')
print("解压成功")
except RuntimeError as e:
print(f"密码错误: {e}")
# 使用正确密码
try:
with zipfile.ZipFile('encrypted.zip', 'r') as zf:
zf.setpassword(b'mysecretpassword') # 正确密码
zf.extractall('decrypted')
print("使用正确密码解压成功")
except Exception as e:
print(f"解压失败: {e}")
高级功能
追加文件到ZIP
import zipfile
import os
# 创建初始ZIP
with zipfile.ZipFile('growing.zip', 'w') as zf:
zf.write('file1.txt')
print("初始ZIP大小:", os.path.getsize('growing.zip'))
# 追加文件(使用'a'模式)
with zipfile.ZipFile('growing.zip', 'a') as zf:
zf.write('file2.txt')
print("追加后ZIP大小:", os.path.getsize('growing.zip'))
ZIP文件注释
import zipfile
# 创建带注释的ZIP
with zipfile.ZipFile('with_comment.zip', 'w') as zf:
zf.comment = b'这是一个重要的ZIP文件,创建于2024年'
zf.write('file1.txt')
# 读取注释
with zipfile.ZipFile('with_comment.zip', 'r') as zf:
print(f"ZIP注释: {zf.comment.decode('utf-8')}")
处理中文文件名
import zipfile
import os
# 创建中文文件
with open('中文文件.txt', 'w', encoding='utf-8') as f:
f.write("中文内容")
# 创建ZIP
with zipfile.ZipFile('chinese.zip', 'w') as zf:
zf.write('中文文件.txt')
# 读取(正确处理编码)
with zipfile.ZipFile('chinese.zip', 'r') as zf:
for info in zf.infolist():
# 修复可能的编码问题
try:
filename = info.filename.encode('cp437').decode('gbk')
except:
try:
filename = info.filename.encode('cp437').decode('utf-8')
except:
filename = info.filename
print(f"文件名: {filename}")
print(f"内容: {zf.read(info).decode('utf-8')}")
实战应用
应用1:批量压缩图片
import zipfile
import os
from PIL import Image
import io
def compress_images_to_zip(image_dir, output_zip, max_size=(1024, 1024), quality=85):
"""
压缩图片并打包到ZIP
Args:
image_dir: 图片目录
output_zip: 输出ZIP文件
max_size: 最大尺寸
quality: JPEG质量 (1-100)
"""
supported_formats = ['.jpg', '.jpeg', '.png', '.gif', '.bmp']
with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
for root, dirs, files in os.walk(image_dir):
for file in files:
if any(file.lower().endswith(fmt) for fmt in supported_formats):
file_path = os.path.join(root, file)
try:
# 打开并压缩图片
with Image.open(file_path) as img:
# 调整大小
img.thumbnail(max_size, Image.Resampling.LANCZOS)
# 保存到内存
img_bytes = io.BytesIO()
if file.lower().endswith(('.jpg', '.jpeg')):
img.save(img_bytes, 'JPEG', quality=quality, optimize=True)
elif file.lower().endswith('.png'):
img.save(img_bytes, 'PNG', optimize=True)
else:
img.save(img_bytes, img.format)
# 添加到ZIP
arcname = os.path.relpath(file_path, image_dir)
zf.writestr(arcname, img_bytes.getvalue())
original_size = os.path.getsize(file_path)
compressed_size = len(img_bytes.getvalue())
ratio = (1 - compressed_size/original_size) * 100
print(f"压缩: {arcname} "
f"({original_size/1024:.0f}KB -> {compressed_size/1024:.0f}KB, "
f"节省{ratio:.1f}%)")
except Exception as e:
print(f"处理失败 {file}: {e}")
print(f"\n压缩完成: {output_zip}")
print(f"总大小: {os.path.getsize(output_zip)/1024:.0f}KB")
# 使用
compress_images_to_zip('photos', 'compressed_photos.zip', max_size=(1920, 1080))
应用2:ZIP文件分析器
import zipfile
import os
from datetime import datetime
def analyze_zip(zip_path):
"""分析ZIP文件"""
if not os.path.exists(zip_path):
print(f"文件不存在: {zip_path}")
return
with zipfile.ZipFile(zip_path, 'r') as zf:
# 基本信息
print("=" * 60)
print(f"ZIP文件: {os.path.basename(zip_path)}")
print(f"文件大小: {os.path.getsize(zip_path):,} 字节")
print(f"文件数量: {len(zf.namelist())}")
print(f"注释: {zf.comment.decode('utf-8') if zf.comment else '无'}")
print("=" * 60)
# 文件列表
print("\n文件列表:")
print("-" * 60)
print(f"{'文件名':<30} {'大小':>10} {'压缩后':>10} {'比例':>8} {'修改时间':>20}")
print("-" * 60)
total_original = 0
total_compressed = 0
for info in zf.infolist():
filename = info.filename[:28] + ".." if len(info.filename) > 30 else info.filename
ratio = (1 - info.compress_size/info.file_size) * 100 if info.file_size > 0 else 0
mod_time = datetime(*info.date_time).strftime("%Y-%m-%d %H:%M:%S")
print(f"{filename:<30} {info.file_size:>10,} {info.compress_size:>10,} "
f"{ratio:>7.1f}% {mod_time:>20}")
total_original += info.file_size
total_compressed += info.compress_size
print("-" * 60)
# 统计信息
total_ratio = (1 - total_compressed/total_original) * 100 if total_original > 0 else 0
print(f"总计: {len(zf.namelist()):>2}个文件")
print(f"原始大小: {total_original:>12,} 字节")
print(f"压缩大小: {total_compressed:>12,} 字节")
print(f"总体压缩率: {total_ratio:>8.1f}%")
# 文件类型统计
print("\n文件类型统计:")
print("-" * 30)
ext_stats = {}
for name in zf.namelist():
ext = os.path.splitext(name)[1].lower()
ext_stats[ext] = ext_stats.get(ext, 0) + 1
for ext, count in sorted(ext_stats.items(), key=lambda x: x[1], reverse=True):
print(f"{ext or '无扩展名':<10} {count:>4} 个")
# 使用
analyze_zip('my_archive.zip')
应用3:自动备份工具
import zipfile
import os
import shutil
from datetime import datetime, timedelta
import schedule
import time
class AutoBackup:
def __init__(self, source_dir, backup_dir, keep_days=7):
"""
自动备份工具
Args:
source_dir: 源目录
backup_dir: 备份目录
keep_days: 保留天数
"""
self.source_dir = source_dir
self.backup_dir = backup_dir
self.keep_days = keep_days
# 创建备份目录
os.makedirs(backup_dir, exist_ok=True)
def create_backup(self):
"""创建备份"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
zip_name = f"backup_{timestamp}.zip"
zip_path = os.path.join(self.backup_dir, zip_name)
print(f"[{datetime.now()}] 开始备份: {self.source_dir}")
try:
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
for root, dirs, files in os.walk(self.source_dir):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(file_path, self.source_dir)
zf.write(file_path, arcname)
size_mb = os.path.getsize(zip_path) / (1024 * 1024)
print(f"[{datetime.now()}] 备份完成: {zip_name} ({size_mb:.1f}MB)")
# 清理旧备份
self.clean_old_backups()
except Exception as e:
print(f"[{datetime.now()}] 备份失败: {e}")
def clean_old_backups(self):
"""清理旧备份"""
cutoff_date = datetime.now() - timedelta(days=self.keep_days)
for file in os.listdir(self.backup_dir):
if file.startswith('backup_') and file.endswith('.zip'):
file_path = os.path.join(self.backup_dir, file)
file_mtime = datetime.fromtimestamp(os.path.getmtime(file_path))
if file_mtime < cutoff_date:
try:
os.remove(file_path)
print(f"[{datetime.now()}] 删除旧备份: {file}")
except Exception as e:
print(f"[{datetime.now()}] 删除失败 {file}: {e}")
def run_daily(self, hour=2, minute=0):
"""每天定时运行"""
schedule.every().day.at(f"{hour:02d}:{minute:02d}").do(self.create_backup)
print(f"自动备份已启动,每天 {hour:02d}:{minute:02d} 运行")
print(f"源目录: {self.source_dir}")
print(f"备份目录: {self.backup_dir}")
print(f"保留天数: {self.keep_days}")
while True:
schedule.run_pending()
time.sleep(60) # 每分钟检查一次
# 使用
if __name__ == "__main__":
backup = AutoBackup(
source_dir='/path/to/important/data',
backup_dir='/path/to/backups',
keep_days=30
)
# 立即运行一次
backup.create_backup()
# 或者设置为定时任务
# backup.run_daily(hour=2, minute=0) # 每天凌晨2点运行
常见问题解答
Q1:ZIP_DEFLATED 和 ZIP_STORED 有什么区别?
A:
- ZIP_STORED:不压缩,只打包
- ZIP_DEFLATED:使用DEFLATE算法压缩
- ZIP_BZIP2:使用BZIP2算法压缩(需要bzip2支持)
- ZIP_LZMA:使用LZMA算法压缩(需要lzma支持)
Q2:如何处理大型ZIP文件?
import zipfile
def process_large_zip(zip_path, output_dir):
"""处理大型ZIP文件(分块)"""
os.makedirs(output_dir, exist_ok=True)
with zipfile.ZipFile(zip_path, 'r') as zf:
for info in zf.infolist():
if info.file_size > 100 * 1024 * 1024: # 大于100MB
print(f"处理大文件: {info.filename} ({info.file_size/1024/1024:.1f}MB)")
# 分块读取
with zf.open(info) as src, open(os.path.join(output_dir, info.filename), 'wb') as dst:
chunk_size = 1024 * 1024 # 1MB
while True:
chunk = src.read(chunk_size)
if not chunk:
break
dst.write(chunk)
else:
zf.extract(info, output_dir)
Q3:如何创建自解压ZIP?
A:Python标准库不支持创建自解压ZIP,但可以结合其他工具:
import zipfile
import subprocess
# 创建普通ZIP
with zipfile.ZipFile('data.zip', 'w') as zf:
zf.write('file1.txt')
# 使用第三方工具转换(需要安装7-Zip)
subprocess.run(['7z', 'a', '-sfx', 'self_extracting.exe', 'data.zip'])
Q4:zipfile支持RAR格式吗?
A:不支持。zipfile只支持ZIP格式。处理RAR需要使用第三方库,如rarfile。
性能优化技巧
批量操作提高性能
import zipfile
import os
def fast_zip_create(files, zip_path):
"""快速创建ZIP"""
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
for file in files:
if os.path.exists(file):
zf.write(file, os.path.basename(file))
使用内存优化
import zipfile
import io
def create_zip_in_memory():
"""在内存中创建ZIP"""
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zf:
# 添加文本文件
zf.writestr('file1.txt', '内存中的文件内容')
# 添加二进制数据
zf.writestr('data.bin', b'\x00\x01\x02\x03')
# 获取ZIP数据
zip_data = zip_buffer.getvalue()
return zip_data
# 使用
zip_data = create_zip_in_memory()
print(f"生成的ZIP大小: {len(zip_data)} 字节")
# 保存到文件
with open('memory_zip.zip', 'wb') as f:
f.write(zip_data)

浙公网安备 33010602011771号