Python的ZIP压缩工具

为什么需要zipfile?

ZIP是最常见的压缩格式,用于:

  • 减小文件大小
  • 打包多个文件
  • 加密保护文件
  • 网络传输
    传统方式的问题:
# 没有zipfile,解压ZIP很麻烦
# 需要调用外部程序
import subprocess
subprocess.run(['unzip', 'file.zip'])

zipfile的解决方案:

import zipfile
with zipfile.ZipFile('file.zip', 'r') as zf:
    zf.extractall()

创建ZIP文件

基本创建

import zipfile
import os

# 创建测试文件
os.makedirs('test_data', exist_ok=True)
for i in range(3):
    with open(f'test_data/file{i}.txt', 'w') as f:
        f.write(f"这是文件{i}的内容")

# 创建ZIP文件
with zipfile.ZipFile('my_archive.zip', 'w') as zf:
    # 添加单个文件
    zf.write('test_data/file0.txt', 'file0.txt')
    
    # 批量添加文件
    for i in range(1, 3):
        filename = f'test_data/file{i}.txt'
        arcname = f'archive/file{i}.txt'  # 在ZIP中的路径
        zf.write(filename, arcname)

print("ZIP文件创建完成")
print(f"文件大小: {os.path.getsize('my_archive.zip')} 字节")

添加整个目录

import zipfile
import os

def zip_directory(directory, zip_path):
    """压缩整个目录"""
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
        for root, dirs, files in os.walk(directory):
            for file in files:
                file_path = os.path.join(root, file)
                # 计算在ZIP中的相对路径
                arcname = os.path.relpath(file_path, os.path.dirname(directory))
                zf.write(file_path, arcname)
                print(f"添加: {arcname}")

# 使用
zip_directory('test_data', 'full_directory.zip')

读取ZIP文件

查看ZIP内容

import zipfile

with zipfile.ZipFile('my_archive.zip', 'r') as zf:
    # 查看所有文件
    print("ZIP文件内容:")
    for filename in zf.namelist():
        print(f"  {filename}")
    
    # 获取详细信息
    print("\n详细信息:")
    for info in zf.infolist():
        print(f"文件: {info.filename}")
        print(f"  原始大小: {info.file_size} 字节")
        print(f"  压缩大小: {info.compress_size} 字节")
        print(f"  压缩率: {(1 - info.compress_size/info.file_size)*100:.1f}%")
        print(f"  修改时间: {info.date_time}")
        print("-" * 30)

读取特定文件

import zipfile

with zipfile.ZipFile('my_archive.zip', 'r') as zf:
    # 读取文件内容
    content = zf.read('file0.txt')
    print("文件内容:")
    print(content.decode('utf-8'))
    
    # 使用ZipFile.open()读取
    with zf.open('archive/file1.txt') as f:
        content2 = f.read().decode('utf-8')
        print("\n第二个文件内容:")
        print(content2)

解压ZIP文件

解压所有文件

import zipfile
import os

with zipfile.ZipFile('my_archive.zip', 'r') as zf:
    # 解压到当前目录
    zf.extractall('extracted_files')
    print("解压完成")
    
    # 查看解压的文件
    print("\n解压的文件:")
    for root, dirs, files in os.walk('extracted_files'):
        for file in files:
            print(f"  {os.path.join(root, file)}")

解压单个文件

import zipfile

with zipfile.ZipFile('my_archive.zip', 'r') as zf:
    # 解压特定文件
    zf.extract('file0.txt', 'single_extracted')
    print("单个文件解压完成")
    
    # 解压多个文件
    files_to_extract = ['archive/file1.txt', 'archive/file2.txt']
    for file in files_to_extract:
        zf.extract(file, 'multiple_extracted')
    print("多个文件解压完成")

ZIP文件加密

创建加密ZIP

import zipfile
import os

# 创建测试文件
with open('secret.txt', 'w') as f:
    f.write("这是一个机密文件")

# 创建加密的ZIP文件
with zipfile.ZipFile('encrypted.zip', 'w', zipfile.ZIP_DEFLATED) as zf:
    zf.setpassword(b'mysecretpassword')  # 设置密码
    zf.write('secret.txt')
    
print("加密ZIP创建完成")

读取加密ZIP

import zipfile

# 尝试读取加密ZIP
try:
    with zipfile.ZipFile('encrypted.zip', 'r') as zf:
        zf.setpassword(b'wrongpassword')  # 错误密码
        zf.extractall('decrypted')
        print("解压成功")
except RuntimeError as e:
    print(f"密码错误: {e}")

# 使用正确密码
try:
    with zipfile.ZipFile('encrypted.zip', 'r') as zf:
        zf.setpassword(b'mysecretpassword')  # 正确密码
        zf.extractall('decrypted')
        print("使用正确密码解压成功")
except Exception as e:
    print(f"解压失败: {e}")

高级功能

追加文件到ZIP

import zipfile
import os

# 创建初始ZIP
with zipfile.ZipFile('growing.zip', 'w') as zf:
    zf.write('file1.txt')

print("初始ZIP大小:", os.path.getsize('growing.zip'))

# 追加文件(使用'a'模式)
with zipfile.ZipFile('growing.zip', 'a') as zf:
    zf.write('file2.txt')

print("追加后ZIP大小:", os.path.getsize('growing.zip'))

ZIP文件注释

import zipfile

# 创建带注释的ZIP
with zipfile.ZipFile('with_comment.zip', 'w') as zf:
    zf.comment = b'这是一个重要的ZIP文件,创建于2024年'
    zf.write('file1.txt')

# 读取注释
with zipfile.ZipFile('with_comment.zip', 'r') as zf:
    print(f"ZIP注释: {zf.comment.decode('utf-8')}")

处理中文文件名

import zipfile
import os

# 创建中文文件
with open('中文文件.txt', 'w', encoding='utf-8') as f:
    f.write("中文内容")

# 创建ZIP
with zipfile.ZipFile('chinese.zip', 'w') as zf:
    zf.write('中文文件.txt')

# 读取(正确处理编码)
with zipfile.ZipFile('chinese.zip', 'r') as zf:
    for info in zf.infolist():
        # 修复可能的编码问题
        try:
            filename = info.filename.encode('cp437').decode('gbk')
        except:
            try:
                filename = info.filename.encode('cp437').decode('utf-8')
            except:
                filename = info.filename
        
        print(f"文件名: {filename}")
        print(f"内容: {zf.read(info).decode('utf-8')}")

实战应用

应用1:批量压缩图片

import zipfile
import os
from PIL import Image
import io

def compress_images_to_zip(image_dir, output_zip, max_size=(1024, 1024), quality=85):
    """
    压缩图片并打包到ZIP
    
    Args:
        image_dir: 图片目录
        output_zip: 输出ZIP文件
        max_size: 最大尺寸
        quality: JPEG质量 (1-100)
    """
    supported_formats = ['.jpg', '.jpeg', '.png', '.gif', '.bmp']
    
    with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
        for root, dirs, files in os.walk(image_dir):
            for file in files:
                if any(file.lower().endswith(fmt) for fmt in supported_formats):
                    file_path = os.path.join(root, file)
                    
                    try:
                        # 打开并压缩图片
                        with Image.open(file_path) as img:
                            # 调整大小
                            img.thumbnail(max_size, Image.Resampling.LANCZOS)
                            
                            # 保存到内存
                            img_bytes = io.BytesIO()
                            if file.lower().endswith(('.jpg', '.jpeg')):
                                img.save(img_bytes, 'JPEG', quality=quality, optimize=True)
                            elif file.lower().endswith('.png'):
                                img.save(img_bytes, 'PNG', optimize=True)
                            else:
                                img.save(img_bytes, img.format)
                            
                            # 添加到ZIP
                            arcname = os.path.relpath(file_path, image_dir)
                            zf.writestr(arcname, img_bytes.getvalue())
                            
                            original_size = os.path.getsize(file_path)
                            compressed_size = len(img_bytes.getvalue())
                            ratio = (1 - compressed_size/original_size) * 100
                            
                            print(f"压缩: {arcname} "
                                  f"({original_size/1024:.0f}KB -> {compressed_size/1024:.0f}KB, "
                                  f"节省{ratio:.1f}%)")
                    
                    except Exception as e:
                        print(f"处理失败 {file}: {e}")
    
    print(f"\n压缩完成: {output_zip}")
    print(f"总大小: {os.path.getsize(output_zip)/1024:.0f}KB")

# 使用
compress_images_to_zip('photos', 'compressed_photos.zip', max_size=(1920, 1080))

应用2:ZIP文件分析器

import zipfile
import os
from datetime import datetime

def analyze_zip(zip_path):
    """分析ZIP文件"""
    if not os.path.exists(zip_path):
        print(f"文件不存在: {zip_path}")
        return
    
    with zipfile.ZipFile(zip_path, 'r') as zf:
        # 基本信息
        print("=" * 60)
        print(f"ZIP文件: {os.path.basename(zip_path)}")
        print(f"文件大小: {os.path.getsize(zip_path):,} 字节")
        print(f"文件数量: {len(zf.namelist())}")
        print(f"注释: {zf.comment.decode('utf-8') if zf.comment else '无'}")
        print("=" * 60)
        
        # 文件列表
        print("\n文件列表:")
        print("-" * 60)
        print(f"{'文件名':<30} {'大小':>10} {'压缩后':>10} {'比例':>8} {'修改时间':>20}")
        print("-" * 60)
        
        total_original = 0
        total_compressed = 0
        
        for info in zf.infolist():
            filename = info.filename[:28] + ".." if len(info.filename) > 30 else info.filename
            ratio = (1 - info.compress_size/info.file_size) * 100 if info.file_size > 0 else 0
            mod_time = datetime(*info.date_time).strftime("%Y-%m-%d %H:%M:%S")
            
            print(f"{filename:<30} {info.file_size:>10,} {info.compress_size:>10,} "
                  f"{ratio:>7.1f}% {mod_time:>20}")
            
            total_original += info.file_size
            total_compressed += info.compress_size
        
        print("-" * 60)
        
        # 统计信息
        total_ratio = (1 - total_compressed/total_original) * 100 if total_original > 0 else 0
        print(f"总计: {len(zf.namelist()):>2}个文件")
        print(f"原始大小: {total_original:>12,} 字节")
        print(f"压缩大小: {total_compressed:>12,} 字节")
        print(f"总体压缩率: {total_ratio:>8.1f}%")
        
        # 文件类型统计
        print("\n文件类型统计:")
        print("-" * 30)
        ext_stats = {}
        for name in zf.namelist():
            ext = os.path.splitext(name)[1].lower()
            ext_stats[ext] = ext_stats.get(ext, 0) + 1
        
        for ext, count in sorted(ext_stats.items(), key=lambda x: x[1], reverse=True):
            print(f"{ext or '无扩展名':<10} {count:>4} 个")

# 使用
analyze_zip('my_archive.zip')

应用3:自动备份工具

import zipfile
import os
import shutil
from datetime import datetime, timedelta
import schedule
import time

class AutoBackup:
    def __init__(self, source_dir, backup_dir, keep_days=7):
        """
        自动备份工具
        
        Args:
            source_dir: 源目录
            backup_dir: 备份目录
            keep_days: 保留天数
        """
        self.source_dir = source_dir
        self.backup_dir = backup_dir
        self.keep_days = keep_days
        
        # 创建备份目录
        os.makedirs(backup_dir, exist_ok=True)
    
    def create_backup(self):
        """创建备份"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        zip_name = f"backup_{timestamp}.zip"
        zip_path = os.path.join(self.backup_dir, zip_name)
        
        print(f"[{datetime.now()}] 开始备份: {self.source_dir}")
        
        try:
            with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
                for root, dirs, files in os.walk(self.source_dir):
                    for file in files:
                        file_path = os.path.join(root, file)
                        arcname = os.path.relpath(file_path, self.source_dir)
                        zf.write(file_path, arcname)
            
            size_mb = os.path.getsize(zip_path) / (1024 * 1024)
            print(f"[{datetime.now()}] 备份完成: {zip_name} ({size_mb:.1f}MB)")
            
            # 清理旧备份
            self.clean_old_backups()
            
        except Exception as e:
            print(f"[{datetime.now()}] 备份失败: {e}")
    
    def clean_old_backups(self):
        """清理旧备份"""
        cutoff_date = datetime.now() - timedelta(days=self.keep_days)
        
        for file in os.listdir(self.backup_dir):
            if file.startswith('backup_') and file.endswith('.zip'):
                file_path = os.path.join(self.backup_dir, file)
                file_mtime = datetime.fromtimestamp(os.path.getmtime(file_path))
                
                if file_mtime < cutoff_date:
                    try:
                        os.remove(file_path)
                        print(f"[{datetime.now()}] 删除旧备份: {file}")
                    except Exception as e:
                        print(f"[{datetime.now()}] 删除失败 {file}: {e}")
    
    def run_daily(self, hour=2, minute=0):
        """每天定时运行"""
        schedule.every().day.at(f"{hour:02d}:{minute:02d}").do(self.create_backup)
        
        print(f"自动备份已启动,每天 {hour:02d}:{minute:02d} 运行")
        print(f"源目录: {self.source_dir}")
        print(f"备份目录: {self.backup_dir}")
        print(f"保留天数: {self.keep_days}")
        
        while True:
            schedule.run_pending()
            time.sleep(60)  # 每分钟检查一次

# 使用
if __name__ == "__main__":
    backup = AutoBackup(
        source_dir='/path/to/important/data',
        backup_dir='/path/to/backups',
        keep_days=30
    )
    
    # 立即运行一次
    backup.create_backup()
    
    # 或者设置为定时任务
    # backup.run_daily(hour=2, minute=0)  # 每天凌晨2点运行

常见问题解答

Q1:ZIP_DEFLATED 和 ZIP_STORED 有什么区别?

A:

  • ZIP_STORED:不压缩,只打包
  • ZIP_DEFLATED:使用DEFLATE算法压缩
  • ZIP_BZIP2:使用BZIP2算法压缩(需要bzip2支持)
  • ZIP_LZMA:使用LZMA算法压缩(需要lzma支持)

Q2:如何处理大型ZIP文件?

import zipfile

def process_large_zip(zip_path, output_dir):
    """处理大型ZIP文件(分块)"""
    os.makedirs(output_dir, exist_ok=True)
    
    with zipfile.ZipFile(zip_path, 'r') as zf:
        for info in zf.infolist():
            if info.file_size > 100 * 1024 * 1024:  # 大于100MB
                print(f"处理大文件: {info.filename} ({info.file_size/1024/1024:.1f}MB)")
                
                # 分块读取
                with zf.open(info) as src, open(os.path.join(output_dir, info.filename), 'wb') as dst:
                    chunk_size = 1024 * 1024  # 1MB
                    while True:
                        chunk = src.read(chunk_size)
                        if not chunk:
                            break
                        dst.write(chunk)
            else:
                zf.extract(info, output_dir)

Q3:如何创建自解压ZIP?

A:Python标准库不支持创建自解压ZIP,但可以结合其他工具:

import zipfile
import subprocess

# 创建普通ZIP
with zipfile.ZipFile('data.zip', 'w') as zf:
    zf.write('file1.txt')

# 使用第三方工具转换(需要安装7-Zip)
subprocess.run(['7z', 'a', '-sfx', 'self_extracting.exe', 'data.zip'])

Q4:zipfile支持RAR格式吗?

A:不支持。zipfile只支持ZIP格式。处理RAR需要使用第三方库,如rarfile。

性能优化技巧

批量操作提高性能

import zipfile
import os

def fast_zip_create(files, zip_path):
    """快速创建ZIP"""
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
        for file in files:
            if os.path.exists(file):
                zf.write(file, os.path.basename(file))

使用内存优化

import zipfile
import io

def create_zip_in_memory():
    """在内存中创建ZIP"""
    zip_buffer = io.BytesIO()
    
    with zipfile.ZipFile(zip_buffer, 'w') as zf:
        # 添加文本文件
        zf.writestr('file1.txt', '内存中的文件内容')
        
        # 添加二进制数据
        zf.writestr('data.bin', b'\x00\x01\x02\x03')
    
    # 获取ZIP数据
    zip_data = zip_buffer.getvalue()
    return zip_data

# 使用
zip_data = create_zip_in_memory()
print(f"生成的ZIP大小: {len(zip_data)} 字节")

# 保存到文件
with open('memory_zip.zip', 'wb') as f:
    f.write(zip_data)
posted @ 2026-04-27 18:34  MKYC  阅读(7)  评论(0)    收藏  举报