LOADING . . .

python 递归比较两个文件夹

  以下

from pathlib import Path
import hashlib
import argparse


def file_hash(path: Path, chunk_size: int = 8192) -> str:
    """计算文件的 SHA256 哈希值"""
    sha256 = hashlib.sha256()
    with path.open("rb") as f:
        while chunk := f.read(chunk_size):
            sha256.update(chunk)
    return sha256.hexdigest()


def collect_files(root: Path) -> dict[str, Path]:
    """
    递归收集目录下所有文件
    返回: {相对路径: 绝对路径}
    """
    files = {}
    for p in root.rglob("*"):
        if p.is_file():
            rel = p.relative_to(root).as_posix()
            files[rel] = p
    return files


def compare_directories(dir1: str, dir2: str):
    root1 = Path(dir1).resolve()
    root2 = Path(dir2).resolve()

    if not root1.is_dir():
        raise ValueError(f"不是有效目录: {root1}")
    if not root2.is_dir():
        raise ValueError(f"不是有效目录: {root2}")

    files1 = collect_files(root1)
    files2 = collect_files(root2)

    all_keys = sorted(set(files1.keys()) | set(files2.keys()))

    only_in_dir1 = []
    only_in_dir2 = []
    different_files = []
    same_files = []

    for rel_path in all_keys:
        p1 = files1.get(rel_path)
        p2 = files2.get(rel_path)

        if p1 and not p2:
            only_in_dir1.append(rel_path)
        elif p2 and not p1:
            only_in_dir2.append(rel_path)
        else:
            # 先比大小,大小不同直接判不同
            if p1.stat().st_size != p2.stat().st_size:
                different_files.append(rel_path)
            else:
                # 大小相同再比哈希
                if file_hash(p1) == file_hash(p2):
                    same_files.append(rel_path)
                else:
                    different_files.append(rel_path)

    return only_in_dir1, only_in_dir2, different_files, same_files


def print_result(title: str, items: list[str]):
    print(f"\n{title} ({len(items)})")
    print("-" * 60)
    for item in items:
        print(item)


def main():
    parser = argparse.ArgumentParser(description="递归对比两个文件夹")
    parser.add_argument("dir1", help="第一个文件夹")
    parser.add_argument("dir2", help="第二个文件夹")
    args = parser.parse_args()

    only1, only2, diff, same = compare_directories(args.dir1, args.dir2)

    print_result("只在左边目录中存在的文件", only1)
    print_result("只在右边目录中存在的文件", only2)
    print_result("内容不同的文件", diff)
    # print_result("内容相同的文件", same)


if __name__ == "__main__":
    main()
    

 

posted @ 2024-03-12 15:12  颀周  阅读(50)  评论(0)    收藏  举报
很高兴能帮到你~
点赞