python 递归比较两个文件夹
以下
from pathlib import Path
import hashlib
import argparse
def file_hash(path: Path, chunk_size: int = 8192) -> str:
"""计算文件的 SHA256 哈希值"""
sha256 = hashlib.sha256()
with path.open("rb") as f:
while chunk := f.read(chunk_size):
sha256.update(chunk)
return sha256.hexdigest()
def collect_files(root: Path) -> dict[str, Path]:
"""
递归收集目录下所有文件
返回: {相对路径: 绝对路径}
"""
files = {}
for p in root.rglob("*"):
if p.is_file():
rel = p.relative_to(root).as_posix()
files[rel] = p
return files
def compare_directories(dir1: str, dir2: str):
root1 = Path(dir1).resolve()
root2 = Path(dir2).resolve()
if not root1.is_dir():
raise ValueError(f"不是有效目录: {root1}")
if not root2.is_dir():
raise ValueError(f"不是有效目录: {root2}")
files1 = collect_files(root1)
files2 = collect_files(root2)
all_keys = sorted(set(files1.keys()) | set(files2.keys()))
only_in_dir1 = []
only_in_dir2 = []
different_files = []
same_files = []
for rel_path in all_keys:
p1 = files1.get(rel_path)
p2 = files2.get(rel_path)
if p1 and not p2:
only_in_dir1.append(rel_path)
elif p2 and not p1:
only_in_dir2.append(rel_path)
else:
# 先比大小,大小不同直接判不同
if p1.stat().st_size != p2.stat().st_size:
different_files.append(rel_path)
else:
# 大小相同再比哈希
if file_hash(p1) == file_hash(p2):
same_files.append(rel_path)
else:
different_files.append(rel_path)
return only_in_dir1, only_in_dir2, different_files, same_files
def print_result(title: str, items: list[str]):
print(f"\n{title} ({len(items)})")
print("-" * 60)
for item in items:
print(item)
def main():
parser = argparse.ArgumentParser(description="递归对比两个文件夹")
parser.add_argument("dir1", help="第一个文件夹")
parser.add_argument("dir2", help="第二个文件夹")
args = parser.parse_args()
only1, only2, diff, same = compare_directories(args.dir1, args.dir2)
print_result("只在左边目录中存在的文件", only1)
print_result("只在右边目录中存在的文件", only2)
print_result("内容不同的文件", diff)
# print_result("内容相同的文件", same)
if __name__ == "__main__":
main()

浙公网安备 33010602011771号