遍历文件夹和子文件夹,删除重复文件



import os
import hashlib
import shutil

def file_hash(filepath):
    """计算文件的MD5哈希值"""
    hash_md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()

def remove_duplicates(root_dir):
    """删除指定目录及其子目录中的重复文件"""
    hashes = {}
    for dirpath, dirnames, filenames in os.walk(root_dir):
        for filename in filenames:
            filepath = os.path.join(dirpath, filename)
            filehash = file_hash(filepath)
            if filehash in hashes:
                # 如果文件哈希已存在,删除这个重复文件
                print(f"Deleting duplicate file: {filepath}")
                os.remove(filepath)
            else:
                # 否则,添加文件哈希到字典
                hashes[filehash] = filepath

# 使用示例
root_directory = os.getcwd()
remove_duplicates(root_directory)
posted @ 2024-11-01 22:55  redufa  阅读(61)  评论(0)    收藏  举报