对比一个文件夹下所有的py脚本功能相似的代码

原因

写了几千个脚本,但是大部分脚本内容修改过一点点,所以写个脚本递归对比下,将功能相似的放在一块进行整合

code

import difflib
import os
import shutil
import uuid # 为避免脚本名字一致,在前边添加一个uuid


def compare_files(file1, file2):
    with open(file1, "r", encoding="utf-8") as f1, open(file2, "r", encoding="utf-8") as f2:
        file1_content = f1.readlines()
        file2_content = f2.readlines()

    similarity = difflib.SequenceMatcher(None, file1_content, file2_content).ratio()
    return similarity


# 递归遍历所有的py文件路径
def get_all_py_file(_dir):
    list_info = []
    for i in os.listdir(_dir):
        t = os.path.join(_dir, i)
        if os.path.isdir(t):
            list_info.extend(get_all_py_file(t))
        else:
            if t.endswith('.py'):
                list_info.append(t)
    return list_info


def main():
    path = r'C:\Users\DM\Desktop\code_test'
    file_list = get_all_py_file(path)
    similarity_threshold = 0.7

    for i in range(len(file_list)):
        for j in range(i + 1, len(file_list)):
            file1 = file_list[i]
            file2 = file_list[j]
            try:
                similarity = compare_files(file1, file2)
                print(similarity, file1, file2)
                if similarity >= similarity_threshold:
                    folder_name = f"Similar_{str(int(similarity * 100))}_Percent"
                    folder_path = os.path.join(path, folder_name)

                    if not os.path.exists(folder_path):
                        os.mkdir(folder_path)
                    shutil.move(file1,
                                os.path.join(folder_path, str(uuid.uuid1().hex) + "_" + os.path.basename(file_list[i])))
                    shutil.move(file2,
                                os.path.join(folder_path, str(uuid.uuid1().hex) + "_" + os.path.basename(file_list[j])))
            except Exception as e:
                print(str(e))


if __name__ == '__main__':
    main()

https://lideshan.cn/post/54.html

posted @ 2023-02-10 16:12  love_water  阅读(35)  评论(0)    收藏  举报