对比一个文件夹下所有的py脚本功能相似的代码
原因
写了几千个脚本,但是大部分脚本内容修改过一点点,所以写个脚本递归对比下,将功能相似的放在一块进行整合
code
import difflib
import os
import shutil
import uuid # 为避免脚本名字一致,在前边添加一个uuid
def compare_files(file1, file2):
with open(file1, "r", encoding="utf-8") as f1, open(file2, "r", encoding="utf-8") as f2:
file1_content = f1.readlines()
file2_content = f2.readlines()
similarity = difflib.SequenceMatcher(None, file1_content, file2_content).ratio()
return similarity
# 递归遍历所有的py文件路径
def get_all_py_file(_dir):
list_info = []
for i in os.listdir(_dir):
t = os.path.join(_dir, i)
if os.path.isdir(t):
list_info.extend(get_all_py_file(t))
else:
if t.endswith('.py'):
list_info.append(t)
return list_info
def main():
path = r'C:\Users\DM\Desktop\code_test'
file_list = get_all_py_file(path)
similarity_threshold = 0.7
for i in range(len(file_list)):
for j in range(i + 1, len(file_list)):
file1 = file_list[i]
file2 = file_list[j]
try:
similarity = compare_files(file1, file2)
print(similarity, file1, file2)
if similarity >= similarity_threshold:
folder_name = f"Similar_{str(int(similarity * 100))}_Percent"
folder_path = os.path.join(path, folder_name)
if not os.path.exists(folder_path):
os.mkdir(folder_path)
shutil.move(file1,
os.path.join(folder_path, str(uuid.uuid1().hex) + "_" + os.path.basename(file_list[i])))
shutil.move(file2,
os.path.join(folder_path, str(uuid.uuid1().hex) + "_" + os.path.basename(file_list[j])))
except Exception as e:
print(str(e))
if __name__ == '__main__':
main()
https://lideshan.cn/post/54.html
本文来自博客园,作者:love_water,转载请注明原文链接:https://www.cnblogs.com/ldsice/articles/17109366.html

浙公网安备 33010602011771号