利用Python将.mol2文件格式的分子库拆分成单个.mol2格式的文件,以便从分子库中众多的化合物进行拆分,有利于逐个分析所提取的化合物。拆分主要的思想如下:

    1.用户选择分子库文件所在的目录以及选择将拆分文件所存储的目录

    2.读取分子库文件所有内容

    3.依照一定的拆分规律,将分子库文件逐个拆分并写入新的文件

    主要依照以上思路即可实现将.mol2分子库文件进行拆分,废话不多说,直接看干货:

 1 import os
 2 from tkinter import filedialog
 3 # 拆分文件
 4 def divide_file(read_fold, write_fold):
 5     all_files = []
 6     for files in os.walk(read_fold):
 7         all_files = files[2]
 8     for file in all_files:
 9         file_path = read_fold + '/' + file
10         with open(file_path, 'r') as mol2_file:
11             contents = mol2_file.readlines()
12             contents.append('@<tripos>MOLECULE')
13             write_file(contents, write_fold, file)
14 
15 # 写入文件
16 def write_file(article, write_fold, file_name):
17     file_name = file_name[:-5]
18     file_path = write_fold + '/' + file_name
19     if not os.path.exists(file_path):
20         os.makedirs(file_path)
21         print(file_path, "创建成功")
22     else:
23         print(file_path, "目录已存在")
24     temp_article = []
25     molecule_count = 0
26     for line in article:
27         if not line.startswith('@<tripos>MOLECULE'):
28             temp_article.append(line)
29         elif line.startswith('@<tripos>MOLECULE'):
30             molecule_count = molecule_count + 1
31             if molecule_count == 1:
32                 temp_article.append(line)
33             elif molecule_count == 2:
34                 path = file_path + '/' + temp_article[1].rstrip("\n")+'.mol2'
35                 with open(path, 'w') as new_file:
36                     for new_line in temp_article:
37                         new_file.write(new_line)
38                 temp_article.clear()
39                 temp_article.append('@<tripos>MOLECULE')
40                 molecule_count = 1
41 
42 if __name__ == '__main__':
43     # 读取获得的文件夹路径
44     read_fold = filedialog.askdirectory()
45     # 读取写入文件夹的路径
46     write_fold = filedialog.askdirectory()
47     divide_file(read_fold, write_fold)