1 # -*- coding:utf-8 -*-
2 import re
3
4
5 '''
6 适应新版本
7
8 注意:
9 1)17A文件改完后缀后,需要转为UTF-8无BOM格式,才能正确处理。
10 2)fr = open(filename,encoding='utf-8')
11
12 '''
13
14
15 year='17A'#用户自定义
16 ss='./data/'#根目录
17 filename = ss+'UNCL%s.txt'%year#输入文件名
18
19
20
21 def tncl_note():
22
23
24
25 p4= r"^(?:\s{5}|X\s{4}|\W\s{4})(\w+)\s+\w.+\n"
26 p1 = r"^(?:\s{5}|X\s{4}|\W\s{4})(\d\d\d\d)\s\s[A-Z].+\]$"#匹配tncl_id
27 p2 = r"^(?:\s{5}|X\s{4}|\W\s{4})(\w+)\s+\w.+\n"#匹配tncl_tag
28 p3 = r"^(?:\s{5}|X\s{4}|\W\s{4})\w+\s+(\w.+)\n"#匹配tncl_name
29 p4 = r"^\s{14}([^ ].+)\n"#匹配tncl_desc和#Note内容
30
31 p5 = r"^\s{11}Note:\s\n"#Note
32
33
34 pattern1 = re.compile(p1)
35 pattern2 = re.compile(p2)
36 pattern3 = re.compile(p3)
37 pattern4 = re.compile(p4)
38
39 pattern5 = re.compile(p5)
40
41
42 fr = open(filename,encoding='utf-8')
43 temp = str();
44 flag = 0
45 w2 = open(ss+'tncl_ori%s.txt'%year,'a')#a代表追加 w代表重写
46 flag1=0
47 for line in fr.readlines():
48 matcher1 = re.findall(pattern1,line)
49 matcher2 = re.findall(pattern2,line)
50 matcher3 = re.findall(pattern3,line)
51 matcher4 = re.findall(pattern4,line)
52 matcher5 = re.findall(pattern5,line)
53
54 #print matcher
55
56 if matcher1:
57 for g in matcher1:
58 flag = 1
59 temp = g
60
61 continue;
62 if matcher2 and(flag==1 or 4)and(temp!=''):
63
64 flag = 2
65 w2.write("\"\n"+temp+",")
66 for j in matcher2:
67 for k in j:
68 w2.write(k)
69
70 if matcher3 and flag==2:
71 flag = 3
72 w2.write(",")
73 for j in matcher3:
74 for k in j:
75 w2.write(k)
76 w2.write(",\"")
77 if matcher4 and (flag==3 or flag==4):
78 flag=4
79 for j in matcher4:
80 for k in j:
81 w2.write(k)
82
83 if ((matcher5!=[])and(flag == 4)):
84 # flag = 5
85 w2.write("\",\"")
86 # flag1=1
87
88 w2.write("\"")
89 w2.close( )
90 def join():
91
92
93
94 f1= open(ss+'tncl_ori%s.txt'%year)
95
96 list_note=[]
97 for line1 in f1:
98 # print(line1)
99
100 list_note.append(line1)
101
102 f1.close()
103 # print(list_note[1].split(','))
104 # print("%s_%s,%s\n"%(list_note[1].split(',')[0],list_note[1].split(',')[1],list_note[1].strip('\n')))
105 # list_note[i].strip('\n')
106 # print(list_note)
107 f2_w1= open(ss+'tred%s.csv'%year,'a')
108 f2_w2= open(ss+'b4_%s.csv'%year,'a')
109 # for i in range(len(list_note)):
110 # i=0
111
112 for i in range(1,len(list_note)):
113
114 str11="%s_%s,%s\n"%(list_note[i].split(',')[0],list_note[i].split(',')[1],list_note[i].strip('\n'))
115
116 str12="%s_%s,%s,%s\n"%(list_note[i].split(',')[0],list_note[i].split(',')[1],list_note[i].split(',')[1],year)
117 f2_w1.write(str11)
118 f2_w2.write(str12)
119
120
121 f2_w1.close()
122 f2_w2.close()
123 # f2.close()
124
125 if __name__ == '__main__':
126
127 tncl_note()
128 join()