1 # -*- coding:utf-8 -*-
2 import re
3
4
5 '''
6 适应新版本
7 '''
8
9
10 year='17a'#用户自定义
11 ss='./data/'#根目录
12 filename = ss+'EDED%s.txt'%year#输入文件名
13
14
15
16
17 def tred_nonote():
18
19 p1 = r"^(?:\s{5}|X\s{4}|\W\s{4})(\d\d\d\d)\s\s[A-Z].+\]$"#匹配1001
20 p2 = r"^(?:\s{5}|X\s{4}|\W\s{4})\d\d\d\d\s\s([A-Z].+)\s+\[[A-Z]\]$"
21 p3 = r"^(?:\s{5}|X\s{4}|\W\s{4})\d\d\d\d\s\s[A-Z].+\s+\[([A-Z])\]$"
22 p4 = r"^\s{5}Desc:\s(.+\w\w\.)\n"
23
24 p5 = r"^\s{5}Desc:\s(.+[^\.]|.+\.g\.)\n"#非以.结尾的Desc
25 p6 = r"^\s{11}(.+\.)\n"#非以.结尾的Desc的第二行
26 p7 = r"^\s{5}Repr:\s(.+)\n"#Repr
27
28 pattern1 = re.compile(p1)
29 pattern2 = re.compile(p2)
30 pattern3 = re.compile(p3)
31 pattern4 = re.compile(p4)
32 pattern5 = re.compile(p5)
33 pattern6 = re.compile(p6)
34 pattern7 = re.compile(p7)
35
36 fr = open(filename)
37 temp = ();
38 flag = 0
39 for line in fr.readlines():
40 matcher1 = re.findall(pattern1,line)
41 matcher2 = re.findall(pattern2,line)
42 matcher3 = re.findall(pattern3,line)
43 matcher4 = re.findall(pattern4,line)
44 matcher5 = re.findall(pattern5,line)
45 matcher6 = re.findall(pattern6,line)
46 matcher7 = re.findall(pattern7,line)
47
48 w2 = open(ss+'tred_nonote%s.txt'%year,'a')#a代表追加 w代表重写
49 if matcher1:
50 flag = 1
51 w2.write("\n")
52 for j in matcher1:
53 for k in j:
54 w2.write(k)
55
56 if ((matcher2!=[])and(flag ==1)):
57 flag = 2
58
59 w2.write(",")
60 for j in matcher2:
61 for k in j:
62 w2.write(k)
63 if ((matcher3!=[])and(flag ==2)):
64 flag = 3
65 # w2.write(",")
66 for j in matcher3:
67 for k in j:
68 w2.write(k)
69 if ((matcher4!=[])and(flag ==3)):
70 flag = 4
71 w2.write(",\"")
72 for j in matcher4:
73 for k in j:
74 w2.write(k)
75 w2.write("\"")
76 if ((matcher5!=[])and(flag ==3 or 5)):
77 flag = 5
78 w2.write(",\"")
79 for j in matcher5:
80 for k in j:
81 w2.write(k)
82 if ((matcher6!=[])and(flag ==5)):
83 flag = 6
84 w2.write(" ")
85 for j in matcher6:
86 for k in j:
87 w2.write(k)
88 w2.write("\"")
89 if ((matcher7!=[])and(flag ==4 or 6)):
90 flag = 7
91 w2.write(",")
92 for j in matcher7:
93 for k in j:
94 w2.write(k)
95
96 w2.close( )
97
98
99 def tred_note():
100
101 p1 = r"^(?:\s{5}|X\s{4}|\W\s{4})(\d\d\d\d)\s\s[A-Z].+\]$"#匹配1001
102 p2 = r"^\s{5}Note:\s\n"#Note
103 p3= r"^\s{11}([^ ].+)\n"#Note内容
104 p4= r"^(?:-|컴)+\n"
105 pattern1 = re.compile(p1)
106 pattern2 = re.compile(p2)
107 pattern3 = re.compile(p3)
108 pattern4 = re.compile(p4)
109
110
111 fr = open(filename)
112 w2 = open(ss+'tred_note%s.txt'%year,'a')#a代表追加 w代表重写
113 # temp = ();
114 flag = 0
115 flag1=0
116 for line in fr.readlines():
117 matcher1 = re.findall(pattern1,line)
118 matcher2 = re.findall(pattern2,line)
119 matcher3 = re.findall(pattern3,line)
120 matcher4 = re.findall(pattern4,line)
121
122
123 #print matcher
124
125 if matcher1!=[]:
126 flag = 1
127 w2.write("\n")
128 # for j in matcher1:
129
130 # w2.write(j)
131
132 if ((matcher2!=[])and(flag == 1)):
133 flag = 2
134 flag1=1
135 # w2.write(",")
136 if flag1==1:
137 if ((matcher3!=[])and(flag ==2 or 3)):
138 flag = 3
139 w2.write(" ")
140 for j in matcher3:
141
142 w2.write(j)
143 # w2.write(")
144 if ((matcher4!=[])and(flag == 3)):
145 flag=0
146 flag1=0
147 w2.write("\n")
148 w2.close( )
149 fr.close()
150
151 def join():
152
153
154
155 f1= open(ss+'tred_note%s.txt'%year)
156 f2 =open(ss+'tred_nonote%s.txt'%year)
157
158 list_note=[]
159 for line1 in f1:
160 # print(line1)
161 if line1.isspace():
162 list_note.append('')
163 else:
164 list_note.append(line1)
165
166 f1.close()
167
168 # print(list_note)
169 f2_w= open(ss+'tred%s.csv'%year,'a')
170 # for i in range(len(list_note)):
171 i=0
172 # f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
173 for line2 in f2:
174
175 str11="%s,\"%s\"\n"%(line2.strip('\n'),list_note[i].strip('\n'))
176 i=i+1
177 # print(i)
178 # print(str11)
179 f2_w.write(str11)
180
181
182 f2_w.close()
183 f2.close()
184 if __name__ == '__main__':
185 tred_nonote()
186 tred_note()
187 join()