1 # -*- coding:utf-8 -*-
2 import re
3
4
5 '''
6 适应新版本
7 '''
8
9
10 year='17A'#用户自定义
11 ss='./data/'#根目录
12 filename = ss+'EDCD%s.txt'%year#输入文件名
13
14
15
16
17 def trcd_nonote():
18
19
20 p1 = r"^\s{4}(?:X|\W)\s{2}(C\d\d\d)\s.+\n"
21 p2 = r"^\s{4}(?:X|\W)\s{2}C\d\d\d\s(.+)\n"
22 p3 = r"^\s{7}Desc:\s(.+\.)\n"
23 p4 = r"^\s{7}Desc:\s(.+[^\.])\n"
24 p5 = r"^\s{13}(.+[^\.])\n"
25 p6 = r"^\s{13}(.+\.)\n"
26 pattern1 = re.compile(p1)
27 pattern2 = re.compile(p2)
28 pattern3 = re.compile(p3)
29 pattern4 = re.compile(p4)
30 pattern5 = re.compile(p5)
31 pattern6 = re.compile(p6)
32 fr = open(filename)
33 temp = ();
34 flag = 0
35 for line in fr.readlines():
36 matcher1 = re.findall(pattern1,line)
37 matcher2 = re.findall(pattern2,line)
38 matcher3 = re.findall(pattern3,line)
39 matcher4 = re.findall(pattern4,line)
40 matcher5 = re.findall(pattern5,line)
41 matcher6 = re.findall(pattern6,line)
42 #print matcher
43 w2 = open(ss+'trcd_nonote%s.txt'%year,'a')#a代表追加 w代表重写
44 if matcher1:
45 flag = 1
46 w2.write("\n")
47 for j in matcher1:
48 for k in j:
49 w2.write(k)
50 #for k in g:
51 #w2.write(k)
52 #continue;
53 if ((matcher2!=[])and(flag ==1)):
54 flag = 2
55 #print type(tup1)
56 #print tup1
57 #flag = 2
58 w2.write(",")
59 for j in matcher2:
60 for k in j:
61 w2.write(k)
62 if ((matcher3!=[])and(flag ==2)):
63 flag = 3
64 w2.write(",\"")
65 for j in matcher3:
66 for k in j:
67 w2.write(k)
68 w2.write("\"")
69 if (matcher4!=[]):
70 w2.write(",\"")
71 for j in matcher4:
72 for k in j:
73 w2.write(k)
74 flag = 4
75 if ((matcher5!=[])and(flag ==4)):
76 flag = 5
77 w2.write(" ")
78 for j in matcher5:
79 for k in j:
80 w2.write(k)
81 if ((matcher6!=[])and(flag ==4 or 5)):
82 flag = 6
83 w2.write(" ")
84 for j in matcher6:
85 for k in j:
86 w2.write(k)
87 w2.write("\"")
88 w2.close( )
89
90 def trcd_note():
91
92 p1 = r"^(?:\s{7}|X\s{6}|\W\s{6})([A-Z][0-9]{3})\s[A-Z].+$"#匹配1001
93 p2 = r"^\s{7}Note:\s\n"#Note
94 p3= r"^\s{13}([^ ].+)\n"#Note内容
95 p4= r"^(?:-|컴)+\n"
96 pattern1 = re.compile(p1)
97 pattern2 = re.compile(p2)
98 pattern3 = re.compile(p3)
99 pattern4 = re.compile(p4)
100
101
102 fr = open(filename)
103 w2 = open(ss+'trcd_note%s.txt'%year,'a')#a代表追加 w代表重写
104 # temp = ();
105 flag = 0
106 flag1=0
107 for line in fr.readlines():
108 matcher1 = re.findall(pattern1,line)
109 matcher2 = re.findall(pattern2,line)
110 matcher3 = re.findall(pattern3,line)
111 matcher4 = re.findall(pattern4,line)
112
113
114 #print matcher
115
116 if matcher1!=[]:
117 flag = 1
118 w2.write("\n")
119 # for j in matcher1:
120
121 # w2.write(j)
122
123 if ((matcher2!=[])and(flag == 1)):
124 flag = 2
125 flag1=1
126 # w2.write(",")
127 if flag1==1:
128 if ((matcher3!=[])and(flag ==2 or 3)):
129 flag = 3
130 w2.write(" ")
131 for j in matcher3:
132
133 w2.write(j)
134 # w2.write(")
135 if ((matcher4!=[])and(flag == 3)):
136 flag=0
137 flag1=0
138 w2.write("\n")
139 w2.close( )
140 fr.close()
141
142 def join():
143
144
145
146 f1= open(ss+'trcd_note%s.txt'%year)
147 f2 =open(ss+'trcd_nonote%s.txt'%year)
148
149 list_note=[]
150 for line1 in f1:
151 # print(line1)
152 if line1.isspace():
153 list_note.append('')
154 else:
155 list_note.append(line1)
156
157 f1.close()
158
159 # print(list_note)
160 f2_w= open(ss+'trcd%s.csv'%year,'a')
161 # for i in range(len(list_note)):
162 i=0
163 # f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
164 for line2 in f2:
165
166 str11="%s,\"%s\"\n"%(line2.strip('\n'),list_note[i].strip('\n'))
167 i=i+1
168 # print(i)
169 # print(str11)
170 f2_w.write(str11)
171
172
173 f2_w.close()
174 f2.close()
175 if __name__ == '__main__':
176 trcd_nonote()
177 trcd_note()
178 join()