1 # -*- coding:utf-8 -*-
2 import re
3
4
5 '''
6 适应新版本
7 '''
8
9
10 year='17A'#用户自定义
11 ss='./data/'#根目录
12 filename = ss+'EDSD%s.txt'%year#输入文件名
13
14
15
16
17 def trsd_nonote():
18
19
20 p1 = r"^\s{4}(?:X|\W)\s{2}([A-Z]{3})\s\s.+\n"#TCC
21 p2 = r"\s{4}(?:X|\W)\s{2}[A-Z]{3}\s\s(.+)\n"
22 """
23 Function: To specify information regarding the transport
24 such as mode of transport, means of transport,
25 its conveyance reference number and the
26 identification of the means of transport.
27 """
28 p3 = r"^\s{7}Function:\s(.+\w\w\.)\n"
29 p4 = r"^\s{7}Function:\s(.+\.g\.|.+[^\.])\n"
30 # p4 = r"^\s{7}Function:\s(.+[\.g\.|[^\.]])\n"
31 p5 = r"^\s{17}(\w.+[^\.])\n"
32 p6 = r"^\s{17}(.+\.)\n"
33
34 #Note
35 # p7 = r"^\s{7}Note:\s\n"#Note
36 # p8= r"^\s{12}([A-Z].+\.)\n"#Note内容只有1行
37 # p9 = r"^\s{12}(.+[^\.]|)\n"#Note内容只多行的非最后行
38 # p10 = r"^\s{12}(.+\.)\n"#Note内容只多行的最后行
39
40 pattern1 = re.compile(p1)
41 pattern2 = re.compile(p2)
42 pattern3 = re.compile(p3)
43 pattern4 = re.compile(p4)
44 pattern5 = re.compile(p5)
45 pattern6 = re.compile(p6)
46 fr = open(filename)
47 # temp = "";
48 flag = 0
49 for line in fr.readlines():
50 matcher1 = re.findall(pattern1,line)
51 matcher2 = re.findall(pattern2,line)
52 matcher3 = re.findall(pattern3,line)
53 matcher4 = re.findall(pattern4,line)
54 matcher5 = re.findall(pattern5,line)
55 matcher6 = re.findall(pattern6,line)
56 #print matcher
57 w2 = open(ss+'trsd_nonote%s.txt'%year,'a')#a代表追加 w代表重写
58 if matcher1:
59 flag = 1
60 w2.write("\n")
61 for j in matcher1:
62 # for k in j:
63 w2.write(j)
64 if ((matcher2!=[])and(flag ==1)):
65 flag = 2
66 w2.write(",")
67 for j in matcher2:
68 # for k in j:
69 w2.write(j)
70 if ((matcher3!=[])and(flag ==2)):
71 flag = 3
72 #防止有逗号,用双引号括起
73 w2.write(",\"")
74 for j in matcher3:
75 # for k in j:
76 w2.write(j)
77 w2.write("\"")
78 if ((matcher4!=[])and(flag ==2)):
79 flag = 4
80 w2.write(",\"")
81 for j in matcher4:
82 # for k in j:
83 w2.write(j)
84 if ((matcher5!=[])and(flag ==4 or 5)):
85 flag = 5
86 w2.write(" ")
87 for j in matcher5:
88 # for k in j:
89 w2.write(j)
90 # w2.write("\"")
91 if ((matcher6!=[])and(flag ==4 or flag==5)):
92 flag = 6
93 w2.write(" ")
94 for j in matcher6:
95 # for k in j:
96 w2.write(j)
97 w2.write("\"")
98 w2.close( )
99
100 def trsd_note():
101
102 p1 = r"^(?:\s{7}|X\s{6}|\W\s{6})([A-Z]{3})\s\s[A-Z].+$"#匹配1001
103 p2 = r"^\s{7}Note:\s\n"#Note
104 p3= r"^\s{12}([^ ].+)\n"#Note内容
105 p4= r"^(?:-|컴)+\n"
106 pattern1 = re.compile(p1)
107 pattern2 = re.compile(p2)
108 pattern3 = re.compile(p3)
109 pattern4 = re.compile(p4)
110
111
112 fr = open(filename)
113 w2 = open(ss+'trsd_note%s.txt'%year,'a')#a代表追加 w代表重写
114 # temp = ();
115 flag = 0
116 flag1=0
117 for line in fr.readlines():
118 matcher1 = re.findall(pattern1,line)
119 matcher2 = re.findall(pattern2,line)
120 matcher3 = re.findall(pattern3,line)
121 matcher4 = re.findall(pattern4,line)
122
123
124 #print matcher
125
126 if matcher1!=[]:
127 flag = 1
128 w2.write("\n")
129 # for j in matcher1:
130
131 # w2.write(j)
132
133 if ((matcher2!=[])and(flag == 1)):
134 flag = 2
135 flag1=1
136 # w2.write(",")
137 if flag1==1:
138 if ((matcher3!=[])and(flag ==2 or 3)):
139 flag = 3
140 w2.write(" ")
141 for j in matcher3:
142
143 w2.write(j)
144 # w2.write(")
145 if ((matcher4!=[])and(flag == 3)):
146 flag=0
147 flag1=0
148 w2.write("\n")
149 w2.close( )
150 fr.close()
151
152 def join():
153
154
155
156 f1= open(ss+'trsd_note%s.txt'%year)
157 f2 =open(ss+'trsd_nonote%s.txt'%year)
158
159 list_note=[]
160 for line1 in f1:
161 # print(line1)
162 if line1.isspace():
163 list_note.append('')
164 else:
165 list_note.append(line1)
166
167 f1.close()
168
169 # print(list_note)
170 f2_w= open(ss+'trsd%s.csv'%year,'a')
171 # for i in range(len(list_note)):
172 i=0
173 # f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])
174 for line2 in f2:
175
176 str11="%s,\"%s\"\n"%(line2.strip('\n'),list_note[i].strip('\n'))
177 i=i+1
178 # print(i)
179 # print(str11)
180 f2_w.write(str11)
181
182
183 f2_w.close()
184 f2.close()
185 if __name__ == '__main__':
186 trsd_nonote()
187 trsd_note()
188 join()