1 # -*- coding:utf-8 -*-
2 import re
3
4
5 ss="./data/"
6 year = '17A'
7 filename = ss+'EDSD%s.txt'%year
8
9
10 '''
11 适应新版本
12
13 '''
14
15
16
17 p1 = r"^\s{4}(?:X|\W)\s{2}([A-Z]{3})\s\s.+\n"#TC
18 p2 = r"(^\d{3})\s{4}C\d{3}.+[CM]\s+\d\n"#010
19 p3 = r"^\d{3}\s{4}(C\d{3}).+[CM]\s+\d\n"#C552
20 p4 = r"^\d{3}\s{4}C\d{3}.+([CM])\s+\d\n"#M
21 p5 = r"^\d{3}\s{4}C\d{3}.+[CM]\s+(\d)\n"#1
22 p6= r"(^\d{3})\s{4}\d{4}.+[CM]\s{4}\d\s.*\.\.\d+\n|(^\d{3})\s{4}\w\d{3}\s.+[^\d]$\n"#单独的030
23 p7 =r"^\d{3}\s{4}(\d{4}).+[CM]\s{4}\d\s.*\.\.\d+\n|^\d{3}\s{4}(\w\d{3})\s.+[^\d]$\n"#单独的3286
24 p8 =r"^\d{3}\s{4}\d{4}.+([CM])\s{4}\d\s.*\.\.\d+\n|^\s{12}[A-Z].+([CM])\s{4}\d\s.*\.\.\d+\n"#单独的M
25 p9 =r"^\d{3}\s{4}\d{4}.+[CM]\s{4}(\d)\s.*\.\.\d+\n|^\s{12}[A-Z].+[CM]\s{4}(\d)\s.*\.\.\d+\n"#单独的1
26 p10 =r"^\d{3}\s{4}\d{4}.+[CM]\s{4}\d\s(.*\.\.\d+)\n|^\s{12}[A-Z].+[CM]\s{4}\d\s(.*\.\.\d+)\n"#单独的an..35
27
28
29
30 pattern1 = re.compile(p1)
31 pattern2 = re.compile(p2)
32 pattern3 = re.compile(p3)
33 pattern4 = re.compile(p4)
34 pattern5 = re.compile(p5)
35 pattern6 = re.compile(p6)
36 pattern7 = re.compile(p7)
37 pattern8 = re.compile(p8)
38 pattern9 = re.compile(p9)
39 pattern10 = re.compile(p10)
40
41 fr = open(filename)
42 temp = ();
43 flag = 0
44 for line in fr.readlines():
45 matcher1 = re.findall(pattern1,line)
46 matcher2 = re.findall(pattern2,line)
47 matcher3 = re.findall(pattern3,line)
48 matcher4 = re.findall(pattern4,line)
49 matcher5 = re.findall(pattern5,line)
50 matcher6 = re.findall(pattern6,line)
51 matcher7 = re.findall(pattern7,line)
52 matcher8 = re.findall(pattern8,line)
53 matcher9 = re.findall(pattern9,line)
54 matcher10 = re.findall(pattern10,line)
55 #print matcher
56 w2 = open(ss+'b2_%s.csv'%year,'a')#a代表追加 w代表重写
57 if (matcher1!=[]):
58 for g in matcher1:
59 flag = 1
60 temp = g
61 if ((matcher2!=[])and(flag ==1 or 2)):
62 flag = 2
63 w2.write("\n"+temp+",")
64 for j in matcher2:
65 for k in j:
66 w2.write(k)
67 if ((matcher3!=[])and(flag ==2)):
68 flag = 3
69 w2.write(",")
70 for j in matcher3:
71 for k in j:
72 w2.write(k)
73 #复合的缺省为0000
74 w2.write(",0000")
75 if ((matcher4!=[])and(flag ==3)):
76 flag = 4
77 w2.write(",")
78 for j in matcher4:
79 for k in j:
80 w2.write(k)
81 #增加固定列year
82 w2.write(","+year)
83 if ((matcher5!=[])and(flag ==4)):
84 flag = 5
85 w2.write(",")
86 for j in matcher5:
87 for k in j:
88 w2.write(k)
89 w2.write(", ")
90 # print len(matcher6)
91 if(len(matcher6)==1 and matcher6!=[''] ):
92
93 flag = 6
94 w2.write("\n"+temp+",")
95 for j in matcher6:
96 for k in j:
97 w2.write(k)
98 #单独的缺省为C000
99 w2.write(",C000")
100 if ((matcher7!=[])and(flag ==6)):
101 flag = 7
102 w2.write(",")
103 for j in matcher7:
104 for k in j:
105 w2.write(k)
106 if ((matcher8!=[])and(flag ==7)):
107 flag = 8
108 w2.write(",")
109 for j in matcher8:
110 for k in j:
111 w2.write(k)
112 #增加固定列year
113 w2.write(","+year)
114 if ((matcher9!=[])and(flag ==8)):
115 flag = 9
116 w2.write(",")
117 for j in matcher9:
118 for k in j:
119 w2.write(k)
120 if ((matcher10!=[])and(flag ==9)):
121 flag = 10
122 w2.write(",")
123 for j in matcher10:
124 for k in j:
125 w2.write(k)
126 w2.close( )
127
128 """
129 特殊情况
130
131
132
133 """