快速解析超大XML不占用太大内存

 1 import xml.etree.ElementTree as ET
 2 
 3 def parse_res(xml_file):
 4     res_dic = {}
 5     tmp_lst_lev1 = []
 6     tmp_lst_lev2 = []
 7     add_flag = False
 8     for event, elem in ET.iterparse(xml_file):
 9         if event == 'end':
10             if elem.tag == 'Item':
11                 tmp_lst_lev1.append(dict(elem.attrib))
12             elif elem.tag == 'Enum':
13                 enum_str = ''.join([ ''.join(['[', item['value'], '=', item['name'], ']']) for item in tmp_lst_lev1])
14                 res_enum = {}
15                 res_enum['id'] = elem.attrib['id']
16                 res_enum['name'] = enum_str
17                 tmp_lst_lev2.append(res_enum)
18                 tmp_lst_lev1 = []
19             elif elem.tag == 'EnumRes':
20                 res_dic['EnumRes'] = {}
21                 tmp_dic = res_dic['EnumRes']
22                 for item in tmp_lst_lev2:
23                     tmp_dic[ item['id'].split('.')[1] ] = item['name']
24                 tmp_lst_lev2 = []
25             elif elem.tag == 'MeasUnitRes' or elem.tag == 'CounterNameRes' or elem.tag == 'CounterUnitRes':
26                 res_dic[elem.tag] = {}
27                 tmp_dic = res_dic[elem.tag]
28                 for item in tmp_lst_lev1:
29                     tmp_dic[ item['id'].split('.')[1] ] = item['name']
30                 tmp_lst_lev1 = []
31             #CommonInfo.Resource.xml
32             elif elem.tag == 'DevTypeNameRes' or elem.tag == 'VendorRes' or elem.tag == 'MocRes':
33                 res_dic[elem.tag] = {}
34                 tmp_dic = res_dic[elem.tag]
35                 for item in tmp_lst_lev1:
36                     tmp_dic['id'] = item['id'].split('.')[1]
37                     tmp_dic['name'] = item['name']
38                 tmp_lst_lev1 = []
39             #StaticList.xml
40             elif elem.tag == 'param':
41                 if 'alarmId' == elem.attrib['name']:
42                     id = elem.text
43             elif elem.tag == 'alarm':
44                 tmp_lst_lev1.append([id, elem.attrib['name']])
45             elif elem.tag == 'alarms':
46                 res_dic[elem.tag] = {}
47                 tmp_dic = res_dic[elem.tag]
48                 for item in tmp_lst_lev1:
49                     tmp_dic[ item[0] ] = item[1]
50                 tmp_lst_lev1 = []
51         elem.clear()   #关键在这一名,处理完节点及时清理内存
52     return res_dic

 

posted @ 2016-12-03 13:53  bongem  阅读(2244)  评论(0)    收藏  举报