python 解析XML文件

比较高效的python 解析XML文件

参考 http://codingpy.com/article/parsing-xml-using-python/

try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET
import time


def parse_poi_by_elementTree(filepath):
    t0 = time.time()

    tree = ET.ElementTree(file=filepath)
    pois_element_num = 0
    vde_poi = 0
    # for elem in tree.iter(tag='Pois'):
    #     pois_element_num = elem.get('Num')
    #     print pois_element_num
    #     for e in elem.iter():
    #         if e.tag == 'Poi':
    #             vde_poi += 1
    pois_element_num = tree.iter(tag='Pois').next().get('Num')
    vde_poi = len(list(tree.iter(tag='Poi')))

    cost_time = time.time() - t0
    print 'parse_poi_by_elementTree time cost is %s' % cost_time
    return pois_element_num, vde_poi


def parse_poi_by_iterparse(filepath):
    t0 = time.time()

    pois_element_num = 0
    vde_poi = 0
    for event, elem in ET.iterparse(filepath):
        # if event == 'end':
        if elem.tag == 'Poi':
            vde_poi += 1
        if elem.tag == "Pois":
            pois_element_num = int(elem.get('Num'))

        elem.clear()

    cost_time = time.time() - t0
    print 'parse_poi_by_iterparse time cost is %s' % cost_time
    return pois_element_num, vde_poi


from statistic import StatisticItem, XML_STREET, XML_POI
import os


def parse_street_xml_by_ET(street_file):
    if not os.path.exists(street_file):
        return StatisticItem(XML_STREET, [0, 0])

    street_num = 0
    vde_streets = 0  # actual count

    for event, elem in ET.iterparse(street_file):
        if elem.tag == 'Street':
            vde_streets += 1
        if elem.tag == "Streets":
            street_num = int(elem.get('Num'))

        elem.clear()
    return StatisticItem(XML_STREET, [vde_streets, street_num])


def parse_poi_xml_by_ET(poi_file):
    if not os.path.exists(poi_file):
        return StatisticItem(XML_POI, [0, 0])

    poi_num = 0
    vde_pois = 0  # actual count

    for event, elem in ET.iterparse(poi_file):
        if elem.tag == 'Poi':
            vde_pois += 1
        if elem.tag == "Pois":
            poi_num = int(elem.get('Num'))

        elem.clear()
    return StatisticItem(XML_POI, [vde_pois, poi_num])


if __name__ == '__main__':
    # C:\Users\shchshan\Desktop\vde\State_14120002\POI_1414000018.xml
    # C:\Users\shchshan\Desktop\vde\State_14120001\POI_1414000001.xml
    print parse_poi_by_elementTree(r'C:\Users\shchshan\Desktop\vde\State_14120001\POI_1414000001.xml')
    print parse_poi_by_iterparse(r'C:\Users\shchshan\Desktop\vde\State_14120001\POI_1414000001.xml')

 

posted @ 2017-09-07 18:25  百变小超  阅读(2914)  评论(0编辑  收藏  举报