Python解析生成XML-ElementTree VS minidom

OS:Windows 7

关键字:Python3.4,XML,ElementTree,minidom

 

本文介绍用Python解析生成以下XML:

<Persons>
    <Person>
        <Name>LDL</Name>
        <Description Language='English'><![CDATA[cdata text]]></Description>
    </Person>
    <Person>
        <Name>China</Name>
        <Description Language='English'><![CDATA[cdata text]]></Description>
    </Person>
</Persons>

1.创建一个xml文件名为src.xml,内容如上,放到c:\temp

2.使用ElementTree读取src.xml,并创建一个内容相同的xml名为target-tree.xml。

ElementTreeSample.py如下:

# -*- coding: utf-8 -*-
"""
Sample of xml.etree.ElementTree

@author: ldlchina
"""

import os
import sys
import logging
import traceback
import xml.etree.ElementTree as ET
import time

def copy_node(src_node, target_node):
    # Copy attr
    for key in src_node.keys():
        target_node.set(key, src_node.get(key))
    
    if len(list(src_node)) > 0:
        for child in src_node:
            target_child = ET.Element(child.tag)
            target_node.append(target_child)
            copy_node(child, target_child)
    else:
        target_node.text = src_node.text
    
def read_write_xml(src, target):
    tree = ET.parse(src)
    root = tree.getroot()
    
    target_root = ET.Element(root.tag)
    start_time = time.time() * 1000
    copy_node(root, target_root)
    end_time = time.time() * 1000
    print('copy_node:' + str(end_time - start_time))
    
    target_tree = ET.ElementTree(target_root)
    target_tree.write(target)
    logging.info(target)

def main():
    try:
        current_file = os.path.realpath(__file__)
        
        # Configure logger
        log_file = current_file.replace('.py', '.log')
        logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO)
        
        # Create console handler
        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        
        logger = logging.getLogger('')
        logger.addHandler(ch)
        
        #src = sys.argv[1]
        #target = sys.argv[2]
        
        # For debugging
        src = 'C:/temp/src.xml'
        target = 'C:/temp/target-tree.xml'
        
        # Generate results
        start_time = time.time() * 1000
        read_write_xml(src, target)
        end_time = time.time() * 1000
        print('read_write_xml:' + str(end_time - start_time))
    except:
        logging.exception(''.format(traceback.format_exc()))
    
    input('Press any key to exit...')

main()

 3.使用minidom读取src.xml,并创建一个内容相同的xml名为target-dom.xml。

MinidomSample.py如下:

# -*- coding: utf-8 -*-
"""
Sample of xml.dom.minidom

@author: ldlchina
"""

import os
import sys
import logging
import traceback
import xml.dom.minidom as MD
import time

def get_text(n):
    nodelist = n.childNodes
    rc = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE or node.nodeType == node.CDATA_SECTION_NODE:
            rc = rc + node.data
    return rc

def copy_node(target_doc, src_node, target_node):
    if not isinstance(src_node, MD.Document) and src_node.hasAttributes():
        for item in src_node.attributes.items():
            target_node.setAttribute(item[0], item[1])
    for node in src_node.childNodes:
        if node.nodeType == node.TEXT_NODE:
            target_child = target_doc.createTextNode(node.nodeValue)
            target_node.appendChild(target_child)
        elif node.nodeType == node.CDATA_SECTION_NODE:
            target_child = target_doc.createCDATASection(node.nodeValue)
            target_node.appendChild(target_child)
        elif node.nodeType == node.ELEMENT_NODE:
            target_child = target_doc.createElement(node.nodeName)
            target_node.appendChild(target_child)
            copy_node(target_doc, node, target_child)
    
def read_write_xml(src, target):
    doc = MD.parse(src)
    target_doc = MD.Document()

    start_time = time.time() * 1000
    copy_node(target_doc, doc, target_doc)
    end_time = time.time() * 1000
    print('copy_node: ' + str(end_time - start_time))
    
    # Write to file
    f = open(target, 'w')
    f.write(target_doc.documentElement.toxml())
    f.close()
    logging.info(target)

def main():
    try:
        current_file = os.path.realpath(__file__)
        
        # Configure logger
        log_file = current_file.replace('.py', '.log')
        logging.basicConfig(filename = log_file, filemode = 'w', level = logging.INFO)
        
        # Create console handler
        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        
        logger = logging.getLogger('')
        logger.addHandler(ch)
        
        #src = sys.argv[1]
        #target = sys.argv[2]
        
        # For debugging
        src = 'C:/temp/src.xml'
        target = 'C:/temp/target-dom.xml'
        
        # Generate results
        start_time = time.time() * 1000
        read_write_xml(src, target)
        end_time = time.time() * 1000
        print('read_write_xml: ' + str(end_time - start_time))
    except:
        logging.exception(''.format(traceback.format_exc()))
    
    input('Press any key to exit...')

main()

4.运行ElementTreeSample.py,得到XML如下:

<Persons><Person><Name>LDL</Name><Description Language="English">cdata text</Description></Person><Person><Name>China</Name><Description Language="Chinese">cdata text</Description></Person></Persons>

5.运行MinidomSample.py,得到XML如下:

<Persons>
    <Person>
        <Name>LDL</Name>
        <Description Language="English"><![CDATA[cdata text]]></Description>
    </Person>
    <Person>
        <Name>China</Name>
        <Description Language="Chinese"><![CDATA[cdata text]]></Description>
    </Person>
</Persons>

 

ElementTree VS minidom:

1.ElementTree执行速度会比minidom快一些。

2.ElemenTree不能分析XML的转行和缩进。minidom可以。

3.ElemenTree不支持CDATA,minidom可以。

posted @ 2015-05-05 20:44  Ldlchina  阅读(1601)  评论(0编辑  收藏  举报