lmgsanm

每天学习一点,每天进步一点点…… Tomorrow is another beatifull day

导航

python模块:xml.etree.ElementTree

   1 """Lightweight XML support for Python.
   2 
   3  XML is an inherently hierarchical data format, and the most natural way to
   4  represent it is with a tree.  This module has two classes for this purpose:
   5 
   6     1. ElementTree represents the whole XML document as a tree and
   7 
   8     2. Element represents a single node in this tree.
   9 
  10  Interactions with the whole document (reading and writing to/from files) are
  11  usually done on the ElementTree level.  Interactions with a single XML element
  12  and its sub-elements are done on the Element level.
  13 
  14  Element is a flexible container object designed to store hierarchical data
  15  structures in memory. It can be described as a cross between a list and a
  16  dictionary.  Each Element has a number of properties associated with it:
  17 
  18     'tag' - a string containing the element's name.
  19 
  20     'attributes' - a Python dictionary storing the element's attributes.
  21 
  22     'text' - a string containing the element's text content.
  23 
  24     'tail' - an optional string containing text after the element's end tag.
  25 
  26     And a number of child elements stored in a Python sequence.
  27 
  28  To create an element instance, use the Element constructor,
  29  or the SubElement factory function.
  30 
  31  You can also use the ElementTree class to wrap an element structure
  32  and convert it to and from XML.
  33 
  34 """
  35 
  36 #---------------------------------------------------------------------
  37 # Licensed to PSF under a Contributor Agreement.
  38 # See http://www.python.org/psf/license for licensing details.
  39 #
  40 # ElementTree
  41 # Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
  42 #
  43 # fredrik@pythonware.com
  44 # http://www.pythonware.com
  45 # --------------------------------------------------------------------
  46 # The ElementTree toolkit is
  47 #
  48 # Copyright (c) 1999-2008 by Fredrik Lundh
  49 #
  50 # By obtaining, using, and/or copying this software and/or its
  51 # associated documentation, you agree that you have read, understood,
  52 # and will comply with the following terms and conditions:
  53 #
  54 # Permission to use, copy, modify, and distribute this software and
  55 # its associated documentation for any purpose and without fee is
  56 # hereby granted, provided that the above copyright notice appears in
  57 # all copies, and that both that copyright notice and this permission
  58 # notice appear in supporting documentation, and that the name of
  59 # Secret Labs AB or the author not be used in advertising or publicity
  60 # pertaining to distribution of the software without specific, written
  61 # prior permission.
  62 #
  63 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
  64 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
  65 # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
  66 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
  67 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  68 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
  69 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  70 # OF THIS SOFTWARE.
  71 # --------------------------------------------------------------------
  72 
  73 __all__ = [
  74     # public symbols
  75     "Comment",
  76     "dump",
  77     "Element", "ElementTree",
  78     "fromstring", "fromstringlist",
  79     "iselement", "iterparse",
  80     "parse", "ParseError",
  81     "PI", "ProcessingInstruction",
  82     "QName",
  83     "SubElement",
  84     "tostring", "tostringlist",
  85     "TreeBuilder",
  86     "VERSION",
  87     "XML", "XMLID",
  88     "XMLParser", "XMLPullParser",
  89     "register_namespace",
  90     ]
  91 
  92 VERSION = "1.3.0"
  93 
  94 import sys
  95 import re
  96 import warnings
  97 import io
  98 import collections
  99 import contextlib
 100 
 101 from . import ElementPath
 102 
 103 
 104 class ParseError(SyntaxError):
 105     """An error when parsing an XML document.
 106 
 107     In addition to its exception value, a ParseError contains
 108     two extra attributes:
 109         'code'     - the specific exception code
 110         'position' - the line and column of the error
 111 
 112     """
 113     pass
 114 
 115 # --------------------------------------------------------------------
 116 
 117 
 118 def iselement(element):
 119     """Return True if *element* appears to be an Element."""
 120     return hasattr(element, 'tag')
 121 
 122 
 123 class Element:
 124     """An XML element.
 125 
 126     This class is the reference implementation of the Element interface.
 127 
 128     An element's length is its number of subelements.  That means if you
 129     want to check if an element is truly empty, you should check BOTH
 130     its length AND its text attribute.
 131 
 132     The element tag, attribute names, and attribute values can be either
 133     bytes or strings.
 134 
 135     *tag* is the element name.  *attrib* is an optional dictionary containing
 136     element attributes. *extra* are additional element attributes given as
 137     keyword arguments.
 138 
 139     Example form:
 140         <tag attrib>text<child/>...</tag>tail
 141 
 142     """
 143 
 144     tag = None
 145     """The element's name."""
 146 
 147     attrib = None
 148     """Dictionary of the element's attributes."""
 149 
 150     text = None
 151     """
 152     Text before first subelement. This is either a string or the value None.
 153     Note that if there is no text, this attribute may be either
 154     None or the empty string, depending on the parser.
 155 
 156     """
 157 
 158     tail = None
 159     """
 160     Text after this element's end tag, but before the next sibling element's
 161     start tag.  This is either a string or the value None.  Note that if there
 162     was no text, this attribute may be either None or an empty string,
 163     depending on the parser.
 164 
 165     """
 166 
 167     def __init__(self, tag, attrib={}, **extra):
 168         if not isinstance(attrib, dict):
 169             raise TypeError("attrib must be dict, not %s" % (
 170                 attrib.__class__.__name__,))
 171         attrib = attrib.copy()
 172         attrib.update(extra)
 173         self.tag = tag
 174         self.attrib = attrib
 175         self._children = []
 176 
 177     def __repr__(self):
 178         return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
 179 
 180     def makeelement(self, tag, attrib):
 181         """Create a new element with the same type.
 182 
 183         *tag* is a string containing the element name.
 184         *attrib* is a dictionary containing the element attributes.
 185 
 186         Do not call this method, use the SubElement factory function instead.
 187 
 188         """
 189         return self.__class__(tag, attrib)
 190 
 191     def copy(self):
 192         """Return copy of current element.
 193 
 194         This creates a shallow copy. Subelements will be shared with the
 195         original tree.
 196 
 197         """
 198         elem = self.makeelement(self.tag, self.attrib)
 199         elem.text = self.text
 200         elem.tail = self.tail
 201         elem[:] = self
 202         return elem
 203 
 204     def __len__(self):
 205         return len(self._children)
 206 
 207     def __bool__(self):
 208         warnings.warn(
 209             "The behavior of this method will change in future versions.  "
 210             "Use specific 'len(elem)' or 'elem is not None' test instead.",
 211             FutureWarning, stacklevel=2
 212             )
 213         return len(self._children) != 0 # emulate old behaviour, for now
 214 
 215     def __getitem__(self, index):
 216         return self._children[index]
 217 
 218     def __setitem__(self, index, element):
 219         # if isinstance(index, slice):
 220         #     for elt in element:
 221         #         assert iselement(elt)
 222         # else:
 223         #     assert iselement(element)
 224         self._children[index] = element
 225 
 226     def __delitem__(self, index):
 227         del self._children[index]
 228 
 229     def append(self, subelement):
 230         """Add *subelement* to the end of this element.
 231 
 232         The new element will appear in document order after the last existing
 233         subelement (or directly after the text, if it's the first subelement),
 234         but before the end tag for this element.
 235 
 236         """
 237         self._assert_is_element(subelement)
 238         self._children.append(subelement)
 239 
 240     def extend(self, elements):
 241         """Append subelements from a sequence.
 242 
 243         *elements* is a sequence with zero or more elements.
 244 
 245         """
 246         for element in elements:
 247             self._assert_is_element(element)
 248         self._children.extend(elements)
 249 
 250     def insert(self, index, subelement):
 251         """Insert *subelement* at position *index*."""
 252         self._assert_is_element(subelement)
 253         self._children.insert(index, subelement)
 254 
 255     def _assert_is_element(self, e):
 256         # Need to refer to the actual Python implementation, not the
 257         # shadowing C implementation.
 258         if not isinstance(e, _Element_Py):
 259             raise TypeError('expected an Element, not %s' % type(e).__name__)
 260 
 261     def remove(self, subelement):
 262         """Remove matching subelement.
 263 
 264         Unlike the find methods, this method compares elements based on
 265         identity, NOT ON tag value or contents.  To remove subelements by
 266         other means, the easiest way is to use a list comprehension to
 267         select what elements to keep, and then use slice assignment to update
 268         the parent element.
 269 
 270         ValueError is raised if a matching element could not be found.
 271 
 272         """
 273         # assert iselement(element)
 274         self._children.remove(subelement)
 275 
 276     def getchildren(self):
 277         """(Deprecated) Return all subelements.
 278 
 279         Elements are returned in document order.
 280 
 281         """
 282         warnings.warn(
 283             "This method will be removed in future versions.  "
 284             "Use 'list(elem)' or iteration over elem instead.",
 285             DeprecationWarning, stacklevel=2
 286             )
 287         return self._children
 288 
 289     def find(self, path, namespaces=None):
 290         """Find first matching element by tag name or path.
 291 
 292         *path* is a string having either an element tag or an XPath,
 293         *namespaces* is an optional mapping from namespace prefix to full name.
 294 
 295         Return the first matching element, or None if no element was found.
 296 
 297         """
 298         return ElementPath.find(self, path, namespaces)
 299 
 300     def findtext(self, path, default=None, namespaces=None):
 301         """Find text for first matching element by tag name or path.
 302 
 303         *path* is a string having either an element tag or an XPath,
 304         *default* is the value to return if the element was not found,
 305         *namespaces* is an optional mapping from namespace prefix to full name.
 306 
 307         Return text content of first matching element, or default value if
 308         none was found.  Note that if an element is found having no text
 309         content, the empty string is returned.
 310 
 311         """
 312         return ElementPath.findtext(self, path, default, namespaces)
 313 
 314     def findall(self, path, namespaces=None):
 315         """Find all matching subelements by tag name or path.
 316 
 317         *path* is a string having either an element tag or an XPath,
 318         *namespaces* is an optional mapping from namespace prefix to full name.
 319 
 320         Returns list containing all matching elements in document order.
 321 
 322         """
 323         return ElementPath.findall(self, path, namespaces)
 324 
 325     def iterfind(self, path, namespaces=None):
 326         """Find all matching subelements by tag name or path.
 327 
 328         *path* is a string having either an element tag or an XPath,
 329         *namespaces* is an optional mapping from namespace prefix to full name.
 330 
 331         Return an iterable yielding all matching elements in document order.
 332 
 333         """
 334         return ElementPath.iterfind(self, path, namespaces)
 335 
 336     def clear(self):
 337         """Reset element.
 338 
 339         This function removes all subelements, clears all attributes, and sets
 340         the text and tail attributes to None.
 341 
 342         """
 343         self.attrib.clear()
 344         self._children = []
 345         self.text = self.tail = None
 346 
 347     def get(self, key, default=None):
 348         """Get element attribute.
 349 
 350         Equivalent to attrib.get, but some implementations may handle this a
 351         bit more efficiently.  *key* is what attribute to look for, and
 352         *default* is what to return if the attribute was not found.
 353 
 354         Returns a string containing the attribute value, or the default if
 355         attribute was not found.
 356 
 357         """
 358         return self.attrib.get(key, default)
 359 
 360     def set(self, key, value):
 361         """Set element attribute.
 362 
 363         Equivalent to attrib[key] = value, but some implementations may handle
 364         this a bit more efficiently.  *key* is what attribute to set, and
 365         *value* is the attribute value to set it to.
 366 
 367         """
 368         self.attrib[key] = value
 369 
 370     def keys(self):
 371         """Get list of attribute names.
 372 
 373         Names are returned in an arbitrary order, just like an ordinary
 374         Python dict.  Equivalent to attrib.keys()
 375 
 376         """
 377         return self.attrib.keys()
 378 
 379     def items(self):
 380         """Get element attributes as a sequence.
 381 
 382         The attributes are returned in arbitrary order.  Equivalent to
 383         attrib.items().
 384 
 385         Return a list of (name, value) tuples.
 386 
 387         """
 388         return self.attrib.items()
 389 
 390     def iter(self, tag=None):
 391         """Create tree iterator.
 392 
 393         The iterator loops over the element and all subelements in document
 394         order, returning all elements with a matching tag.
 395 
 396         If the tree structure is modified during iteration, new or removed
 397         elements may or may not be included.  To get a stable set, use the
 398         list() function on the iterator, and loop over the resulting list.
 399 
 400         *tag* is what tags to look for (default is to return all elements)
 401 
 402         Return an iterator containing all the matching elements.
 403 
 404         """
 405         if tag == "*":
 406             tag = None
 407         if tag is None or self.tag == tag:
 408             yield self
 409         for e in self._children:
 410             yield from e.iter(tag)
 411 
 412     # compatibility
 413     def getiterator(self, tag=None):
 414         # Change for a DeprecationWarning in 1.4
 415         warnings.warn(
 416             "This method will be removed in future versions.  "
 417             "Use 'elem.iter()' or 'list(elem.iter())' instead.",
 418             PendingDeprecationWarning, stacklevel=2
 419         )
 420         return list(self.iter(tag))
 421 
 422     def itertext(self):
 423         """Create text iterator.
 424 
 425         The iterator loops over the element and all subelements in document
 426         order, returning all inner text.
 427 
 428         """
 429         tag = self.tag
 430         if not isinstance(tag, str) and tag is not None:
 431             return
 432         t = self.text
 433         if t:
 434             yield t
 435         for e in self:
 436             yield from e.itertext()
 437             t = e.tail
 438             if t:
 439                 yield t
 440 
 441 
 442 def SubElement(parent, tag, attrib={}, **extra):
 443     """Subelement factory which creates an element instance, and appends it
 444     to an existing parent.
 445 
 446     The element tag, attribute names, and attribute values can be either
 447     bytes or Unicode strings.
 448 
 449     *parent* is the parent element, *tag* is the subelements name, *attrib* is
 450     an optional directory containing element attributes, *extra* are
 451     additional attributes given as keyword arguments.
 452 
 453     """
 454     attrib = attrib.copy()
 455     attrib.update(extra)
 456     element = parent.makeelement(tag, attrib)
 457     parent.append(element)
 458     return element
 459 
 460 
 461 def Comment(text=None):
 462     """Comment element factory.
 463 
 464     This function creates a special element which the standard serializer
 465     serializes as an XML comment.
 466 
 467     *text* is a string containing the comment string.
 468 
 469     """
 470     element = Element(Comment)
 471     element.text = text
 472     return element
 473 
 474 
 475 def ProcessingInstruction(target, text=None):
 476     """Processing Instruction element factory.
 477 
 478     This function creates a special element which the standard serializer
 479     serializes as an XML comment.
 480 
 481     *target* is a string containing the processing instruction, *text* is a
 482     string containing the processing instruction contents, if any.
 483 
 484     """
 485     element = Element(ProcessingInstruction)
 486     element.text = target
 487     if text:
 488         element.text = element.text + " " + text
 489     return element
 490 
 491 PI = ProcessingInstruction
 492 
 493 
 494 class QName:
 495     """Qualified name wrapper.
 496 
 497     This class can be used to wrap a QName attribute value in order to get
 498     proper namespace handing on output.
 499 
 500     *text_or_uri* is a string containing the QName value either in the form
 501     {uri}local, or if the tag argument is given, the URI part of a QName.
 502 
 503     *tag* is an optional argument which if given, will make the first
 504     argument (text_or_uri) be interpreted as a URI, and this argument (tag)
 505     be interpreted as a local name.
 506 
 507     """
 508     def __init__(self, text_or_uri, tag=None):
 509         if tag:
 510             text_or_uri = "{%s}%s" % (text_or_uri, tag)
 511         self.text = text_or_uri
 512     def __str__(self):
 513         return self.text
 514     def __repr__(self):
 515         return '<%s %r>' % (self.__class__.__name__, self.text)
 516     def __hash__(self):
 517         return hash(self.text)
 518     def __le__(self, other):
 519         if isinstance(other, QName):
 520             return self.text <= other.text
 521         return self.text <= other
 522     def __lt__(self, other):
 523         if isinstance(other, QName):
 524             return self.text < other.text
 525         return self.text < other
 526     def __ge__(self, other):
 527         if isinstance(other, QName):
 528             return self.text >= other.text
 529         return self.text >= other
 530     def __gt__(self, other):
 531         if isinstance(other, QName):
 532             return self.text > other.text
 533         return self.text > other
 534     def __eq__(self, other):
 535         if isinstance(other, QName):
 536             return self.text == other.text
 537         return self.text == other
 538 
 539 # --------------------------------------------------------------------
 540 
 541 
 542 class ElementTree:
 543     """An XML element hierarchy.
 544 
 545     This class also provides support for serialization to and from
 546     standard XML.
 547 
 548     *element* is an optional root element node,
 549     *file* is an optional file handle or file name of an XML file whose
 550     contents will be used to initialize the tree with.
 551 
 552     """
 553     def __init__(self, element=None, file=None):
 554         # assert element is None or iselement(element)
 555         self._root = element # first node
 556         if file:
 557             self.parse(file)
 558 
 559     def getroot(self):
 560         """Return root element of this tree."""
 561         return self._root
 562 
 563     def _setroot(self, element):
 564         """Replace root element of this tree.
 565 
 566         This will discard the current contents of the tree and replace it
 567         with the given element.  Use with care!
 568 
 569         """
 570         # assert iselement(element)
 571         self._root = element
 572 
 573     def parse(self, source, parser=None):
 574         """Load external XML document into element tree.
 575 
 576         *source* is a file name or file object, *parser* is an optional parser
 577         instance that defaults to XMLParser.
 578 
 579         ParseError is raised if the parser fails to parse the document.
 580 
 581         Returns the root element of the given source document.
 582 
 583         """
 584         close_source = False
 585         if not hasattr(source, "read"):
 586             source = open(source, "rb")
 587             close_source = True
 588         try:
 589             if parser is None:
 590                 # If no parser was specified, create a default XMLParser
 591                 parser = XMLParser()
 592                 if hasattr(parser, '_parse_whole'):
 593                     # The default XMLParser, when it comes from an accelerator,
 594                     # can define an internal _parse_whole API for efficiency.
 595                     # It can be used to parse the whole source without feeding
 596                     # it with chunks.
 597                     self._root = parser._parse_whole(source)
 598                     return self._root
 599             while True:
 600                 data = source.read(65536)
 601                 if not data:
 602                     break
 603                 parser.feed(data)
 604             self._root = parser.close()
 605             return self._root
 606         finally:
 607             if close_source:
 608                 source.close()
 609 
 610     def iter(self, tag=None):
 611         """Create and return tree iterator for the root element.
 612 
 613         The iterator loops over all elements in this tree, in document order.
 614 
 615         *tag* is a string with the tag name to iterate over
 616         (default is to return all elements).
 617 
 618         """
 619         # assert self._root is not None
 620         return self._root.iter(tag)
 621 
 622     # compatibility
 623     def getiterator(self, tag=None):
 624         # Change for a DeprecationWarning in 1.4
 625         warnings.warn(
 626             "This method will be removed in future versions.  "
 627             "Use 'tree.iter()' or 'list(tree.iter())' instead.",
 628             PendingDeprecationWarning, stacklevel=2
 629         )
 630         return list(self.iter(tag))
 631 
 632     def find(self, path, namespaces=None):
 633         """Find first matching element by tag name or path.
 634 
 635         Same as getroot().find(path), which is Element.find()
 636 
 637         *path* is a string having either an element tag or an XPath,
 638         *namespaces* is an optional mapping from namespace prefix to full name.
 639 
 640         Return the first matching element, or None if no element was found.
 641 
 642         """
 643         # assert self._root is not None
 644         if path[:1] == "/":
 645             path = "." + path
 646             warnings.warn(
 647                 "This search is broken in 1.3 and earlier, and will be "
 648                 "fixed in a future version.  If you rely on the current "
 649                 "behaviour, change it to %r" % path,
 650                 FutureWarning, stacklevel=2
 651                 )
 652         return self._root.find(path, namespaces)
 653 
 654     def findtext(self, path, default=None, namespaces=None):
 655         """Find first matching element by tag name or path.
 656 
 657         Same as getroot().findtext(path),  which is Element.findtext()
 658 
 659         *path* is a string having either an element tag or an XPath,
 660         *namespaces* is an optional mapping from namespace prefix to full name.
 661 
 662         Return the first matching element, or None if no element was found.
 663 
 664         """
 665         # assert self._root is not None
 666         if path[:1] == "/":
 667             path = "." + path
 668             warnings.warn(
 669                 "This search is broken in 1.3 and earlier, and will be "
 670                 "fixed in a future version.  If you rely on the current "
 671                 "behaviour, change it to %r" % path,
 672                 FutureWarning, stacklevel=2
 673                 )
 674         return self._root.findtext(path, default, namespaces)
 675 
 676     def findall(self, path, namespaces=None):
 677         """Find all matching subelements by tag name or path.
 678 
 679         Same as getroot().findall(path), which is Element.findall().
 680 
 681         *path* is a string having either an element tag or an XPath,
 682         *namespaces* is an optional mapping from namespace prefix to full name.
 683 
 684         Return list containing all matching elements in document order.
 685 
 686         """
 687         # assert self._root is not None
 688         if path[:1] == "/":
 689             path = "." + path
 690             warnings.warn(
 691                 "This search is broken in 1.3 and earlier, and will be "
 692                 "fixed in a future version.  If you rely on the current "
 693                 "behaviour, change it to %r" % path,
 694                 FutureWarning, stacklevel=2
 695                 )
 696         return self._root.findall(path, namespaces)
 697 
 698     def iterfind(self, path, namespaces=None):
 699         """Find all matching subelements by tag name or path.
 700 
 701         Same as getroot().iterfind(path), which is element.iterfind()
 702 
 703         *path* is a string having either an element tag or an XPath,
 704         *namespaces* is an optional mapping from namespace prefix to full name.
 705 
 706         Return an iterable yielding all matching elements in document order.
 707 
 708         """
 709         # assert self._root is not None
 710         if path[:1] == "/":
 711             path = "." + path
 712             warnings.warn(
 713                 "This search is broken in 1.3 and earlier, and will be "
 714                 "fixed in a future version.  If you rely on the current "
 715                 "behaviour, change it to %r" % path,
 716                 FutureWarning, stacklevel=2
 717                 )
 718         return self._root.iterfind(path, namespaces)
 719 
 720     def write(self, file_or_filename,
 721               encoding=None,
 722               xml_declaration=None,
 723               default_namespace=None,
 724               method=None, *,
 725               short_empty_elements=True):
 726         """Write element tree to a file as XML.
 727 
 728         Arguments:
 729           *file_or_filename* -- file name or a file object opened for writing
 730 
 731           *encoding* -- the output encoding (default: US-ASCII)
 732 
 733           *xml_declaration* -- bool indicating if an XML declaration should be
 734                                added to the output. If None, an XML declaration
 735                                is added if encoding IS NOT either of:
 736                                US-ASCII, UTF-8, or Unicode
 737 
 738           *default_namespace* -- sets the default XML namespace (for "xmlns")
 739 
 740           *method* -- either "xml" (default), "html, "text", or "c14n"
 741 
 742           *short_empty_elements* -- controls the formatting of elements
 743                                     that contain no content. If True (default)
 744                                     they are emitted as a single self-closed
 745                                     tag, otherwise they are emitted as a pair
 746                                     of start/end tags
 747 
 748         """
 749         if not method:
 750             method = "xml"
 751         elif method not in _serialize:
 752             raise ValueError("unknown method %r" % method)
 753         if not encoding:
 754             if method == "c14n":
 755                 encoding = "utf-8"
 756             else:
 757                 encoding = "us-ascii"
 758         enc_lower = encoding.lower()
 759         with _get_writer(file_or_filename, enc_lower) as write:
 760             if method == "xml" and (xml_declaration or
 761                     (xml_declaration is None and
 762                      enc_lower not in ("utf-8", "us-ascii", "unicode"))):
 763                 declared_encoding = encoding
 764                 if enc_lower == "unicode":
 765                     # Retrieve the default encoding for the xml declaration
 766                     import locale
 767                     declared_encoding = locale.getpreferredencoding()
 768                 write("<?xml version='1.0' encoding='%s'?>\n" % (
 769                     declared_encoding,))
 770             if method == "text":
 771                 _serialize_text(write, self._root)
 772             else:
 773                 qnames, namespaces = _namespaces(self._root, default_namespace)
 774                 serialize = _serialize[method]
 775                 serialize(write, self._root, qnames, namespaces,
 776                           short_empty_elements=short_empty_elements)
 777 
 778     def write_c14n(self, file):
 779         # lxml.etree compatibility.  use output method instead
 780         return self.write(file, method="c14n")
 781 
 782 # --------------------------------------------------------------------
 783 # serialization support
 784 
 785 @contextlib.contextmanager
 786 def _get_writer(file_or_filename, encoding):
 787     # returns text write method and release all resources after using
 788     try:
 789         write = file_or_filename.write
 790     except AttributeError:
 791         # file_or_filename is a file name
 792         if encoding == "unicode":
 793             file = open(file_or_filename, "w")
 794         else:
 795             file = open(file_or_filename, "w", encoding=encoding,
 796                         errors="xmlcharrefreplace")
 797         with file:
 798             yield file.write
 799     else:
 800         # file_or_filename is a file-like object
 801         # encoding determines if it is a text or binary writer
 802         if encoding == "unicode":
 803             # use a text writer as is
 804             yield write
 805         else:
 806             # wrap a binary writer with TextIOWrapper
 807             with contextlib.ExitStack() as stack:
 808                 if isinstance(file_or_filename, io.BufferedIOBase):
 809                     file = file_or_filename
 810                 elif isinstance(file_or_filename, io.RawIOBase):
 811                     file = io.BufferedWriter(file_or_filename)
 812                     # Keep the original file open when the BufferedWriter is
 813                     # destroyed
 814                     stack.callback(file.detach)
 815                 else:
 816                     # This is to handle passed objects that aren't in the
 817                     # IOBase hierarchy, but just have a write method
 818                     file = io.BufferedIOBase()
 819                     file.writable = lambda: True
 820                     file.write = write
 821                     try:
 822                         # TextIOWrapper uses this methods to determine
 823                         # if BOM (for UTF-16, etc) should be added
 824                         file.seekable = file_or_filename.seekable
 825                         file.tell = file_or_filename.tell
 826                     except AttributeError:
 827                         pass
 828                 file = io.TextIOWrapper(file,
 829                                         encoding=encoding,
 830                                         errors="xmlcharrefreplace",
 831                                         newline="\n")
 832                 # Keep the original file open when the TextIOWrapper is
 833                 # destroyed
 834                 stack.callback(file.detach)
 835                 yield file.write
 836 
 837 def _namespaces(elem, default_namespace=None):
 838     # identify namespaces used in this tree
 839 
 840     # maps qnames to *encoded* prefix:local names
 841     qnames = {None: None}
 842 
 843     # maps uri:s to prefixes
 844     namespaces = {}
 845     if default_namespace:
 846         namespaces[default_namespace] = ""
 847 
 848     def add_qname(qname):
 849         # calculate serialized qname representation
 850         try:
 851             if qname[:1] == "{":
 852                 uri, tag = qname[1:].rsplit("}", 1)
 853                 prefix = namespaces.get(uri)
 854                 if prefix is None:
 855                     prefix = _namespace_map.get(uri)
 856                     if prefix is None:
 857                         prefix = "ns%d" % len(namespaces)
 858                     if prefix != "xml":
 859                         namespaces[uri] = prefix
 860                 if prefix:
 861                     qnames[qname] = "%s:%s" % (prefix, tag)
 862                 else:
 863                     qnames[qname] = tag # default element
 864             else:
 865                 if default_namespace:
 866                     # FIXME: can this be handled in XML 1.0?
 867                     raise ValueError(
 868                         "cannot use non-qualified names with "
 869                         "default_namespace option"
 870                         )
 871                 qnames[qname] = qname
 872         except TypeError:
 873             _raise_serialization_error(qname)
 874 
 875     # populate qname and namespaces table
 876     for elem in elem.iter():
 877         tag = elem.tag
 878         if isinstance(tag, QName):
 879             if tag.text not in qnames:
 880                 add_qname(tag.text)
 881         elif isinstance(tag, str):
 882             if tag not in qnames:
 883                 add_qname(tag)
 884         elif tag is not None and tag is not Comment and tag is not PI:
 885             _raise_serialization_error(tag)
 886         for key, value in elem.items():
 887             if isinstance(key, QName):
 888                 key = key.text
 889             if key not in qnames:
 890                 add_qname(key)
 891             if isinstance(value, QName) and value.text not in qnames:
 892                 add_qname(value.text)
 893         text = elem.text
 894         if isinstance(text, QName) and text.text not in qnames:
 895             add_qname(text.text)
 896     return qnames, namespaces
 897 
 898 def _serialize_xml(write, elem, qnames, namespaces,
 899                    short_empty_elements, **kwargs):
 900     tag = elem.tag
 901     text = elem.text
 902     if tag is Comment:
 903         write("<!--%s-->" % text)
 904     elif tag is ProcessingInstruction:
 905         write("<?%s?>" % text)
 906     else:
 907         tag = qnames[tag]
 908         if tag is None:
 909             if text:
 910                 write(_escape_cdata(text))
 911             for e in elem:
 912                 _serialize_xml(write, e, qnames, None,
 913                                short_empty_elements=short_empty_elements)
 914         else:
 915             write("<" + tag)
 916             items = list(elem.items())
 917             if items or namespaces:
 918                 if namespaces:
 919                     for v, k in sorted(namespaces.items(),
 920                                        key=lambda x: x[1]):  # sort on prefix
 921                         if k:
 922                             k = ":" + k
 923                         write(" xmlns%s=\"%s\"" % (
 924                             k,
 925                             _escape_attrib(v)
 926                             ))
 927                 for k, v in sorted(items):  # lexical order
 928                     if isinstance(k, QName):
 929                         k = k.text
 930                     if isinstance(v, QName):
 931                         v = qnames[v.text]
 932                     else:
 933                         v = _escape_attrib(v)
 934                     write(" %s=\"%s\"" % (qnames[k], v))
 935             if text or len(elem) or not short_empty_elements:
 936                 write(">")
 937                 if text:
 938                     write(_escape_cdata(text))
 939                 for e in elem:
 940                     _serialize_xml(write, e, qnames, None,
 941                                    short_empty_elements=short_empty_elements)
 942                 write("</" + tag + ">")
 943             else:
 944                 write(" />")
 945     if elem.tail:
 946         write(_escape_cdata(elem.tail))
 947 
 948 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
 949               "img", "input", "isindex", "link", "meta", "param")
 950 
 951 try:
 952     HTML_EMPTY = set(HTML_EMPTY)
 953 except NameError:
 954     pass
 955 
 956 def _serialize_html(write, elem, qnames, namespaces, **kwargs):
 957     tag = elem.tag
 958     text = elem.text
 959     if tag is Comment:
 960         write("<!--%s-->" % _escape_cdata(text))
 961     elif tag is ProcessingInstruction:
 962         write("<?%s?>" % _escape_cdata(text))
 963     else:
 964         tag = qnames[tag]
 965         if tag is None:
 966             if text:
 967                 write(_escape_cdata(text))
 968             for e in elem:
 969                 _serialize_html(write, e, qnames, None)
 970         else:
 971             write("<" + tag)
 972             items = list(elem.items())
 973             if items or namespaces:
 974                 if namespaces:
 975                     for v, k in sorted(namespaces.items(),
 976                                        key=lambda x: x[1]):  # sort on prefix
 977                         if k:
 978                             k = ":" + k
 979                         write(" xmlns%s=\"%s\"" % (
 980                             k,
 981                             _escape_attrib(v)
 982                             ))
 983                 for k, v in sorted(items):  # lexical order
 984                     if isinstance(k, QName):
 985                         k = k.text
 986                     if isinstance(v, QName):
 987                         v = qnames[v.text]
 988                     else:
 989                         v = _escape_attrib_html(v)
 990                     # FIXME: handle boolean attributes
 991                     write(" %s=\"%s\"" % (qnames[k], v))
 992             write(">")
 993             ltag = tag.lower()
 994             if text:
 995                 if ltag == "script" or ltag == "style":
 996                     write(text)
 997                 else:
 998                     write(_escape_cdata(text))
 999             for e in elem:
1000                 _serialize_html(write, e, qnames, None)
1001             if ltag not in HTML_EMPTY:
1002                 write("</" + tag + ">")
1003     if elem.tail:
1004         write(_escape_cdata(elem.tail))
1005 
1006 def _serialize_text(write, elem):
1007     for part in elem.itertext():
1008         write(part)
1009     if elem.tail:
1010         write(elem.tail)
1011 
1012 _serialize = {
1013     "xml": _serialize_xml,
1014     "html": _serialize_html,
1015     "text": _serialize_text,
1016 # this optional method is imported at the end of the module
1017 #   "c14n": _serialize_c14n,
1018 }
1019 
1020 
1021 def register_namespace(prefix, uri):
1022     """Register a namespace prefix.
1023 
1024     The registry is global, and any existing mapping for either the
1025     given prefix or the namespace URI will be removed.
1026 
1027     *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
1028     attributes in this namespace will be serialized with prefix if possible.
1029 
1030     ValueError is raised if prefix is reserved or is invalid.
1031 
1032     """
1033     if re.match(r"ns\d+$", prefix):
1034         raise ValueError("Prefix format reserved for internal use")
1035     for k, v in list(_namespace_map.items()):
1036         if k == uri or v == prefix:
1037             del _namespace_map[k]
1038     _namespace_map[uri] = prefix
1039 
1040 _namespace_map = {
1041     # "well-known" namespace prefixes
1042     "http://www.w3.org/XML/1998/namespace": "xml",
1043     "http://www.w3.org/1999/xhtml": "html",
1044     "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1045     "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1046     # xml schema
1047     "http://www.w3.org/2001/XMLSchema": "xs",
1048     "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1049     # dublin core
1050     "http://purl.org/dc/elements/1.1/": "dc",
1051 }
1052 # For tests and troubleshooting
1053 register_namespace._namespace_map = _namespace_map
1054 
1055 def _raise_serialization_error(text):
1056     raise TypeError(
1057         "cannot serialize %r (type %s)" % (text, type(text).__name__)
1058         )
1059 
1060 def _escape_cdata(text):
1061     # escape character data
1062     try:
1063         # it's worth avoiding do-nothing calls for strings that are
1064         # shorter than 500 character, or so.  assume that's, by far,
1065         # the most common case in most applications.
1066         if "&" in text:
1067             text = text.replace("&", "&amp;")
1068         if "<" in text:
1069             text = text.replace("<", "&lt;")
1070         if ">" in text:
1071             text = text.replace(">", "&gt;")
1072         return text
1073     except (TypeError, AttributeError):
1074         _raise_serialization_error(text)
1075 
1076 def _escape_attrib(text):
1077     # escape attribute value
1078     try:
1079         if "&" in text:
1080             text = text.replace("&", "&amp;")
1081         if "<" in text:
1082             text = text.replace("<", "&lt;")
1083         if ">" in text:
1084             text = text.replace(">", "&gt;")
1085         if "\"" in text:
1086             text = text.replace("\"", "&quot;")
1087         # The following business with carriage returns is to satisfy
1088         # Section 2.11 of the XML specification, stating that
1089         # CR or CR LN should be replaced with just LN
1090         # http://www.w3.org/TR/REC-xml/#sec-line-ends
1091         if "\r\n" in text:
1092             text = text.replace("\r\n", "\n")
1093         if "\r" in text:
1094             text = text.replace("\r", "\n")
1095         #The following four lines are issue 17582
1096         if "\n" in text:
1097             text = text.replace("\n", "&#10;")
1098         if "\t" in text:
1099             text = text.replace("\t", "&#09;")
1100         return text
1101     except (TypeError, AttributeError):
1102         _raise_serialization_error(text)
1103 
1104 def _escape_attrib_html(text):
1105     # escape attribute value
1106     try:
1107         if "&" in text:
1108             text = text.replace("&", "&amp;")
1109         if ">" in text:
1110             text = text.replace(">", "&gt;")
1111         if "\"" in text:
1112             text = text.replace("\"", "&quot;")
1113         return text
1114     except (TypeError, AttributeError):
1115         _raise_serialization_error(text)
1116 
1117 # --------------------------------------------------------------------
1118 
1119 def tostring(element, encoding=None, method=None, *,
1120              short_empty_elements=True):
1121     """Generate string representation of XML element.
1122 
1123     All subelements are included.  If encoding is "unicode", a string
1124     is returned. Otherwise a bytestring is returned.
1125 
1126     *element* is an Element instance, *encoding* is an optional output
1127     encoding defaulting to US-ASCII, *method* is an optional output which can
1128     be one of "xml" (default), "html", "text" or "c14n".
1129 
1130     Returns an (optionally) encoded string containing the XML data.
1131 
1132     """
1133     stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
1134     ElementTree(element).write(stream, encoding, method=method,
1135                                short_empty_elements=short_empty_elements)
1136     return stream.getvalue()
1137 
1138 class _ListDataStream(io.BufferedIOBase):
1139     """An auxiliary stream accumulating into a list reference."""
1140     def __init__(self, lst):
1141         self.lst = lst
1142 
1143     def writable(self):
1144         return True
1145 
1146     def seekable(self):
1147         return True
1148 
1149     def write(self, b):
1150         self.lst.append(b)
1151 
1152     def tell(self):
1153         return len(self.lst)
1154 
1155 def tostringlist(element, encoding=None, method=None, *,
1156                  short_empty_elements=True):
1157     lst = []
1158     stream = _ListDataStream(lst)
1159     ElementTree(element).write(stream, encoding, method=method,
1160                                short_empty_elements=short_empty_elements)
1161     return lst
1162 
1163 
1164 def dump(elem):
1165     """Write element tree or element structure to sys.stdout.
1166 
1167     This function should be used for debugging only.
1168 
1169     *elem* is either an ElementTree, or a single Element.  The exact output
1170     format is implementation dependent.  In this version, it's written as an
1171     ordinary XML file.
1172 
1173     """
1174     # debugging
1175     if not isinstance(elem, ElementTree):
1176         elem = ElementTree(elem)
1177     elem.write(sys.stdout, encoding="unicode")
1178     tail = elem.getroot().tail
1179     if not tail or tail[-1] != "\n":
1180         sys.stdout.write("\n")
1181 
1182 # --------------------------------------------------------------------
1183 # parsing
1184 
1185 
1186 def parse(source, parser=None):
1187     """Parse XML document into element tree.
1188 
1189     *source* is a filename or file object containing XML data,
1190     *parser* is an optional parser instance defaulting to XMLParser.
1191 
1192     Return an ElementTree instance.
1193 
1194     """
1195     tree = ElementTree()
1196     tree.parse(source, parser)
1197     return tree
1198 
1199 
1200 def iterparse(source, events=None, parser=None):
1201     """Incrementally parse XML document into ElementTree.
1202 
1203     This class also reports what's going on to the user based on the
1204     *events* it is initialized with.  The supported events are the strings
1205     "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
1206     detailed namespace information).  If *events* is omitted, only
1207     "end" events are reported.
1208 
1209     *source* is a filename or file object containing XML data, *events* is
1210     a list of events to report back, *parser* is an optional parser instance.
1211 
1212     Returns an iterator providing (event, elem) pairs.
1213 
1214     """
1215     # Use the internal, undocumented _parser argument for now; When the
1216     # parser argument of iterparse is removed, this can be killed.
1217     pullparser = XMLPullParser(events=events, _parser=parser)
1218     def iterator():
1219         try:
1220             while True:
1221                 yield from pullparser.read_events()
1222                 # load event buffer
1223                 data = source.read(16 * 1024)
1224                 if not data:
1225                     break
1226                 pullparser.feed(data)
1227             root = pullparser._close_and_return_root()
1228             yield from pullparser.read_events()
1229             it.root = root
1230         finally:
1231             if close_source:
1232                 source.close()
1233 
1234     class IterParseIterator(collections.Iterator):
1235         __next__ = iterator().__next__
1236     it = IterParseIterator()
1237     it.root = None
1238     del iterator, IterParseIterator
1239 
1240     close_source = False
1241     if not hasattr(source, "read"):
1242         source = open(source, "rb")
1243         close_source = True
1244 
1245     return it
1246 
1247 
1248 class XMLPullParser:
1249 
1250     def __init__(self, events=None, *, _parser=None):
1251         # The _parser argument is for internal use only and must not be relied
1252         # upon in user code. It will be removed in a future release.
1253         # See http://bugs.python.org/issue17741 for more details.
1254 
1255         self._events_queue = collections.deque()
1256         self._parser = _parser or XMLParser(target=TreeBuilder())
1257         # wire up the parser for event reporting
1258         if events is None:
1259             events = ("end",)
1260         self._parser._setevents(self._events_queue, events)
1261 
1262     def feed(self, data):
1263         """Feed encoded data to parser."""
1264         if self._parser is None:
1265             raise ValueError("feed() called after end of stream")
1266         if data:
1267             try:
1268                 self._parser.feed(data)
1269             except SyntaxError as exc:
1270                 self._events_queue.append(exc)
1271 
1272     def _close_and_return_root(self):
1273         # iterparse needs this to set its root attribute properly :(
1274         root = self._parser.close()
1275         self._parser = None
1276         return root
1277 
1278     def close(self):
1279         """Finish feeding data to parser.
1280 
1281         Unlike XMLParser, does not return the root element. Use
1282         read_events() to consume elements from XMLPullParser.
1283         """
1284         self._close_and_return_root()
1285 
1286     def read_events(self):
1287         """Return an iterator over currently available (event, elem) pairs.
1288 
1289         Events are consumed from the internal event queue as they are
1290         retrieved from the iterator.
1291         """
1292         events = self._events_queue
1293         while events:
1294             event = events.popleft()
1295             if isinstance(event, Exception):
1296                 raise event
1297             else:
1298                 yield event
1299 
1300 
1301 def XML(text, parser=None):
1302     """Parse XML document from string constant.
1303 
1304     This function can be used to embed "XML Literals" in Python code.
1305 
1306     *text* is a string containing XML data, *parser* is an
1307     optional parser instance, defaulting to the standard XMLParser.
1308 
1309     Returns an Element instance.
1310 
1311     """
1312     if not parser:
1313         parser = XMLParser(target=TreeBuilder())
1314     parser.feed(text)
1315     return parser.close()
1316 
1317 
1318 def XMLID(text, parser=None):
1319     """Parse XML document from string constant for its IDs.
1320 
1321     *text* is a string containing XML data, *parser* is an
1322     optional parser instance, defaulting to the standard XMLParser.
1323 
1324     Returns an (Element, dict) tuple, in which the
1325     dict maps element id:s to elements.
1326 
1327     """
1328     if not parser:
1329         parser = XMLParser(target=TreeBuilder())
1330     parser.feed(text)
1331     tree = parser.close()
1332     ids = {}
1333     for elem in tree.iter():
1334         id = elem.get("id")
1335         if id:
1336             ids[id] = elem
1337     return tree, ids
1338 
1339 # Parse XML document from string constant.  Alias for XML().
1340 fromstring = XML
1341 
1342 def fromstringlist(sequence, parser=None):
1343     """Parse XML document from sequence of string fragments.
1344 
1345     *sequence* is a list of other sequence, *parser* is an optional parser
1346     instance, defaulting to the standard XMLParser.
1347 
1348     Returns an Element instance.
1349 
1350     """
1351     if not parser:
1352         parser = XMLParser(target=TreeBuilder())
1353     for text in sequence:
1354         parser.feed(text)
1355     return parser.close()
1356 
1357 # --------------------------------------------------------------------
1358 
1359 
1360 class TreeBuilder:
1361     """Generic element structure builder.
1362 
1363     This builder converts a sequence of start, data, and end method
1364     calls to a well-formed element structure.
1365 
1366     You can use this class to build an element structure using a custom XML
1367     parser, or a parser for some other XML-like format.
1368 
1369     *element_factory* is an optional element factory which is called
1370     to create new Element instances, as necessary.
1371 
1372     """
1373     def __init__(self, element_factory=None):
1374         self._data = [] # data collector
1375         self._elem = [] # element stack
1376         self._last = None # last element
1377         self._tail = None # true if we're after an end tag
1378         if element_factory is None:
1379             element_factory = Element
1380         self._factory = element_factory
1381 
1382     def close(self):
1383         """Flush builder buffers and return toplevel document Element."""
1384         assert len(self._elem) == 0, "missing end tags"
1385         assert self._last is not None, "missing toplevel element"
1386         return self._last
1387 
1388     def _flush(self):
1389         if self._data:
1390             if self._last is not None:
1391                 text = "".join(self._data)
1392                 if self._tail:
1393                     assert self._last.tail is None, "internal error (tail)"
1394                     self._last.tail = text
1395                 else:
1396                     assert self._last.text is None, "internal error (text)"
1397                     self._last.text = text
1398             self._data = []
1399 
1400     def data(self, data):
1401         """Add text to current element."""
1402         self._data.append(data)
1403 
1404     def start(self, tag, attrs):
1405         """Open new element and return it.
1406 
1407         *tag* is the element name, *attrs* is a dict containing element
1408         attributes.
1409 
1410         """
1411         self._flush()
1412         self._last = elem = self._factory(tag, attrs)
1413         if self._elem:
1414             self._elem[-1].append(elem)
1415         self._elem.append(elem)
1416         self._tail = 0
1417         return elem
1418 
1419     def end(self, tag):
1420         """Close and return current Element.
1421 
1422         *tag* is the element name.
1423 
1424         """
1425         self._flush()
1426         self._last = self._elem.pop()
1427         assert self._last.tag == tag,\
1428                "end tag mismatch (expected %s, got %s)" % (
1429                    self._last.tag, tag)
1430         self._tail = 1
1431         return self._last
1432 
1433 
1434 # also see ElementTree and TreeBuilder
1435 class XMLParser:
1436     """Element structure builder for XML source data based on the expat parser.
1437 
1438     *html* are predefined HTML entities (deprecated and not supported),
1439     *target* is an optional target object which defaults to an instance of the
1440     standard TreeBuilder class, *encoding* is an optional encoding string
1441     which if given, overrides the encoding specified in the XML file:
1442     http://www.iana.org/assignments/character-sets
1443 
1444     """
1445 
1446     def __init__(self, html=0, target=None, encoding=None):
1447         try:
1448             from xml.parsers import expat
1449         except ImportError:
1450             try:
1451                 import pyexpat as expat
1452             except ImportError:
1453                 raise ImportError(
1454                     "No module named expat; use SimpleXMLTreeBuilder instead"
1455                     )
1456         parser = expat.ParserCreate(encoding, "}")
1457         if target is None:
1458             target = TreeBuilder()
1459         # underscored names are provided for compatibility only
1460         self.parser = self._parser = parser
1461         self.target = self._target = target
1462         self._error = expat.error
1463         self._names = {} # name memo cache
1464         # main callbacks
1465         parser.DefaultHandlerExpand = self._default
1466         if hasattr(target, 'start'):
1467             parser.StartElementHandler = self._start
1468         if hasattr(target, 'end'):
1469             parser.EndElementHandler = self._end
1470         if hasattr(target, 'data'):
1471             parser.CharacterDataHandler = target.data
1472         # miscellaneous callbacks
1473         if hasattr(target, 'comment'):
1474             parser.CommentHandler = target.comment
1475         if hasattr(target, 'pi'):
1476             parser.ProcessingInstructionHandler = target.pi
1477         # Configure pyexpat: buffering, new-style attribute handling.
1478         parser.buffer_text = 1
1479         parser.ordered_attributes = 1
1480         parser.specified_attributes = 1
1481         self._doctype = None
1482         self.entity = {}
1483         try:
1484             self.version = "Expat %d.%d.%d" % expat.version_info
1485         except AttributeError:
1486             pass # unknown
1487 
1488     def _setevents(self, events_queue, events_to_report):
1489         # Internal API for XMLPullParser
1490         # events_to_report: a list of events to report during parsing (same as
1491         # the *events* of XMLPullParser's constructor.
1492         # events_queue: a list of actual parsing events that will be populated
1493         # by the underlying parser.
1494         #
1495         parser = self._parser
1496         append = events_queue.append
1497         for event_name in events_to_report:
1498             if event_name == "start":
1499                 parser.ordered_attributes = 1
1500                 parser.specified_attributes = 1
1501                 def handler(tag, attrib_in, event=event_name, append=append,
1502                             start=self._start):
1503                     append((event, start(tag, attrib_in)))
1504                 parser.StartElementHandler = handler
1505             elif event_name == "end":
1506                 def handler(tag, event=event_name, append=append,
1507                             end=self._end):
1508                     append((event, end(tag)))
1509                 parser.EndElementHandler = handler
1510             elif event_name == "start-ns":
1511                 def handler(prefix, uri, event=event_name, append=append):
1512                     append((event, (prefix or "", uri or "")))
1513                 parser.StartNamespaceDeclHandler = handler
1514             elif event_name == "end-ns":
1515                 def handler(prefix, event=event_name, append=append):
1516                     append((event, None))
1517                 parser.EndNamespaceDeclHandler = handler
1518             else:
1519                 raise ValueError("unknown event %r" % event_name)
1520 
1521     def _raiseerror(self, value):
1522         err = ParseError(value)
1523         err.code = value.code
1524         err.position = value.lineno, value.offset
1525         raise err
1526 
1527     def _fixname(self, key):
1528         # expand qname, and convert name string to ascii, if possible
1529         try:
1530             name = self._names[key]
1531         except KeyError:
1532             name = key
1533             if "}" in name:
1534                 name = "{" + name
1535             self._names[key] = name
1536         return name
1537 
1538     def _start(self, tag, attr_list):
1539         # Handler for expat's StartElementHandler. Since ordered_attributes
1540         # is set, the attributes are reported as a list of alternating
1541         # attribute name,value.
1542         fixname = self._fixname
1543         tag = fixname(tag)
1544         attrib = {}
1545         if attr_list:
1546             for i in range(0, len(attr_list), 2):
1547                 attrib[fixname(attr_list[i])] = attr_list[i+1]
1548         return self.target.start(tag, attrib)
1549 
1550     def _end(self, tag):
1551         return self.target.end(self._fixname(tag))
1552 
1553     def _default(self, text):
1554         prefix = text[:1]
1555         if prefix == "&":
1556             # deal with undefined entities
1557             try:
1558                 data_handler = self.target.data
1559             except AttributeError:
1560                 return
1561             try:
1562                 data_handler(self.entity[text[1:-1]])
1563             except KeyError:
1564                 from xml.parsers import expat
1565                 err = expat.error(
1566                     "undefined entity %s: line %d, column %d" %
1567                     (text, self.parser.ErrorLineNumber,
1568                     self.parser.ErrorColumnNumber)
1569                     )
1570                 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
1571                 err.lineno = self.parser.ErrorLineNumber
1572                 err.offset = self.parser.ErrorColumnNumber
1573                 raise err
1574         elif prefix == "<" and text[:9] == "<!DOCTYPE":
1575             self._doctype = [] # inside a doctype declaration
1576         elif self._doctype is not None:
1577             # parse doctype contents
1578             if prefix == ">":
1579                 self._doctype = None
1580                 return
1581             text = text.strip()
1582             if not text:
1583                 return
1584             self._doctype.append(text)
1585             n = len(self._doctype)
1586             if n > 2:
1587                 type = self._doctype[1]
1588                 if type == "PUBLIC" and n == 4:
1589                     name, type, pubid, system = self._doctype
1590                     if pubid:
1591                         pubid = pubid[1:-1]
1592                 elif type == "SYSTEM" and n == 3:
1593                     name, type, system = self._doctype
1594                     pubid = None
1595                 else:
1596                     return
1597                 if hasattr(self.target, "doctype"):
1598                     self.target.doctype(name, pubid, system[1:-1])
1599                 elif self.doctype != self._XMLParser__doctype:
1600                     # warn about deprecated call
1601                     self._XMLParser__doctype(name, pubid, system[1:-1])
1602                     self.doctype(name, pubid, system[1:-1])
1603                 self._doctype = None
1604 
1605     def doctype(self, name, pubid, system):
1606         """(Deprecated)  Handle doctype declaration
1607 
1608         *name* is the Doctype name, *pubid* is the public identifier,
1609         and *system* is the system identifier.
1610 
1611         """
1612         warnings.warn(
1613             "This method of XMLParser is deprecated.  Define doctype() "
1614             "method on the TreeBuilder target.",
1615             DeprecationWarning,
1616             )
1617 
1618     # sentinel, if doctype is redefined in a subclass
1619     __doctype = doctype
1620 
1621     def feed(self, data):
1622         """Feed encoded data to parser."""
1623         try:
1624             self.parser.Parse(data, 0)
1625         except self._error as v:
1626             self._raiseerror(v)
1627 
1628     def close(self):
1629         """Finish feeding data to parser and return element structure."""
1630         try:
1631             self.parser.Parse("", 1) # end of data
1632         except self._error as v:
1633             self._raiseerror(v)
1634         try:
1635             close_handler = self.target.close
1636         except AttributeError:
1637             pass
1638         else:
1639             return close_handler()
1640         finally:
1641             # get rid of circular references
1642             del self.parser, self._parser
1643             del self.target, self._target
1644 
1645 
1646 # Import the C accelerators
1647 try:
1648     # Element is going to be shadowed by the C implementation. We need to keep
1649     # the Python version of it accessible for some "creative" by external code
1650     # (see tests)
1651     _Element_Py = Element
1652 
1653     # Element, SubElement, ParseError, TreeBuilder, XMLParser
1654     from _elementtree import *
1655 except ImportError:
1656     pass
xml.etree.ElementTree

 

posted on 2018-01-29 22:25  lmgsanm  阅读(622)  评论(0编辑  收藏  举报