xml文件格式相关操作
[可扩展标记语言](https://baike.baidu.com/item/可扩展标记语言/2885849),是一种简单的数据存储语言,XML 被设计用来传输和存储数据。
- 存储,可用来存放配置文件,例如:java的配置文件。
- 传输,网络传输时以这种格式存在,例如:早期ajax传输的数据、soap协议等。
<data>
<country name="Liechtenstein">
<rank updated="yes">2</rank>
<year>2023</year>
<gdppc>141100</gdppc>
<neighbor direction="E" name="Austria" />
<neighbor direction="W" name="Switzerland" />
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year>2026</year>
<gdppc>59900</gdppc>
<neighbor direction="N" name="Malaysia" />
</country>
<country name="Panama">
<rank updated="yes">69</rank>
<year>2026</year>
<gdppc>13600</gdppc>
<neighbor direction="W" name="Costa Rica" />
<neighbor direction="E" name="Colombia" />
</country>
</data>
注意:在Python开发中用的相对来比较少,大家作为了解即可(后期课程在讲解微信支付、微信公众号消息处理 时会用到基于xml传输数据)。
例如:https://developers.weixin.qq.com/doc/offiaccount/Message_Management/Receiving_standard_messages.html
-
读取文件和内容
from xml.etree import ElementTree as ET # ET去打开xml文件 tree = ET.parse("files/xo.xml") # 获取根标签 root = tree.getroot() print(root) # <Element 'data' at 0x7f94e02763b0>
from xml.etree import ElementTree as ET content = """ <data> <country name="Liechtenstein"> <rank updated="yes">2</rank> <year>2023</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Panama"> <rank updated="yes">69</rank> <year>2026</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country> </data> """ root = ET.XML(content) print(root) # <Element 'data' at 0x7fdaa019cea0>
-
读取节点数据
from xml.etree import ElementTree as ET content = """ <data> <country name="Liechtenstein" id="999" > <rank>2</rank> <year>2023</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Panama"> <rank>69</rank> <year>2026</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country> </data> """ # 获取根标签 data root = ET.XML(content) country_object = root.find("country") print(country_object.tag, country_object.attrib) gdppc_object = country_object.find("gdppc") print(gdppc_object.tag,gdppc_object.attrib,gdppc_object.text)
from xml.etree import ElementTree as ET content = """ <data> <country name="Liechtenstein"> <rank>2</rank> <year>2023</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Panama"> <rank>69</rank> <year>2026</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country> </data> """ # 获取根标签 data root = ET.XML(content) # 获取data标签的孩子标签 for child in root: # child.tag = conntry # child.attrib = {"name":"Liechtenstein"} print(child.tag, child.attrib) for node in child: print(node.tag, node.attrib, node.text)
from xml.etree import ElementTree as ET content = """ <data> <country name="Liechtenstein"> <rank>2</rank> <year>2023</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Panama"> <rank>69</rank> <year>2026</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country> </data> """ root = ET.XML(content) for child in root.iter('year'): print(child.tag, child.text)
from xml.etree import ElementTree as ET content = """ <data> <country name="Liechtenstein"> <rank>2</rank> <year>2023</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Panama"> <rank>69</rank> <year>2026</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country> </data> """ root = ET.XML(content) v1 = root.findall('country') print(v1) v2 = root.find('country').find('rank') print(v2.text)
-
修改和删除节点
from xml.etree import ElementTree as ET content = """ <data> <country name="Liechtenstein"> <rank>2</rank> <year>2023</year> <gdppc>141100</gdppc> <neighbor direction="E" name="Austria" /> <neighbor direction="W" name="Switzerland" /> </country> <country name="Panama"> <rank>69</rank> <year>2026</year> <gdppc>13600</gdppc> <neighbor direction="W" name="Costa Rica" /> <neighbor direction="E" name="Colombia" /> </country> </data> """ root = ET.XML(content) # 修改节点内容和属性 rank = root.find('country').find('rank') print(rank.text) rank.text = "999" rank.set('update', '2020-11-11') print(rank.text, rank.attrib) ############ 保存文件 ############ tree = ET.ElementTree(root) tree.write("new.xml", encoding='utf-8') # 删除节点 root.remove( root.find('country') ) print(root.findall('country')) ############ 保存文件 ############ tree = ET.ElementTree(root) tree.write("newnew.xml", encoding='utf-8')
-
构建文档
<home>
<son name="儿1">
<grandson name="儿11"></grandson>
<grandson name="儿12"></grandson>
</son>
<son name="儿2"></son>
</home>
from xml.etree import ElementTree as ET # 创建根标签 root = ET.Element("home") # 创建节点大儿子 son1 = ET.Element('son', {'name': '儿1'}) # 创建小儿子 son2 = ET.Element('son', {"name": '儿2'}) # 在大儿子中创建两个孙子 grandson1 = ET.Element('grandson', {'name': '儿11'}) grandson2 = ET.Element('grandson', {'name': '儿12'}) son1.append(grandson1) son1.append(grandson2) # 把儿子添加到根节点中 root.append(son1) root.append(son2) tree = ET.ElementTree(root) tree.write('oooo.xml', encoding='utf-8', short_empty_elements=False)
<famliy>
<son name="儿1">
<grandson name="儿11"></grandson>
<grandson name="儿12"></grandson>
</son>
<son name="儿2"></son>
</famliy>
from xml.etree import ElementTree as ET # 创建根节点 root = ET.Element("famliy") # 创建大儿子 son1 = root.makeelement('son', {'name': '儿1'}) # 创建小儿子 son2 = root.makeelement('son', {"name": '儿2'}) # 在大儿子中创建两个孙子 grandson1 = son1.makeelement('grandson', {'name': '儿11'}) grandson2 = son1.makeelement('grandson', {'name': '儿12'}) son1.append(grandson1) son1.append(grandson2) # 把儿子添加到根节点中 root.append(son1) root.append(son2) tree = ET.ElementTree(root) tree.write('oooo.xml',encoding='utf-8')
<famliy>
<son name="儿1">
<age name="儿11">孙子</age>
</son>
<son name="儿2"></son>
</famliy>
from xml.etree import ElementTree as ET # 创建根节点 root = ET.Element("famliy") # 创建节点大儿子 son1 = ET.SubElement(root, "son", attrib={'name': '儿1'}) # 创建小儿子 son2 = ET.SubElement(root, "son", attrib={"name": "儿2"}) # 在大儿子中创建一个孙子 grandson1 = ET.SubElement(son1, "age", attrib={'name': '儿11'}) grandson1.text = '孙子' et = ET.ElementTree(root) #生成文档对象 et.write("test.xml", encoding="utf-8")
<user><![CDATA[你好呀]]</user>
from xml.etree import ElementTree as ET # 创建根节点 root = ET.Element("user") root.text = "<![CDATA[你好呀]]" et = ET.ElementTree(root) # 生成文档对象 et.write("test.xml", encoding="utf-8")
案例:
content = """<xml> <ToUserName><![CDATA[gh_7f083739789a]]></ToUserName> <FromUserName><![CDATA[oia2TjuEGTNoeX76QEjQNrcURxG8]]></FromUserName> <CreateTime>1395658920</CreateTime> <MsgType><![CDATA[event]]></MsgType> <Event><![CDATA[TEMPLATESENDJOBFINISH]]></Event> <MsgID>200163836</MsgID> <Status><![CDATA[success]]></Status> </xml>""" from xml.etree import ElementTree as ET info = {} root = ET.XML(content) for node in root: # print(node.tag,node.text) info[node.tag] = node.text print(info)
示例:
# ############### 2. 实现去网上获取指定地区的天气信息,并写入到Excel中。 ############# import os import requests from xml.etree import ElementTree as ET from openpyxl import workbook # 处理文件路径 base_dir = os.path.dirname(os.path.abspath(__file__)) target_excel_file_path = os.path.join(base_dir, 'weather.xlsx') # 创建excel且默认会创建一个sheet(名称为Sheet) wb = workbook.Workbook() del wb['Sheet'] while True: # 用户输入城市,并获取该城市的天气信息 city = input("请输入城市(Q/q退出):") if city.upper() == "Q": break url = "http://ws.webxml.com.cn//WebServices/WeatherWebService.asmx/getWeatherbyCityName?theCityName={}".format(city) res = requests.get(url=url) # 1.提取XML格式中的数据 root = ET.XML(res.text) # 2.为每个城市创建一个sheet,并将获取的xml格式中的数据写入到excel中。 sheet = wb.create_sheet(city) for row_index, node in enumerate(root, 1): text = node.text cell = sheet.cell(row_index, 1) cell.value = text wb.save(target_excel_file_path)
浙公网安备 33010602011771号