python 2.03 XML Excel 使用

4.XML格式文件

可扩展标记语言,是一种简单的数据存储语言,XML 被设计用来传输和存储数据。

  • 存储,可用来存放配置文件,例如:java的配置文件。
  • 传输,网络传输时以这种格式存在,例如:早期ajax传输的数据、soap协议等。
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2026</year>
        <gdppc>59900</gdppc>
        <neighbor direction="N" name="Malaysia" />
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>

注意:在Python开发中用的相对来比较少,大家作为了解即可(后期课程在讲解微信支付、微信公众号消息处理 时会用到基于xml传输数据)。

例如:https://developers.weixin.qq.com/doc/offiaccount/Message_Management/Receiving_standard_messages.html

4.1 读取文件和内容

from xml.etree import ElementTree as ET

# ET去打开xml文件
tree = ET.parse("files/xo.xml")

# 获取根标签
root = tree.getroot()

print(root) # <Element 'data' at 0x7f94e02763b0>
from xml.etree import ElementTree as ET

content = """
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
     <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
"""

root = ET.XML(content)
print(root)  # <Element 'data' at 0x7fdaa019cea0>

4.2 读取节点数据

from xml.etree import ElementTree as ET

content = """
<data>
    <country name="Liechtenstein" id="999" >
        <rank>2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
     <country name="Panama">
        <rank>69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
"""

# 获取根标签 data
root = ET.XML(content)

country_object = root.find("country")
print(country_object.tag, country_object.attrib)
gdppc_object = country_object.find("gdppc")
print(gdppc_object.tag,gdppc_object.attrib,gdppc_object.text)
from xml.etree import ElementTree as ET

content = """
<data>
    <country name="Liechtenstein">
        <rank>2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
     <country name="Panama">
        <rank>69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
"""

# 获取根标签 data
root = ET.XML(content)

# 获取data标签的孩子标签
for child in root:
    # child.tag = conntry
    # child.attrib = {"name":"Liechtenstein"}
    print(child.tag, child.attrib)
    for node in child:
        print(node.tag, node.attrib, node.text)
from xml.etree import ElementTree as ET

content = """
<data>
    <country name="Liechtenstein">
        <rank>2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
     <country name="Panama">
        <rank>69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
"""

root = ET.XML(content)

for child in root.iter('year'):
    print(child.tag, child.text)
from xml.etree import ElementTree as ET

content = """
<data>
    <country name="Liechtenstein">
        <rank>2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
     <country name="Panama">
        <rank>69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
"""

root = ET.XML(content)
v1 = root.findall('country')
print(v1)

v2 = root.find('country').find('rank')
print(v2.text)

4.3 修改和删除节点

from xml.etree import ElementTree as ET

content = """
<data>
    <country name="Liechtenstein">
        <rank>2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
     <country name="Panama">
        <rank>69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
"""

root = ET.XML(content)

# 修改节点内容和属性
rank = root.find('country').find('rank')
print(rank.text)
rank.text = "999"
rank.set('update', '2020-11-11')
print(rank.text, rank.attrib)
############ 保存文件 ############
tree = ET.ElementTree(root)
tree.write("new.xml", encoding='utf-8')


# 删除节点
root.remove( root.find('country') )
print(root.findall('country'))

############ 保存文件 ############
tree = ET.ElementTree(root)
tree.write("newnew.xml", encoding='utf-8')

4.4 构建文档

<home>
    <son name="儿1">
        <grandson name="儿11"></grandson>
        <grandson name="儿12"></grandson>
    </son>
    <son name="儿2"></son>
</home>
from xml.etree import ElementTree as ET

# 创建根标签
root = ET.Element("home")

# 创建节点大儿子
son1 = ET.Element('son', {'name': '儿1'})
# 创建小儿子
son2 = ET.Element('son', {"name": '儿2'})

# 在大儿子中创建两个孙子
grandson1 = ET.Element('grandson', {'name': '儿11'})
grandson2 = ET.Element('grandson', {'name': '儿12'})
son1.append(grandson1)
son1.append(grandson2)

# 把儿子添加到根节点中
root.append(son1)
root.append(son2)

tree = ET.ElementTree(root)
tree.write('oooo.xml', encoding='utf-8', short_empty_elements=False)
<famliy>
    <son name="儿1">
        <grandson name="儿11"></grandson>
        <grandson name="儿12"></grandson>
    </son>
    <son name="儿2"></son>
</famliy>
from xml.etree import ElementTree as ET

# 创建根节点
root = ET.Element("famliy")


# 创建大儿子
son1 = root.makeelement('son', {'name': '儿1'})
# 创建小儿子
son2 = root.makeelement('son', {"name": '儿2'})

# 在大儿子中创建两个孙子
grandson1 = son1.makeelement('grandson', {'name': '儿11'})
grandson2 = son1.makeelement('grandson', {'name': '儿12'})

son1.append(grandson1)
son1.append(grandson2)


# 把儿子添加到根节点中
root.append(son1)
root.append(son2)

tree = ET.ElementTree(root)
tree.write('oooo.xml',encoding='utf-8')
<famliy>
	<son name="儿1">
    	<age name="儿11">孙子</age>
    </son>
	<son name="儿2"></son>
</famliy>
from xml.etree import ElementTree as ET


# 创建根节点
root = ET.Element("famliy")


# 创建节点大儿子
son1 = ET.SubElement(root, "son", attrib={'name': '儿1'})
# 创建小儿子
son2 = ET.SubElement(root, "son", attrib={"name": "儿2"})

# 在大儿子中创建一个孙子
grandson1 = ET.SubElement(son1, "age", attrib={'name': '儿11'})
grandson1.text = '孙子'


et = ET.ElementTree(root)  #生成文档对象
et.write("test.xml", encoding="utf-8")
<user><![CDATA[你好呀]]</user>
from xml.etree import ElementTree as ET

# 创建根节点
root = ET.Element("user")
root.text = "<![CDATA[你好呀]]"

et = ET.ElementTree(root)  # 生成文档对象
et.write("test.xml", encoding="utf-8")

案例:

content = """<xml>
    <ToUserName><![CDATA[gh_7f083739789a]]></ToUserName>
    <FromUserName><![CDATA[oia2TjuEGTNoeX76QEjQNrcURxG8]]></FromUserName>
    <CreateTime>1395658920</CreateTime>
    <MsgType><![CDATA[event]]></MsgType>
    <Event><![CDATA[TEMPLATESENDJOBFINISH]]></Event>
    <MsgID>200163836</MsgID>
    <Status><![CDATA[success]]></Status>
</xml>"""

from xml.etree import ElementTree as ET

info = {}
root = ET.XML(content)
for node in root:
    # print(node.tag,node.text)
    info[node.tag] = node.text
print(info)

5.Excel格式文件

Python内部未提供处理Excel文件的功能,想要在Python中操作Excel需要按照第三方的模块。

pip install openpyxl

此模块中集成了Python操作Excel的相关功能,接下来我们就需要去学习该模块提供的相关功能即可。

from  openpyxl import load_workbook

workbook = load_workbook('123.xlsx')
打开一个excel 获取一个句柄


1 通过excel句柄 对shett 进行增删改查
查 shtte 名称

sheetnames 方法获取全部的sheet 名称 ,返回值是列表存储
print(workbook.sheetnames)


worksheets 方法是存储sheet名称的列表 ,通过sheet名称 或者索引获取sheet  的句柄
sheetname1  = workbook.worksheets[0]



通过Excel 的句柄执行查列表的值, 也是能获取到 sheet的句柄
sheetname2 = workbook['姓名']


循环shett名称
for i in workbook:
    print(i)

for i in workbook.sheetnames:
    print(i)


# 定位某一个单元
2.1  获取到sheet的句柄  通过行和列 对单元格进行定位,获取单元格的句柄
ranks_key = sheetname2.cell(1,1)

2.2 通过sheet的这个对象找 列名+ 数字 也能 定位到单元格
ranks_ket = sheetname2['A1']


1. 单元格内容 value
print(ranks_key.value)

2. 单元格的样式 style
print(ranks_key.style)

3  单元格的字体  font
print(ranks_key.font)

4  单元格的排列  alignment  (对齐方式)
print(ranks_key.alignment)



定位到某一行,sheet 句柄的列表中元素是一个个的高仿列表从1开始,第一行就是[1],
  高仿列表中每个元素就是每一个单元格

for line in sheetname2[0]:
    print(line.value)

进阶 获取所有行, sheet对象的 rows方法就是返回的行的列表,每一个元素是代表一行的元组,元组内的元素是一个个的单元格
for line in sheetname2.rows:
    print(line)
 取每一行的第0个元素 就是第一列
    print(line[0].value)

进阶 获取所有的列    columns 和 rows 作用一致返回所有的列

for line in sheetname2.columns:
    print(line)
 取每一行的第0个元素 就是第一列
    print(line[0].value)

新建Excel

from  openpyxl import workbook

new_wb  = workbook.Workbook()

sheet = new_wb.worksheets[0]

ranks_key = sheet.cell(1,1)

ranks_key.value = '123'

new_wb.save('2.xlsx')


旧文件修改
from  openpyxl import load_workbook

wb = load_workbook('123.xlsx')
sheet = wb.worksheets[0]
ranks = sheet.cell(1,2)
ranks.value = '123'
wb.save('123.xlsx')


sheet 专讲

from openpyxl import load_workbook

wb = load_workbook('123.xlsx')


sheet 更改名称  ,同 sheet对象 更改自己的属性值
sheet = wb.worksheets[0]
sheet.title='name1'
wb.save('123.xlsx')

sheet 创建 ,通过文件对象 创建一个sheet ,设置在sheet在文件中的位置,0 排第一,依次排序
wb.create_sheet('q123',index=0)

复制sheet  对象wb 复制一个源sheet 赋值新的名称
new_sheet = wb.copy_worksheet(wb.worksheets[0])
new_sheet.title='q1234'


删除 sheet
del wb['qqqqq']


更改sheet的title的颜色,在sheet的特性中设置
new_sheet = wb.worksheets[0]
new_sheet.sheet_properties.tabColor='1072BA'


前面学到 单元格的内容修改, 单一定位值修改
sheet = wb.worksheets[0]

sheet.cell(1,1).value= '3333'
sheet['A1'] = '444'

批量修改 也是所谓的矩阵修改,以定位符 列的计算单位  A1 D5 一个起始点 一个终止点

data = sheet['A1':'D5']
可以简单的理解就是从A1和D5 两个点横向 纵向组成的矩形的, 这个生成的矩阵是以 行为单位组成 ,
循环取每一个单元格赋值 即可
for line in data:
    for lines in line:
        lines.value = '22'

wb.save('123.xlsx')

5.1 读Excel

  • 读sheet

    from openpyxl import load_workbook
    
    wb = load_workbook("files/p1.xlsx")
    
    # sheet相关操作
    
    # 1.获取excel文件中的所有sheet名称
    """
    print(wb.sheetnames) # ['数据导出', '用户列表', 'Sheet1', 'Sheet2']
    """
    
    # 2.选择sheet,基于sheet名称
    """
    sheet = wb["数据导出"]
    cell = sheet.cell(1, 2)
    print(cell.value)
    """
    
    # 3.选择sheet,基于索引位置
    """
    sheet = wb.worksheets[0]
    cell = sheet.cell(1,2)
    print(cell.value)
    """
    
    # 4.循环所有的sheet
    """
    for name in wb.sheetnames:
        sheet = wb[name]
        cell = sheet.cell(1, 1)
        print(cell.value)
    """
    """
    for sheet in wb.worksheets:
        cell = sheet.cell(1, 1)
        print(cell.value)
    """
    """
    for sheet in wb:
        cell = sheet.cell(1, 1)
        print(cell.value)
    """
    
  • 读sheet中单元格的数据

    from openpyxl import load_workbook
    
    wb = load_workbook("files/p1.xlsx")
    sheet = wb.worksheets[0]
    
    # 1.获取第N行第N列的单元格(位置是从1开始)
    """
    cell = sheet.cell(1, 1)
    
    print(cell.value)
    print(cell.style)
    print(cell.font)
    print(cell.alignment)
    """
    
    # 2.获取某个单元格
    """
    c1 = sheet["A2"]
    print(c1.value)
    
    c2 = sheet['D4']
    print(c2.value)
    """
    
    # 3.第N行所有的单元格
    """
    for cell in sheet[1]:
        print(cell.value)
    """
    
    # 4.所有行的数据(获取某一列数据)
    """
    for row in sheet.rows:
        print(row[0].value, row[1].value)
    """
    
    # 5.获取所有列的数据
    """
    for col in sheet.columns:
        print(col[1].value)
    """
    
  • 读合并的单元格

    from openpyxl import load_workbook
    
    wb = load_workbook("files/p1.xlsx")
    sheet = wb.worksheets[2]
    
    # 获取第N行第N列的单元格(位置是从1开始)
    c1 = sheet.cell(1, 1)
    print(c1)  # <Cell 'Sheet1'.A1>
    print(c1.value) # 用户信息
    
    c2 = sheet.cell(1, 2)
    print(c2)  # <MergedCell 'Sheet1'.B1>
    print(c2.value) # None
    
    
    from openpyxl import load_workbook
    
    wb = load_workbook('files/p1.xlsx')
    sheet = wb.worksheets[2]
    for row in sheet.rows:
        print(row)
    
    >>> 输出结果
    (<Cell 'Sheet1'.A1>, <MergedCell 'Sheet1'.B1>, <Cell 'Sheet1'.C1>)
    (<Cell 'Sheet1'.A2>, <Cell 'Sheet1'.B2>, <Cell 'Sheet1'.C2>)
    (<Cell 'Sheet1'.A3>, <Cell 'Sheet1'.B3>, <Cell 'Sheet1'.C3>)
    (<MergedCell 'Sheet1'.A4>, <Cell 'Sheet1'.B4>, <Cell 'Sheet1'.C4>)
    (<Cell 'Sheet1'.A5>, <Cell 'Sheet1'.B5>, <Cell 'Sheet1'.C5>)
    

5.1 写Excel

在Excel中想要写文件,大致要分为在:

  • 原Excel文件基础上写内容。

    from openpyxl import load_workbook
    
    wb = load_workbook('files/p1.xlsx')
    sheet = wb.worksheets[0]
    
    # 找到单元格,并修改单元格的内容
    cell = sheet.cell(1, 1)
    cell.value = "新的开始"
    
    # 将excel文件保存到p2.xlsx文件中
    wb.save("files/p2.xlsx")
    
  • 新创建Excel文件写内容。

    from openpyxl import workbook
    
    # 创建excel且默认会创建一个sheet(名称为Sheet)
    wb = workbook.Workbook()
    
    sheet = wb.worksheets[0] # 或 sheet = wb["Sheet"]
    
    # 找到单元格,并修改单元格的内容
    cell = sheet.cell(1, 1)
    cell.value = "新的开始"
    
    # 将excel文件保存到p2.xlsx文件中
    wb.save("files/p2.xlsx")
    

在了解了如何读取Excel和创建Excel之后,后续对于Excel中的sheet和cell操作基本上都相同。

from openpyxl import workbook

wb = workbook.Workbook() # Sheet

# 1. 修改sheet名称
"""
sheet = wb.worksheets[0]
sheet.title = "数据集"
wb.save("p2.xlsx")
"""

# 2. 创建sheet并设置sheet颜色
"""
sheet = wb.create_sheet("工作计划", 0)
sheet.sheet_properties.tabColor = "1072BA"
wb.save("p2.xlsx")
"""

# 3. 默认打开的sheet
"""
wb.active = 0
wb.save("p2.xlsx")
"""

# 4. 拷贝sheet
"""
sheet = wb.create_sheet("工作计划")
sheet.sheet_properties.tabColor = "1072BA"

new_sheet = wb.copy_worksheet(wb["Sheet"])
new_sheet.title = "新的计划"
wb.save("p2.xlsx")
"""

# 5.删除sheet
"""
del wb["用户列表"]
wb.save('files/p2.xlsx')
"""
from openpyxl import load_workbook
from openpyxl.styles import Alignment, Border, Side, Font, PatternFill, GradientFill


wb = load_workbook('files/p1.xlsx')

sheet = wb.worksheets[1]

# 1. 获取某个单元格,修改值
"""
cell = sheet.cell(1, 1)
cell.value = "开始"
wb.save("p2.xlsx")
"""

# 2.  获取某个单元格,修改值
"""
sheet["B3"] = "Alex"
wb.save("p2.xlsx")
"""

# 3. 获取某些单元格,修改值
"""
cell_list = sheet["B2":"C3"]
for row in cell_list:
    for cell in row:
        cell.value = "新的值"
wb.save("p2.xlsx")
"""

# 4. 对齐方式
"""
cell = sheet.cell(1, 1)

# horizontal,水平方向对齐方式:"general", "left", "center", "right", "fill", "justify", "centerContinuous", "distributed"
# vertical,垂直方向对齐方式:"top", "center", "bottom", "justify", "distributed"
# text_rotation,旋转角度。
# wrap_text,是否自动换行。
cell.alignment = Alignment(horizontal='center', vertical='distributed', text_rotation=45, wrap_text=True)
wb.save("p2.xlsx")
"""

# 5. 边框
# side的style有如下:dashDot','dashDotDot', 'dashed','dotted','double','hair', 'medium', 'mediumDashDot', 'mediumDashDotDot','mediumDashed', 'slantDashDot', 'thick', 'thin'
"""
cell = sheet.cell(9, 2)
cell.border = Border(
    top=Side(style="thin", color="FFB6C1"), 
    bottom=Side(style="dashed", color="FFB6C1"),
    left=Side(style="dashed", color="FFB6C1"),
    right=Side(style="dashed", color="9932CC"),
    diagonal=Side(style="thin", color="483D8B"),  # 对角线
    diagonalUp=True,  # 左下 ~ 右上
    diagonalDown=True  # 左上 ~ 右下
)
wb.save("p2.xlsx")
"""

# 6.字体
"""
cell = sheet.cell(5, 1)
cell.font = Font(name="微软雅黑", size=45, color="ff0000", underline="single")
wb.save("p2.xlsx")
"""

# 7.背景色
"""
cell = sheet.cell(5, 3)
cell.fill = PatternFill("solid", fgColor="99ccff")
wb.save("p2.xlsx")
"""

# 8.渐变背景色
"""
cell = sheet.cell(5, 5)
cell.fill = GradientFill("linear", stop=("FFFFFF", "99ccff", "000000"))
wb.save("p2.xlsx")
"""

# 9.宽高(索引从1开始)
"""
sheet.row_dimensions[1].height = 50
sheet.column_dimensions["E"].width = 100
wb.save("p2.xlsx")
"""

# 10.合并单元格
"""
sheet.merge_cells("B2:D8")
sheet.merge_cells(start_row=15, start_column=3, end_row=18, end_column=8)
wb.save("p2.xlsx")
"""
"""
sheet.unmerge_cells("B2:D8")
wb.save("p2.xlsx")
"""

# 11.写入公式
"""
sheet = wb.worksheets[3]
sheet["D1"] = "合计"
sheet["D2"] = "=B2*C2"
wb.save("p2.xlsx")
"""
"""
sheet = wb.worksheets[3]
sheet["D3"] = "=SUM(B3,C3)"
wb.save("p2.xlsx")
"""

# 12.删除
"""
# idx,要删除的索引位置
# amount,从索引位置开始要删除的个数(默认为1)
sheet.delete_rows(idx=1, amount=20)
sheet.delete_cols(idx=1, amount=3)
wb.save("p2.xlsx")
"""

# 13.插入
"""
sheet.insert_rows(idx=5, amount=10)
sheet.insert_cols(idx=3, amount=2)
wb.save("p2.xlsx")
"""

# 14.循环写内容
"""
sheet = wb["Sheet"]
cell_range = sheet['A1:C2']
for row in cell_range:
    for cell in row:
        cell.value = "xx"

for row in sheet.iter_rows(min_row=5, min_col=1, max_col=7, max_row=10):
    for cell in row:
        cell.value = "oo"
wb.save("p2.xlsx")
"""

# 15.移动
"""
# 将H2:J10范围的数据,向右移动15个位置、向上移动1个位置
sheet.move_range("H2:J10",rows=1, cols=15)
wb.save("p2.xlsx")
"""
"""
sheet = wb.worksheets[3]
sheet["D1"] = "合计"
sheet["D2"] = "=B2*C2"
sheet["D3"] = "=SUM(B3,C3)"
sheet.move_range("B1:D3",cols=10, translate=True) # 自动翻译公式
wb.save("p2.xlsx")
"""

# 16.打印区域
"""
sheet.print_area = "A1:D200"
wb.save("p2.xlsx")
"""

# 17.打印时,每个页面的固定表头
"""
sheet.print_title_cols = "A:D"
sheet.print_title_rows = "1:3"
wb.save("p2.xlsx")
"""

6.压缩文件

基于Python内置的shutil模块可以实现对压缩文件的操作。

import shutil

# 1. 压缩文件
"""
# base_name,压缩后的压缩包文件
# format,压缩的格式,例如:"zip", "tar", "gztar", "bztar", or "xztar".
# root_dir,要压缩的文件夹路径
"""
# shutil.make_archive(base_name=r'datafile',format='zip',root_dir=r'files')


# 2. 解压文件
"""
# filename,要解压的压缩包文件
# extract_dir,解压的路径
# format,压缩文件格式
"""
# shutil.unpack_archive(filename=r'datafile.zip', extract_dir=r'xxxxxx/xo', format='zip')

7.路径相关

7.1 转义

windows路径使用的是\,linux路径使用的是/。

特别的,在windows系统中如果有这样的一个路径 D:\nxxx\txxx\x1,程序会报错。因为在路径中存在特殊符 \n(换行符)和\t(制表符),Python解释器无法自动区分。

所以,在windows中编写路径时,一般有两种方式:

  • 加转义符,例如:"D:\\nxxx\\txxx\\x1"
  • 路径前加r,例如:r"D:\\nxxx\\txxx\\x1"

7.2 程序当前路径

项目中如果使用了相对路径,那么一定要注意当前所在的位置。

例如:在/Users/wupeiqi/PycharmProjects/CodeRepository/路径下编写 demo.py文件

with open("a1.txt", mode='w', encoding='utf-8') as f:
    f.write("你好呀")

用以下两种方式去运行:

  • 方式1,文件会创建在 /Users/wupeiqi/PycharmProjects/CodeRepository/ 目录下。

    cd /Users/wupeiqi/PycharmProjects/CodeRepository/
    python demo.py
    
  • 方式2,文件会创建在 /Users/wupeiqi目录下。

    cd /Users/wupeiqi
    python /Users/wupeiqi/PycharmProjects/CodeRepository/demo.py
    
import os

"""
# 1.获取当前运行的py脚本所在路径
abs = os.path.abspath(__file__)
print(abs) # /Users/wupeiqi/PycharmProjects/luffyCourse/day09/20.路径相关.py
path = os.path.dirname(abs)
print(path) # /Users/wupeiqi/PycharmProjects/luffyCourse/day09
"""
base_dir = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.join(base_dir, 'files', 'info.txt')
print(file_path)
if os.path.exists(file_path):
    file_object = open(file_path, mode='r', encoding='utf-8')
    data = file_object.read()
    file_object.close()

    print(data)
else:
    print('文件路径不存在')

7.3 文件和路径相关

import shutil
import os

# 1. 获取当前脚本绝对路径
"""
abs_path = os.path.abspath(__file__)
print(abs_path)
"""

# 2. 获取当前文件的上级目录
"""
base_path = os.path.dirname( os.path.dirname(路径) )
print(base_path)
"""

# 3. 路径拼接
"""
p1 = os.path.join(base_path, 'xx')
print(p1)

p2 = os.path.join(base_path, 'xx', 'oo', 'a1.png')
print(p2)
"""

# 4. 判断路径是否存在
"""
exists = os.path.exists(p1)
print(exists)
"""

# 5. 创建文件夹
"""
os.makedirs(路径)
"""
"""
path = os.path.join(base_path, 'xx', 'oo', 'uuuu')
if not os.path.exists(path):
    os.makedirs(path)
"""

# 6. 是否是文件夹
"""
file_path = os.path.join(base_path, 'xx', 'oo', 'uuuu.png')
is_dir = os.path.isdir(file_path)
print(is_dir) # False

folder_path = os.path.join(base_path, 'xx', 'oo', 'uuuu')
is_dir = os.path.isdir(folder_path)
print(is_dir) # True

"""

# 7. 删除文件或文件夹
"""
os.remove("文件路径")
"""
"""
path = os.path.join(base_path, 'xx')
shutil.rmtree(path)
"""

# 8. 拷贝文件夹
"""
shutil.copytree("/Users/wupeiqi/Desktop/图/csdn/","/Users/wupeiqi/PycharmProjects/CodeRepository/files")
"""

# 9.拷贝文件
"""
shutil.copy("/Users/wupeiqi/Desktop/图/csdn/WX20201123-112406@2x.png","/Users/wupeiqi/PycharmProjects/CodeRepository/")
shutil.copy("/Users/wupeiqi/Desktop/图/csdn/WX20201123-112406@2x.png","/Users/wupeiqi/PycharmProjects/CodeRepository/x.png")
"""

# 10.文件或文件夹重命名
"""
shutil.move("/Users/wupeiqi/PycharmProjects/CodeRepository/x.png","/Users/wupeiqi/PycharmProjects/CodeRepository/xxxx.png")
shutil.move("/Users/wupeiqi/PycharmProjects/CodeRepository/files","/Users/wupeiqi/PycharmProjects/CodeRepository/images")
"""

总结

今天我们主要围绕着文件 相关的操作来展开进行讲解,让大家能够基于Python处理不同格式的文件。由于涉及的知识点比较多,所以今日的内容学起来会比较耗时,但都比较简单,只需要理解并编写好相关笔记以便后期开发时翻阅。

  1. 文件相对路径,在使用相对路径时可能会执行程序的目录不同,导致路径出问题。所以,如若使用相对路径请务必清楚当前运行程序所在目录。

  2. 文件绝对路径(推荐),不要将文件路径写死,而是基于 os 模块中的相关功能自动化获取绝对路径,以方便项目移动到其他文件或电脑上。

    import os
    base_dir = os.path.dirname(os.path.abspath(__file__))
    file_path = os.path.join(base_dir, 'files', 'info.txt')
    
  3. 路径转义

    • 手动写路径,需要自己在路径中添加 r 或 加入 \ 来进行处理。
    • 基于os.path.join拼接,内部自动处理,不需要手动处理。
  4. 内置函数、内置模块、第三方模块的区别?

  5. 如何去下载安装第三方模块?

    pip install 模块名称
    
    • requests模块,可以用来发送网络请求。
    • openpyxl模块,处理Excel格式的文件。
  6. 基本文件的读写、打开模式、上下文管理。

  7. 其他格式:csv、ini、xml、excel格式的处理(无序记忆,做好笔记即可)。

作业

  1. 基于csv格式实现 用户的注册 & 登录认证。详细需求如下:

    • 用户注册时,新注册用户要写入文件csv文件中,输入Q或q则退出。
    • 用户登录时,逐行读取csv文件中的用户信息并进行校验。
    • 提示:文件路径须使用os模块构造的绝对路径的方式。
import os

abs_dir = os.path.dirname(os.path.abspath(__file__))

if not os.path.exists(os.path.join(abs_dir, 'db')):
    os.makedirs(os.path.join(abs_dir, 'db'))

if not os.path.exists(os.path.join(abs_dir, 'db', 'user.csv')):
    with open(os.path.join(abs_dir, 'db', 'user.csv'), mode='w+') as file_object:
        pass

login_state =None


key_name = input("请输入要注册的用户名:").strip()
key_pwd = input("请输入要注册的密码:").strip()

with open(os.path.join(abs_dir, 'db', 'user.csv'), mode='a+', encoding='utf-8') as writefile_object:
        writefile_object.write(f'{key_name},{key_pwd}\n')
        print('恭喜注册成功')


login_name = input("请输入要登录的用户名:").strip()
login_pwd = input("请输入要登录的密码:").strip()
with open(os.path.join(abs_dir, 'db', 'user.csv'), encoding='utf-8') as readfile_object:
        for line in readfile_object:
            if login_name == line.strip().split(',')[0] and login_pwd == line.strip().split(',')[1]:
                login_state = True

if login_state:
    print('登录成功')
else:
    print('登录失败')

更新版
import os

abs_dir = os.path.dirname(os.path.abspath(__file__))

if not os.path.exists(os.path.join(abs_dir, 'db.csv')):
    os.makedirs(os.path.join(abs_dir, 'db'))

if not os.path.exists(os.path.join(abs_dir, 'db', 'user.csv')):
    with open(os.path.join(abs_dir, 'db', 'user.csv'), mode='w+') as file_object:
        # pass




key_name = input("请输入要注册的用户名:").strip()
key_pwd = input("请输入要注册的密码:").strip()

with open(os.path.join(abs_dir, 'db', 'user.csv'), mode='a+', encoding='utf-8') as writefile_object:
        writefile_object.write(f'{key_name},{key_pwd}\n')
        writefile_object.flush()

        print('恭喜注册成功')

login_state =None
login_name = input("请输入要登录的用户名:").strip()
login_pwd = input("请输入要登录的密码:").strip()
with open(os.path.join(abs_dir, 'db', 'user.csv'), encoding='utf-8') as readfile_object:
        for line in readfile_object:
            user,passwd=line.strip().split(',')
            if login_name == user and login_pwd == passwd:
                print('登录成功')
        else:
            print('登录失败')
  #  使用 for else 进行节约代码
  #  使用 解压赋值 节约代码 
  1. 补充代码:实现去网上获取指定地区的天气信息,并写入到Excel中。

    import requests
    
    while True:
        city = input("请输入城市(Q/q退出):")
        if city.upper() == "Q":
            break
        url = "http://ws.webxml.com.cn//WebServices/WeatherWebService.asmx/getWeatherbyCityName?theCityName={}".format(city)
        res = requests.get(url=url)
        print(res.text)
    
        # 1.提取XML格式中的数据
        # 2.为每个城市创建一个sheet,并将获取的xml格式中的数据写入到excel中。 
    
  2. 读取ini文件内容,按照规则写入到Excel中。

    • ini文件内容如下:

      [mysqld]
      datadir=/var/lib/mysql
      socket=/var/lib/mysql/mysql.sock
      log-bin=py-mysql-bin
      character-set-server=utf8
      collation-server=utf8_general_ci
      log-error=/var/log/mysqld.log
      # Disabling symbolic-links is recommended to prevent assorted security risks
      symbolic-links=0
      
      [mysqld_safe]
      log-error=/var/log/mariadb/mariadb.log
      pid-file=/var/run/mariadb/mariadb.pid
      
      [client]
      default-character-set=utf8
      
    • 读取ini格式的文件,并创建一个excel文件,且为每个节点创建一个sheet,然后将节点下的键值写入到excel中,按照如下格式。

      • 首行,字体白色 & 单元格背景色蓝色。
      • 内容均居中。
      • 边框。
import sys
import os
import configparser
import openpyxl
from openpyxl.styles import Alignment, Border, Side, Font, PatternFill, GradientFill
Base_dir = os.path.dirname(os.path.abspath(__file__))

if not os.path.exists(os.path.join(Base_dir, 'db', 'config.ini')):
    print('文件不存在')
    sys.exit()

wb = openpyxl.Workbook()
cofig = configparser.ConfigParser()
cofig.read(os.path.join(Base_dir, 'db', 'config.ini'))


for sheet_name in cofig.sections():
    wb.create_sheet(sheet_name)
    sheet = wb[sheet_name]
    #  循环sheet名称列表,分别实例 sheet
    num = 2

    sheet['A1']='键'
    sheet['A1'].fill = PatternFill("solid", fgColor="99ccff")
    sheet['A1'].alignment = Alignment(horizontal='center', vertical='distributed', text_rotation=0, wrap_text=True)
    cell_list = sheet.cell(1,1)
    cell_list.border = Border(
        top=Side(style="thin", color="000000"),
        bottom=Side(style="thin", color="000000"),
        left=Side(style="thin", color="000000"),
        right=Side(style="thin", color="000000"),
    )


    sheet['B1'] = '值'
    sheet['B1'].fill = PatternFill("solid", fgColor="99ccff")
    sheet['B1'].alignment = Alignment(horizontal='center', vertical='distributed', text_rotation=0, wrap_text=True)
    cell_list = sheet.cell(1,2)
    cell_list.border = Border(
        top=Side(style="thin", color="000000"),
        bottom=Side(style="thin", color="000000"),
        left=Side(style="thin", color="000000"),
        right=Side(style="thin", color="000000"),
    )

    for items in cofig.items(sheet_name):
        #  从config.item 取每个组的全部数据,返回列表,for循环 分别取出来
        
        sheet[f'A{num}'] = items[0].strip()
        cell_list = sheet[f'A{num}']
        cell_list.border = Border(
            top=Side(style="thin", color="000000"),
            bottom=Side(style="thin", color="000000"),
            left=Side(style="thin", color="000000"),
            right=Side(style="thin", color="000000"),
        )

        sheet[f'B{num}'] = items[1].strip()

        cell_list = sheet[f'B{num}']
        cell_list.border = Border(
            top=Side(style="thin", color="000000"),
            bottom=Side(style="thin", color="000000"),
            left=Side(style="thin", color="000000"),
            right=Side(style="thin", color="000000"),
        )
        # sheet实例的设置以列名的数字 初始为1
        # ini 组内的元组[0] 赋值给 A1  元组[1] 赋值给 B1


        num += 1


del  wb['Sheet']
wb.save('p1.xlsx')

  1. 补充代码,实现如下功能。

    import requests
    
    # 1.下载文件
    file_url = 'https://files.cnblogs.com/files/wupeiqi/HtmlStore.zip'
    res = requests.get(url=file_url)
    print(res.content)
    
    # 2.将下载的文件保存到当前执行脚本同级目录下 /files/package/ 目录下(且文件名为HtmlStore.zip)
    
    # 3.在将下载下来的文件解压到 /files/html/ 目录下
    
    
    import os.path
    import shutil
    import requests
    
    # 1.下载文件
    file_url = 'https://files.cnblogs.com/files/wupeiqi/HtmlStore.zip'
    res = requests.get(url=file_url)
    print(res.content)
    
    Base_dir = os.path.dirname(os.path.abspath(__file__))
    
    if not os.path.exists(os.path.join(Base_dir, 'files', 'package')):
        os.makedirs(os.path.join(Base_dir, 'files', 'package'))
    
    # 2.将下载的文件保存到当前执行脚本同级目录下 /files/package/ 目录下(且文件名为HtmlStore.zip)
    with open(os.path.join(Base_dir, 'files', 'package', 'HtmlStore.zip'), mode='wb') as file_object:
        file_object.write(res.content)
    
    # 3.在将下载下来的文件解压到 /files/html/ 目录下
    
    if not  os.path.exists(os.path.join(Base_dir,'files','html')):
        os.makedirs(os.path.join(Base_dir,'files','html'))
    shutil.unpack_archive(os.path.join(Base_dir, 'files', 'package', 'HtmlStore.zip'),extract_dir=os.path.join(Base_dir,'files','html'),format='zip')
    
    
     # 更新版
    import os.path
    import shutil
    import requests
    
    # 1.下载文件
    file_url = 'https://files.cnblogs.com/files/wupeiqi/HtmlStore.zip'
    res = requests.get(url=file_url)
    print(res.content)
    
    Base_dir = os.path.dirname(os.path.abspath(__file__))
    
    if not os.path.exists(os.path.join(Base_dir, 'files', 'package')):
        os.makedirs(os.path.join(Base_dir, 'files', 'package'))
    
    # 2.将下载的文件保存到当前执行脚本同级目录下 /files/package/ 目录下(且文件名为HtmlStore.zip)
    #要把url 进行切割
    url_file_name = file_url.split('/')[-1]
    
    with open(os.path.join(Base_dir, 'files', 'package', url_file_name), mode='wb') as file_object:
        file_object.write(res.content)
    
    # 3.在将下载下来的文件解压到 /files/html/ 目录下
    
    # shutil 解压的时候,目录不存在会自动创建
    shutil.unpack_archive(os.path.join(Base_dir, 'files', 'package', url_file_name),extract_dir=os.path.join(Base_dir, 'files', 'html'), format='zip')
    
    
posted @ 2021-08-11 11:52  mmszxc  阅读(119)  评论(0)    收藏  举报