Python与几种特殊文件格式

1、csv格式文件

逗号分隔值(Comma-Separated Values,CSV,有时也称为字符分隔值,因为分隔字符也可以不是逗号),其文件以纯文本形式存储表格数据(数字和文本)。

对于这种格式的数据,我们需要利用open函数来读取文件并根据逗号分隔的特点来进行处理。

股票代码,股票名称,当前价,涨跌额,涨跌幅,年初至今
SH601778,N晶科,6.29,+1.92,-43.94%,+43.94%
SH688566,吉贝尔,52.66,+6.96,+15.23%,+122.29%
...

2、ini格式文件

ini文件是Initialization File的缩写,平时用于存储软件的的配置文件。例如:MySQL数据库的配置文件。

[mysqld]
datadir=/var/lib/mysql
socket=/var/lib/mysql/mysql.sock
log-bin=py-mysql-bin
character-set-server=utf8
collation-server=utf8_general_ci
log-error=/var/log/mysqld.log
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0

[mysqld_safe]
log-error=/var/log/mariadb/mariadb.log
pid-file=/var/run/mariadb/mariadb.pid

[client]
default-character-set=utf8

这种格式是可以直接使用open来出来,考虑到自己处理比较麻烦,所以Python为我们提供了更为方便的方式。

import configparser

config = configparser.ConfigParser()
config.read('files/my.ini', encoding='utf-8')
# config.read('/Users/dean/PycharmProjects/text/day09/files/my.ini', encoding='utf-8')

# 1.获取所有的节点
"""
result = config.sections()
print(result)  # ['mysqld', 'mysqld_safe', 'client']
"""

# 2.获取节点下的键值
"""
result = config.items("mysqld_safe")
print(result)  # [('log-error', '/var/log/mariadb/mariadb.log'), ('pid-file', '/var/run/mariadb/mariadb.pid')]

for key, value in config.items("mysqld_safe"):
    print(key, value)
"""

# 3.获取某个节点下的键对应的值
"""
result = config.get("mysqld","collation-server")
print(result)
"""

# 4.其他

# 4.1 是否存在节点
# v1 = config.has_section("client")
# print(v1)

# 4.2 添加一个节点
# config.add_section("group")
# config.set('group','name','dean')
# config.set('client','name','dean')
# config.write(open('files/new.ini', mode='w', encoding='utf-8'))

# 4.3 删除
# config.remove_section('client')
# config.remove_option("mysqld", "datadir")
# config.write(open('files/new.ini', mode='w', encoding='utf-8'))
  • 读取所有节点

    import configparser
    
    config = configparser.ConfigParser()
    config.read('/Users/dean/PycharmProjects/text/day09/files/my.conf', encoding='utf-8')
    # config.read('my.conf', encoding='utf-8')
    ret = config.sections()
    print(ret) 
    
    >>输出
    ['mysqld', 'mysqld_safe', 'client']
    
  • 读取节点下的键值

    import configparser
    
    config = configparser.ConfigParser()
    config.read('/Users/dean/PycharmProjects/text/day09/files/my.conf', encoding='utf-8')
    # config.read('my.conf', encoding='utf-8')
    item_list = config.items("mysqld_safe")
    print(item_list) 
    
    >>输出
    [('log-error', '/var/log/mariadb/mariadb.log'), ('pid-file', '/var/run/mariadb/mariadb.pid')]
    
  • 读取节点下值(根据 节点+键 )

    import configparser
    
    config = configparser.ConfigParser()
    config.read('/Users/dean/PycharmProjects/text/day09/files/my.conf', encoding='utf-8')
    
    value = config.get('mysqld', 'log-bin')
    print(value)
    
    >>输出
    py-mysql-bin
    
  • 检查、删除、添加节点

    import configparser
    
    config = configparser.ConfigParser()
    config.read('/Users/dean/PycharmProjects/text/day09/files/my.conf', encoding='utf-8')
    # config.read('my.conf', encoding='utf-8')
    
    
    # 检查
    has_sec = config.has_section('mysqld')
    print(has_sec)
    
    # 添加节点
    config.add_section("SEC_1")
    # 节点中设置键值
    config.set('SEC_1', 'k10', "123")
    config.set('SEC_1', 'name', "哈哈哈哈哈")
    
    config.add_section("SEC_2")
    config.set('SEC_2', 'k10', "123")
    # 内容写入新文件
    config.write(open('/Users/dean/PycharmProjects/text/day09/files/xxoo.conf', 'w'))
    
    
    # 删除节点
    config.remove_section("SEC_2")
    # 删除节点中的键值
    config.remove_option('SEC_1', 'k10')
    config.write(open('/Users/dean/PycharmProjects/text/day09/files/new.conf', 'w'))
    

3、XML格式文件

可扩展标记语言,是一种简单的数据存储语言,XML 被设计用来传输和存储数据。

  • 存储,可用来存放配置文件,例如:java的配置文件。
  • 传输,网络传输时以这种格式存在,例如:早期ajax传输的数据、soap协议等。
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2026</year>
        <gdppc>59900</gdppc>
        <neighbor direction="N" name="Malaysia" />
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>

3.1 读取文件和内容

from xml.etree import ElementTree as ET

# ET去打开xml文件
tree = ET.parse("files/xo.xml")

# 获取根标签
root = tree.getroot()

print(root) # <Element 'data' at 0x7f94e02763b0>
from xml.etree import ElementTree as ET

content = """
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
     <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
"""

root = ET.XML(content)
print(root)  # <Element 'data' at 0x7fdaa019cea0>

3.2 读取节点数据

from xml.etree import ElementTree as ET

content = """
<data>
    <country name="Liechtenstein" id="999" >
        <rank>2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
     <country name="Panama">
        <rank>69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
"""

# 获取根标签 data
root = ET.XML(content)

country_object = root.find("country")
print(country_object.tag, country_object.attrib)
gdppc_object = country_object.find("gdppc")
print(gdppc_object.tag,gdppc_object.attrib,gdppc_object.text)
from xml.etree import ElementTree as ET

content = """
<data>
    <country name="Liechtenstein">
        <rank>2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
     <country name="Panama">
        <rank>69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
"""

# 获取根标签 data
root = ET.XML(content)

# 获取data标签的孩子标签
for child in root:
    # child.tag = conntry
    # child.attrib = {"name":"Liechtenstein"}
    print(child.tag, child.attrib)
    for node in child:
        print(node.tag, node.attrib, node.text)
from xml.etree import ElementTree as ET

content = """
<data>
    <country name="Liechtenstein">
        <rank>2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
     <country name="Panama">
        <rank>69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
"""

root = ET.XML(content)

for child in root.iter('year'):
    print(child.tag, child.text)
from xml.etree import ElementTree as ET

content = """
<data>
    <country name="Liechtenstein">
        <rank>2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
     <country name="Panama">
        <rank>69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
"""

root = ET.XML(content)
v1 = root.findall('country')
print(v1)

v2 = root.find('country').find('rank')
print(v2.text)

3.3 修改和删除节点

from xml.etree import ElementTree as ET

content = """
<data>
    <country name="Liechtenstein">
        <rank>2</rank>
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
     <country name="Panama">
        <rank>69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
"""

root = ET.XML(content)

# 修改节点内容和属性
rank = root.find('country').find('rank')
print(rank.text)
rank.text = "999"
rank.set('update', '2020-11-11')
print(rank.text, rank.attrib)
############ 保存文件 ############
tree = ET.ElementTree(root)
tree.write("new.xml", encoding='utf-8')


# 删除节点
root.remove( root.find('country') )
print(root.findall('country'))

############ 保存文件 ############
tree = ET.ElementTree(root)
tree.write("newnew.xml", encoding='utf-8')

3.4 构建文档

<home>
    <son name="儿1">
        <grandson name="儿11"></grandson>
        <grandson name="儿12"></grandson>
    </son>
    <son name="儿2"></son>
</home>
from xml.etree import ElementTree as ET

# 创建根标签
root = ET.Element("home")

# 创建节点大儿子
son1 = ET.Element('son', {'name': '儿1'})
# 创建小儿子
son2 = ET.Element('son', {"name": '儿2'})

# 在大儿子中创建两个孙子
grandson1 = ET.Element('grandson', {'name': '儿11'})
grandson2 = ET.Element('grandson', {'name': '儿12'})
son1.append(grandson1)
son1.append(grandson2)

# 把儿子添加到根节点中
root.append(son1)
root.append(son2)

tree = ET.ElementTree(root)
tree.write('oooo.xml', encoding='utf-8', short_empty_elements=False)
<famliy>
    <son name="儿1">
        <grandson name="儿11"></grandson>
        <grandson name="儿12"></grandson>
    </son>
    <son name="儿2"></son>
</famliy>
from xml.etree import ElementTree as ET

# 创建根节点
root = ET.Element("famliy")


# 创建大儿子
son1 = root.makeelement('son', {'name': '儿1'})
# 创建小儿子
son2 = root.makeelement('son', {"name": '儿2'})

# 在大儿子中创建两个孙子
grandson1 = son1.makeelement('grandson', {'name': '儿11'})
grandson2 = son1.makeelement('grandson', {'name': '儿12'})

son1.append(grandson1)
son1.append(grandson2)


# 把儿子添加到根节点中
root.append(son1)
root.append(son2)

tree = ET.ElementTree(root)
tree.write('oooo.xml',encoding='utf-8')
<famliy>
	<son name="儿1">
    	<age name="儿11">孙子</age>
    </son>
	<son name="儿2"></son>
</famliy>
from xml.etree import ElementTree as ET


# 创建根节点
root = ET.Element("famliy")


# 创建节点大儿子
son1 = ET.SubElement(root, "son", attrib={'name': '儿1'})
# 创建小儿子
son2 = ET.SubElement(root, "son", attrib={"name": "儿2"})

# 在大儿子中创建一个孙子
grandson1 = ET.SubElement(son1, "age", attrib={'name': '儿11'})
grandson1.text = '孙子'


et = ET.ElementTree(root)  #生成文档对象
et.write("test.xml", encoding="utf-8")
<user><![CDATA[你好呀]]</user>
from xml.etree import ElementTree as ET

# 创建根节点
root = ET.Element("user")
root.text = "<![CDATA[你好呀]]"

et = ET.ElementTree(root)  # 生成文档对象
et.write("test.xml", encoding="utf-8")

案例:

content = """<xml>
    <ToUserName><![CDATA[gh_7f083739789a]]></ToUserName>
    <FromUserName><![CDATA[oia2TjuEGTNoeX76QEjQNrcURxG8]]></FromUserName>
    <CreateTime>1395658920</CreateTime>
    <MsgType><![CDATA[event]]></MsgType>
    <Event><![CDATA[TEMPLATESENDJOBFINISH]]></Event>
    <MsgID>200163836</MsgID>
    <Status><![CDATA[success]]></Status>
</xml>"""

from xml.etree import ElementTree as ET

info = {}
root = ET.XML(content)
for node in root:
    # print(node.tag,node.text)
    info[node.tag] = node.text
print(info)

4、Excel格式文件

Python内部未提供处理Excel文件的功能,想要在Python中操作Excel需要按照第三方的模块。

pip install openpyxl

4.1 读Excel

  • 读sheet

    from openpyxl import load_workbook
    
    wb = load_workbook("files/p1.xlsx")
    
    # sheet相关操作
    
    # 1.获取excel文件中的所有sheet名称
    """
    print(wb.sheetnames) # ['数据导出', '用户列表', 'Sheet1', 'Sheet2']
    """
    
    # 2.选择sheet,基于sheet名称
    """
    sheet = wb["数据导出"]
    cell = sheet.cell(1, 2)
    print(cell.value)
    """
    
    # 3.选择sheet,基于索引位置
    """
    sheet = wb.worksheets[0]
    cell = sheet.cell(1,2)
    print(cell.value)
    """
    
    # 4.循环所有的sheet
    """
    for name in wb.sheetnames:
        sheet = wb[name]
        cell = sheet.cell(1, 1)
        print(cell.value)
    """
    """
    for sheet in wb.worksheets:
        cell = sheet.cell(1, 1)
        print(cell.value)
    """
    """
    for sheet in wb:
        cell = sheet.cell(1, 1)
        print(cell.value)
    """
    
  • 读sheet中单元格的数据

    from openpyxl import load_workbook
    
    wb = load_workbook("files/p1.xlsx")
    sheet = wb.worksheets[0]
    
    # 1.获取第N行第N列的单元格(位置是从1开始)
    """
    cell = sheet.cell(1, 1)
    
    print(cell.value)
    print(cell.style)
    print(cell.font)
    print(cell.alignment)
    """
    
    # 2.获取某个单元格
    """
    c1 = sheet["A2"]
    print(c1.value)
    
    c2 = sheet['D4']
    print(c2.value)
    """
    
    # 3.第N行所有的单元格
    """
    for cell in sheet[1]:
        print(cell.value)
    """
    
    # 4.所有行的数据(获取某一列数据)
    """
    for row in sheet.rows:
        print(row[0].value, row[1].value)
    """
    
    # 5.获取所有列的数据
    """
    for col in sheet.columns:
        print(col[1].value)
    """
    
  • 读合并的单元格

    from openpyxl import load_workbook
    
    wb = load_workbook("files/p1.xlsx")
    sheet = wb.worksheets[2]
    
    # 获取第N行第N列的单元格(位置是从1开始)
    c1 = sheet.cell(1, 1)
    print(c1)  # <Cell 'Sheet1'.A1>
    print(c1.value) # 用户信息
    
    c2 = sheet.cell(1, 2)
    print(c2)  # <MergedCell 'Sheet1'.B1>
    print(c2.value) # None
    
    
    from openpyxl import load_workbook
    
    wb = load_workbook('files/p1.xlsx')
    sheet = wb.worksheets[2]
    for row in sheet.rows:
        print(row)
    
    >>> 输出结果
    (<Cell 'Sheet1'.A1>, <MergedCell 'Sheet1'.B1>, <Cell 'Sheet1'.C1>)
    (<Cell 'Sheet1'.A2>, <Cell 'Sheet1'.B2>, <Cell 'Sheet1'.C2>)
    (<Cell 'Sheet1'.A3>, <Cell 'Sheet1'.B3>, <Cell 'Sheet1'.C3>)
    (<MergedCell 'Sheet1'.A4>, <Cell 'Sheet1'.B4>, <Cell 'Sheet1'.C4>)
    (<Cell 'Sheet1'.A5>, <Cell 'Sheet1'.B5>, <Cell 'Sheet1'.C5>)
    

4.2 写Excel

在Excel中想要写文件,大致要分为在:

  • 原Excel文件基础上写内容。

    from openpyxl import load_workbook
    
    wb = load_workbook('files/p1.xlsx')
    sheet = wb.worksheets[0]
    
    # 找到单元格,并修改单元格的内容
    cell = sheet.cell(1, 1)
    cell.value = "新的开始"
    
    # 将excel文件保存到p2.xlsx文件中
    wb.save("files/p2.xlsx")
    
  • 新创建Excel文件写内容。

    from openpyxl import workbook
    
    # 创建excel且默认会创建一个sheet(名称为Sheet)
    wb = workbook.Workbook()
    
    sheet = wb.worksheets[0] # 或 sheet = wb["Sheet"]
    
    # 找到单元格,并修改单元格的内容
    cell = sheet.cell(1, 1)
    cell.value = "新的开始"
    
    # 将excel文件保存到p2.xlsx文件中
    wb.save("files/p2.xlsx")
    

在了解了如何读取Excel和创建Excel之后,后续对于Excel中的sheet和cell操作基本上都相同。

from openpyxl import workbook

wb = workbook.Workbook() # Sheet

# 1. 修改sheet名称
"""
sheet = wb.worksheets[0]
sheet.title = "数据集"
wb.save("p2.xlsx")
"""

# 2. 创建sheet并设置sheet颜色
"""
sheet = wb.create_sheet("工作计划", 0)
sheet.sheet_properties.tabColor = "1072BA"
wb.save("p2.xlsx")
"""

# 3. 默认打开的sheet
"""
wb.active = 0
wb.save("p2.xlsx")
"""

# 4. 拷贝sheet
"""
sheet = wb.create_sheet("工作计划")
sheet.sheet_properties.tabColor = "1072BA"

new_sheet = wb.copy_worksheet(wb["Sheet"])
new_sheet.title = "新的计划"
wb.save("p2.xlsx")
"""

# 5.删除sheet
"""
del wb["用户列表"]
wb.save('files/p2.xlsx')
"""
from openpyxl import load_workbook
from openpyxl.styles import Alignment, Border, Side, Font, PatternFill, GradientFill


wb = load_workbook('files/p1.xlsx')

sheet = wb.worksheets[1]

# 1. 获取某个单元格,修改值
"""
cell = sheet.cell(1, 1)
cell.value = "开始"
wb.save("p2.xlsx")
"""

# 2.  获取某个单元格,修改值
"""
sheet["B3"] = "sam"
wb.save("p2.xlsx")
"""

# 3. 获取某些单元格,修改值
"""
cell_list = sheet["B2":"C3"]
for row in cell_list:
    for cell in row:
        cell.value = "新的值"
wb.save("p2.xlsx")
"""

# 4. 对齐方式
"""
cell = sheet.cell(1, 1)

# horizontal,水平方向对齐方式:"general", "left", "center", "right", "fill", "justify", "centerContinuous", "distributed"
# vertical,垂直方向对齐方式:"top", "center", "bottom", "justify", "distributed"
# text_rotation,旋转角度。
# wrap_text,是否自动换行。
cell.alignment = Alignment(horizontal='center', vertical='distributed', text_rotation=45, wrap_text=True)
wb.save("p2.xlsx")
"""

# 5. 边框
# side的style有如下:dashDot','dashDotDot', 'dashed','dotted','double','hair', 'medium', 'mediumDashDot', 'mediumDashDotDot','mediumDashed', 'slantDashDot', 'thick', 'thin'
"""
cell = sheet.cell(9, 2)
cell.border = Border(
    top=Side(style="thin", color="FFB6C1"), 
    bottom=Side(style="dashed", color="FFB6C1"),
    left=Side(style="dashed", color="FFB6C1"),
    right=Side(style="dashed", color="9932CC"),
    diagonal=Side(style="thin", color="483D8B"),  # 对角线
    diagonalUp=True,  # 左下 ~ 右上
    diagonalDown=True  # 左上 ~ 右下
)
wb.save("p2.xlsx")
"""

# 6.字体
"""
cell = sheet.cell(5, 1)
cell.font = Font(name="微软雅黑", size=45, color="ff0000", underline="single")
wb.save("p2.xlsx")
"""

# 7.背景色
"""
cell = sheet.cell(5, 3)
cell.fill = PatternFill("solid", fgColor="99ccff")
wb.save("p2.xlsx")
"""

# 8.渐变背景色
"""
cell = sheet.cell(5, 5)
cell.fill = GradientFill("linear", stop=("FFFFFF", "99ccff", "000000"))
wb.save("p2.xlsx")
"""

# 9.宽高(索引从1开始)
"""
sheet.row_dimensions[1].height = 50
sheet.column_dimensions["E"].width = 100
wb.save("p2.xlsx")
"""

# 10.合并单元格
"""
sheet.merge_cells("B2:D8")
sheet.merge_cells(start_row=15, start_column=3, end_row=18, end_column=8)
wb.save("p2.xlsx")
"""
"""
sheet.unmerge_cells("B2:D8")
wb.save("p2.xlsx")
"""

# 11.写入公式
"""
sheet = wb.worksheets[3]
sheet["D1"] = "合计"
sheet["D2"] = "=B2*C2"
wb.save("p2.xlsx")
"""
"""
sheet = wb.worksheets[3]
sheet["D3"] = "=SUM(B3,C3)"
wb.save("p2.xlsx")
"""

# 12.删除
"""
# idx,要删除的索引位置
# amount,从索引位置开始要删除的个数(默认为1)
sheet.delete_rows(idx=1, amount=20)
sheet.delete_cols(idx=1, amount=3)
wb.save("p2.xlsx")
"""

# 13.插入
"""
sheet.insert_rows(idx=5, amount=10)
sheet.insert_cols(idx=3, amount=2)
wb.save("p2.xlsx")
"""

# 14.循环写内容
"""
sheet = wb["Sheet"]
cell_range = sheet['A1:C2']
for row in cell_range:
    for cell in row:
        cell.value = "xx"

for row in sheet.iter_rows(min_row=5, min_col=1, max_col=7, max_row=10):
    for cell in row:
        cell.value = "oo"
wb.save("p2.xlsx")
"""

# 15.移动
"""
# 将H2:J10范围的数据,向右移动15个位置、向上移动1个位置
sheet.move_range("H2:J10",rows=1, cols=15)
wb.save("p2.xlsx")
"""
"""
sheet = wb.worksheets[3]
sheet["D1"] = "合计"
sheet["D2"] = "=B2*C2"
sheet["D3"] = "=SUM(B3,C3)"
sheet.move_range("B1:D3",cols=10, translate=True) # 自动翻译公式
wb.save("p2.xlsx")
"""

# 16.打印区域
"""
sheet.print_area = "A1:D200"
wb.save("p2.xlsx")
"""

# 17.打印时,每个页面的固定表头
"""
sheet.print_title_cols = "A:D"
sheet.print_title_rows = "1:3"
wb.save("p2.xlsx")
"""
posted @ 2021-08-01 22:09  henryVIII  阅读(294)  评论(0)    收藏  举报