Python之CSV模块的使用

csv的作用:

可以用csv模块处理从电子表格和数据库导出的数据,并写入使用字段和记录的格式的文本文件,这种格式通常被称为逗号分隔的格式(因为常用逗号来分隔记录中的字段)。

1、读取csv的文件

数据源

title1,title2,title3,title4
1,a,2020/4/22,!
2,b,2020/4/23,@
3,c,2020/4/24,#
4,d,2020/4/25,$
test.csv
import csv
with open('test.csv','rt') as rf:
    reader = csv.reader(rf)
    for item in reader:
        print(item)
csv_reader.py

 运行效果

['title1', 'title2', 'title3', 'title4']
['1', 'a', '2020/4/22', '!']
['2', 'b', '2020/4/23', '@']
['3', 'c', '2020/4/24', '#']
['4', 'd', '2020/4/25', '$']

 2、写入csv的文件

import csv

unicode_chars = 'å∫ç'
with open('test_write.csv', 'wt', encoding='utf-8', newline='') as wf:
    writer = csv.writer(wf)
    writer.writerow(('title1', 'title2', 'title3'))
    for i in range(3):
        row = (
            i + 1,
            chr(ord('a') + 1),
            unicode_chars[i],
        )
        writer.writerow(row)
csv_writer.py

 运行效果

title1,title2,title3
1,b,å
2,b,∫
3,b,ç

 3、设置写入的内容是否加引号规则

import csv

unicode_chars = 'å∫ç'
with open('test_write.csv', 'wt', encoding='utf-8', newline='') as wf:
    writer = csv.writer(wf, quoting=csv.QUOTE_NONNUMERIC)
    writer.writerow(('title1', 'title2', 'title3'))
    for i in range(3):
        row = (
            i + 1,
            chr(ord('a') + 1),
            unicode_chars[i],
        )
        writer.writerow(row)
csv_writer_quoted.py
csv.QUOTE_ALL : 设置所有内容加引号
csv.QUOTE_NONNUMERIC : 除了数字类型,其它的设置内容加引号
csv.QUOTE_MINIMAL : 设置包含特殊字符的字段加引号
csv.QUOTE_NONE : 设置所有的内容不加引号

 运行效果

"title1","title2","title3"
1,"b","å"
2,"b",""
3,"b","ç"

 4、csv能够解析的种类

import csv

print(csv.list_dialects())
csv_list_dialects.py

 运行效果

['excel', 'excel-tab', 'unix']

 5、自定义创建一个解析器

"Title 1"|"Title 2"|"Title 3"
1|"first line
second line"|08/18/07
testdata.pipes
import csv

# 注册一个解析器,分隔符为|
csv.register_dialect('pipes', delimiter='|')
with open('testdata.pipes', 'r') as f:
    reader = csv.reader(f, dialect='pipes')
    for row in reader:
        print(row)
csv_dialect.py

 运行效果

['Title 1', 'Title 2', 'Title 3']
['1', 'first line\nsecond line', '08/18/07']
CSV Dialect Parameters
AttributeDefaultMeaning
delimiter , Field separator (one character)
doublequote True Flag controlling whether quotechar instances are doubled
escapechar None Character used to indicate an escape sequence
lineterminator \r\n String used by writer to terminate a line
quotechar " String to surround fields containing special values (one character)
quoting QUOTE_MINIMAL Controls quoting behavior described earlier
skipinitialspace False Ignore whitespace after the field delimiter

 6、查看不同的解析器,分隔符等参数的差异

import csv
import sys

# 注册一个解析器,符号为 \\
csv.register_dialect('escaped',
                     escapechar='\\',
                     doublequote=False,
                     quoting=csv.QUOTE_NONE,
                     )

# 注册一个解析器,符号为 '
csv.register_dialect('singlequote',
                     quotechar="'",
                     quoting=csv.QUOTE_ALL,
                     )
# 获取内容是否加引号的对象
quoting_modes = {
    getattr(csv, n): n
    for n in dir(csv)
    if n.startswith('QUOTE_')
}

# 格式化的模板
TEMPLATE = '''
Dialect: "{name}"
  delimiter   = {dl!r:<6}    skipinitialspace = {si!r}
  doublequote = {dq!r:<6}    quoting          = {qu}
  quotechar   = {qc!r:<6}    lineterminator   = {lt!r}
  escapechar  = {ec!r:<6}
'''

for name in sorted(csv.list_dialects()):
    dialect = csv.get_dialect(name)

    print(TEMPLATE.format(
        name=name,
        dl=dialect.delimiter,
        si=dialect.skipinitialspace,
        dq=dialect.doublequote,
        qu=quoting_modes[dialect.quoting],
        qc=dialect.quotechar,
        lt=dialect.lineterminator,
        ec=dialect.escapechar,
    ))

    writer = csv.writer(sys.stdout, dialect=dialect)
    writer.writerow(
        ('col1', 1, '10/01/2010',
         'Special chars: " \' {} to parse'.format(
             dialect.delimiter))
    )
csv_dialect_variations.py

 运行效果

Dialect: "escaped"
  delimiter   = ','       skipinitialspace = 0
  doublequote = 0         quoting          = QUOTE_NONE
  quotechar   = '"'       lineterminator   = '\r\n'
  escapechar  = '\\'  

col1,1,10/01/2010,Special chars: \" ' \, to parse


Dialect: "excel"
  delimiter   = ','       skipinitialspace = 0
  doublequote = 1         quoting          = QUOTE_MINIMAL
  quotechar   = '"'       lineterminator   = '\r\n'
  escapechar  = None  

col1,1,10/01/2010,"Special chars: "" ' , to parse"

Dialect: "excel-tab"
  delimiter   = '\t'      skipinitialspace = 0
  doublequote = 1         quoting          = QUOTE_MINIMAL
  quotechar   = '"'       lineterminator   = '\r\n'
  escapechar  = None  

col1    1    10/01/2010    "Special chars: "" '      to parse"


Dialect: "singlequote"
  delimiter   = ','       skipinitialspace = 0
  doublequote = 1         quoting          = QUOTE_ALL
  quotechar   = "'"       lineterminator   = '\r\n'
  escapechar  = None  

'col1','1','10/01/2010','Special chars: " '' , to parse'


Dialect: "unix"
  delimiter   = ','       skipinitialspace = 0
  doublequote = 1         quoting          = QUOTE_ALL
  quotechar   = '"'       lineterminator   = '\n'
  escapechar  = None  

"col1","1","10/01/2010","Special chars: "" ' , to parse"

7、为给定的样本猜测csv解析器,然后打印出结果

import csv
from io import StringIO

# 注册一个解析器,符号为 \\
csv.register_dialect('escaped',
                     escapechar='\\',
                     doublequote=False,
                     quoting=csv.QUOTE_NONE,
                     )

# 注册一个解析器,符号为 '
csv.register_dialect('singlequote',
                     quotechar="'",
                     quoting=csv.QUOTE_ALL,
                     )

samples = []

# 获取python自带的解析器,写入到缓存中,用于下面解析.
for name in sorted(csv.list_dialects()):
    buffer = StringIO()
    dialect = csv.get_dialect(name)
    writer = csv.writer(buffer, dialect=dialect)
    writer.writerow(
        ('col1', 1, '10/01/2010',
         'Special chars " \' {} to parse'.format(
             dialect.delimiter))
    )
    samples.append((name, dialect, buffer.getvalue()))

# 为给定的样本猜测解析器,然后使用结果
sniffer = csv.Sniffer()
for name, expected, sample in samples:
    print('Dialect: "{}"'.format(name))
    print('In: {}'.format(sample.rstrip()))
    dialect = sniffer.sniff(sample, delimiters=',\t')
    reader = csv.reader(StringIO(sample), dialect=dialect)
    print('Parsed:\n  {}\n'.format('\n  '.join(repr(r) for r in next(reader))))
csv_dialect_sniffer.py

运行效果

Dialect: "escaped"
In: col1,1,10/01/2010,Special chars \" ' \, to parse
Parsed:
  'col1'
  '1'
  '10/01/2010'
  'Special chars \\" \' \\'
  ' to parse'

Dialect: "excel"
In: col1,1,10/01/2010,"Special chars "" ' , to parse"
Parsed:
  'col1'
  '1'
  '10/01/2010'
  'Special chars " \' , to parse'

Dialect: "excel-tab"
In: col1    1    10/01/2010    "Special chars "" '      to parse"
Parsed:
  'col1'
  '1'
  '10/01/2010'
  'Special chars " \' \t to parse'

Dialect: "singlequote"
In: 'col1','1','10/01/2010','Special chars " '' , to parse'
Parsed:
  'col1'
  '1'
  '10/01/2010'
  'Special chars " \' , to parse'

Dialect: "unix"
In: "col1","1","10/01/2010","Special chars "" ' , to parse"
Parsed:
  'col1'
  '1'
  '10/01/2010'
  'Special chars " \' , to parse'


Process finished with exit code 0

8、读取csv文件以字典的类型输出

import csv

with open('test.csv', 'rt') as rf:
    reader = csv.DictReader(rf)
    for row in reader:
        print(row)
csv_dictreader.py

 运行效果

OrderedDict([('title1', '1'), ('title2', 'a'), ('title3', '2020/4/22'), ('title4', '!')])
OrderedDict([('title1', '2'), ('title2', 'b'), ('title3', '2020/4/23'), ('title4', '@')])
OrderedDict([('title1', '3'), ('title2', 'c'), ('title3', '2020/4/24'), ('title4', '#')])
OrderedDict([('title1', '4'), ('title2', 'd'), ('title3', '2020/4/25'), ('title4', '$')])

9、写入csv文件以字典的类型写入

import csv

fieldnames = ('Title 1', 'Title 2', 'Title 3', 'Title 4')
headers = {
    n: n
    for n in fieldnames
}
unicode_chars = 'å∫ç'

with open('test_write.csv', 'w', encoding='utf-8', newline='') as wf:
    # 设置写入的列标题
    writer = csv.DictWriter(wf, fieldnames=fieldnames)
    writer.writeheader()

    for i in range(3):
        writer.writerow({
            'Title 1': i + 1,
            'Title 2': chr(ord('a') + 1),
            'Title 3': '08/{:02d}/07'.format(i + 1),
            'Title 4': unicode_chars[i]
        })
csv_dictwriter.py

 

 运行效果

test_write.csv

Title 1,Title 2,Title 3,Title 4
1,b,08/01/072,b,08/02/07,∫
3,b,08/03/07,ç
posted @ 2020-04-22 17:05  小粉优化大师  阅读(681)  评论(2编辑  收藏  举报