python基础---读写文件

手动写入文件:

f = open('de8ug.txt', 'w')
f.write('de8ug '* 8)
f.close()

手动读取文件:默认方式为读r

f = open('de8ug.txt')
f.read()
f.close()

自动写入文件:

with open('de8ug-1.txt', 'w') as f:
    f.write('de8ug')

自动读取文件内容:

with open('de8ug-1.txt')as f:
    data =  f.read()
    print(data)
with open('de8ug-1.txt', 'a') as f:  # a追加
    f.write('de8ug')  # 追加到末尾
    print(f.tell())  # # 打印位置
    f.seek(0)  # 回到0的位置

 

 fnmatch模块

#查找匹配相应的后缀名的文件

import fnmatch
for f in os.listdir():
    if fnmatch.fnmatch(f, '*.txt'):  # *匹配多个字符
        print(f)
    elif fnmatch.fnmatch(f, '?.html'):  # ?匹配一个字符
        print(f)

 

glob模块

单纯匹配某种命名规则的文件

import glob
for i in glob.glob('*.txt'):  # [0-9].txt   正则
    print(i)

 

pickle模块

# 存储python的数据结构,将列表,字典等内存中的数据序列化到磁盘,后缀名推荐pkl

import pickle
data = {'name':'de8ug', 'age':'18'}
with open('data.pkl', 'wb')as f:  # wb 二进制写入文件
    pickle.dump(data, f)
    
with open('data.pkl', 'rb')as f:  # rb 二进制从文件读取
    data = pickle.load(f)
    print(data)

 

io模块

# 虚拟文件,临时文件,不需要真的保存文件到磁盘

import io 
output = io.StringIO()
output.write('hello python')
print('python', file=output)

content = output.getvalue()  # 取出文件
print(content)

output.close()  # 关闭文件,清理缓存

 

shelve模块

#用类似字典的方式存储任意的python对象

import shelve
with shelve.open('22.she')as so:
    so['chicken'] = 'eat chicken'
#生成22.she.bak,22.she.dat,22.she.dir三个文件
    
with shelve.open('22.she')as so:
    print(so['chicken'])

 

实例应用:简单的提取log日志文件的IP地址和状态码

# 只适合小的文件,大文件无法装入内存

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# 统计ip和状态码
# log_parse.py
# author: guo


import os
import re
import pickle


class  Log_parser:
    def __init__(self):
        '初始化加载数据'
        self.data = self.load_data()


    def count_ip(self):
        '统计ip'
        re_ip = re.compile(r'((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)')
        ip_dic = {}
        for line in self.data:
            match = re_ip.match(line)
            if match:
                ip = match.group()
                ip_dic[ip] = ip_dic.get(ip, 0 ) + 1
        with open('ip.pkl', 'wb')as f:
            pickle.dump(ip_dic, f)
        try:
            with open('ip.pkl', 'rb')as f:
                data = pickle.load(f)
                print(f'ip地址登录次数统计如下:{data}')
        except Exception as e:
            print(e)
            return {}

    def count_status(self):
        '统计状态码'
        status = []
        for line in self.data:
            status_get = line.split(' ')[8]
            status.append(status_get)
        status = set(status)

        status_dic = {}
        for line in self.data:
            status_get = line.split(' ')[8]
            if status_get in status:
                status_dic[status_get] = status_dic.get(status_get, 0) + 1
        with open('status.pkl', 'wb')as f:
            pickle.dump(status_dic, f)
        try:
            with open('status.pkl', 'rb')as f:
                data = pickle.load(f)
                print(f'状态码及个数如下:{data}')
        except Exception as e:
            print(e)
            return {}


    def load_data(self):
        '读取log日志'
        base_dir = 'C:\\study\\jupyter\\Module_3\\3-2_处理任意格式的文本文件'
        log_path = os.path.join(base_dir, 'access.log')
        log_file = open(log_path)
        log_data = log_file.readlines()
        log_file.close()
        return  log_data


def main():
    log_parse = Log_parser()
    log_parse.count_ip()
    log_parse.count_status()

if __name__ == '__main__':
    main()

程序执行结果如下截图:

 

posted on 2018-08-03 16:04  guoqian205  阅读(173)  评论(0)    收藏  举报

导航