python基础---读写文件
手动写入文件:
f = open('de8ug.txt', 'w') f.write('de8ug '* 8) f.close()
手动读取文件:默认方式为读r
f = open('de8ug.txt') f.read() f.close()
自动写入文件:
with open('de8ug-1.txt', 'w') as f: f.write('de8ug')
自动读取文件内容:
with open('de8ug-1.txt')as f: data = f.read() print(data)
with open('de8ug-1.txt', 'a') as f: # a追加 f.write('de8ug') # 追加到末尾 print(f.tell()) # # 打印位置 f.seek(0) # 回到0的位置
fnmatch模块
#查找匹配相应的后缀名的文件
import fnmatch for f in os.listdir(): if fnmatch.fnmatch(f, '*.txt'): # *匹配多个字符 print(f) elif fnmatch.fnmatch(f, '?.html'): # ?匹配一个字符 print(f)
glob模块
单纯匹配某种命名规则的文件
import glob for i in glob.glob('*.txt'): # [0-9].txt 正则 print(i)
pickle模块
# 存储python的数据结构,将列表,字典等内存中的数据序列化到磁盘,后缀名推荐pkl
import pickle data = {'name':'de8ug', 'age':'18'} with open('data.pkl', 'wb')as f: # wb 二进制写入文件 pickle.dump(data, f) with open('data.pkl', 'rb')as f: # rb 二进制从文件读取 data = pickle.load(f) print(data)
io模块
# 虚拟文件,临时文件,不需要真的保存文件到磁盘
import io output = io.StringIO() output.write('hello python') print('python', file=output) content = output.getvalue() # 取出文件 print(content) output.close() # 关闭文件,清理缓存
shelve模块
#用类似字典的方式存储任意的python对象
import shelve with shelve.open('22.she')as so: so['chicken'] = 'eat chicken' #生成22.she.bak,22.she.dat,22.she.dir三个文件 with shelve.open('22.she')as so: print(so['chicken'])
实例应用:简单的提取log日志文件的IP地址和状态码
# 只适合小的文件,大文件无法装入内存
#!/usr/bin/env python # -*- coding:utf-8 -*- # 统计ip和状态码 # log_parse.py # author: guo import os import re import pickle class Log_parser: def __init__(self): '初始化加载数据' self.data = self.load_data() def count_ip(self): '统计ip' re_ip = re.compile(r'((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)') ip_dic = {} for line in self.data: match = re_ip.match(line) if match: ip = match.group() ip_dic[ip] = ip_dic.get(ip, 0 ) + 1 with open('ip.pkl', 'wb')as f: pickle.dump(ip_dic, f) try: with open('ip.pkl', 'rb')as f: data = pickle.load(f) print(f'ip地址登录次数统计如下:{data}') except Exception as e: print(e) return {} def count_status(self): '统计状态码' status = [] for line in self.data: status_get = line.split(' ')[8] status.append(status_get) status = set(status) status_dic = {} for line in self.data: status_get = line.split(' ')[8] if status_get in status: status_dic[status_get] = status_dic.get(status_get, 0) + 1 with open('status.pkl', 'wb')as f: pickle.dump(status_dic, f) try: with open('status.pkl', 'rb')as f: data = pickle.load(f) print(f'状态码及个数如下:{data}') except Exception as e: print(e) return {} def load_data(self): '读取log日志' base_dir = 'C:\\study\\jupyter\\Module_3\\3-2_处理任意格式的文本文件' log_path = os.path.join(base_dir, 'access.log') log_file = open(log_path) log_data = log_file.readlines() log_file.close() return log_data def main(): log_parse = Log_parser() log_parse.count_ip() log_parse.count_status() if __name__ == '__main__': main()
程序执行结果如下截图:
posted on 2018-08-03 16:04 guoqian205 阅读(173) 评论(0) 收藏 举报