Python正则表达式,统计分析nginx访问日志

目标:

  1.正则表达式

  2.oop编程,统计nginx访问日志中不同IP地址出现的次数并排序

 

1.正则表达式

#!/usr/bin/env python
# -*- coding: utf-8 -*-


import re

# match
# 方法一
pattern1 = re.compile(r'hello', re.I)

match = pattern1.match('Hello World')

if match:
    print match.group()

# 方法二

m = re.match(r'hello', 'hello world.')

print m.group()

# search
pattern1 = re.compile(r'World')

match = pattern1.search('Hello, hello World.')

if match:
    print match.group()


# split
pattern1 = re.compile(r'\d+')
match = pattern1.split('one1two2three3')
print match
for i in match:
    print i

# findall
match = pattern1.findall('one1two2three3')
print match


# finditer
match = pattern1.finditer('one1two2three3')
for i in match:
    print i.group()

•运行代码,测试效果

 

2.oop编程,统计nginx访问日志中不同IP地址出现的次数并排序

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re

class CountPatt(object):
    def __init__(self, patt):
        self.patt = re.compile(patt)
        self.result = {}
    def count_patt(self, fname):
        with open(fname) as fobj:
            for line in fobj:
                match = self.patt.search(line)
                if match:
                    key = match.group()
                    self.result[key] = self.result.get(key, 0) + 1

        return self.result

    def sort(self):
        result = []
        alist = self.result.items()
        for i in xrange(len(alist)):
            greater = alist[0]
            for item in alist[1:]:
                if greater[1] < item[1]:
                    greater = item
            result.append(greater)
            alist.remove(greater)
        return result


if __name__ == "__main__":
    httpd_log = '/tmp/access.log'
    ip_pattern = r'^(\d+\.){3}\d+'
    browser_pattern = r'Chrome|Safari|Firefox'
    a = CountPatt(ip_pattern)
    print a.count_patt(httpd_log)
    print a.sort()

•运行代码,测试效果

handetiandeMacBook-Pro:test xkops$ python test2.py
{'192.168.207.21': 25, '192.168.80.165': 20, '192.168.207.1': 46, '127.0.0.1': 10}
[('192.168.207.1', 46), ('192.168.207.21', 25), ('192.168.80.165', 20), ('127.0.0.1', 10)]

 

posted @ 2017-01-16 15:57  韩德田  阅读(2120)  评论(0编辑  收藏  举报