python实现tail(考虑到几种特殊情况)

1. 网上有很多种使用python实现tail的方法,大体思想都一样,典型的一种如下:

#!/usr/bin/env python
#!-*- coding: utf-8 -*-

################################################################################
#
# Copyright (c) 2015 XX.com, Inc. All Rights Reserved
#
################################################################################

################################################################################
# This module provide ...
# Third libs those depend on:
################################################################################

"""
Compiler Python 2.7.10
Authors: XX(XX@baidu.com)
Date: 2015-09-09 12:57:41
Todo: nothing $XX$2015-09-09
"""

"""SYS LIBS
"""


import os
import re
import sys
import time

import traceback

"""THIRD LIBS
"""

try:
    # import the third libs there.
    pass
except ImportError as e:
    print e
    os._exit(-1)

"""CUSTOM libs
Strongly recommend using abs path when using custmo libs.
"""

# Good behaviors.
# It means refusing called like from xxx import *
# When `__all__` is []
__all__ = []

reload(sys)
sys.setdefaultencoding('utf-8')


def send_error(msg):
    """ Send error to email.
    """

    print msg


#********************************************************
#* Global defines start.                                *
#********************************************************

#********************************************************
#* Global defines end.                                  *
#********************************************************


class Tail(object):
    """
    Python-Tail - Unix tail follow implementation in Python.

    python-tail can be used to monitor changes to a file.

    Example:
        import tail

        # Create a tail instance
        t = tail.Tail('file-to-be-followed')

        # Register a callback function to be called when a new line is found in the followed file.
        # If no callback function is registerd, new lines would be printed to standard out.
        t.register_callback(callback_function)

        # Follow the file with 5 seconds as sleep time between iterations.
        # If sleep time is not provided 1 second is used as the default time.
        t.follow(s=5)
    """

    ''' Represents a tail command. '''
    def __init__(self, tailed_file):
        ''' Initiate a Tail instance.
            Check for file validity, assigns callback function to standard out.

            Arguments:
                tailed_file - File to be followed. '''
        self.check_file_validity(tailed_file)
        self.tailed_file = tailed_file
        self.callback = sys.stdout.write

    def follow(self, s=0.01):
        """ Do a tail follow. If a callback function is registered it is called with every new line.
        Else printed to standard out.

        Arguments:
            s - Number of seconds to wait between each iteration; Defaults to 1. """

        with open(self.tailed_file) as file_:
            # Go to the end of file
            file_.seek(0, 2)
            while True:
                curr_position = file_.tell()
                line = file_.readline()
                if not line:
                    file_.seek(curr_position)
                else:
                    self.callback(line)
                time.sleep(s)

    def register_callback(self, func):
        """ Overrides default callback function to provided function. """
        self.callback = func

    def check_file_validity(self, file_):
        """ Check whether the a given file exists, readable and is a file """
        if not os.access(file_, os.F_OK):
            raise TailError("File '%s' does not exist" % (file_))
        if not os.access(file_, os.R_OK):
            raise TailError("File '%s' not readable" % (file_))
        if os.path.isdir(file_):
            raise TailError("File '%s' is a directory" % (file_))


class TailError(Exception):
    """ Custom error type.
    """

    def __init__(self, msg):
        """ Init.
        """
        self.message = msg

    def __str__(self):
        """ str.
        """
        return self.message

if __name__ == '__main__':
    pass

""" vim: set ts=4 sw=4 sts=4 tw=100 et: """

 

2. 但是考虑到集中情况,上面的方法会失效(shell中tail命令甚至也会失效)

    (1) 正在tail的文件被清空,例如 echo "" > test.log (这种情况下tail依然可以打印,上面程序无法打印)

    (2) 日志被logrotate以copy形式切割时,这种情况下tail依然可以打印,上面程序无法打印

    (3) 日志被logrotate以新建文件形式切割时,上面两种方法都会失效。

 

我的应用场景是需要兼容上面的情况的,由于时间匆忙,简单的改良了一下,可以兼容上面的两种情况,同事可以避免由于logrotate切割导致的日志丢失,代码如下:#!/usr/bin/env python

#!-*- coding: utf-8 -*-

################################################################################
#
# Copyright (c) 2015 XX.com, Inc. All Rights Reserved
#
################################################################################

################################################################################
# This module provide ...
# Third libs those depend on:
################################################################################

"""
Compiler Python 2.7.10
Authors: XX(XX@baidu.com)
Date: 2015-09-09 12:57:41
Todo: nothing $XX$2015-09-09
"""

"""SYS LIBS
"""


import os
import re
import sys
import time

import traceback

"""THIRD LIBS
"""

try:
    # import the third libs there.
    pass
except ImportError as e:
    print e
    os._exit(-1)

"""CUSTOM libs
Strongly recommend using abs path when using custmo libs.
"""

# Good behaviors.
# It means refusing called like from xxx import *
# When `__all__` is []
__all__ = []

reload(sys)
sys.setdefaultencoding('utf-8')


def send_error(msg):
    """ Send error to email.
    """

    print msg


#********************************************************
#* Global defines start.                                *
#********************************************************

#********************************************************
#* Global defines end.                                  *
#********************************************************


class Tail(object):
    """
    Python-Tail - Unix tail follow implementation in Python.

    python-tail can be used to monitor changes to a file.

    Example:
        import tail

        # Create a tail instance
        t = tail.Tail('file-to-be-followed')

        # Register a callback function to be called when a new line is found in the followed file.
        # If no callback function is registerd, new lines would be printed to standard out.
        t.register_callback(callback_function)

        # Follow the file with 5 seconds as sleep time between iterations.
        # If sleep time is not provided 1 second is used as the default time.
        t.follow(s=5)
    """

    ''' Represents a tail command. '''
    def __init__(self, tailed_file):
        ''' Initiate a Tail instance.
            Check for file validity, assigns callback function to standard out.

            Arguments:
                tailed_file - File to be followed. '''
        self.check_file_validity(tailed_file)
        self.tailed_file = tailed_file
        self.callback = sys.stdout.write

        self.try_count = 0

        try:
            self.file_ = open(self.tailed_file, "r")
            self.size = os.path.getsize(self.tailed_file)

            # Go to the end of file
            self.file_.seek(0, 2)
        except:
            raise

    def reload_tailed_file(self):
        """ Reload tailed file when it be empty be `echo "" > tailed file`, or
            segmentated by logrotate.
        """
        try:
            self.file_ = open(self.tailed_file, "r")
            self.size = os.path.getsize(self.tailed_file)

            # Go to the head of file
            self.file_.seek(0, 1)

            return True
        except:
            return False



    def follow(self, s=0.01):
        """ Do a tail follow. If a callback function is registered it is called with every new line.
        Else printed to standard out.

        Arguments:
            s - Number of seconds to wait between each iteration; Defaults to 1. """

        while True:
            _size = os.path.getsize(self.tailed_file)
            if _size < self.size:
                while self.try_count < 10:
                    if not self.reload_tailed_file():
                        self.try_count += 1
                    else:
                        self.try_count = 0
                        self.size = os.path.getsize(self.tailed_file)
                        break
                    time.sleep(0.1)

                if self.try_count == 10:
                    raise Exception("Open %s failed after try 10 times" % self.tailed_file)
            else:
                self.size = _size

            curr_position = self.file_.tell()
            line = self.file_.readline()
            if not line:
                self.file_.seek(curr_position)
elif not line.endswith("\n"):
self.file_.seed(curr_position)
else: self.callback(line) time.sleep(s) def register_callback(self, func): """ Overrides default callback function to provided function. """ self.callback = func def check_file_validity(self, file_): """ Check whether the a given file exists, readable and is a file """ if not os.access(file_, os.F_OK): raise TailError("File '%s' does not exist" % (file_)) if not os.access(file_, os.R_OK): raise TailError("File '%s' not readable" % (file_)) if os.path.isdir(file_): raise TailError("File '%s' is a directory" % (file_)) class TailError(Exception): """ Custom error type. """ def __init__(self, msg): """ Init. """ self.message = msg def __str__(self): """ str. """ return self.message if __name__ == '__main__': t = Tail("/home/syslog/switch.log") def print_msg(msg): print msg t.register_callback(print_msg) t.follow() """ vim: set ts=4 sw=4 sts=4 tw=100 et: """

上面程序通过简单的测试,可以解决以上几种情况,由于比较匆忙,没有仔细考虑,也希望大家有什么想法尽量贡献哈。

上述红色部分为bugs修复,在使用过程中,发现有的一行内容被截断成两行了,红色部分可以修复该bug:

bugs的原因应该是:readline在读取文件的最后一行时,不会去判断\n这个结束符,有兴趣的同学可以测试验证下。

 

posted @ 2015-10-14 19:54  DreamerHui  阅读(12839)  评论(0编辑  收藏  举报