python对大文件的处理

多线程框架中采取queue来实现线程间资源的互斥。

在文件过大的情况下,如果都读入内存的话,占用内存就太多了。

这里手动实现了一个多线程调用文件迭代器来使用f.next()

# -*- coding:utf-8 -*-

import threading


class Geturl(object):

    def __init__(self, open_file):
        self.open_file = open_file
        self.num = 0
        self.__mutex = threading.RLock()
        self.f = open(self.open_file, 'r')
        self.kafka_mutex = threading.RLock()

    def _line(self):
        self.__mutex.acquire()
        try:
            line = self.f.__next__()
        except StopIteration:
            line = StopIteration
            self.f.close()
        '''
        if self.num % 1000 == 0:
            print(self.num)
        self.num += 1
        '''
        self.__mutex.release()
        return line

    def get_line(self):
        return self._line()


def _deal(deal_file):
    while True:
        try:
            item = deal_file.get_line()
            # 处理工作
            if item == StopIteration:
                raise ValueError()
        except ValueError:
            print("all task has done!")
            break
        except Exception as e:
            print("error:", e)


if __name__ == "__main__":
    filename = 'Bigfile'
    geturl = Geturl(filename)
    _deal(geturl)
posted @ 2017-10-07 22:44  随意orz  阅读(416)  评论(0编辑  收藏  举报