19.多进程

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# 系统入口,监测chat日志文件变化
"""Code By , 引入多进程处理"""


from notify_base import NotifyBase
from global_common import parse_mgame_log
import os
from mylog import logger
from fasttext_utils import get_result, get_env_fasttext
import re
import json
from xxx_utils import *
from thread_base import ThreadPool
import signal
import itertools
from multiprocessing import Process, Queue
import time



reload(sys)
sys.setdefaultencoding('utf8')


# 对于不符合json规范的数据进行匹配分组
pattern = re.compile(r'^.*content":\s*"(?P<content>.*?)",.*role_name":\s*"(?P<role_name>.*?)",.*')
PATTERN_STR = ur'#PHOTO#|needParse=|作为见证两人之间情谊的心意|邀请你和我一起|赠予了你所祈求的'
pattern_filter = re.compile(PATTERN_STR.encode('utf-8'))

# 对于需要替换的分组部分进行替换
def handle(matchobj):
    return matchobj.group(1)

# 解析出错的进行二次处理
def retry_parse(line, log, fasttext_type):
    try:
        index_1 = line.find('[')
        index_2 = line.find(']')
        content = re.sub(pattern, handle, line)
        if content.find('comment') != -1:
            content = json.loads(content, strict=False)['comment']
        obj = {
            'time': line[index_1 + 1:index_2],
            'content': content
        }
        sentence, label, prob, use = get_result(fasttext_type, obj['content'])
        logger.info('-xxx言论分类标签结果-%s-%s-%s-%s-%s' % (sentence, label, prob, obj['time'], use))
        extra = "\"label\":\"%s\",\"prob\":\"%s\"," % (label.encode('utf-8'), prob)
        clog = line.replace('[Chat],{', '[Filter_Chat],{%s' % extra)
        if label != '正常类':
            save_filter_chat(clog, obj['time'][0:10].replace('-', ''))
        # else:
        #     save_normal_chat(clog, obj['time'][0:10].replace('-', ''))
    except:
        save_failed_chat('None error: %s' % log.strip('\n'))


# 处理chat日志
def manage_chat(log, fasttext_type):
    if pattern_filter.search(log):
        return
    line = log.strip('\n')
    index = line.find('[')
    line = line[index:]
    obj = parse_mgame_log(r'%s' % line)

    if obj is None:
        retry_parse(line, log, fasttext_type)
        return
    
    content = obj['content']
    sentence, label, prob, use = get_result(fasttext_type, content)
    info = '-xxx言论分类标签结果-%s-%s-%s-%s-%s' % (sentence, label, prob, obj['time'], use)
    extra = "\"label\":\"%s\",\"prob\":\"%s\"," % (label, prob)
    clog = line.replace('[Chat],{', '[Filter_Chat],{%s' % extra)
    if label != '正常类':
        save_filter_chat(clog, obj['time'][0:10].replace('-', ''))
    logger.info('------%s------' % info)


class NotifyObj(NotifyBase):
    def __init__(self):
        super(NotifyObj, self).__init__()
        self._flag = 0
    def get_batch_size(self):
        return 600

    def get_log_dir(self):
        return '/home/mg_dc/logs/%s/logs/_REALTIME_/' % common.PRODUCT

    def inner_chat_f(self, chat_arr):
        self._flag = (self._flag + 1) % 2
        if self._flag == 0:
             q_even.put(chat_arr)
        else:
             q_odd.put(chat_arr)


def get_chat_odd(q_odd,):
    env_fasttext = get_env_fasttext()
    while True:
        chat_arr = q_odd.get(True)
        if not chat_arr or len(chat_arr) < 100:
            time.sleep(10)
        else:
            logger.info('------奇数进程处理------')
            for log in chat_arr:
                manage_chat(log, env_fasttext)



def get_chat_even(q_even):
    env_fasttext = get_env_fasttext()
    while True:         
        chat_arr = q_even.get(True)
        if not chat_arr or len(chat_arr) < 100:
            time.sleep(10)
        else:           
            logger.info('------偶数进程处理------')
            for log in chat_arr:
                manage_chat(log, env_fasttext)

q_odd = Queue()
q_even = Queue()
 
p_odd = Process(target=get_chat_odd, args=(q_odd,))
p_even = Process(target=get_chat_even, args=(q_even,))


def signal_handler(sig, frame):
    time.sleep(10)
    p_odd.terminate()
    p_even.terminate()


# 系统入口
def main():
    notify_obj = NotifyObj()
    notify_obj.main()


if __name__ == '__main__':
    p_odd.start()
    #p_odd.join()
    p_even.start()
    #p_even.join()
    signal.signal(signal.SIGTERM, signal_handler)
    main()

  

posted @ 2018-08-15 16:09  桃源仙居  阅读(158)  评论(0)    收藏  举报