19.多进程
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# 系统入口,监测chat日志文件变化
"""Code By , 引入多进程处理"""
from notify_base import NotifyBase
from global_common import parse_mgame_log
import os
from mylog import logger
from fasttext_utils import get_result, get_env_fasttext
import re
import json
from xxx_utils import *
from thread_base import ThreadPool
import signal
import itertools
from multiprocessing import Process, Queue
import time
reload(sys)
sys.setdefaultencoding('utf8')
# 对于不符合json规范的数据进行匹配分组
pattern = re.compile(r'^.*content":\s*"(?P<content>.*?)",.*role_name":\s*"(?P<role_name>.*?)",.*')
PATTERN_STR = ur'#PHOTO#|needParse=|作为见证两人之间情谊的心意|邀请你和我一起|赠予了你所祈求的'
pattern_filter = re.compile(PATTERN_STR.encode('utf-8'))
# 对于需要替换的分组部分进行替换
def handle(matchobj):
return matchobj.group(1)
# 解析出错的进行二次处理
def retry_parse(line, log, fasttext_type):
try:
index_1 = line.find('[')
index_2 = line.find(']')
content = re.sub(pattern, handle, line)
if content.find('comment') != -1:
content = json.loads(content, strict=False)['comment']
obj = {
'time': line[index_1 + 1:index_2],
'content': content
}
sentence, label, prob, use = get_result(fasttext_type, obj['content'])
logger.info('-xxx言论分类标签结果-%s-%s-%s-%s-%s' % (sentence, label, prob, obj['time'], use))
extra = "\"label\":\"%s\",\"prob\":\"%s\"," % (label.encode('utf-8'), prob)
clog = line.replace('[Chat],{', '[Filter_Chat],{%s' % extra)
if label != '正常类':
save_filter_chat(clog, obj['time'][0:10].replace('-', ''))
# else:
# save_normal_chat(clog, obj['time'][0:10].replace('-', ''))
except:
save_failed_chat('None error: %s' % log.strip('\n'))
# 处理chat日志
def manage_chat(log, fasttext_type):
if pattern_filter.search(log):
return
line = log.strip('\n')
index = line.find('[')
line = line[index:]
obj = parse_mgame_log(r'%s' % line)
if obj is None:
retry_parse(line, log, fasttext_type)
return
content = obj['content']
sentence, label, prob, use = get_result(fasttext_type, content)
info = '-xxx言论分类标签结果-%s-%s-%s-%s-%s' % (sentence, label, prob, obj['time'], use)
extra = "\"label\":\"%s\",\"prob\":\"%s\"," % (label, prob)
clog = line.replace('[Chat],{', '[Filter_Chat],{%s' % extra)
if label != '正常类':
save_filter_chat(clog, obj['time'][0:10].replace('-', ''))
logger.info('------%s------' % info)
class NotifyObj(NotifyBase):
def __init__(self):
super(NotifyObj, self).__init__()
self._flag = 0
def get_batch_size(self):
return 600
def get_log_dir(self):
return '/home/mg_dc/logs/%s/logs/_REALTIME_/' % common.PRODUCT
def inner_chat_f(self, chat_arr):
self._flag = (self._flag + 1) % 2
if self._flag == 0:
q_even.put(chat_arr)
else:
q_odd.put(chat_arr)
def get_chat_odd(q_odd,):
env_fasttext = get_env_fasttext()
while True:
chat_arr = q_odd.get(True)
if not chat_arr or len(chat_arr) < 100:
time.sleep(10)
else:
logger.info('------奇数进程处理------')
for log in chat_arr:
manage_chat(log, env_fasttext)
def get_chat_even(q_even):
env_fasttext = get_env_fasttext()
while True:
chat_arr = q_even.get(True)
if not chat_arr or len(chat_arr) < 100:
time.sleep(10)
else:
logger.info('------偶数进程处理------')
for log in chat_arr:
manage_chat(log, env_fasttext)
q_odd = Queue()
q_even = Queue()
p_odd = Process(target=get_chat_odd, args=(q_odd,))
p_even = Process(target=get_chat_even, args=(q_even,))
def signal_handler(sig, frame):
time.sleep(10)
p_odd.terminate()
p_even.terminate()
# 系统入口
def main():
notify_obj = NotifyObj()
notify_obj.main()
if __name__ == '__main__':
p_odd.start()
#p_odd.join()
p_even.start()
#p_even.join()
signal.signal(signal.SIGTERM, signal_handler)
main()
http://www.cnblogs.com/makexu/

浙公网安备 33010602011771号