python操作kafka

import sys
import time
import json
import redis
import pymysql
import datetime

from kafka import KafkaProducer


# 把格式化时间转换成时间戳
def str_to_timestamp(str_time=None, format='%Y-%m-%d %H:%M:%S'):
    if str_time:
        time_tuple = time.strptime(str_time, format)  # 把格式化好的时间转换成元祖
        result = time.mktime(time_tuple)  # 把时间元祖转换成时间戳
        return int(result)
    return int(time.time())


# 把时间戳转换成格式化
def timestamp_to_str(timestamp=None, format='%Y-%m-%d'):
    if timestamp:
        time_tuple = time.localtime(timestamp)  # 把时间戳转换成时间元祖
        result = time.strftime(format, time_tuple)  # 把时间元祖转换成格式化好的时间
        return result
    else:
        return time.strptime(format)


# 把时间戳转换成格式化
# 年
def timestamp_to_str_year(timestamp=None, format='%Y'):
    if timestamp:
        time_tuple = time.localtime(timestamp)  # 把时间戳转换成时间元祖
        result = time.strftime(format, time_tuple)  # 把时间元祖转换成格式化好的时间
        return result
    else:
        return time.strptime(format)


# 把时间戳转换成格式化
# 天
def timestamp_to_str_day(timestamp=None, format='%Y%m%d'):
    if timestamp:
        time_tuple = time.localtime(timestamp)  # 把时间戳转换成时间元祖
        result = time.strftime(format, time_tuple)  # 把时间元祖转换成格式化好的时间
        return result
    else:
        return time.strptime(format)


# 把时间戳转换成格式化
# 月
def timestamp_to_str_today_yue(timestamp=None, format='%Y%m'):
    if timestamp:
        time_tuple = time.localtime(timestamp)  # 把时间戳转换成时间元祖
        result = time.strftime(format, time_tuple)  # 把时间元祖转换成格式化好的时间
        return result
    else:
        return time.strptime(format)


# redis主库
class Redis(object):
    conn = None

    def __init__(self):
        # poll = redis.ConnectionPool(host='47.94.218.171', port=6379, db=9, password='root1234@A')
        poll = redis.ConnectionPool(host='192.168.5.219', port=6379, db=14, password='root1234@A')
        # 本地测试
        # poll = redis.ConnectionPool(host='192.168.10.10', port=7000, db=14)
        self.conn = redis.Redis(connection_pool=poll)


# mysql链接
class LogMysql(object):
    conn = None
    cursor = None

    def __init__(self):
        self.conn = pymysql.connect(host='rm-2zezqp8sll2swzwby.mysql.rds.aliyuncs.com', user='datacenter',
                                    password='kbs11zx@',
                                    database='log', charset='utf8')

        # 本地测试
        # self.conn = pymysql.connect(host='192.168.10.5', user='root',
        #                             password='root',
        #                             database='unionlog', charset='utf8')
        self.cursor = self.conn.cursor()

    # 为了方便使用一般会选择将查询结果加上字段名称以字典组的方式返回查询结果
    def dict_fetchall(self):
        "Return all rows from a cursor as a dict"
        # 获取查询字段
        columns = [col[0] for col in self.cursor.description]
        # print(columns)
        return [dict(zip(columns, row)) for row in self.cursor.fetchall()]
    

    def get_table_list(self):
        # 判断表是否存在
        self.cursor.execute("SHOW TABLES")
        res = self.cursor.fetchall()
        table_list = []
        for i in res:
            table_list.append(i[0])
        # print("table_list", table_list)
        return table_list


from kafka import KafkaClient
from kafka.producer import SimpleProducer


def send_data_2_kafka(topic, datas):
    '''
        向kafka解析队列发送数据
    '''
    PARTNUM = 100
    TOPICNAME = topic
    KAFKABROKER = ["192.168.2.110:9092"]
    client = KafkaClient(hosts=KAFKABROKER, timeout=30)
    producer = SimpleProducer(client, async_send=False)
    curcount = int(len(datas)//PARTNUM)
    for i in range(curcount):
        start = i*PARTNUM
        if i != curcount - 1:
            end = (i+1)*PARTNUM
            curdata = datas[start:end]
            future = producer.send_messages(TOPICNAME, *curdata)
            print(future)
        else:
            curdata = datas[start:]
            future = producer.send_messages(TOPICNAME, *curdata)
            print(future)
        
    producer.stop()
    client.close()


def kafka_send_date(topic, date):
    # kafka生产者链接
    producer = KafkaProducer(bootstrap_servers='192.168.2.110:9092')
    # future = producer.send(topic, json.dumps(date).encode())
    # future = producer.send(topic, str(date).replace("'", '"').encode('utf-8'))
    r = bytes('{}'.format(date), 'utf-8')
    future = producer.send(topic, r)
    record_metadata = future.get(timeout=10)
    print(record_metadata, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))


# 第一步kafka数据消息
def work_kafka(date):
    data = {
        "ct": "rewash",
        "visitTime": "%s" % date,
    }
    # 数据存储Topic为rewash
    kafka_send_date("rewash", data)


# 第二步将keyword存入数据库
def work_redis(date):
    one, two, three = date.split('-')
    # year = one
    # month = one + two
    day = one + two + three
    # print(year, month, day)

    now_day = ''.join(date.split('-'))
    # now_yestoday = timestamp_to_str(int(str_to_timestamp(date) - 24 *60 *60))
    r_client = Redis()
    m_client = LogMysql()
    key = 'log.hash.keywordlogdetails.%s' % now_day
    sql = """SELECT * from keywordlogdetails{0} WHERE VisitTime like '{1}{2}'""" .format(day, date, "%")
    # sql = """SELECT * from logdetailskeyword"""
    m_client.cursor.execute(sql)
    res = m_client.dict_fetchall()
    # print(res)
    for item in res:
        # print(item)
        pid = item.get('Pid', '')
        keyword = item.get('Keyword', '')
        print(pid, keyword)
        hash_key = 'keyword.%s' % pid
        hash_value = json.dumps(keyword)
        r_client.conn.hset(key, hash_key, hash_value)
        # time.sleep(100)
    r_client.conn.close()
    m_client.cursor.close()
    m_client.conn.close()
    pass


def work_kafka_mysql(date):
    one, two, three = date.split('-')
    year = one
    month = one + two
    day = one + two + three
    # print(year, month, day)

    # 数据库(mysql)链接
    m_client = LogMysql()
    # kafka生产者链接
    # producer = KafkaProducer(bootstrap_servers='192.168.10.10:9092')
    # kafka Topic
    topic = "rewash"

    # 数据列表
    view_list = []
    click_list = []

    # view
    sql_view = r"""SELECT * from viewlogdetails{0} order by VisitTime""".format(day)
    m_client.cursor.execute(sql_view)
    res_view = m_client.dict_fetchall()
    for item in res_view:
        rel_data = {}
        now_date = item.get('VisitTime', '')
        if now_date:
            item.update({"VisitTime": now_date.strftime("%Y-%m-%d %H:%M:%S")})
        rel_data.update({"ct": "view"})
        rel_data.update({"id": item.get('Id', "")})
        rel_data.update({"uid": item.get('User', "")})
        rel_data.update({"url": item.get('Url', "")})
        rel_data.update({"domain": item.get('Domain', "")})
        rel_data.update({"device": int(item.get('Device', 0))})
        rel_data.update({"refer": item.get('Refer', '')})
        rel_data.update({"ip": item.get('Ip', "")})
        rel_data.update({"visitTime": str_to_timestamp(item.get('VisitTime', "")) * 1000})
        rel_data.update({"userAgent": item.get('UserAgent', "")})
        rel_data.update({"ccontent": item.get('Content', r"{}")})
        res = json.dumps(rel_data)
        # kafka_send_date(topic, res)
        r = bytes('{}'.format(res), 'utf-8')
        view_list.append(r)
    send_data_2_kafka(topic, view_list)


    # click
    sql_click = r"""SELECT * from clicklogdetails{0} WHERE visitTime like '{1}{2}' order by visitTime""".format(month, date, "%")
    m_client.cursor.execute(sql_click)
    res_click = m_client.dict_fetchall()
    for item in res_click:
        rel_data = {}
        now_date = item.get('visitTime', '')
        if now_date:
            item.update({"visitTime": now_date.strftime("%Y-%m-%d %H:%M:%S")})
        rel_data.update({"ct": "click"})
        rel_data.update({"id": item.get('Pid', "")})
        rel_data.update({"uid": item.get('Uid', "")})
        rel_data.update({"url": item.get('Url', "")})
        rel_data.update({"domain": item.get('Domain', "")})
        rel_data.update({"device": int(item.get('Device', 0))})
        rel_data.update({"refer": item.get('Refer', "")})
        rel_data.update({"ip": item.get('Ip', "")})
        rel_data.update({"visitTime": str_to_timestamp(item.get('visitTime', "")) * 1000})
        rel_data.update({"userAgent": item.get('UserAgent', "")})
        rel_data.update({"ctype": int(item.get('Ctype', 0))})
        rel_data.update({"key": item.get('KeyButton', "")})
        rel_data.update({"ccontent": item.get('Content', r"{}")})
        res = json.dumps(rel_data)
        # kafka_send_date(topic, res)
        r = bytes('{}'.format(res), 'utf-8')
        click_list.append(r)
    send_data_2_kafka(topic, click_list)

    # message1
    message_list = []
    # 
    sql_message_1 = r"""SELECT * from messagelogdetails{0} WHERE visitTime like '{1}{2}'""".format(year, date, "%")
    m_client.cursor.execute(sql_message_1)
    res_customer_1 = m_client.dict_fetchall()
    
    for item in res_customer_1:
        rel_data = {}
        now_date = item.get('visitTime', '')
        if now_date:
            item.update({"visitTime": now_date.strftime("%Y-%m-%d %H:%M:%S")})
        rel_data.update({"ct": "message"})
        rel_data.update({"id": item.get('Pid', '')})
        rel_data.update({"uid": item.get('Uid', '')})
        rel_data.update({"url": item.get('Url', '')})
        rel_data.update({"domain": item.get('Domain', '')})
        rel_data.update({"device": int(item.get('Device', 0))})
        rel_data.update({"tel": item.get('Tel', '')})
        rel_data.update({"telEncryption": item.get('TelEncryption', '')})
        rel_data.update({"ip": item.get('Ip', '')})
        rel_data.update({"visitTime": str_to_timestamp(item.get('visitTime', '')) * 1000})
        rel_data.update({"content": item.get('Content', r"{}")})
        rel_data.update({"ccontent": item.get('Ccontent', r"{}")})
        res = json.dumps(rel_data)
        # kafka_send_date(topic, res)
        r = bytes('{}'.format(res), 'utf-8')
        message_list.append(r)

    
    # message2
    sql_message_2 = r"""SELECT * from messagelogdetailsbad{0} WHERE visitTime like '{1}{2}'""".format(year, date, "%")
    m_client.cursor.execute(sql_message_2)
    res_customer_2 = m_client.dict_fetchall()
    
    for item in res_customer_2:
        rel_data = {}
        now_date = item.get('visitTime', '')
        if now_date:
            item.update({"visitTime": now_date.strftime("%Y-%m-%d %H:%M:%S")})
        rel_data.update({"ct": "message"})
        rel_data.update({"id": item.get('Pid', '')})
        rel_data.update({"uid": item.get('Uid', '')})
        rel_data.update({"url": item.get('Url', '')})
        rel_data.update({"domain": item.get('Domain', '')})
        rel_data.update({"device": int(item.get('Device', 0))})
        rel_data.update({"tel": item.get('Tel', '')})
        rel_data.update({"telEncryption": item.get('TelEncryption', '')})
        rel_data.update({"ip": item.get('Ip', '')})
        rel_data.update({"visitTime": str_to_timestamp(item.get('visitTime', '')) * 1000})
        rel_data.update({"content": item.get('Content', r"{}")})
        rel_data.update({"ccontent": item.get('Ccontent', r"{}")})
        res = json.dumps(rel_data)
        # kafka_send_date(topic, res)
        r = bytes('{}'.format(res), 'utf-8')
        message_list.append(r)
    send_data_2_kafka(topic, message_list)

def work_get_table(table, base_table):
    # 1、访问明细表
    #     基础表名:LogDetailsPages
    #     建表规则:基础表名+日期(天),如LogDetailsPages20191115
    #     建表语句:create table LogDetailsPages日期 like LogDetailsPages

    # 2、点击明细表
    #     基础表名:LogDetailsPageClick
    #     建表规则:基础表名+日期(月),如LogDetailsPageClick201911
    #     建表语句:create table LogDetailsPageClick日期 like LogDetailsPageClick

    # 3、留言表
    #     基础表名:LogDetailsLeavingTel
    #     建表规则:基础表名+日期(年),如LogDetailsLeavingTel2019
    #     建表语句:create table LogDetailsLeavingTel日期 like LogDetailsLeavingTel
    # 判断表是否存在
    log_mysql = LogMysql()
    log_mysql.cursor.execute("SHOW TABLES")
    res = log_mysql.cursor.fetchall()
    table_list = []
    for i in res:
        table_list.append(i[0])
    if table in table_list:
        return True
    else:
        try:
            sql = """create table %s like %s""" % (table, base_table)
            print('------start------')
            log_mysql.cursor.execute(sql)
            log_mysql.cursor.close()
            log_mysql.conn.close()
            print('------end-------')
        except Exception as e:
            print(e)
            pass
        return False


def create_tables(date):
    one, two, three = date.split('-')
    # print(one, two, three)
    year = one
    month = one + two
    day = one + two + three
    # 留言板+营销模板展现量
    LogDetailsMkMg = "LogDetailsMkMg".lower() + day
    # 访问明细
    LogDetailsPages = "LogDetailsPages".lower() + day
    # 点击明细
    LogDetailsPageClick = "LogDetailsPageClick".lower() + month
    # 留言表 
    LogDetailsLeavingTel = "LogDetailsLeavingTel".lower() + year
    # 创建表
    work_get_table(LogDetailsMkMg, "LogDetailsMkMg".lower())
    work_get_table(LogDetailsPages, "LogDetailsPages".lower())
    work_get_table(LogDetailsPageClick, "LogDetailsPageClick".lower())
    work_get_table(LogDetailsLeavingTel, "LogDetailsLeavingTel".lower())


def main():
    print('====================================开始====================================')
    # date = str(sys.argv[1])
    date = "2019-11-18"

    # 判断表是否存在-不存在创建表
    create_tables(date)
    
    print('==========第一步 往kafka放数据 开始==========')
    work_kafka(date)
    print('==========第一步 往kafka放数据 结束==========')

    print('==========第二步 往redis放keyword数据 开始==========')
    work_redis(date)
    print('==========第二步 往redis放keyword数据 结束==========')

    print('==========第三步 往kafka放view、click、customer数据 开始==========')
    work_kafka_mysql(date)
    print('==========第三步 往kafka放view、click、customer数据 结束==========')

    print('====================================结束====================================')


def test_main():
    pass


if __name__ == '__main__':
    # date = str(sys.argv[1])
    # main()
    # test_main()
    # work_redis(date)
    pass
kafka赛数据
# —*— coding: utf-8 _*_
import uuid
import ast
import time
import json
import copy
# url解码与提取参数
from urllib import parse

import redis
import requests
import pymysql
from kafka import KafkaConsumer

import logging
from logging import handlers
import random


# 日志记录
class Logger(object):
    level_relations = {
        'debug': logging.DEBUG,
        'info': logging.INFO,
        'warning': logging.WARNING,
        'error': logging.ERROR,
        'crit': logging.CRITICAL
    }  # 日志级别关系映射

    def __init__(self, filename, level='info', when='D', backCount=3,
                 fmt='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'):
        self.logger = logging.getLogger(filename)
        format_str = logging.Formatter(fmt)  # 设置日志格式
        self.logger.setLevel(self.level_relations.get(level))  # 设置日志级别
        sh = logging.StreamHandler()  # 往屏幕上输出
        sh.setFormatter(format_str)  # 设置屏幕上显示的格式
        th = handlers.TimedRotatingFileHandler(filename=filename, when=when, backupCount=backCount,
                                               encoding='utf-8')  # 往文件里写入#指定间隔时间自动生成文件的处理器
        # 实例化TimedRotatingFileHandler
        # interval是时间间隔,backupCount是备份文件的个数,如果超过这个个数,就会自动删除,when是间隔的时间单位,单位有以下几种:
        # S 秒
        # M 分
        # H 小时、
        # D 天、
        # W 每星期(interval==0时代表星期一)
        # midnight 每天凌晨
        th.setFormatter(format_str)  # 设置文件里写入的格式
        self.logger.addHandler(sh)  # 把对象加到logger里
        self.logger.addHandler(th)


file = 'all'
try:
    file = __file__.split("/")[-1]
except Exception as e:
    pass
log = Logger('%s.log' % file)
logger = log.logger

# 把时间戳转换成格式化
def timestamp_to_str(timestamp=None, format='%Y-%m-%d %H:%M:%S'):
    if timestamp:
        time_tuple = time.localtime(timestamp)  # 把时间戳转换成时间元祖
        result = time.strftime(format, time_tuple)  # 把时间元祖转换成格式化好的时间
        return result
    else:
        return time.strptime(format)

def timestamp_to_str_data(timestamp=None, format='%Y-%m-%d'):
    if timestamp:
        time_tuple = time.localtime(timestamp)  # 把时间戳转换成时间元祖
        result = time.strftime(format, time_tuple)  # 把时间元祖转换成格式化好的时间
        return result
    else:
        return time.strptime(format)


# 把时间戳转换成格式化
def timestamp_to_str_(timestamp=None, format='%H'):
    if timestamp:
        time_tuple = time.localtime(timestamp)  # 把时间戳转换成时间元祖
        result = time.strftime(format, time_tuple)  # 把时间元祖转换成格式化好的时间
        return result
    else:
        return time.strptime(format)


# 把时间戳转换成格式化
def timestamp_to_str_today(timestamp=None, format='%Y%m%d'):
    if timestamp:
        time_tuple = time.localtime(timestamp)  # 把时间戳转换成时间元祖
        result = time.strftime(format, time_tuple)  # 把时间元祖转换成格式化好的时间
        return result
    else:
        return time.strptime(format)


def timestamp_to_str_today_yue(timestamp=None, format='%Y%m'):
    if timestamp:
        time_tuple = time.localtime(timestamp)  # 把时间戳转换成时间元祖
        result = time.strftime(format, time_tuple)  # 把时间元祖转换成格式化好的时间
        return result
    else:
        return time.strptime(format)


def timestamp_to_str_today_year(timestamp=None, format='%Y'):
    if timestamp:
        time_tuple = time.localtime(timestamp)  # 把时间戳转换成时间元祖
        result = time.strftime(format, time_tuple)  # 把时间元祖转换成格式化好的时间
        return result
    else:
        return time.strptime(format)
    

# UA
ua_dict = {

    "www.google.com/bot": "谷歌爬虫",
    "Baiduspider": "百度爬虫",
    "MQQBrowser": "QQ浏览器",
    "UCBrowser": "UC浏览器",
    "MiuiBrowser": "小米浏览器",
    "iqiyi": "爱奇艺",
    "BiliApp": "BiliApp",
    "SogouMobileBrowser": "Sogou浏览器",
    "SogouSearch": "Sogou浏览器NoHead",
    "baiduboxapp": "百度app",
    "HuaweiBrowser": "华为浏览器",
    "VivoBrowser": "vivo浏览器",
    "OppoBrowser": "oppo浏览器",
    "HeyTapBrowser": "HeyTap浏览器",
    "EUI Browser": "EUI浏览器",
    "SamsungBrowser": "三星浏览器",
    "MicroMessenger": "微信内置浏览器",
    "Quark": "夸克浏览器",
    "QHBrowser": "360浏览器",
    "baidubrowser": "百度浏览器",

    "Chrome": "谷歌浏览器",
    "Safari": "Safari浏览器",
}
Android_ua_dict = copy.deepcopy(ua_dict)
Android_ua_dict.pop('Safari')

iphone = list(ua_dict.keys())

Android = list(Android_ua_dict.keys())

print(iphone, Android)

# Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_6 like Mac OS X) AppleWebKit/604.5.6 (KHTML, like Gecko) Version/11.0 Mobile/15D100 Safari/604.1

def ua_browser(user_agent):
    browser = '其他'
    user_agent = str(user_agent).lower()

    if user_agent.find('Mobile'.lower()) >= 0:
        # wap
        # android
        if user_agent.find('android') >= 0:
            for a_item in Android:
                if user_agent.find(a_item.lower()) >= 0:
                    browser = ua_dict.get(a_item, '其他')
                    break
            pass
        # iphone
        elif user_agent.find('iphone') >= 0:
            for i_item in iphone:
                if user_agent.find(i_item.lower()) >= 0:
                    browser = ua_dict.get(i_item, '其他')
                    break
            pass
        else:
            for a1_item in Android:
                if user_agent.find(a1_item.lower()) >= 0:
                    browser = ua_dict.get(a1_item, '其他')
                    break
            pass
        pass
    else:
        browser = 'pc'
        # pc
        pass
    return browser


# redis主库
class Redis_5_219_6379_9(object):
    conn = None
    def __init__(self):
        # poll = redis.ConnectionPool(host='47.94.218.171', port=6379, db=9, password='root1234@A')
        poll = redis.ConnectionPool(host='192.168.5.219', port=6379, db=9, password='root1234@A')
        # 本地测试
        # poll = redis.ConnectionPool(host='192.168.10.10', port=7000, db=14)
        self.conn = redis.Redis(connection_pool=poll)


class Redis_5_219_6379_11(object):
    conn = None
    def __init__(self):
        # poll = redis.ConnectionPool(host='47.94.218.171', port=6379, db=9, password='root1234@A')
        poll = redis.ConnectionPool(host='192.168.5.219', port=6379, db=11, password='root1234@A')
        # 本地测试
        # poll = redis.ConnectionPool(host='192.168.10.10', port=7000, db=14)
        self.conn = redis.Redis(connection_pool=poll)


class Redis_5_219_6379_14(object):
    conn = None
    def __init__(self):
        # poll = redis.ConnectionPool(host='47.94.218.171', port=6379, db=9, password='root1234@A')
        poll = redis.ConnectionPool(host='192.168.5.219', port=6379, db=14, password='root1234@A')
        # 本地测试
        # poll = redis.ConnectionPool(host='192.168.10.10', port=7000, db=14)
        self.conn = redis.Redis(connection_pool=poll)


# log 数据库连接
class LogMysql(object):
    conn = None
    cursor = None

    def __init__(self):
        self.conn = pymysql.connect(host='rm-2zezqp8sll2swzwby.mysql.rds.aliyuncs.com', user='datacenter',
                            password='kbs11zx@',
                            database='log', charset='utf8')
        self.cursor = self.conn.cursor()

    # 为了方便使用一般会选择将查询结果加上字段名称以字典组的方式返回查询结果
    def dict_fetchall(self):
        "Return all rows from a cursor as a dict"
        # 获取查询字段
        columns = [col[0] for col in self.cursor.description]
        print(columns)
        return [dict(zip(columns, row)) for row in self.cursor.fetchall()]
    

    # 获取表列表
    def get_table_list(self):
        # 判断表是否存在
        self.cursor.execute("SHOW TABLES")
        res = self.cursor.fetchall()
        table_list = []
        for i in res:
            table_list.append(i[0])
        # print("table_list", table_list)
        return table_list
    

def get_table_info(log_mysql, table):
    # 判断表是否存在
    log_mysql.cursor.execute("SHOW TABLES")
    res = log_mysql.cursor.fetchall()
    table_list = []
    for i in res:
        table_list.append(i[0])
    if table in table_list:
        return True
    else:
        # 创建表
        sql_click = """create table %s like wulogdetailspageclick""" % (table)
        try:

            print('------view-start------')
            log_mysql.cursor.execute(sql_click)
            print('------view-end-------')
        except Exception as e:
            print(e)
            pass
        return False


def get_table_info_message(log_mysql, table):
    # 判断表是否存在
    log_mysql.cursor.execute("SHOW TABLES")
    res = log_mysql.cursor.fetchall()
    table_list = []
    for i in res:
        table_list.append(i[0])
    if table in table_list:
        return True
    else:
        # 创建表
        sql_click = """create table %s like wulogdetailsleavingtel""" % (table)
        try:

            print('------view-start------')
            log_mysql.cursor.execute(sql_click)
            print('------view-end-------')
        except Exception as e:
            print(e)
            pass
        return False


def get_table_info_view(log_mysql, table):
    # 判断表是否存在
    log_mysql.cursor.execute("SHOW TABLES")
    res = log_mysql.cursor.fetchall()
    table_list = []
    for i in res:
        table_list.append(i[0])
    if table in table_list:
        return True
    else:
        # 创建表
        sql_click = """create table %s like wulogdetailspages""" % (table)
        try:

            print('------view-start------')
            log_mysql.cursor.execute(sql_click)
            print('------view-end-------')
        except Exception as e:
            print(e)
            pass
        return False

def get_site_name():
    res_dict = {}
    m_client = LogMysql()
    sql = """select domain, site from siteinfo"""
    m_client.cursor.execute(sql)
    res = m_client.dict_fetchall()
    for item in res:
        res_dict.update({item.get('domain'): item.get('site')})
    m_client.cursor.close()
    m_client.conn.close()
    return res_dict

site_name_dict = get_site_name()

def get_sitetype(domain, url):
    sem_site_list = ['beimu.xigsa.cn', 'beimu.qidian36.com']
    if domain in sem_site_list:
        return 2
    else:
        if url.find('beimu.com/tg') >= 0 or url.find('cglxfw.com') >=0:
            return 2
    return 1


def get_Province_City(ip):
    url = 'http://39.106.167.115:8000/api/data/ipadress?ip=%s' % ip
    Province = City = ''
    r_client = Redis_5_219_6379_9()
    key = 'Redis_5_219_6379_9'
    item = '.'.join(ip.split('.')[:-1]) + '.0'
    res = r_client.conn.hget(key, item)
    if res:
        res = json.loads(res)
        print('city', res)
        Province = res.get('province', '')
        City = res.get('city', '')
        return Province, City
    else:
        try:
            res = requests.get(url=url)
            json_data = res.json()
            print('city------get', json_data)
            code = int(json_data.get('code'))
            if code == 0:
                Province = json_data.get('province', '')
                City = json_data.get('city', '')
                return Province, City
        except Exception as e:
            pass
    return Province, City


def get_word(pid):
    word = ''
    r_client = Redis_5_219_6379_14()
    key = 'log.hash.keywordlogdetails.%s' % timestamp_to_str_today(time.time())
    item = 'keyword.%s' % pid
    res = r_client.conn.hget(key, item)
    if res:
        # .decode('utf-8')
        word = json.loads(res)
        print('word-----------------%s' % word)
        if word:
            return word
    return word


def data_to_redis(key, data):
    r_client = Redis_5_219_6379_11()
    data = json.dumps(data)
    r_client.conn.lpush(key, data)
    logger.info('======+++++=====:插入redis成功')



def get_pagetype_topicid_title(url, device):
    pagetype = topicid  = 0
    title = topicname = ''
    request_url = 'http://192.168.8.191:18111/change/data'
    data = {"url": url, "device": int(device)}
    try:
        res = requests.post(url=request_url, json=data)
        res_data = res.json()
        print('-------post-----', res_data)
        tagId = res_data.get("tagId", 0)
        tagName = res_data.get("tagName", '')
        Title = res_data.get("Title", '')
        catId = res_data.get("catid", 0)
        return tagId, tagName, Title, catId
    except Exception as e:
        print('---post--error----%s' % str(e))
        return topicid, topicname, title, pagetype


# 访问明细
class View(object):
    Id = '' # 唯一标识 UUID的md5加密
    User = '' # 当前用户标识,记录设备的用户信息
    Url = '' # 访问的网页地址
    Domain = '' # 网站域名
    Device = '' # 设备
    Ip = '' # 客户端IP地址
    VisitTime = '' # 访问时间
    Hour = '' # 访问时间小时
    UserAgent = '' # 浏览器UserAgent
    Refre = '' # 来源
    BaiduSi = '' # 百度统计用户身份
    BaiduFirstTime = '' # 百度访问时间
    BaiduTime = '' # 百度访问时间
    Content = '' # 内容
    CreatedAt = '' # 访问时间戳
    Refre = ''


# 访问明细
class ViewDetails(object):
    PId = '' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '访问页面的实时Id标识,在页面中生成',
    UId = '' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '当前用户标识,记录设备的用户信息',
    Url = '' # varchar(2000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '访问的网页地址',
    SiteName = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '站点名称',
    Domain = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '网站域名',
    Device = '' # tinyint(4) NULL DEFAULT NULL COMMENT '设备 1PC 2WAP',
    Ip = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '客户端IP地址',
    Province = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '省份',
    City = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '城市',
    VisitType = '' # tinyint(4) NULL DEFAULT NULL COMMENT '访问类型 1正常 2蜘蛛 3抓取 0其他',
    VisitTime = '' # datetime(0) NULL DEFAULT NULL COMMENT '访问时间',
    Source = '' # tinyint(4) NULL DEFAULT NULL COMMENT '流量来源 1百度 2直接访问 3其他',
    PageType = '' # tinyint(4) NULL DEFAULT NULL COMMENT '页面类型 路由表的分类',
    SiteType = '' # tinyint(4) NULL DEFAULT NULL COMMENT '站点类型 1SEO 2SEM',
    TopicId = '' # int(11) NULL DEFAULT NULL COMMENT '话题标识',
    Title = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT 'Title',
    Word = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '搜索词',
    Date = '' # date NULL DEFAULT NULL COMMENT '日期',
    Hours = '' # tinyint(4) NULL DEFAULT NULL COMMENT '小时',
    Remark = '' # varchar(1000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '备注',
    TopicName = '' # varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '标签数据',
    PersonState = '' # int(4) NULL DEFAULT NULL,
    Browser = '' # varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,


def get_refer(refer):
    """
    百度搜索引擎标识:baidu  
    谷歌搜索引擎标识:google
    微信搜索引擎标识:weixin
    Bing搜索引擎登标识:bing
    搜狗搜索引擎登标识:sogou
    有道搜索引擎登标识:youdao
    神马搜索引擎标识:sm.cn   
    360搜索引擎标识:so.com
    头条搜索引擎标识:toutiao
    """
    if refer:
        if refer.find('baidu.com') >= 0:
            # log_viwe.Source = 1
            Source = '百度'
        elif refer.find('weixin') >= 0:
            Source = '微信'
        elif refer.find('google') >= 0:
            Source = '谷歌'
        elif refer.find('bing') >= 0:
            Source = 'Bing'
        elif refer.find('sogou') >= 0:
            Source = '搜狗'
        elif refer.find('youdao') >= 0:
            Source = '有道'
        elif refer.find('sm.cn') >= 0:
            Source = '神马'
        elif refer.find('so.com') >= 0:
            Source = '360'
        elif refer.find('toutiao') >= 0:
            Source = '头条'
        else:
            # log_viwe.Source = 2
            Source = '其他'
    else:
        Source = '直接访问'
    return Source


def ct_view(res):
    log_mysql = LogMysql()
    now_time = int(res.get('visitTime', 0))
    data = timestamp_to_str_data(now_time/1000)
    data_now_time = timestamp_to_str(now_time/1000)
    hour_now_time = int(timestamp_to_str_(now_time/1000))

    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    log_viwe = ViewDetails()
    uuid_str = str(uuid.uuid4())
    log_viwe.Id = ''.join(uuid_str.split('-'))
    log_viwe.PId = res.get('id', '') # 唯一标识 UUID的md5加密
    log_viwe.UId = res.get('uid', '') # 当前用户标识,记录设备的用户信息
    log_viwe.Url = parse.unquote(res.get('url', '')) # 访问的网页地址
    log_viwe.Domain = res.get('domain', '') # 网站域名
    Device = int(res.get('device', 0)) if res.get('device', 0) else 0 # 设备
    if Device == 1:
        log_viwe.Device = 'PC'
    elif Device == 2:
        log_viwe.Device = 'WAP'
    else:
        log_viwe.Device = '未知'
    log_viwe.SiteName = site_name_dict.get(log_viwe.Domain, '')
    
    SiteType = get_sitetype(log_viwe.Domain, log_viwe.Url)
    if SiteType == 2:
        log_viwe.SiteType = 'SEM'
    else:
        log_viwe.SiteType = 'SEO'
    

    log_viwe.Ip = res.get('ip', '') # 客户端IP地址
    log_viwe.VisitTime = data_now_time # 访问时间
    
    UserAgent = res.get('userAgent', '') # 浏览器UserAgent
    log_viwe.Browser = ua_browser(UserAgent)
    UserAgent = str(UserAgent)
    ua = UserAgent.lower() # type:str
    if ua.find('spider') >= 0:
        # log_viwe.VisitType = 2
        log_viwe.VisitType = '蜘蛛'
    else:
        # log_viwe.VisitType = 1
        log_viwe.VisitType = '正常'
    Refre = parse.unquote(res.get('refer', '')) # 来源
    log_viwe.Refre = Refre
    Refre = str(Refre)
    refer = Refre.lower()
    #if refer.find('baidu.com') >= 0:
        # log_viwe.Source = 1
        #log_viwe.Source = '百度'
    #else:
        # log_viwe.Source = 2
        #log_viwe.Source = '其他'
    log_viwe.Source = get_refer(refer)
    # log_viwe.BaiduSi = '' # 百度统计用户身份
    # log_viwe.BaiduFirstTime = '' # 百度访问时间
    # log_viwe.BaiduTime = '' # 百度访问时间
    # log_viwe.Content = res.get("ccontent", '') # 内容
    # log_viwe.CreatedAt = now_time # 访问时间戳
    topicid, topicname, title, catId = get_pagetype_topicid_title(log_viwe.Url, Device)
    log_viwe.TopicId = topicid
    log_viwe.Title = title
    log_viwe.TopicName = topicname
    log_viwe.PageType = catId
    log_viwe.Word = get_word(log_viwe.PId)

    
    log_viwe.Hours = hour_now_time # 访问时间小时
    log_viwe.Date = data

    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # print("rel", res)
    # pop(key) 删除之间键的数据
    # res.pop('ct', 0)
    # print("pop", res)
    # time.sleep(10)
    data = log_viwe.__dict__
    logger.info(data)
    key = "cleaning.list.view"
    data_to_redis(key, data)
    



# wu点击明细
class ClickDetails(object):
    Id ='' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '唯一标识 UUID的md5加密',
    PId ='' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '明细标识',
    UId ='' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '当前用户标识,记录设备的用户信息',
    SiteName ='' #varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '站点名称',
    Domain ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '域名',
    Url ='' # varchar(3000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '页面网址',
    Device ='' # tinyint(4) NULL DEFAULT NULL COMMENT '设备',
    Ip ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT 'IP',
    Province ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '省份',
    City ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '城市',
    ClickTime ='' # datetime(0) NULL DEFAULT NULL COMMENT '点击时间',
    ButtonKey ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '按钮Key',
    ButtonName ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '按钮名称',
    ClickType ='' # tinyint(4) NULL DEFAULT NULL COMMENT '点击类型 1留言板 2易聊 3营销数据留言板 4营销数据易聊',
    ClickContent ='' # varchar(2000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '点击内容内容',
    Source ='' # tinyint(4) NULL DEFAULT NULL COMMENT '流量来源 1百度 2直接访问 3其他',
    MarkingId ='' # int(11) NULL DEFAULT NULL COMMENT '营销Id',
    MarkingName ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '营销名称',
    LMId ='' # int(11) NULL DEFAULT NULL COMMENT '留言板Id',
    LMName ='' # char(10) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '留言板名称',
    PageType ='' # tinyint(4) NULL DEFAULT NULL COMMENT '页面类型 首页、列表页 、与路由关联的分类表',
    SiteType ='' # tinyint(4) NULL DEFAULT NULL COMMENT '站点类型 1SEO 2SEM',
    TopicId ='' # int(100) NULL DEFAULT NULL COMMENT '话题标识',
    Title ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT 'Title',
    Word ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '搜索词',
    Date ='' # date NULL DEFAULT NULL COMMENT '日期',
    Hours ='' # tinyint(4) NULL DEFAULT NULL COMMENT '小时',
    Remark ='' # varchar(1000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '备注',
    TopicName ='' # varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '标签数据',
    Type ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
    PersonState ='' # int(4) NULL DEFAULT NULL,
    Browser = ''

    


def ct_click(res):
    log_mysql = LogMysql()
    now_time = int(res.get('visitTime', 0))
    data_now_time = timestamp_to_str(now_time/1000)
    data = timestamp_to_str_data(now_time/1000)
    hour_now_time = int(timestamp_to_str_(now_time/1000))
    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    log_click = ClickDetails()
    uuid_str = str(uuid.uuid4())
    log_click.Id = ''.join(uuid_str.split('-'))
    log_click.Pid = res.get('id', '') # 唯一标识 UUID的md5加密
    log_click.Uid = res.get('uid', '') # 当前用户标识,记录设备的用户信息
    log_click.Url = parse.unquote(res.get('url', '')) # 访问的网页地址
    log_click.Domain = res.get('domain', '') # 网站域名
    log_click.SiteName = site_name_dict.get(log_click.Domain, '')
    Device = int(res.get('device', 0)) if res.get('device', 0) else 0 # 设备
    if Device == 1:
        log_click.Device = 'PC'
    elif Device == 2:
        log_click.Device = 'WAP'
    else:
        log_click.Device = '未知'
    log_click.Ip = res.get('ip', '') # 客户端IP地址
    Province, City = get_Province_City(log_click.Ip)
    log_click.Province = Province
    log_click.City = City
    log_click.ClickTime = data_now_time

    SiteType = get_sitetype(log_click.Domain, log_click.Url)
    if SiteType == 2:
        log_click.SiteType = 'SEM'
    else:
        log_click.SiteType = 'SEO'

    # #######################
    topicid, topicname, title, catId = get_pagetype_topicid_title(log_click.Url, Device)
    log_click.Title = title
    log_click.TopicId = topicid
    log_click.TopicName = topicname
    log_click.PageType = catId
    log_click.Word = get_word(log_click.Pid)


    UserAgent = res.get('userAgent', '') # 浏览器UserAgent
    log_click.Browser = ua_browser(UserAgent)

    Refre = parse.unquote(res.get('refer', '')) # 来源
    if Refre.find('baidu') >= 0:
        # log_click.Source = 1
        log_click.Source = '百度'
    else:
        # log_click.Source = 2
        log_click.Source = '其他'
    log_click.Hours = hour_now_time # 访问时间小时
    log_click.Date = data
    ClickType = int(res.get("ctype", 1)) # 点击按钮类型
    # 1留言板 2易聊 3营销数据留言板 4营销数据易聊
    if ClickType == 1:
        log_click.ClickType = '留言板'
    elif ClickType == 2:
        log_click.ClickType = '易聊'
    elif ClickType == 3:
        log_click.ClickType = '营销数据留言板'
    elif ClickType == 4:
        log_click.ClickType = '营销数据易聊'
    log_click.Type = res.get("type", '') 
    log_click.ButtonKey = res.get("buttonKey", '') 
    Content_dict = ast.literal_eval(res.get("ccontent", {})) # 内容
    # {"m":"","no":"1","value":"获取报价","button":"获取报价","lmid":"1","mkid":"49","orgid":"1","orgname":"EF英孚"}
    no = int(Content_dict.get('no', 0))
    lmid = int(Content_dict.get('lmid', 0))
    mkid = int(Content_dict.get('mkid', 0))
    if no:
        Lmid = no
    else:
        Lmid = lmid
    log_click.LMId = Lmid
    log_click.MarkingId = mkid


    # log_click.CreatedAt = now_time # 访问时间戳
    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # print("rel", res)
    # pop(key) 删除之间键的数据
    # res.pop('ct', 0)
    # print("pop", res)
    # time.sleep(10)
    data = log_click.__dict__
    logger.info(data)
    key = "cleaning.list.click"
    data_to_redis(key, data)

class Messace(object):
    Pid = '' # 唯一标识 UUID的md5加密
    Uid = '' # 当前用户标识,记录设备的用户信息
    Tel = '' # 留电
    TelEncryption = '' # 留电加密
    Type = ''  # 留点情况
    Url = '' # 访问的网页地址
    Domain = '' # 网站域名
    Device = '' # 设备
    Ip = '' # 客户端IP地址
    Area = '' # 区域
    VisitTime = '' # 访问时间
    Hour = '' # 访问时间小时
    Content = '' # 内容
    Ccontent = '' # 内容
    Ctype = '' # 点击按钮类型
    CreatedAt = '' # 访问时间戳
    buttonKey = ''

# logdetailsleavingtel
class MessageDetails(object):
    Id = '' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '唯一标识 UUID的md5加密',
    PId = '' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '明细标识',
    UId = '' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '当前用户标识,记录设备的用户信息',
    SiteType = '' # tinyint(255) NULL DEFAULT NULL COMMENT '站点类型 1SEO 2SEM',
    SiteName = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '站点名称',
    Domain = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '域名',
    Url = '' # varchar(3000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '页面网址',
    Device = '' # tinyint(4) NULL DEFAULT NULL COMMENT '设备',
    CustomerSource = '' # tinyint(4) NULL DEFAULT NULL COMMENT '客户来源 1留言板 2易聊',
    Source = '' # tinyint(4) NULL DEFAULT NULL COMMENT '客户来源 1百度 2直接访问 0其他',
    Tel = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '留言电话',
    TelEncryption = '' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '留言电话加密',
    Content = '' # varchar(1000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '留言内容',
    LeavingType = '' # tinyint(4) NULL DEFAULT NULL COMMENT '留电情况 1留电并成为客户 2留电未成为客户 3重复留电 4测试号码',
    LeavingTime = '' # datetime(0) NULL DEFAULT NULL COMMENT '留电时间',
    IsClick = '' # tinyint(4) NULL DEFAULT NULL COMMENT '是否点击留电',
    Ip = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT 'IP',
    Province = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '省份',
    City = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '城市',
    Word = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '搜索词',
    ButtonKey = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '按钮Key',
    ButtonName = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '按钮内容',
    MarketingId = '' # int(11) NULL DEFAULT NULL COMMENT '营销Id',
    MarketingName = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '营销名称',
    LMId = '' # int(11) NULL DEFAULT NULL COMMENT '留言板Id',
    LMName = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '留言板名称',
    TopicId = '' # int(255) NULL DEFAULT NULL COMMENT '话题标识',
    RoutingId = '' # int(11) NULL DEFAULT NULL COMMENT '路由标识',
    PageType = '' # int(255) NULL DEFAULT NULL COMMENT '页面类型',
    Date = '' # date NULL DEFAULT NULL COMMENT '日期',
    Hours = '' # tinyint(4) NULL DEFAULT NULL COMMENT '小时',
    TopicName = '' # varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '标签数据',
    PersonState = '' # int(4) NULL DEFAULT NULL,

def ct_message(res):
    log_mysql = LogMysql()
    now_time = int(res.get('visitTime', 0))
    data_now_time = timestamp_to_str(now_time/1000)
    data = timestamp_to_str_data(now_time/1000)
    hour_now_time = int(timestamp_to_str_(now_time/1000))
    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    log_message = MessageDetails()
    uuid_str = str(uuid.uuid4())
    log_message.Id = ''.join(uuid_str.split('-'))
    log_message.PId = res.get('id', '') # 唯一标识 UUID的md5加密
    log_message.UId = res.get('uid', '') # 当前用户标识,记录设备的用户信息
    log_message.Url = parse.unquote(res.get('url', '')) # 访问的网页地址
    log_message.Tel = res.get('tel', '') # 电话
    log_message.TelEncryption = res.get('telEncryption', '') # 电话加密
    # log_message.Type = res.get('type', "")
    log_message.Domain = res.get('domain', '') # 网站域名
    log_message.SiteName = site_name_dict.get(log_message.Domain, '')

    SiteType = get_sitetype(log_message.Domain, log_message.Url)
    if SiteType == 2:
        log_message.SiteType = 'SEM'
    else:
        log_message.SiteType = 'SEO'


    Device = int(res.get('device', 0)) if res.get('device', 0) else 0 # 设备
    if Device == 1:
        log_message.Device = 'PC'
    elif Device == 2:
        log_message.Device = 'WAP'
    else:
        log_message.Device = '未知'
    log_message.Ip = res.get('ip', '') # 客户端IP地址
    Province, City = get_Province_City(log_message.Ip)
    log_message.Province = Province
    log_message.City = City

    topicid, topicname, title, catId = get_pagetype_topicid_title(log_message.Url, Device)
    log_message.TopicId = topicid
    log_message.TopicName = topicname
    log_message.PageType = catId
    log_message.Word = get_word(log_message.PId)

    if log_message.Word:
        # log_message.Source = 1
        log_message.Source = '百度'
    else:
        # log_message.Source = 0
        log_message.Source = "其他"
    
    log_message.LeavingTime = data_now_time # 访问时间

    # log_message.Ctype = int(res.get("ctype", 0)) # 点击按钮类型
    Ccontent_dict = ast.literal_eval(res.get("ccontent", {})) if res.get("ccontent", {})  else dict() # 内容
    no = int(Ccontent_dict.get('no', 0))
    lmid = int(Ccontent_dict.get('lmid', 0))
    mkid = int(Ccontent_dict.get('mkid', 0))
    if no:
        Lmid = no
    else:
        Lmid = lmid
    log_message.LMId = Lmid
    log_message.MarketingId = mkid

    log_message.Content = str(res.get("content", {})) # 内容
    Content_dict = ast.literal_eval(res.get("content", {})) if res.get("content", {})  else dict()
    # {"cId":333455,"message":"","msgType":2,"telStatus":1}
    CustomerSource = int(Content_dict.get('msgType', 0))
    if CustomerSource == 1:
        log_message.CustomerSource = '留言板'
    elif CustomerSource == 2:
        log_message.CustomerSource = '易聊'
    else:
        log_message.CustomerSource = '其他'
    
    LeavingType = int(Content_dict.get('telStatus', 0))
    if LeavingType == 1:
        log_message.LeavingType = '留电并成为客户'
    elif LeavingType == 2:
        log_message.LeavingType = '留电未成为客户'
    elif LeavingType == 3:
        log_message.LeavingType = '重复留电'
    elif LeavingType == 4:
        log_message.LeavingType = '测试号码'
    elif LeavingType == 5:
        log_message.LeavingType = '400'
    elif LeavingType == 6:
        log_message.LeavingType = 'QQ'
    elif LeavingType == 7:
        log_message.LeavingType = '微信'
    else:
        log_message.LeavingType = '未知'
    # 去除测试
    if log_message.LeavingType == 4:
        # log_message.PersonState = 1 
        log_message.PersonState = '测试'


    Ctype = int(res.get("ctype", 0))
    if Ctype == 1 or (Ctype == 2 and log_message.Device == 1):
        log_message.IsClick = 0
    else:
        log_message.IsClick = 1

    log_message.ButtonKey = res.get("buttonKey", '') # buttonKey
    log_message.Hours = hour_now_time # 访问时间小时
    log_message.Date = data
    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # print("rel", res)
    # pop(key) 删除之间键的数据
    # res.pop('ct', 0)
    # print("pop", res)
    # time.sleep(10)
    data = log_message.__dict__
    logger.info(data)
    key = "cleaning.list.message"
    data_to_redis(key, data)


# 持久化
def work():

    # 从头开始读取,慎重使用
    # consumer = KafkaConsumer('test', bootstrap_servers=['192.168.10.10:9092'], auto_offset_reset='earliest')

    # 从当前数据开始读取 消费者
    consumer = KafkaConsumer('log', bootstrap_servers=['192.168.2.134:9092'])

    # 监听广播,读取数据
    for message in consumer:
        try:
            # print(message)
            # print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
            #                                      message.offset, message.key,
            #                                      message.value))
            # print(message.value)  # bytes
            # res = message.value  # bytes
            res = message.value.decode('utf-8')  # str
            res = ast.literal_eval(res)  # dict
            # ct 判别类型
            ct = res.get('ct', '')
            logger.info(ct)
            if ct == "click":
                print(res)
                # click
                print(ct)
                # print(res)
                ct_click(res)
                pass
            if ct == "message":
                # message
                print(res)
                print(ct)
                ct_message(res)
            if ct == "view":
                # view
                print(res)
                print(ct)
                ct_view(res)
            # time.sleep(100)
        except Exception as e:
            print(e)
            pass
        

if __name__ == "__main__":
    work()
    

    # ################################################################################################
    # sql = "SELECT * FROM logdetailsvisit WHERE VisitTime like '2019-11-11%'  LIMIT 1, 100"
    # mysql_conn = LogMysql()
    # mysql_conn.cursor.execute(sql)
    # res = mysql_conn.dict_fetchall()
    # print(res)
    # ################################################################################################
    pass
    
kafka获取数据

 

posted @ 2020-08-11 16:11  小学弟-  阅读(12)  评论(0)    收藏  举报