# —*— coding: utf-8 _*_
import uuid
import ast
import time
import json
import copy
# url解码与提取参数
from urllib import parse
import redis
import requests
import pymysql
from kafka import KafkaConsumer
import logging
from logging import handlers
import random
# 日志记录
class Logger(object):
level_relations = {
'debug': logging.DEBUG,
'info': logging.INFO,
'warning': logging.WARNING,
'error': logging.ERROR,
'crit': logging.CRITICAL
} # 日志级别关系映射
def __init__(self, filename, level='info', when='D', backCount=3,
fmt='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'):
self.logger = logging.getLogger(filename)
format_str = logging.Formatter(fmt) # 设置日志格式
self.logger.setLevel(self.level_relations.get(level)) # 设置日志级别
sh = logging.StreamHandler() # 往屏幕上输出
sh.setFormatter(format_str) # 设置屏幕上显示的格式
th = handlers.TimedRotatingFileHandler(filename=filename, when=when, backupCount=backCount,
encoding='utf-8') # 往文件里写入#指定间隔时间自动生成文件的处理器
# 实例化TimedRotatingFileHandler
# interval是时间间隔,backupCount是备份文件的个数,如果超过这个个数,就会自动删除,when是间隔的时间单位,单位有以下几种:
# S 秒
# M 分
# H 小时、
# D 天、
# W 每星期(interval==0时代表星期一)
# midnight 每天凌晨
th.setFormatter(format_str) # 设置文件里写入的格式
self.logger.addHandler(sh) # 把对象加到logger里
self.logger.addHandler(th)
file = 'all'
try:
file = __file__.split("/")[-1]
except Exception as e:
pass
log = Logger('%s.log' % file)
logger = log.logger
# 把时间戳转换成格式化
def timestamp_to_str(timestamp=None, format='%Y-%m-%d %H:%M:%S'):
if timestamp:
time_tuple = time.localtime(timestamp) # 把时间戳转换成时间元祖
result = time.strftime(format, time_tuple) # 把时间元祖转换成格式化好的时间
return result
else:
return time.strptime(format)
def timestamp_to_str_data(timestamp=None, format='%Y-%m-%d'):
if timestamp:
time_tuple = time.localtime(timestamp) # 把时间戳转换成时间元祖
result = time.strftime(format, time_tuple) # 把时间元祖转换成格式化好的时间
return result
else:
return time.strptime(format)
# 把时间戳转换成格式化
def timestamp_to_str_(timestamp=None, format='%H'):
if timestamp:
time_tuple = time.localtime(timestamp) # 把时间戳转换成时间元祖
result = time.strftime(format, time_tuple) # 把时间元祖转换成格式化好的时间
return result
else:
return time.strptime(format)
# 把时间戳转换成格式化
def timestamp_to_str_today(timestamp=None, format='%Y%m%d'):
if timestamp:
time_tuple = time.localtime(timestamp) # 把时间戳转换成时间元祖
result = time.strftime(format, time_tuple) # 把时间元祖转换成格式化好的时间
return result
else:
return time.strptime(format)
def timestamp_to_str_today_yue(timestamp=None, format='%Y%m'):
if timestamp:
time_tuple = time.localtime(timestamp) # 把时间戳转换成时间元祖
result = time.strftime(format, time_tuple) # 把时间元祖转换成格式化好的时间
return result
else:
return time.strptime(format)
def timestamp_to_str_today_year(timestamp=None, format='%Y'):
if timestamp:
time_tuple = time.localtime(timestamp) # 把时间戳转换成时间元祖
result = time.strftime(format, time_tuple) # 把时间元祖转换成格式化好的时间
return result
else:
return time.strptime(format)
# UA
ua_dict = {
"www.google.com/bot": "谷歌爬虫",
"Baiduspider": "百度爬虫",
"MQQBrowser": "QQ浏览器",
"UCBrowser": "UC浏览器",
"MiuiBrowser": "小米浏览器",
"iqiyi": "爱奇艺",
"BiliApp": "BiliApp",
"SogouMobileBrowser": "Sogou浏览器",
"SogouSearch": "Sogou浏览器NoHead",
"baiduboxapp": "百度app",
"HuaweiBrowser": "华为浏览器",
"VivoBrowser": "vivo浏览器",
"OppoBrowser": "oppo浏览器",
"HeyTapBrowser": "HeyTap浏览器",
"EUI Browser": "EUI浏览器",
"SamsungBrowser": "三星浏览器",
"MicroMessenger": "微信内置浏览器",
"Quark": "夸克浏览器",
"QHBrowser": "360浏览器",
"baidubrowser": "百度浏览器",
"Chrome": "谷歌浏览器",
"Safari": "Safari浏览器",
}
Android_ua_dict = copy.deepcopy(ua_dict)
Android_ua_dict.pop('Safari')
iphone = list(ua_dict.keys())
Android = list(Android_ua_dict.keys())
print(iphone, Android)
# Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_6 like Mac OS X) AppleWebKit/604.5.6 (KHTML, like Gecko) Version/11.0 Mobile/15D100 Safari/604.1
def ua_browser(user_agent):
browser = '其他'
user_agent = str(user_agent).lower()
if user_agent.find('Mobile'.lower()) >= 0:
# wap
# android
if user_agent.find('android') >= 0:
for a_item in Android:
if user_agent.find(a_item.lower()) >= 0:
browser = ua_dict.get(a_item, '其他')
break
pass
# iphone
elif user_agent.find('iphone') >= 0:
for i_item in iphone:
if user_agent.find(i_item.lower()) >= 0:
browser = ua_dict.get(i_item, '其他')
break
pass
else:
for a1_item in Android:
if user_agent.find(a1_item.lower()) >= 0:
browser = ua_dict.get(a1_item, '其他')
break
pass
pass
else:
browser = 'pc'
# pc
pass
return browser
# redis主库
class Redis_5_219_6379_9(object):
conn = None
def __init__(self):
# poll = redis.ConnectionPool(host='47.94.218.171', port=6379, db=9, password='root1234@A')
poll = redis.ConnectionPool(host='192.168.5.219', port=6379, db=9, password='root1234@A')
# 本地测试
# poll = redis.ConnectionPool(host='192.168.10.10', port=7000, db=14)
self.conn = redis.Redis(connection_pool=poll)
class Redis_5_219_6379_11(object):
conn = None
def __init__(self):
# poll = redis.ConnectionPool(host='47.94.218.171', port=6379, db=9, password='root1234@A')
poll = redis.ConnectionPool(host='192.168.5.219', port=6379, db=11, password='root1234@A')
# 本地测试
# poll = redis.ConnectionPool(host='192.168.10.10', port=7000, db=14)
self.conn = redis.Redis(connection_pool=poll)
class Redis_5_219_6379_14(object):
conn = None
def __init__(self):
# poll = redis.ConnectionPool(host='47.94.218.171', port=6379, db=9, password='root1234@A')
poll = redis.ConnectionPool(host='192.168.5.219', port=6379, db=14, password='root1234@A')
# 本地测试
# poll = redis.ConnectionPool(host='192.168.10.10', port=7000, db=14)
self.conn = redis.Redis(connection_pool=poll)
# log 数据库连接
class LogMysql(object):
conn = None
cursor = None
def __init__(self):
self.conn = pymysql.connect(host='rm-2zezqp8sll2swzwby.mysql.rds.aliyuncs.com', user='datacenter',
password='kbs11zx@',
database='log', charset='utf8')
self.cursor = self.conn.cursor()
# 为了方便使用一般会选择将查询结果加上字段名称以字典组的方式返回查询结果
def dict_fetchall(self):
"Return all rows from a cursor as a dict"
# 获取查询字段
columns = [col[0] for col in self.cursor.description]
print(columns)
return [dict(zip(columns, row)) for row in self.cursor.fetchall()]
# 获取表列表
def get_table_list(self):
# 判断表是否存在
self.cursor.execute("SHOW TABLES")
res = self.cursor.fetchall()
table_list = []
for i in res:
table_list.append(i[0])
# print("table_list", table_list)
return table_list
def get_table_info(log_mysql, table):
# 判断表是否存在
log_mysql.cursor.execute("SHOW TABLES")
res = log_mysql.cursor.fetchall()
table_list = []
for i in res:
table_list.append(i[0])
if table in table_list:
return True
else:
# 创建表
sql_click = """create table %s like wulogdetailspageclick""" % (table)
try:
print('------view-start------')
log_mysql.cursor.execute(sql_click)
print('------view-end-------')
except Exception as e:
print(e)
pass
return False
def get_table_info_message(log_mysql, table):
# 判断表是否存在
log_mysql.cursor.execute("SHOW TABLES")
res = log_mysql.cursor.fetchall()
table_list = []
for i in res:
table_list.append(i[0])
if table in table_list:
return True
else:
# 创建表
sql_click = """create table %s like wulogdetailsleavingtel""" % (table)
try:
print('------view-start------')
log_mysql.cursor.execute(sql_click)
print('------view-end-------')
except Exception as e:
print(e)
pass
return False
def get_table_info_view(log_mysql, table):
# 判断表是否存在
log_mysql.cursor.execute("SHOW TABLES")
res = log_mysql.cursor.fetchall()
table_list = []
for i in res:
table_list.append(i[0])
if table in table_list:
return True
else:
# 创建表
sql_click = """create table %s like wulogdetailspages""" % (table)
try:
print('------view-start------')
log_mysql.cursor.execute(sql_click)
print('------view-end-------')
except Exception as e:
print(e)
pass
return False
def get_site_name():
res_dict = {}
m_client = LogMysql()
sql = """select domain, site from siteinfo"""
m_client.cursor.execute(sql)
res = m_client.dict_fetchall()
for item in res:
res_dict.update({item.get('domain'): item.get('site')})
m_client.cursor.close()
m_client.conn.close()
return res_dict
site_name_dict = get_site_name()
def get_sitetype(domain, url):
sem_site_list = ['beimu.xigsa.cn', 'beimu.qidian36.com']
if domain in sem_site_list:
return 2
else:
if url.find('beimu.com/tg') >= 0 or url.find('cglxfw.com') >=0:
return 2
return 1
def get_Province_City(ip):
url = 'http://39.106.167.115:8000/api/data/ipadress?ip=%s' % ip
Province = City = ''
r_client = Redis_5_219_6379_9()
key = 'Redis_5_219_6379_9'
item = '.'.join(ip.split('.')[:-1]) + '.0'
res = r_client.conn.hget(key, item)
if res:
res = json.loads(res)
print('city', res)
Province = res.get('province', '')
City = res.get('city', '')
return Province, City
else:
try:
res = requests.get(url=url)
json_data = res.json()
print('city------get', json_data)
code = int(json_data.get('code'))
if code == 0:
Province = json_data.get('province', '')
City = json_data.get('city', '')
return Province, City
except Exception as e:
pass
return Province, City
def get_word(pid):
word = ''
r_client = Redis_5_219_6379_14()
key = 'log.hash.keywordlogdetails.%s' % timestamp_to_str_today(time.time())
item = 'keyword.%s' % pid
res = r_client.conn.hget(key, item)
if res:
# .decode('utf-8')
word = json.loads(res)
print('word-----------------%s' % word)
if word:
return word
return word
def data_to_redis(key, data):
r_client = Redis_5_219_6379_11()
data = json.dumps(data)
r_client.conn.lpush(key, data)
logger.info('======+++++=====:插入redis成功')
def get_pagetype_topicid_title(url, device):
pagetype = topicid = 0
title = topicname = ''
request_url = 'http://192.168.8.191:18111/change/data'
data = {"url": url, "device": int(device)}
try:
res = requests.post(url=request_url, json=data)
res_data = res.json()
print('-------post-----', res_data)
tagId = res_data.get("tagId", 0)
tagName = res_data.get("tagName", '')
Title = res_data.get("Title", '')
catId = res_data.get("catid", 0)
return tagId, tagName, Title, catId
except Exception as e:
print('---post--error----%s' % str(e))
return topicid, topicname, title, pagetype
# 访问明细
class View(object):
Id = '' # 唯一标识 UUID的md5加密
User = '' # 当前用户标识,记录设备的用户信息
Url = '' # 访问的网页地址
Domain = '' # 网站域名
Device = '' # 设备
Ip = '' # 客户端IP地址
VisitTime = '' # 访问时间
Hour = '' # 访问时间小时
UserAgent = '' # 浏览器UserAgent
Refre = '' # 来源
BaiduSi = '' # 百度统计用户身份
BaiduFirstTime = '' # 百度访问时间
BaiduTime = '' # 百度访问时间
Content = '' # 内容
CreatedAt = '' # 访问时间戳
Refre = ''
# 访问明细
class ViewDetails(object):
PId = '' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '访问页面的实时Id标识,在页面中生成',
UId = '' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '当前用户标识,记录设备的用户信息',
Url = '' # varchar(2000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '访问的网页地址',
SiteName = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '站点名称',
Domain = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '网站域名',
Device = '' # tinyint(4) NULL DEFAULT NULL COMMENT '设备 1PC 2WAP',
Ip = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '客户端IP地址',
Province = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '省份',
City = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '城市',
VisitType = '' # tinyint(4) NULL DEFAULT NULL COMMENT '访问类型 1正常 2蜘蛛 3抓取 0其他',
VisitTime = '' # datetime(0) NULL DEFAULT NULL COMMENT '访问时间',
Source = '' # tinyint(4) NULL DEFAULT NULL COMMENT '流量来源 1百度 2直接访问 3其他',
PageType = '' # tinyint(4) NULL DEFAULT NULL COMMENT '页面类型 路由表的分类',
SiteType = '' # tinyint(4) NULL DEFAULT NULL COMMENT '站点类型 1SEO 2SEM',
TopicId = '' # int(11) NULL DEFAULT NULL COMMENT '话题标识',
Title = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT 'Title',
Word = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '搜索词',
Date = '' # date NULL DEFAULT NULL COMMENT '日期',
Hours = '' # tinyint(4) NULL DEFAULT NULL COMMENT '小时',
Remark = '' # varchar(1000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '备注',
TopicName = '' # varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '标签数据',
PersonState = '' # int(4) NULL DEFAULT NULL,
Browser = '' # varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
def get_refer(refer):
"""
百度搜索引擎标识:baidu
谷歌搜索引擎标识:google
微信搜索引擎标识:weixin
Bing搜索引擎登标识:bing
搜狗搜索引擎登标识:sogou
有道搜索引擎登标识:youdao
神马搜索引擎标识:sm.cn
360搜索引擎标识:so.com
头条搜索引擎标识:toutiao
"""
if refer:
if refer.find('baidu.com') >= 0:
# log_viwe.Source = 1
Source = '百度'
elif refer.find('weixin') >= 0:
Source = '微信'
elif refer.find('google') >= 0:
Source = '谷歌'
elif refer.find('bing') >= 0:
Source = 'Bing'
elif refer.find('sogou') >= 0:
Source = '搜狗'
elif refer.find('youdao') >= 0:
Source = '有道'
elif refer.find('sm.cn') >= 0:
Source = '神马'
elif refer.find('so.com') >= 0:
Source = '360'
elif refer.find('toutiao') >= 0:
Source = '头条'
else:
# log_viwe.Source = 2
Source = '其他'
else:
Source = '直接访问'
return Source
def ct_view(res):
log_mysql = LogMysql()
now_time = int(res.get('visitTime', 0))
data = timestamp_to_str_data(now_time/1000)
data_now_time = timestamp_to_str(now_time/1000)
hour_now_time = int(timestamp_to_str_(now_time/1000))
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
log_viwe = ViewDetails()
uuid_str = str(uuid.uuid4())
log_viwe.Id = ''.join(uuid_str.split('-'))
log_viwe.PId = res.get('id', '') # 唯一标识 UUID的md5加密
log_viwe.UId = res.get('uid', '') # 当前用户标识,记录设备的用户信息
log_viwe.Url = parse.unquote(res.get('url', '')) # 访问的网页地址
log_viwe.Domain = res.get('domain', '') # 网站域名
Device = int(res.get('device', 0)) if res.get('device', 0) else 0 # 设备
if Device == 1:
log_viwe.Device = 'PC'
elif Device == 2:
log_viwe.Device = 'WAP'
else:
log_viwe.Device = '未知'
log_viwe.SiteName = site_name_dict.get(log_viwe.Domain, '')
SiteType = get_sitetype(log_viwe.Domain, log_viwe.Url)
if SiteType == 2:
log_viwe.SiteType = 'SEM'
else:
log_viwe.SiteType = 'SEO'
log_viwe.Ip = res.get('ip', '') # 客户端IP地址
log_viwe.VisitTime = data_now_time # 访问时间
UserAgent = res.get('userAgent', '') # 浏览器UserAgent
log_viwe.Browser = ua_browser(UserAgent)
UserAgent = str(UserAgent)
ua = UserAgent.lower() # type:str
if ua.find('spider') >= 0:
# log_viwe.VisitType = 2
log_viwe.VisitType = '蜘蛛'
else:
# log_viwe.VisitType = 1
log_viwe.VisitType = '正常'
Refre = parse.unquote(res.get('refer', '')) # 来源
log_viwe.Refre = Refre
Refre = str(Refre)
refer = Refre.lower()
#if refer.find('baidu.com') >= 0:
# log_viwe.Source = 1
#log_viwe.Source = '百度'
#else:
# log_viwe.Source = 2
#log_viwe.Source = '其他'
log_viwe.Source = get_refer(refer)
# log_viwe.BaiduSi = '' # 百度统计用户身份
# log_viwe.BaiduFirstTime = '' # 百度访问时间
# log_viwe.BaiduTime = '' # 百度访问时间
# log_viwe.Content = res.get("ccontent", '') # 内容
# log_viwe.CreatedAt = now_time # 访问时间戳
topicid, topicname, title, catId = get_pagetype_topicid_title(log_viwe.Url, Device)
log_viwe.TopicId = topicid
log_viwe.Title = title
log_viwe.TopicName = topicname
log_viwe.PageType = catId
log_viwe.Word = get_word(log_viwe.PId)
log_viwe.Hours = hour_now_time # 访问时间小时
log_viwe.Date = data
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# print("rel", res)
# pop(key) 删除之间键的数据
# res.pop('ct', 0)
# print("pop", res)
# time.sleep(10)
data = log_viwe.__dict__
logger.info(data)
key = "cleaning.list.view"
data_to_redis(key, data)
# wu点击明细
class ClickDetails(object):
Id ='' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '唯一标识 UUID的md5加密',
PId ='' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '明细标识',
UId ='' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '当前用户标识,记录设备的用户信息',
SiteName ='' #varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '站点名称',
Domain ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '域名',
Url ='' # varchar(3000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '页面网址',
Device ='' # tinyint(4) NULL DEFAULT NULL COMMENT '设备',
Ip ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT 'IP',
Province ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '省份',
City ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '城市',
ClickTime ='' # datetime(0) NULL DEFAULT NULL COMMENT '点击时间',
ButtonKey ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '按钮Key',
ButtonName ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '按钮名称',
ClickType ='' # tinyint(4) NULL DEFAULT NULL COMMENT '点击类型 1留言板 2易聊 3营销数据留言板 4营销数据易聊',
ClickContent ='' # varchar(2000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '点击内容内容',
Source ='' # tinyint(4) NULL DEFAULT NULL COMMENT '流量来源 1百度 2直接访问 3其他',
MarkingId ='' # int(11) NULL DEFAULT NULL COMMENT '营销Id',
MarkingName ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '营销名称',
LMId ='' # int(11) NULL DEFAULT NULL COMMENT '留言板Id',
LMName ='' # char(10) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '留言板名称',
PageType ='' # tinyint(4) NULL DEFAULT NULL COMMENT '页面类型 首页、列表页 、与路由关联的分类表',
SiteType ='' # tinyint(4) NULL DEFAULT NULL COMMENT '站点类型 1SEO 2SEM',
TopicId ='' # int(100) NULL DEFAULT NULL COMMENT '话题标识',
Title ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT 'Title',
Word ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '搜索词',
Date ='' # date NULL DEFAULT NULL COMMENT '日期',
Hours ='' # tinyint(4) NULL DEFAULT NULL COMMENT '小时',
Remark ='' # varchar(1000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '备注',
TopicName ='' # varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '标签数据',
Type ='' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
PersonState ='' # int(4) NULL DEFAULT NULL,
Browser = ''
def ct_click(res):
log_mysql = LogMysql()
now_time = int(res.get('visitTime', 0))
data_now_time = timestamp_to_str(now_time/1000)
data = timestamp_to_str_data(now_time/1000)
hour_now_time = int(timestamp_to_str_(now_time/1000))
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
log_click = ClickDetails()
uuid_str = str(uuid.uuid4())
log_click.Id = ''.join(uuid_str.split('-'))
log_click.Pid = res.get('id', '') # 唯一标识 UUID的md5加密
log_click.Uid = res.get('uid', '') # 当前用户标识,记录设备的用户信息
log_click.Url = parse.unquote(res.get('url', '')) # 访问的网页地址
log_click.Domain = res.get('domain', '') # 网站域名
log_click.SiteName = site_name_dict.get(log_click.Domain, '')
Device = int(res.get('device', 0)) if res.get('device', 0) else 0 # 设备
if Device == 1:
log_click.Device = 'PC'
elif Device == 2:
log_click.Device = 'WAP'
else:
log_click.Device = '未知'
log_click.Ip = res.get('ip', '') # 客户端IP地址
Province, City = get_Province_City(log_click.Ip)
log_click.Province = Province
log_click.City = City
log_click.ClickTime = data_now_time
SiteType = get_sitetype(log_click.Domain, log_click.Url)
if SiteType == 2:
log_click.SiteType = 'SEM'
else:
log_click.SiteType = 'SEO'
# #######################
topicid, topicname, title, catId = get_pagetype_topicid_title(log_click.Url, Device)
log_click.Title = title
log_click.TopicId = topicid
log_click.TopicName = topicname
log_click.PageType = catId
log_click.Word = get_word(log_click.Pid)
UserAgent = res.get('userAgent', '') # 浏览器UserAgent
log_click.Browser = ua_browser(UserAgent)
Refre = parse.unquote(res.get('refer', '')) # 来源
if Refre.find('baidu') >= 0:
# log_click.Source = 1
log_click.Source = '百度'
else:
# log_click.Source = 2
log_click.Source = '其他'
log_click.Hours = hour_now_time # 访问时间小时
log_click.Date = data
ClickType = int(res.get("ctype", 1)) # 点击按钮类型
# 1留言板 2易聊 3营销数据留言板 4营销数据易聊
if ClickType == 1:
log_click.ClickType = '留言板'
elif ClickType == 2:
log_click.ClickType = '易聊'
elif ClickType == 3:
log_click.ClickType = '营销数据留言板'
elif ClickType == 4:
log_click.ClickType = '营销数据易聊'
log_click.Type = res.get("type", '')
log_click.ButtonKey = res.get("buttonKey", '')
Content_dict = ast.literal_eval(res.get("ccontent", {})) # 内容
# {"m":"","no":"1","value":"获取报价","button":"获取报价","lmid":"1","mkid":"49","orgid":"1","orgname":"EF英孚"}
no = int(Content_dict.get('no', 0))
lmid = int(Content_dict.get('lmid', 0))
mkid = int(Content_dict.get('mkid', 0))
if no:
Lmid = no
else:
Lmid = lmid
log_click.LMId = Lmid
log_click.MarkingId = mkid
# log_click.CreatedAt = now_time # 访问时间戳
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# print("rel", res)
# pop(key) 删除之间键的数据
# res.pop('ct', 0)
# print("pop", res)
# time.sleep(10)
data = log_click.__dict__
logger.info(data)
key = "cleaning.list.click"
data_to_redis(key, data)
class Messace(object):
Pid = '' # 唯一标识 UUID的md5加密
Uid = '' # 当前用户标识,记录设备的用户信息
Tel = '' # 留电
TelEncryption = '' # 留电加密
Type = '' # 留点情况
Url = '' # 访问的网页地址
Domain = '' # 网站域名
Device = '' # 设备
Ip = '' # 客户端IP地址
Area = '' # 区域
VisitTime = '' # 访问时间
Hour = '' # 访问时间小时
Content = '' # 内容
Ccontent = '' # 内容
Ctype = '' # 点击按钮类型
CreatedAt = '' # 访问时间戳
buttonKey = ''
# logdetailsleavingtel
class MessageDetails(object):
Id = '' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '唯一标识 UUID的md5加密',
PId = '' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '明细标识',
UId = '' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '当前用户标识,记录设备的用户信息',
SiteType = '' # tinyint(255) NULL DEFAULT NULL COMMENT '站点类型 1SEO 2SEM',
SiteName = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '站点名称',
Domain = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '域名',
Url = '' # varchar(3000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '页面网址',
Device = '' # tinyint(4) NULL DEFAULT NULL COMMENT '设备',
CustomerSource = '' # tinyint(4) NULL DEFAULT NULL COMMENT '客户来源 1留言板 2易聊',
Source = '' # tinyint(4) NULL DEFAULT NULL COMMENT '客户来源 1百度 2直接访问 0其他',
Tel = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '留言电话',
TelEncryption = '' # char(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '留言电话加密',
Content = '' # varchar(1000) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '留言内容',
LeavingType = '' # tinyint(4) NULL DEFAULT NULL COMMENT '留电情况 1留电并成为客户 2留电未成为客户 3重复留电 4测试号码',
LeavingTime = '' # datetime(0) NULL DEFAULT NULL COMMENT '留电时间',
IsClick = '' # tinyint(4) NULL DEFAULT NULL COMMENT '是否点击留电',
Ip = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT 'IP',
Province = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '省份',
City = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '城市',
Word = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '搜索词',
ButtonKey = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '按钮Key',
ButtonName = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '按钮内容',
MarketingId = '' # int(11) NULL DEFAULT NULL COMMENT '营销Id',
MarketingName = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '营销名称',
LMId = '' # int(11) NULL DEFAULT NULL COMMENT '留言板Id',
LMName = '' # varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '留言板名称',
TopicId = '' # int(255) NULL DEFAULT NULL COMMENT '话题标识',
RoutingId = '' # int(11) NULL DEFAULT NULL COMMENT '路由标识',
PageType = '' # int(255) NULL DEFAULT NULL COMMENT '页面类型',
Date = '' # date NULL DEFAULT NULL COMMENT '日期',
Hours = '' # tinyint(4) NULL DEFAULT NULL COMMENT '小时',
TopicName = '' # varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '标签数据',
PersonState = '' # int(4) NULL DEFAULT NULL,
def ct_message(res):
log_mysql = LogMysql()
now_time = int(res.get('visitTime', 0))
data_now_time = timestamp_to_str(now_time/1000)
data = timestamp_to_str_data(now_time/1000)
hour_now_time = int(timestamp_to_str_(now_time/1000))
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
log_message = MessageDetails()
uuid_str = str(uuid.uuid4())
log_message.Id = ''.join(uuid_str.split('-'))
log_message.PId = res.get('id', '') # 唯一标识 UUID的md5加密
log_message.UId = res.get('uid', '') # 当前用户标识,记录设备的用户信息
log_message.Url = parse.unquote(res.get('url', '')) # 访问的网页地址
log_message.Tel = res.get('tel', '') # 电话
log_message.TelEncryption = res.get('telEncryption', '') # 电话加密
# log_message.Type = res.get('type', "")
log_message.Domain = res.get('domain', '') # 网站域名
log_message.SiteName = site_name_dict.get(log_message.Domain, '')
SiteType = get_sitetype(log_message.Domain, log_message.Url)
if SiteType == 2:
log_message.SiteType = 'SEM'
else:
log_message.SiteType = 'SEO'
Device = int(res.get('device', 0)) if res.get('device', 0) else 0 # 设备
if Device == 1:
log_message.Device = 'PC'
elif Device == 2:
log_message.Device = 'WAP'
else:
log_message.Device = '未知'
log_message.Ip = res.get('ip', '') # 客户端IP地址
Province, City = get_Province_City(log_message.Ip)
log_message.Province = Province
log_message.City = City
topicid, topicname, title, catId = get_pagetype_topicid_title(log_message.Url, Device)
log_message.TopicId = topicid
log_message.TopicName = topicname
log_message.PageType = catId
log_message.Word = get_word(log_message.PId)
if log_message.Word:
# log_message.Source = 1
log_message.Source = '百度'
else:
# log_message.Source = 0
log_message.Source = "其他"
log_message.LeavingTime = data_now_time # 访问时间
# log_message.Ctype = int(res.get("ctype", 0)) # 点击按钮类型
Ccontent_dict = ast.literal_eval(res.get("ccontent", {})) if res.get("ccontent", {}) else dict() # 内容
no = int(Ccontent_dict.get('no', 0))
lmid = int(Ccontent_dict.get('lmid', 0))
mkid = int(Ccontent_dict.get('mkid', 0))
if no:
Lmid = no
else:
Lmid = lmid
log_message.LMId = Lmid
log_message.MarketingId = mkid
log_message.Content = str(res.get("content", {})) # 内容
Content_dict = ast.literal_eval(res.get("content", {})) if res.get("content", {}) else dict()
# {"cId":333455,"message":"","msgType":2,"telStatus":1}
CustomerSource = int(Content_dict.get('msgType', 0))
if CustomerSource == 1:
log_message.CustomerSource = '留言板'
elif CustomerSource == 2:
log_message.CustomerSource = '易聊'
else:
log_message.CustomerSource = '其他'
LeavingType = int(Content_dict.get('telStatus', 0))
if LeavingType == 1:
log_message.LeavingType = '留电并成为客户'
elif LeavingType == 2:
log_message.LeavingType = '留电未成为客户'
elif LeavingType == 3:
log_message.LeavingType = '重复留电'
elif LeavingType == 4:
log_message.LeavingType = '测试号码'
elif LeavingType == 5:
log_message.LeavingType = '400'
elif LeavingType == 6:
log_message.LeavingType = 'QQ'
elif LeavingType == 7:
log_message.LeavingType = '微信'
else:
log_message.LeavingType = '未知'
# 去除测试
if log_message.LeavingType == 4:
# log_message.PersonState = 1
log_message.PersonState = '测试'
Ctype = int(res.get("ctype", 0))
if Ctype == 1 or (Ctype == 2 and log_message.Device == 1):
log_message.IsClick = 0
else:
log_message.IsClick = 1
log_message.ButtonKey = res.get("buttonKey", '') # buttonKey
log_message.Hours = hour_now_time # 访问时间小时
log_message.Date = data
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# print("rel", res)
# pop(key) 删除之间键的数据
# res.pop('ct', 0)
# print("pop", res)
# time.sleep(10)
data = log_message.__dict__
logger.info(data)
key = "cleaning.list.message"
data_to_redis(key, data)
# 持久化
def work():
# 从头开始读取,慎重使用
# consumer = KafkaConsumer('test', bootstrap_servers=['192.168.10.10:9092'], auto_offset_reset='earliest')
# 从当前数据开始读取 消费者
consumer = KafkaConsumer('log', bootstrap_servers=['192.168.2.134:9092'])
# 监听广播,读取数据
for message in consumer:
try:
# print(message)
# print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
# message.offset, message.key,
# message.value))
# print(message.value) # bytes
# res = message.value # bytes
res = message.value.decode('utf-8') # str
res = ast.literal_eval(res) # dict
# ct 判别类型
ct = res.get('ct', '')
logger.info(ct)
if ct == "click":
print(res)
# click
print(ct)
# print(res)
ct_click(res)
pass
if ct == "message":
# message
print(res)
print(ct)
ct_message(res)
if ct == "view":
# view
print(res)
print(ct)
ct_view(res)
# time.sleep(100)
except Exception as e:
print(e)
pass
if __name__ == "__main__":
work()
# ################################################################################################
# sql = "SELECT * FROM logdetailsvisit WHERE VisitTime like '2019-11-11%' LIMIT 1, 100"
# mysql_conn = LogMysql()
# mysql_conn.cursor.execute(sql)
# res = mysql_conn.dict_fetchall()
# print(res)
# ################################################################################################
pass