Nginx日志导入mysql数据表-Python

Python 代码

!/usr/bin/python

import os
import re
import pymysql
import time
from dateutil.parser import parse
from dateutil import tz
import threading

def listdir(path, list_name):
for file in os.listdir(path):
file_path = os.path.join(path, file)
if os.path.isdir(file_path):
listdir(file_path, list_name)
else:
list_name.append(file_path)

if name == "main":

1. 定义基本的参数

tableName = 'log04'
logPath = 'G:/root-log/'
dbHost = '127.0.0.1'
dbUser = 'root'
dbPassword = 'root'
dbDatabase = 'monitorlog'
dbPort = 3306

1.检查表是否存在,否则创建表

startTime = time.time()
dbConnection = pymysql.connect(dbHost, dbUser, dbPassword, dbDatabase)
cursorDb = dbConnection.cursor()
sqlIsExistTable = "select * from information_schema.tables where table_name ='" + tableName + "';"
cursorDb.execute(sqlIsExistTable)
tableIsExist = cursorDb.fetchall()
# print(tableIsExist)
if len(tableIsExist) < 1:
# 创建表
sql = "CREATE TABLE " + tableName + " (ip varchar(20) NOT NULL COMMENT 'ip',"
"user varchar(20) NOT NULL COMMENT '用户',"
"date datetime NOT NULL COMMENT '最后登录时间',"
"request varchar(800) NOT NULL COMMENT '请求',"
"referer varchar(800) NOT NULL COMMENT '来源',"
"user_agent varchar(800) NOT NULL,"
"status int(11) unsigned NOT NULL COMMENT '响应码',"
"response_size varchar(15) NOT NULL COMMENT '响应大小'"
") ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='nwbiotec站点日志'"
cursorDb.execute(sql)

2. 读取目录下所有log日志

fileList = []
listdir(logPath, fileList)
TZ_LOCAL = tz.tzlocal()

3. 循环读取日志文件

i = 0
# 日志格式匹配
pattern = re.compile(
r'(?P.) - (?P.) [(?P.)] "(?P.)" (?P\d+) (?P\d+) "(?P.)" "(?P<user_agent>.)".*')
for fileName in fileList:
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + " 读取文件【" + fileName +"】的数据")
fileOs = open(fileName, 'r')
dataInsert = []
for line in fileOs:
# 批量处理数据并插入
if i % 10000 == 0 and i > 0:
cursorDb.executemany("INSERT INTO "+tableName+" VALUES (%s,%s,%s,%s,%s,%s,%s,%s)", dataInsert)
dbConnection.commit()
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + " 导入第 " + str(i) + " 行数据")
dataInsert = [];
match = pattern.match(line)
if match:
fmt_date = match.group('date').replace(':', ' ', 1)
foreign_date = parse(fmt_date)
local_date = foreign_date.astimezone(TZ_LOCAL)
dataInsert.append([
match.group('ip'),
match.group('user'),
local_date,
match.group('request')[0:800],
match.group('referer')[0:800],
match.group('user_agent')[0:800],
match.group('rcode'),
match.group('rsize')])
i = i + 1
fileOs.close()

关闭数据库连接

cursorDb.close()
dbConnection.close()

print("共导入"+str(i)+"条数据,耗时"+str(time.time()-startTime)+"s")

posted @ 2021-04-30 15:13  sentangle  阅读(432)  评论(0)    收藏  举报