python3 获取线上所有mysql的表大于1000万的信息
基于其他博主改编
1.获取所有数据库表信息以ip.json格式保存到当前目录
#!/usr/bin/env python3
# coding: utf-8
import pymysql
import json
hostList=["192.168.1.2","192.168.1.3"]
for h in hostList:
conn = pymysql.connect(
host=h, # mysql ip地址
user="root",
passwd="passwd",
port=3306, # mysql 端口号,注意:必须是int类型
connect_timeout = 3 # 超时时间
)
cur = conn.cursor() # 创建游标
# 获取mysql中所有数据库
cur.execute('SHOW DATABASES')
data_all = cur.fetchall() # 获取执行的返回结果
# print(data_all)
dic = {} # 大字典,第一层
for i in data_all:
if i[0] not in dic: # 判断库名不在dic中时
# 排序列表,排除mysql自带的数据库
exclude_list = ["sys", "information_schema", "mysql", "performance_schema"]
if i[0] not in exclude_list: # 判断不在列表中时
# 写入第二层数据
dic[i[0]] = {'name': i[0], 'table_list': []}
ta=i[0]
cur.execute("SELECT CONCAT(table_schema,'.',table_name) AS 'Table Name', CONCAT(ROUND(table_rows/100000000,2)) AS 'Number of Rows', CONCAT(ROUND(data_length/(1024*1024*1024),2),'G') AS 'Data Size', CONCAT(ROUND(index_length/(1024*1024*1024),2),'G') AS 'Index Size' , CONCAT(ROUND((data_length+index_length)/(1024*1024*1024),2),'G') AS 'Total' FROM information_schema.TABLES where table_schema='%s' order by ABS(Total) desc;"%ta)
ret = cur.fetchall()
for k in ret:
table_name=k[0].split('.')[1]
print({'cdb_name_ip':h,'db_name':i[0],'tname': table_name, 'rows': k[1],'table_size':k[2],'index_size':k[3],'db_size':k[4]})
dic[i[0]]['table_list'].append({'cdb_name_ip':h,'tname': table_name, 'rows': k[1],'table_size':k[2],'index_size':k[3],'db_size':k[4]})
with open('%s.json'%h,'w',encoding='utf-8') as f:
f.write(json.dumps(dic))
2.遍历当前目录下ip.json格式的文件,并将大于1000万的表行数记录在trows.xls中
#!/usr/bin/env python3
# coding: utf-8
import xlwt
import json
import sys
from collections import OrderedDict
f = xlwt.Workbook()
sheet1 = f.add_sheet('统计', cell_overwrite_ok=True)
row0 = ["cdb_name_ip","db_name", "tname", "rows","table_size","index_size","db_size"]
hostList=["192.168.1.2","192.168.1.3"]
num=0
for h in hostList:
# 写第一行
for i in range(0, len(row0)):
sheet1.write(0, i, row0[i])
# 加载json文件
with open("%s.json"%h, 'r') as load_f:
load_dict = json.load(load_f) # 反序列化文件
order_dic = OrderedDict() # 有序字典
for key in sorted(load_dict): # 先对普通字典key做排序
order_dic[key] = load_dict[key] # 再写入key
for i in order_dic:
# 遍历所有表
for j in order_dic[i]["table_list"]:
if j['rows'] is not None:
# 判断行数大于1000万时,0.1亿
trows=float(j['rows'])
if trows > 0.1:
# 写入库名
sheet1.write(num + 1, 0, j['cdb_name_ip'])
#写入库IP
sheet1.write(num+1,1,i)
# 写入表名
sheet1.write(num + 1, 2, j['tname'])
# 写入行数
sheet1.write(num + 1, 3, j['rows'])
# 写入行数
sheet1.write(num + 1, 4, j['table_size'])
# 写入行数
sheet1.write(num + 1, 5, j['index_size'])
# 写入行数
sheet1.write(num + 1, 6, j['db_size'])
num += 1 # 自增1
f.save('trows.xls')
3、效果如下,rows单位是亿

浙公网安备 33010602011771号