Oceanbase MySQL租户的统计信息收集脚本

ob_mysql_stat_collection.py

from datetime import datetime
import sys
import logging
import os
import pymysql


# 配置日志格式和文件名
LOG_DIR = '/home/oracle/log'
log_filename = f'auto_statistics_collection_{datetime.now().strftime("%Y%m%d")}.log'
log_file = os.path.join(LOG_DIR, log_filename)

# 确保日志目录存在
os.makedirs(LOG_DIR, exist_ok=True)

# 配置日志,设置为追加模式
logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	handlers=[
		logging.FileHandler(log_file, mode='a')
	]
)

logger = logging.getLogger(__name__)


# 获取今天的日期
current_date = datetime.today().date()
current_date_str = current_date.strftime('%Y%m%d')

def ob_mysql_partition_statistics_collect(cursor_ob_db):
	try:
		logger.info(f"{current_date_str}")

		# 查询当月分区POSITION
		query_partition = """select table_name,min(PARTITION_POSITION) from 
		(select * from (select table_owner,table_name,PARTITION_NAME,PARTITION_POSITION,
		replace(substring(replace(HIGH_VALUE,"'",''),1,10),'-','') as time_format 
		from dba_tab_partitions where table_owner='table_owner') a where a.time_format >= %s) group by table_owner,table_name"""

		logger.info("执行查询分区信息SQL")
		cursor_ob_db.execute(query_partition,(current_date_str,))
		partitions = cursor_ob_db.fetchall()

		logger.info(f"查询当月分区信息完成")

		if partitions is None or not partitions:
			logger.info(f"不存在分区表")
			return None
		else:
			logger.info(f"待处理 {len(partitions)} 个分区")
			for partition in partitions:

				# 查询当月分区号
				query_partition_name = """select PARTITION_NAME from dba_tab_partitions where table_owner='table_owner' and table_name=%s and PARTITION_POSITION=%s"""

				logger.info(f"查询参数:{partition[0]},{partition[1]}")

				cursor_ob_db.execute(query_partition_name, (partition[0], partition[1]))
				partition_name_result = cursor_ob_db.fetchone()
				

				if not partition_name_result:
					logger.error(f"未能获取到分区名,跳过该分区")
					continue

				logger.info(f" 表: {partition[0]} , 当月分区名: {partition_name_result[0]}")

				table_name_real = partition[0].strip()
				partition_name_real = partition_name_result[0].strip()

				if partition[0] and partition_name_result[0] and table_name_real == partition[0] and partition_name_real == partition_name_result[0]:

					# 查询最近收集时间
					check_before_query = """select max(replace(replace(replace(substring(LAST_ANALYZED,1,19),'-',''),':',''),' ','')) from dba_part_col_statistics 
					where OWNER='table_owner' and TABLE_NAME=%s and PARTITION_NAME=%s"""

					cursor_ob_db.execute(check_before_query,(partition[0], partition_name_result[0]))
					check_before_result = cursor_ob_db.fetchone()


					# 处理查询结果
					if check_before_result and check_before_result[0]:
						check_before_result_value = check_before_result[0]
					else:
						check_before_result_value = None

					logger.info(f"最近收集时间查询完成为: {check_before_result_value}")

					# 统计信息收集
					auto_statistics_collect_sql = """
					CALL dbms_stats.gather_table_stats(ownname=>'table_owner', tabname=>%s, partname=>%s,granularity=>'APPROX_GLOBAL AND PARTITION')
					"""
					logger.info(f"统计信息收集带入参数:{partition[0]},{partition_name_result[0]}")
					try:
						cursor_ob_db.execute(auto_statistics_collect_sql, (partition[0], partition_name_result[0]))
						logger.info(f"统计信息收集执行完成")

					except Exception as e:
						logger.error(f"统计信息收集执行失败: {e}")
						continue


					# 查询最新收集时间
					check_after_query = """select max(replace(replace(replace(substring(LAST_ANALYZED,1,19),'-',''),':',''),' ','')) from dba_part_col_statistics 
					where OWNER='table_owner' and TABLE_NAME=%s and PARTITION_NAME=%s"""
					
					cursor_ob_db.execute(check_after_query,(partition[0], partition_name_result[0]))
					check_after_result = cursor_ob_db.fetchone()



					# 处理查询结果
					if check_after_result and check_after_result[0]:
						check_after_result_value = check_after_result[0]
					else:
						check_after_result_value = None


					logger.info(f"最新收集时间为:{check_after_result_value}")

					# 比较时间
					if check_after_result_value and check_before_result_value:
						if check_after_result_value > check_before_result_value:
							logger.info(f"表{partition[0]} 统计信息收集成功,收集时间从 {check_before_result_value} 更新为 {check_after_result_value}")
						else:
							logger.error(f"表{partition[0]} 统计信息收集失败,当前收集时间为{check_before_result_value}")
					elif check_before_result_value is None:
						logger.info(f"表{partition[0]} 之前没有统计信息,现在已收集")
					else:
						logger.error(f"表{partition[0]} 统计信息收集失败,无法获取新时间")
				
				else:
					logger.error(f"表信息存在问题,跳过该表: {partition[0]}")


			return partitions
	except Exception as e:
		logger.error(f"查询分区信息出错:{e}")
		return None


if __name__ == '__main__':

	try:
		logger.info(f"OB-MySQL租户table_owner数据库自动统计信息收集开始:")
		try:

			username = f'wedpd@租户名#集群名'
			
			db_config = {
				'host': 'ip',
				'port': 2883,
				'user': username,
				'password': 'password',
				'database': 'wedpd'
			}

			conn_ob_mysql = pymysql.connect(**db_config)
			cursor_ob_db = conn_ob_mysql.cursor()

			partitions = ob_mysql_partition_statistics_collect(cursor_ob_db)

			cursor_ob_db.close()
			conn_ob_mysql.close()

		except Exception as e:
			logger.error(f"MySQL连接或查询失败: {e}")

	except ValueError as e:
		logger.critical(f"错误: {e}")
	except Exception as e:
		logger.critical(f"未知错误: {e}")

再编写一个shell脚本调用python脚本,配置crontab即可开启自动统计信息收集

posted @ 2025-12-12 09:45  万恶的皮蛋  阅读(17)  评论(0)    收藏  举报