1 #!/bin/bash
2
3 source /etc/profile
4
5 runlog='/tmp/zewei/check_schema_log'
6 hive_database_schema=/tmp/hive_database_schema/hive/
7 mysql_database_schema=/tmp/hive_database_schema/mysql/
8
9 > $runlog
10 #每天下午一点删除现有的表结构缓存文件.
11 #防止hive有变动.
12 if [ `date +%k` -eq 13 ]
13 then
14 rm -rf /tmp/hive_database_schema/hive/*
15 fi
16
17 while read DB; do
18 HiveDB=`echo $DB |awk '{print $1}'`
19 MysqlDB=`echo $DB |awk '{print $2}'`
20 MysqlHost=`echo $DB |awk '{print $3}'`
21 MysqlPort=`echo $DB |awk '{print $4}'`
22 PartTag=`echo $DB |awk '{print $5}'`
23
24 connect_mysql="mysql -u TableSchemaCheck -pSchemacheck666 -h $MysqlHost -P $MysqlPort $MysqlDB -BNe"
25 #缓存文件夹不在就创建
26 ls $mysql_database_schema/$MysqlDB > /dev/null || mkdir -p $mysql_database_schema/$MysqlDB
27 ls $hive_database_schema/$HiveDB > /dev/null || mkdir -p $hive_database_schema/$HiveDB
28
29 #通过part标记检查是否为分区库
30 if [ "$PartTag" == 'part' ]
31 then
32 table_list=`hive -S -e "use $HiveDB; show tables;" | grep "_part$"`
33 else
34 table_list=`hive -S -e "use $HiveDB; show tables;"`
35 fi
36
37 #对获取到的tables进行循环检查
38 for table in $table_list
39 do
40 if [ "$PartTag" == 'part' ]
41 then
42 hive_table_name=$table
43 mysql_table_name=`echo $table | sed 's/_part$//'`
44 else
45 hive_table_name=$table
46 mysql_table_name=$table
47 fi
48
49 #获取mysql的表结构
50 $connect_mysql "desc $mysql_table_name;" > /tmp/mysql_column
51 if [ $? -ne 0 ]
52 then
53 continue
54 else
55 #把大写转换为小写.因为hive里面的列名没有大小写之分,所以转换一下
56 cat /tmp/mysql_column | awk '{print $1}' | tr '[A-Z]' '[a-z]' > $mysql_database_schema/$MysqlDB/$mysql_table_name
57 fi
58
59 #如果没有hive的表结构缓存文件就去hive取...然后对分区字段进行删除,因为这对于MySQL的表结构来说是多余的
60 if ! ls $hive_database_schema/$HiveDB/$table > /dev/null
61 then
62 hive -S -e "use $HiveDB; desc ${hive_table_name};" | awk '{print $1 }' > /tmp/hive_column
63 part_column=`sed -n '/\#/,$p' /tmp/hive_column | egrep -v '#|^$'`
64 for i in $part_column
65 do
66 sed -i "/\b${i}\b/d" /tmp/hive_column
67 done
68 egrep -v "#|^$" /tmp/hive_column | tee $hive_database_schema/$HiveDB/$table
69 fi
70
71 #获取MD5
72 md5ForMysql=`md5sum $mysql_database_schema/$MysqlDB/$mysql_table_name | awk '{print $1}'`
73 md5ForHive=`md5sum $hive_database_schema/$HiveDB/$hive_table_name | awk '{print $1}'`
74
75 #MD5不同就记录日志
76 if [ ! "$md5ForMysql"x == "$md5ForHive"x ]; then
77 echo -e "HiveDB:\t$HiveDB\t\ttable:\t$hive_table_name\t\tnot equal to MysqlDB table:\t$mysql_table_name" >> $runlog
78 fi
79 done
80 done < /root/script/DBlist
81
82 #统计并报警.报警阈值在监控里面设置
83 err_line=`wc -l $runlog | awk '{print $1}'`
84 [报警]