正则表达式解析StarRocks雾化视图中的血缘关系

解析SQL中的底表

主要目标是获取出StarRocks雾化中的底表和字段备注,之后给字段赋予备注值,存入库表,可以动态生成数据字典,web可以利用该表
实现mybatis的动态sql拼接,动态化的excel导出导入,魔板等功能。尝试使用了Jsqlparser解析sql语句,发现遇到部分
复杂的子查询内包含union all情况解析非常慢,解析完成20多个视图需要至少3min不能满足集成到web的需求,尝试了Calcite
这块有待于进一步研究,解析中构造语法树ast就出现了问题,需要细细研究javacc编译器等,复杂度上升,直接利用正则表达式来搞定。

calcite解析中遇到的问题
一种是case when (left() = 'bbb') then 无法识别(left
另外一种是 interval ( 7 day) 无法解析 interval (
如果单纯的避免关键字,采用反引号注释掉,后面还有个 7 day,无法解析。
效率较Jsqlparser高很多

代码中部分是无用的,使用时候看一下吧


import net.sf.jsqlparser.expression.Alias;
import net.sf.jsqlparser.statement.select.PlainSelect;
import net.sf.jsqlparser.statement.select.Select;
import net.sf.jsqlparser.statement.select.SelectExpressionItem;
import net.sf.jsqlparser.statement.select.SelectItem;

import java.sql.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ViewDictByRege {

    private static Connection conn = null;

    private static Map<String, List<String>> viewMapLists = new HashMap<>();

    static {
        try {
            Class.forName("com.mysql.jdbc.Driver");
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
        try {
            conn = DriverManager.getConnection("jdbc:mysql://starip:9030/ic_cyb_cdm", "staruser", "123456");

        } catch (SQLException e) {
            throw new RuntimeException(e);
        }
    }

    //1、show  MATERIALIZED VIEW from ic_cyb_cdm  where name like '%_v'  查询所有的物化视图
//    2、show create   table   ic_cyb_cdm.cyb_dwd_sales_week_global_v 查询字段构建map
//    3、解析出所有的底表
//    4、遍历底表构建所有字段的map,取有备注的赋值
//    5、比对给目标map赋值
//    todo 待优化项,加数据库连接池
    public static void main(String[] args) {
        long start = System.currentTimeMillis();
        getMeterialViewDDL();
        long end = System.currentTimeMillis();
        System.out.println("检测耗费时间"+(end-start));
    }

    public static Map<String, String> getMeterialViewDDL() {
        PreparedStatement pst = null;
        try {
            pst = conn.prepareStatement("show  MATERIALIZED VIEW from ic_cyb_cdm  where name like '%_v'");
            ResultSet rs = pst.executeQuery();
            while (rs.next()) {
                String viewName = "ic_cyb_cdm." + rs.getString(2);
                String viewDdl = rs.getString(4).trim();
                String viewLower = viewDdl.toLowerCase(Locale.ROOT);
                if (viewLower != null) {
                    int select = viewLower.indexOf("select");
                    String viewSelect = viewLower.substring(select);
//                    inner table remarks
                    Set<String> viewUnderTables = parserTableName(viewSelect, viewName);
                    String colRemarksSql = getRemarksSql(viewUnderTables);
                    Map<String, String> innerRemarksMap = tableRemarks(colRemarksSql);
//                   star view remarks
                   String viewRemarksSql = getColRemarksSql(viewName);
                    Map<String, String> viewStarRemarksMap = tableRemarks(viewRemarksSql);
                    for (Map.Entry<String, String> entry : viewStarRemarksMap.entrySet()) {
                        String columnName = entry.getKey();
                        String columnRemark = entry.getValue();
                        if ("".equalsIgnoreCase(columnRemark)) {
                            if (innerRemarksMap.containsKey(columnName)) {
                                entry.setValue(innerRemarksMap.get(columnName));
                            } else if ("search_time".equalsIgnoreCase(columnName)) {
                                entry.setValue("搜索时间");
                            } else if ("uuid".equalsIgnoreCase(columnName)) {
                                entry.setValue("uuid");
                            } else {
                                entry.setValue("暂无备注名,请手动添加");
                            }
                        }

                    }
//                    pg view remarks
                    String pgViewColRemark = "select tfi.field ,tfi.notes  from zcb_dev.t_table_info tti " +
                            " left join zcb_dev.t_field_info tfi on tti.id  = tfi.table_id" +
                            " where tti.table_name = '" + viewName.split("\\.")[1] + "'";
                    Map<String, String> pgColRemark = pgTableRemark(pgViewColRemark);
                    if (pgColRemark.size() > viewStarRemarksMap.size()) {
//                    说明有字段删除,匹配出来删除的字段做提示,手动人工pg库表去删除该字段
                        String delColumns = compareFieldMap(pgColRemark, viewStarRemarksMap);
                        System.out.println("需要在pg库t_filed_info表内删除视图:" + viewName + "的字段和备注有:" + delColumns);

                    } else if (pgColRemark.size() < viewStarRemarksMap.size()) {
//                        说明有新增字段,新增字段直接插入
                        String allColumns = compareFieldMap(viewStarRemarksMap, pgColRemark);
                        System.out.println("需要在pg库t_filed_info表内添加视图:" + viewName + "的字段和备注有:" + allColumns);
                    } else {
//                        字段数没有变化,如果pg字段为空,则做修改,不然不用修改
                        String diffRemarks = compareValueMap(viewStarRemarksMap, pgColRemark);
                        if(!"".equalsIgnoreCase(diffRemarks)){
                            System.out.println("需要在pg库t_filed_info表内修改视图:" + viewName + "的字段和备注有:" + diffRemarks);
                        }else {
                            System.out.println("pg库t_filed_info表内没有需要修改的字段和备注视图:" + viewName);
                        }


                    }
                }

            }
        } catch (SQLException e) {
            throw new RuntimeException(e);
        } finally {
            try {
                pst.close();
                conn.close();
            } catch (SQLException e) {
                throw new RuntimeException(e);
            }
        }

        return null;

    }

    /**
     * 比对map的值,如果map的值不一致,返回不一致的结构
     *
     * @param oriViewMap
     * @param targetRemark
     * @return
     */
    private static String compareValueMap(Map<String, String> oriViewMap, Map<String, String> targetRemark) {
        String diffRemarks = "";
        for (Map.Entry<String, String> entry : oriViewMap.entrySet()) {
            String columnName = entry.getKey();
            String columnRemark = entry.getValue();
//            如果pg库里面是空
            if("".equalsIgnoreCase(targetRemark.get(columnName)) || null == targetRemark.get(columnName)){
                diffRemarks += "columnName:" + columnName + "  view new remark: " + columnRemark + "  pg column remark: " + targetRemark.get(columnName);
//                System.out.println("columnName:" + columnName + " view new remark: " + columnRemark + "   pg column remark: " + targetRemark.get(columnName));
            }
        }
        return diffRemarks;
    }

    /**
     * 原始map和目标map做比对,返回原始map中的不同的值
     *
     * @param oriMap
     * @param targetMap
     */
    private static String compareFieldMap(Map<String, String> oriMap, Map<String, String> targetMap) {
        String compareResult = "";
        for (Map.Entry<String, String> pgMap : oriMap.entrySet()) {
            String colName = pgMap.getKey();
            String colRemark = pgMap.getValue();
            if (!targetMap.containsKey(colName)) {
                compareResult += (" colName: " + colName + " remark: " + colRemark);
            }
        }
        return compareResult;
    }

    //待优化
    private static Map<String, String> pgTableRemark(String pgViewColRemark) {
        Map<String, String> colRemarkMap = new HashMap<>();
        try {
            Class.forName("org.postgresql.Driver");
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
        try {
            Connection conn = DriverManager.getConnection("jdbc:postgresql://pgip:5432/z_test", "user", "#123");
            Statement statement = conn.createStatement();
            ResultSet rs = statement.executeQuery(pgViewColRemark);

            while (rs.next()) {
                String colName = rs.getString(1);
                String colRemarks = rs.getString(2);
//                System.out.println(colName + "----" + colRemarks);
                colRemarkMap.put(colName, colRemarks);

            }

            statement.close();
            conn.close();

        } catch (SQLException e) {
            throw new RuntimeException(e);
        }
        return colRemarkMap;
    }

    private static Map<String, String> tableRemarks(String colRemarksSql) throws SQLException {
        PreparedStatement preparedColumns = conn.prepareStatement(colRemarksSql);
        ResultSet remarkRs = preparedColumns.executeQuery();
        Map<String, String> innerRemarks = new HashMap<>();
        while (remarkRs.next()) {
            String columnName = remarkRs.getString(3);
            String columnRemarks = remarkRs.getString(4);
            innerRemarks.put(columnName, columnRemarks);
        }

        try {
            preparedColumns.close();
        } catch (SQLException e) {
            throw new RuntimeException(e);
        }
        return innerRemarks;
    }

    private static String getRemarksSql(Set<String> viewUnderTables) {
        return getColRemarksSql(viewUnderTables);
    }


    // 目标获取 视图对应的底表名
    public static Set<String> parserTableName(String sql, String viewName) {
        // 使用正则表达式匹配表名称
        Pattern pattern = Pattern.compile("(?:from|join)\\s+(\\w+.\\w+)");
        Matcher matcher = pattern.matcher(sql.replaceAll("`", "").replaceAll("hive.", "").toLowerCase(Locale.ROOT));
        Set<String> underTables = new HashSet<>();
        while (matcher.find()) {
            String tableName = matcher.group(1);
//            System.out.println(viewName + "    Regluar tableName: " + tableName);//  ic_cyb_ods.cyb_ods_byd_week_production_ds_s
            underTables.add(tableName);
        }
        return underTables;
    }

    /*select  table_schema,table_name,COLUMN_NAME ,COLUMN_COMMENT   from information_schema.columns where 1=1 and  COLUMN_COMMENT is not null  and COLUMN_COMMENT != ''  and
    ((table_schema ='ic_cyb_ods' and TABLE_NAME ='cyb_ods_t_api_byd_used_car_ds_s') or  (table_schema ='ic_cyb_cdm' and TABLE_NAME ='cyb_dwd_catarc_sales_business_v'))
    */
    public static String getColRemarksSql(Set<String> innerTableSet) {
        String whereSql = "select  table_schema,table_name,COLUMN_NAME ,COLUMN_COMMENT   from information_schema.columns where 1=1 and  COLUMN_COMMENT is not null  and COLUMN_COMMENT != '' ";
        if (!innerTableSet.isEmpty()) {
            whereSql += " and (";

            String[] innerTables = innerTableSet.toArray(new String[innerTableSet.size()]);
            for (int i = 0; i < innerTables.length; i++) {
                String innerTable = innerTables[i];
                String[] split = innerTable.split("\\.");
                String schema = split[0];
                String tableName = split[1];
                if (i < innerTables.length - 1) {
                    whereSql += "(table_schema ='" + schema + "'" + "  and TABLE_NAME = '" + tableName + "') or ";
                } else {
                    whereSql += "(table_schema ='" + schema + "'" + "  and TABLE_NAME = '" + tableName + "') ) ";
                }
            }
        }
//        System.out.println("备注Sql查询:  " + whereSql);

        return whereSql;
    }


    public static String getColRemarksSql(String wholeTableName) {
        String whereSql = "select  table_schema,table_name,COLUMN_NAME ,COLUMN_COMMENT  " +
                " from information_schema.columns where 1=1 ";
        String[] split = wholeTableName.split("\\.");
        String schema = split[0];
        String tableName = split[1];
        whereSql += " and table_schema ='" + schema + "'" + "  and TABLE_NAME = '" + tableName + "' ";
        return whereSql;
    }


    public static void getAllColumns(Select selectStatement, Map<String, String> aliasMap, Map<String, String> remarkMap) {
        PlainSelect plainSelect = (PlainSelect) selectStatement.getSelectBody();
        List<SelectItem> selectItems = plainSelect.getSelectItems();
//        System.out.println(((SelectExpressionItem)selectItems.get(0)).getAlias().getName());
//获取字段名
        System.out.println("====================");
        for (SelectItem selectItem : selectItems) {
//            SimpleNode astNode = selectItem.
            // 处理每个选项,这里假设只有一列为ColumnSelectExpression。
            if (selectItem instanceof SelectExpressionItem) {
                SelectExpressionItem expressionItem = (SelectExpressionItem) selectItem;
                // 获取列名
                String columnName = expressionItem.getExpression().toString();
                String columnNameOri = null;
//                包含函数的字段肯定是加工出来的,这种备注在原表内无法匹配到需要手动维护备注名称,不包含函数的是可以原表内找到字段的
                if (columnName.contains(".") && !columnName.contains("(")) {
                    int split_char = columnName.lastIndexOf(".");
                    columnNameOri = columnName.substring(split_char + 1);
                } else {
                    columnNameOri = columnName;
                }

                Alias alias = expressionItem.getAlias();
                if (alias != null) {
// 带有别名的处理      Column original Name: `order_num`  aliasName:`order_id`
                    String aliasName = alias.getName();
                    aliasMap.put(aliasName, columnNameOri);
                    remarkMap.put(aliasName, "无");//暂时为空
//                    System.out.println("Column Name: " + columnName + "    Column original Name: " + columnNameOri + "  aliasName:" + alias.getName());
                } else {
                    remarkMap.put(columnNameOri, "无");//暂时为空
//                    System.out.println("Column Name: " + columnNameOri);
                }

            }
        }

    }


}



posted @ 2023-07-19 11:02  堕落先锋  阅读(92)  评论(0编辑  收藏  举报