[Java/SQL] 自动去除SQL注释

0 引言

因部分数据库不允许SQL中存在注释信息(如:Redis、Open Gemini等;当然,MYSQL、INFLUXDB等数据库是支持的),故对于数据库连接(池)框架、数据服务/Data to API(如:阿里云的DataWorks/DWS服务)等中间平台而言,存在这样一个需求:

  • 用户提交到数据库的SQL,需对原始SQL的注释信息予以去除。去除后,再提交给数据库

以 OpenGemini V1.2.0 数据库为例,如果SQL中存在注释信息,将报错:error parsing query: syntax error : unexpected $unk

1 解决方法

解决思路

基于 Java 的正则表达式进行正则匹配,是最佳选择。

方案代码: SqlCommentStripper

import java.util.regex.Pattern;

import javax.annotation.Nonnull;
import lombok.extern.slf4j.Slf4j;

/**
 * SQL注释剥离器
 * @author johnny zen
 * @create-time 2024-08-12 14:35
 * @note ...
 */
@Slf4j
public class SqlCommentStripper {
    //Pattern.MULTILINE : 用于处理多行文本 ; Pattern.DOTALL : 让.匹配包括行终止符在内的任意字符
    private static final Pattern SINGLE_LINE_COMMENT_PATTERN = Pattern.compile("--.*?\n", Pattern.DOTALL);
    //LAST_LINE_SINGLE_LINE_COMMENT_PATTERN : 解决SQL最后一行存在单行注释的问题
    private static final Pattern LAST_LINE_SINGLE_LINE_COMMENT_PATTERN = Pattern.compile("--.*?$", Pattern.MULTILINE);
    private static final Pattern MULTI_LINE_COMMENT_PATTERN = Pattern.compile("/\\*.*?\\*/", Pattern.DOTALL);

    /**
     * 自动去除注释
     * @param sql
     * @return
     */
    public static String stripComments(@Nonnull String sql) {
        //eg: sql = "SELECT * FROM users WHERE id = 1; -- This is a single-line comment\n-- test1\n/* Hello Guys \n * This is a multi-line comment! */-- test2\n/* test 3*/SELECT * FROM product WHERE id = 2;";
        log.debug("strip-before-sql:\n{}", sql);
        String cleanSql = SINGLE_LINE_COMMENT_PATTERN.matcher(sql).replaceAll("\n");
        cleanSql = LAST_LINE_SINGLE_LINE_COMMENT_PATTERN.matcher(cleanSql).replaceAll("");//去除最后一行SQL存在单行注释的问题
        cleanSql = MULTI_LINE_COMMENT_PATTERN.matcher(cleanSql).replaceAll("");
        //eg : SELECT * FROM users WHERE id = 1; SELECT * FROM product WHERE id = 2;
        log.debug("strip-after-sql:\n{}", cleanSql);
        return cleanSql;
    }
}

CASES

CASE1

strip-before-sql: 
-- select
	-- count(uuid) as cnt
-- from (
	select
		-- uuid,device_id,event_ts as collectTime,"sig_01"
		count(uuid) as cnt -- 20240813 为兼容 OpenGemini 1.2.0 语法(COUNT聚合SQL的子查询WHERE条件中不支持field字段)而优化
	from xxx.autogen.dwd_device_signal_ri
	where
		device_id          =  '2cb91c9914d2288e74d5bff5151e9c3fe7d010f9c421e3d43e7786f118eb5ec6'
		and time     >= 1703429368389ms
		and time     <= 1783429428389ms
		-- 如下是为兼容 INFLUXDB 历史CAN信号数据不存在 sys_code 的情况
		
	order by time ASC
-- )
-- order by time ASC


strip-after-sql: 		
	select
		
		count(uuid) as cnt 
	from xxx.autogen.dwd_device_signal_ri
	where
		vin          =  '2cb91c9914d2288e74d5bff5151e9c3fe7d010f9c421e3d43e7786f118eb5ec6'
		and time     >= 1703429368389ms
		and time     <= 1783429428389ms
		
		
	order by time ASC

CASE2

//eg: sql = "SELECT * FROM users WHERE id = 1; -- This is a single-line comment\n-- test1\n/* Hello Guys \n * This is a multi-line comment! */-- test2\n/* test 3*/SELECT * FROM product WHERE id = 2;-- test4";

strip-before-sql: 
SELECT * FROM users WHERE id = 1; -- This is a single-line comment
-- test1
/* Hello Guys 
 * This is a multi-line comment! */-- test2
 /* test 3*/SELECT * FROM product WHERE id = 2;-- test4


strip-after-sql: 
SELECT * FROM users WHERE id = 1; 


SELECT * FROM product WHERE id = 2;

X 参考文献

  • GPT
  • opgen gemini
  • influxdb
  • Java 正则表达式
posted @ 2024-08-12 15:39  千千寰宇  阅读(268)  评论(0)    收藏  举报