sqlglot常见AST学习笔记MySQL版

sqlglot常见AST学习笔记MySQL版

项目介绍

本项目旨在学习和记录SQLGlot库如何解析常见的MySQL
SQL语句为抽象语法树(AST)。通过分析各种SQL语句及其对应的AST结构,帮助理解SQLGlot的解析机制。

环境配置

# 使用uv初始化项目
uv init

# 安装sqlglot
uv add sqlglot

# 运行示例脚本
uv run mysql_sql_examples.py

SQL语句与AST对应关系

1. 基础SELECT语句

1.1 简单SELECT

SELECT * FROM users
Select(
  expressions=[Star()],
  from=From(
    this=Table(
      this=Identifier(this=users, quoted=False))))

1.2 带列名和WHERE条件

SELECT id, name, email FROM users WHERE id = 1
Select(
  expressions=[
    Column(this=Identifier(this=id, quoted=False)),
    Column(this=Identifier(this=name, quoted=False)),
    Column(this=Identifier(this=email, quoted=False))],
  from=From(
    this=Table(
      this=Identifier(this=users, quoted=False))),
  where=Where(
    this=EQ(
      this=Column(this=Identifier(this=id, quoted=False)),
      expression=Literal(this=1, is_string=False))))

1.3 聚合函数

SELECT COUNT(*) FROM users
Select(
  expressions=[
    Count(
      this=Star(),
      big_int=True)],
  from=From(
    this=Table(
      this=Identifier(this=users, quoted=False))))

1.4 DISTINCT

SELECT DISTINCT department FROM employees
Select(
  distinct=Distinct(),
  expressions=[
    Column(this=Identifier(this=department, quoted=False))],
  from=From(
    this=Table(
      this=Identifier(this=employees, quoted=False))))

2. JOIN操作

2.1 INNER JOIN

SELECT u.name, e.department FROM users u JOIN employees e ON u.id = e.user_id
Select(
  expressions=[
    Column(
      this=Identifier(this=name, quoted=False),
      table=Identifier(this=u, quoted=False)),
    Column(
      this=Identifier(this=department, quoted=False),
      table=Identifier(this=e, quoted=False))],
  from=From(
    this=Table(
      this=Identifier(this=users, quoted=False),
      alias=TableAlias(this=Identifier(this=u, quoted=False))))),
  joins=[
    Join(
      this=Table(
        this=Identifier(this=employees, quoted=False),
        alias=TableAlias(this=Identifier(this=e, quoted=False))),
      on=EQ(
        this=Column(
          this=Identifier(this=id, quoted=False),
          table=Identifier(this=u, quoted=False)),
        expression=Column(
          this=Identifier(this=user_id, quoted=False),
          table=Identifier(this=e, quoted=False))))])

3. 排序和限制

3.1 ORDER BY和LIMIT

SELECT * FROM users ORDER BY created_at DESC LIMIT 10
Select(
  expressions=[Star()],
  limit=Limit(expression=Literal(this=10, is_string=False)),
  from=From(
    this=Table(
      this=Identifier(this=users, quoted=False))),
  order=Order(
    expressions=[
      Ordered(
        this=Column(this=Identifier(this=created_at, quoted=False)),
        desc=True,
        nulls_first=False)]))

4. 分组和聚合

4.1 GROUP BY

SELECT * FROM users GROUP BY department
Select(
  expressions=[Star()],
  from=From(
    this=Table(
      this=Identifier(this=users, quoted=False))),
  group=Group(
    expressions=[
      Column(this=Identifier(this=department, quoted=False))]))

4.2 HAVING子句

SELECT department, COUNT(*) as count 
FROM users 
GROUP BY department 
HAVING COUNT(*) > 5
Select(
  expressions=[
    Column(this=Identifier(this=department, quoted=False)),
    Alias(
      this=Count(this=Star(), big_int=True),
      alias=Identifier(this=count, quoted=False))],
  from=From(
    this=Table(
      this=Identifier(this=users, quoted=False))),
  group=Group(
    expressions=[
      Column(this=Identifier(this=department, quoted=False))]),
  having=Having(
    this=GT(
      this=Count(this=Star(), big_int=True),
      expression=Literal(this=5, is_string=False))))

5. 数据操作语句

5.1 INSERT语句

INSERT INTO users (name, email, age) VALUES ('John', 'john@example.com', 25)
Insert(
  this=Schema(
    this=Table(this=Identifier(this=users, quoted=False)),
    expressions=[
      Identifier(this=name, quoted=False),
      Identifier(this=email, quoted=False),
      Identifier(this=age, quoted=False)]),
  expression=Values(
    expressions=[
      Tuple(
        expressions=[
          Literal(this='John', is_string=True),
          Literal(this='john@example.com', is_string=True),
          Literal(this=25, is_string=False)])]))

5.2 UPDATE语句

UPDATE users SET name = 'Bob' WHERE id = 1
Update(
  this=Table(this=Identifier(this=users, quoted=False)),
  expressions=[
    EQ(
      this=Column(this=Identifier(this=name, quoted=False)),
      expression=Literal(this='Bob', is_string=True))],
  where=Where(
    this=EQ(
      this=Column(this=Identifier(this=id, quoted=False)),
      expression=Literal(this=1, is_string=False))))

5.3 DELETE语句

DELETE FROM users WHERE id = 1
Delete(
  this=Table(this=Identifier(this=users, quoted=False)),
  where=Where(
    this=EQ(
      this=Column(this=Identifier(this=id, quoted=False)),
      expression=Literal(this=1, is_string=False))))

6. 数据定义语句

6.1 CREATE DATABASE

CREATE DATABASE test
Create(
  this=Table(
    this=Identifier(this=test, quoted=False)),
  kind=DATABASE)

6.2 CREATE TABLE

CREATE TABLE users (id INT PRIMARY KEY, name VARCHAR(50), email VARCHAR(100))
Create(
  this=Schema(
    this=Table(this=Identifier(this=users, quoted=False)),
    expressions=[
      ColumnDef(
        this=Identifier(this=id, quoted=False),
        kind=DataType(this=Type.INT, nested=False),
        constraints=[
          ColumnConstraint(kind=PrimaryKeyColumnConstraint())]),
      ColumnDef(
        this=Identifier(this=name, quoted=False),
        kind=DataType(
          this=Type.VARCHAR,
          expressions=[DataTypeParam(this=Literal(this=50, is_string=False))],
          nested=False)),
      ColumnDef(
        this=Identifier(this=email, quoted=False),
        kind=DataType(
          this=Type.VARCHAR,
          expressions=[DataTypeParam(this=Literal(this=100, is_string=False))],
          nested=False))]),
  kind=TABLE)

6.3 DROP TABLE

DROP TABLE users
Drop(
  this=Table(
    this=Identifier(this=users, quoted=False)),
  kind=TABLE)

7. 子查询

7.1 IN子查询

SELECT * FROM users WHERE id IN (SELECT user_id FROM employees WHERE department = 'IT')
Select(
  expressions=[Star()],
  from=From(
    this=Table(
      this=Identifier(this=users, quoted=False))),
  where=Where(
    this=In(
      this=Column(this=Identifier(this=id, quoted=False)),
      query=Subquery(
        this=Select(
          expressions=[
            Column(this=Identifier(this=user_id, quoted=False))],
          from=From(
            this=Table(
              this=Identifier(this=employees, quoted=False))),
          where=Where(
            this=EQ(
              this=Column(this=Identifier(this=department, quoted=False)),
              expression=Literal(this='IT', is_string=True))))))))

8. 窗口函数

8.1 ROW_NUMBER

SELECT name, salary, 
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) as rank 
FROM employees
Select(
  expressions=[
    Column(this=Identifier(this=name, quoted=False)),
    Column(this=Identifier(this=salary, quoted=False)),
    Alias(
      this=Window(
        this=RowNumber(),
        partition_by=[
          Column(this=Identifier(this=department, quoted=False))],
        order=Order(
          expressions=[
            Ordered(
              this=Column(this=Identifier(this=salary, quoted=False)),
              desc=True,
              nulls_first=False)]),
        over=OVER),
      alias=Identifier(this=rank, quoted=False))],
  from=From(
    this=Table(
      this=Identifier(this=employees, quoted=False))))

9. 公用表表达式(CTE)

9.1 WITH子句

WITH active_users AS (
  SELECT * FROM users WHERE last_login > '2023-01-01'
) 
SELECT * FROM active_users WHERE department = 'IT'
Select(
  expressions=[Star()],
  from=From(
    this=Table(
      this=Identifier(this=active_users, quoted=False))),
  where=Where(
    this=EQ(
      this=Column(this=Identifier(this=department, quoted=False)),
      expression=Literal(this='IT', is_string=True))),
  with=With(
    expressions=[
      CTE(
        this=Select(
          expressions=[Star()],
          from=From(
            this=Table(
              this=Identifier(this=users, quoted=False))),
          where=Where(
            this=GT(
              this=Column(this=Identifier(this=last_login, quoted=False)),
              expression=Literal(this='2023-01-01', is_string=True))))),
        alias=TableAlias(
          this=Identifier(this=active_users, quoted=False)))]))

AST节点类型说明

常见节点类型

  • Select: SELECT语句
  • Insert: INSERT语句
  • Update: UPDATE语句
  • Delete: DELETE语句
  • Create: CREATE语句
  • Alter: ALTER语句
  • Drop: DROP语句

表达式节点

  • Star: * 通配符
  • Column: 列引用
  • Identifier: 标识符
  • Literal: 字面量
  • Alias: 别名

条件和操作符

  • Where: WHERE子句
  • Having: HAVING子句
  • EQ: 等于 =
  • GT: 大于 >
  • LT: 小于 <
  • GTE: 大于等于 >=
  • LTE: 小于等于 <=
  • And: 逻辑AND
  • Or: 逻辑OR
  • In: IN操作符

表和连接

  • Table: 表引用
  • From: FROM子句
  • Join: JOIN操作
  • TableAlias: 表别名

聚合和函数

  • Count: COUNT函数
  • Sum: SUM函数
  • Avg: AVG函数
  • Case: CASE表达式
  • If: IF条件

高级特性

  • Window: 窗口函数
  • With: WITH子句(CTE)
  • CTE: 公用表表达式
  • Subquery: 子查询
  • Union: UNION操作

使用建议

  1. 理解AST结构: 通过观察不同SQL语句生成的AST,理解SQLGlot的解析逻辑
  2. SQL转换: 利用AST进行SQL语句的转换和优化
  3. 代码生成: 基于AST生成目标数据库的SQL语句
  4. 语法验证: 使用AST验证SQL语句的语法正确性

扩展学习

建议进一步学习以下内容:

  • SQLGlot的方言支持
  • AST遍历和修改
  • SQL语句的跨数据库转换
  • 自定义SQL解析规则

参考资料

posted @ 2025-08-30 12:35  masx200  阅读(74)  评论(0)    收藏  举报