sqlglot常见AST学习笔记MySQL版
sqlglot常见AST学习笔记MySQL版
项目介绍
本项目旨在学习和记录SQLGlot库如何解析常见的MySQL
SQL语句为抽象语法树(AST)。通过分析各种SQL语句及其对应的AST结构,帮助理解SQLGlot的解析机制。
环境配置
# 使用uv初始化项目
uv init
# 安装sqlglot
uv add sqlglot
# 运行示例脚本
uv run mysql_sql_examples.py
SQL语句与AST对应关系
1. 基础SELECT语句
1.1 简单SELECT
SELECT * FROM users
Select(
expressions=[Star()],
from=From(
this=Table(
this=Identifier(this=users, quoted=False))))
1.2 带列名和WHERE条件
SELECT id, name, email FROM users WHERE id = 1
Select(
expressions=[
Column(this=Identifier(this=id, quoted=False)),
Column(this=Identifier(this=name, quoted=False)),
Column(this=Identifier(this=email, quoted=False))],
from=From(
this=Table(
this=Identifier(this=users, quoted=False))),
where=Where(
this=EQ(
this=Column(this=Identifier(this=id, quoted=False)),
expression=Literal(this=1, is_string=False))))
1.3 聚合函数
SELECT COUNT(*) FROM users
Select(
expressions=[
Count(
this=Star(),
big_int=True)],
from=From(
this=Table(
this=Identifier(this=users, quoted=False))))
1.4 DISTINCT
SELECT DISTINCT department FROM employees
Select(
distinct=Distinct(),
expressions=[
Column(this=Identifier(this=department, quoted=False))],
from=From(
this=Table(
this=Identifier(this=employees, quoted=False))))
2. JOIN操作
2.1 INNER JOIN
SELECT u.name, e.department FROM users u JOIN employees e ON u.id = e.user_id
Select(
expressions=[
Column(
this=Identifier(this=name, quoted=False),
table=Identifier(this=u, quoted=False)),
Column(
this=Identifier(this=department, quoted=False),
table=Identifier(this=e, quoted=False))],
from=From(
this=Table(
this=Identifier(this=users, quoted=False),
alias=TableAlias(this=Identifier(this=u, quoted=False))))),
joins=[
Join(
this=Table(
this=Identifier(this=employees, quoted=False),
alias=TableAlias(this=Identifier(this=e, quoted=False))),
on=EQ(
this=Column(
this=Identifier(this=id, quoted=False),
table=Identifier(this=u, quoted=False)),
expression=Column(
this=Identifier(this=user_id, quoted=False),
table=Identifier(this=e, quoted=False))))])
3. 排序和限制
3.1 ORDER BY和LIMIT
SELECT * FROM users ORDER BY created_at DESC LIMIT 10
Select(
expressions=[Star()],
limit=Limit(expression=Literal(this=10, is_string=False)),
from=From(
this=Table(
this=Identifier(this=users, quoted=False))),
order=Order(
expressions=[
Ordered(
this=Column(this=Identifier(this=created_at, quoted=False)),
desc=True,
nulls_first=False)]))
4. 分组和聚合
4.1 GROUP BY
SELECT * FROM users GROUP BY department
Select(
expressions=[Star()],
from=From(
this=Table(
this=Identifier(this=users, quoted=False))),
group=Group(
expressions=[
Column(this=Identifier(this=department, quoted=False))]))
4.2 HAVING子句
SELECT department, COUNT(*) as count
FROM users
GROUP BY department
HAVING COUNT(*) > 5
Select(
expressions=[
Column(this=Identifier(this=department, quoted=False)),
Alias(
this=Count(this=Star(), big_int=True),
alias=Identifier(this=count, quoted=False))],
from=From(
this=Table(
this=Identifier(this=users, quoted=False))),
group=Group(
expressions=[
Column(this=Identifier(this=department, quoted=False))]),
having=Having(
this=GT(
this=Count(this=Star(), big_int=True),
expression=Literal(this=5, is_string=False))))
5. 数据操作语句
5.1 INSERT语句
INSERT INTO users (name, email, age) VALUES ('John', 'john@example.com', 25)
Insert(
this=Schema(
this=Table(this=Identifier(this=users, quoted=False)),
expressions=[
Identifier(this=name, quoted=False),
Identifier(this=email, quoted=False),
Identifier(this=age, quoted=False)]),
expression=Values(
expressions=[
Tuple(
expressions=[
Literal(this='John', is_string=True),
Literal(this='john@example.com', is_string=True),
Literal(this=25, is_string=False)])]))
5.2 UPDATE语句
UPDATE users SET name = 'Bob' WHERE id = 1
Update(
this=Table(this=Identifier(this=users, quoted=False)),
expressions=[
EQ(
this=Column(this=Identifier(this=name, quoted=False)),
expression=Literal(this='Bob', is_string=True))],
where=Where(
this=EQ(
this=Column(this=Identifier(this=id, quoted=False)),
expression=Literal(this=1, is_string=False))))
5.3 DELETE语句
DELETE FROM users WHERE id = 1
Delete(
this=Table(this=Identifier(this=users, quoted=False)),
where=Where(
this=EQ(
this=Column(this=Identifier(this=id, quoted=False)),
expression=Literal(this=1, is_string=False))))
6. 数据定义语句
6.1 CREATE DATABASE
CREATE DATABASE test
Create(
this=Table(
this=Identifier(this=test, quoted=False)),
kind=DATABASE)
6.2 CREATE TABLE
CREATE TABLE users (id INT PRIMARY KEY, name VARCHAR(50), email VARCHAR(100))
Create(
this=Schema(
this=Table(this=Identifier(this=users, quoted=False)),
expressions=[
ColumnDef(
this=Identifier(this=id, quoted=False),
kind=DataType(this=Type.INT, nested=False),
constraints=[
ColumnConstraint(kind=PrimaryKeyColumnConstraint())]),
ColumnDef(
this=Identifier(this=name, quoted=False),
kind=DataType(
this=Type.VARCHAR,
expressions=[DataTypeParam(this=Literal(this=50, is_string=False))],
nested=False)),
ColumnDef(
this=Identifier(this=email, quoted=False),
kind=DataType(
this=Type.VARCHAR,
expressions=[DataTypeParam(this=Literal(this=100, is_string=False))],
nested=False))]),
kind=TABLE)
6.3 DROP TABLE
DROP TABLE users
Drop(
this=Table(
this=Identifier(this=users, quoted=False)),
kind=TABLE)
7. 子查询
7.1 IN子查询
SELECT * FROM users WHERE id IN (SELECT user_id FROM employees WHERE department = 'IT')
Select(
expressions=[Star()],
from=From(
this=Table(
this=Identifier(this=users, quoted=False))),
where=Where(
this=In(
this=Column(this=Identifier(this=id, quoted=False)),
query=Subquery(
this=Select(
expressions=[
Column(this=Identifier(this=user_id, quoted=False))],
from=From(
this=Table(
this=Identifier(this=employees, quoted=False))),
where=Where(
this=EQ(
this=Column(this=Identifier(this=department, quoted=False)),
expression=Literal(this='IT', is_string=True))))))))
8. 窗口函数
8.1 ROW_NUMBER
SELECT name, salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) as rank
FROM employees
Select(
expressions=[
Column(this=Identifier(this=name, quoted=False)),
Column(this=Identifier(this=salary, quoted=False)),
Alias(
this=Window(
this=RowNumber(),
partition_by=[
Column(this=Identifier(this=department, quoted=False))],
order=Order(
expressions=[
Ordered(
this=Column(this=Identifier(this=salary, quoted=False)),
desc=True,
nulls_first=False)]),
over=OVER),
alias=Identifier(this=rank, quoted=False))],
from=From(
this=Table(
this=Identifier(this=employees, quoted=False))))
9. 公用表表达式(CTE)
9.1 WITH子句
WITH active_users AS (
SELECT * FROM users WHERE last_login > '2023-01-01'
)
SELECT * FROM active_users WHERE department = 'IT'
Select(
expressions=[Star()],
from=From(
this=Table(
this=Identifier(this=active_users, quoted=False))),
where=Where(
this=EQ(
this=Column(this=Identifier(this=department, quoted=False)),
expression=Literal(this='IT', is_string=True))),
with=With(
expressions=[
CTE(
this=Select(
expressions=[Star()],
from=From(
this=Table(
this=Identifier(this=users, quoted=False))),
where=Where(
this=GT(
this=Column(this=Identifier(this=last_login, quoted=False)),
expression=Literal(this='2023-01-01', is_string=True))))),
alias=TableAlias(
this=Identifier(this=active_users, quoted=False)))]))
AST节点类型说明
常见节点类型
- Select: SELECT语句
- Insert: INSERT语句
- Update: UPDATE语句
- Delete: DELETE语句
- Create: CREATE语句
- Alter: ALTER语句
- Drop: DROP语句
表达式节点
- Star:
*通配符 - Column: 列引用
- Identifier: 标识符
- Literal: 字面量
- Alias: 别名
条件和操作符
- Where: WHERE子句
- Having: HAVING子句
- EQ: 等于
= - GT: 大于
> - LT: 小于
< - GTE: 大于等于
>= - LTE: 小于等于
<= - And: 逻辑AND
- Or: 逻辑OR
- In: IN操作符
表和连接
- Table: 表引用
- From: FROM子句
- Join: JOIN操作
- TableAlias: 表别名
聚合和函数
- Count: COUNT函数
- Sum: SUM函数
- Avg: AVG函数
- Case: CASE表达式
- If: IF条件
高级特性
- Window: 窗口函数
- With: WITH子句(CTE)
- CTE: 公用表表达式
- Subquery: 子查询
- Union: UNION操作
使用建议
- 理解AST结构: 通过观察不同SQL语句生成的AST,理解SQLGlot的解析逻辑
- SQL转换: 利用AST进行SQL语句的转换和优化
- 代码生成: 基于AST生成目标数据库的SQL语句
- 语法验证: 使用AST验证SQL语句的语法正确性
扩展学习
建议进一步学习以下内容:
- SQLGlot的方言支持
- AST遍历和修改
- SQL语句的跨数据库转换
- 自定义SQL解析规则

浙公网安备 33010602011771号