MERGE语句使用
MERGE语句使用指南
目录
什么是MERGE语句
MERGE语句是SQL中的"万能"数据操作语句,能在一个语句中完成INSERT、UPDATE、DELETE操作。常被称为"UPSERT"操作。
核心价值
- 简化数据同步:一条语句处理新增和更新
- 提高性能:减少多次扫描表的开销
- 保证原子性:整个操作在一个事务中完成
- 减少代码复杂度:替代复杂的IF-ELSE逻辑
基本语法
MERGE INTO 目标表 [别名]
USING 数据源 [别名]
ON (匹配条件)
WHEN MATCHED THEN
UPDATE SET 列1 = 值1, 列2 = 值2
[WHERE 条件]
WHEN NOT MATCHED THEN
INSERT (列列表) VALUES (值列表)
[WHERE 条件];
最简单的示例
MERGE INTO employees e
USING new_employees n
ON (e.emp_id = n.emp_id)
WHEN MATCHED THEN
UPDATE SET e.salary = n.salary
WHEN NOT MATCHED THEN
INSERT (emp_id, name, salary)
VALUES (n.emp_id, n.name, n.salary);
核心概念
1. 数据源类型
-- 表作为源
USING source_table s
-- 视图作为源
USING employee_view ev
-- 子查询作为源
USING (
SELECT emp_id, name, salary
FROM temp_employees
WHERE status = 'ACTIVE'
) t
2. 匹配条件的两种用法
传统连接方式
-- 基于主键连接
ON (target.id = source.id)
-- 基于复合键连接
ON (target.dept_id = source.dept_id AND target.emp_code = source.emp_code)
条件过滤方式(笛卡尔积)
-- 批量更新场景:将配置应用到符合条件的记录
MERGE INTO products p
USING system_config sc
ON (p.category IN ('Electronics', 'Books') AND sc.config_type = 'DISCOUNT')
WHEN MATCHED THEN
UPDATE SET p.discount_rate = sc.config_value;
笛卡尔积用法说明:
- 目标表中符合条件的记录 × 源表中符合条件的记录
- 适用于批量配置更新、状态同步等场景
- 注意控制数据量,避免性能问题
3. WHEN子句详解
WHEN MATCHED(记录存在时)
-- 更新操作
WHEN MATCHED THEN
UPDATE SET col1 = source.col1, col2 = source.col2
WHERE target.last_updated < source.last_updated
-- 删除操作
WHEN MATCHED THEN
DELETE WHERE target.status = 'INACTIVE'
WHEN NOT MATCHED(记录不存在时)
-- 插入新记录
WHEN NOT MATCHED THEN
INSERT (col1, col2, col3)
VALUES (source.col1, source.col2, source.col3)
WHERE source.status = 'VALID'
WHEN NOT MATCHED BY SOURCE(仅SQL Server)
-- 删除目标表中源表没有的记录
WHEN NOT MATCHED BY SOURCE THEN
DELETE WHERE target.created_date < DATEADD(year, -1, GETDATE())
实用案例
案例1:员工信息同步
-- 从HR系统同步员工信息到主系统
MERGE INTO employees e
USING hr_employees h
ON (e.employee_id = h.employee_id)
WHEN MATCHED AND h.last_updated > e.last_updated THEN
UPDATE SET
e.name = h.name,
e.department = h.department,
e.salary = h.salary,
e.last_updated = h.last_updated
WHEN NOT MATCHED AND h.status = 'ACTIVE' THEN
INSERT (employee_id, name, department, salary, last_updated)
VALUES (h.employee_id, h.name, h.department, h.salary, h.last_updated);
案例2:库存管理
-- 根据入库单更新库存
MERGE INTO inventory i
USING (
SELECT
product_id,
SUM(quantity) as total_in,
MAX(unit_cost) as latest_cost
FROM stock_in_details
WHERE in_date = CURRENT_DATE
GROUP BY product_id
) s
ON (i.product_id = s.product_id)
WHEN MATCHED THEN
UPDATE SET
i.quantity = i.quantity + s.total_in,
i.unit_cost = s.latest_cost,
i.last_updated = CURRENT_TIMESTAMP
WHEN NOT MATCHED THEN
INSERT (product_id, quantity, unit_cost, last_updated)
VALUES (s.product_id, s.total_in, s.latest_cost, CURRENT_TIMESTAMP);
案例3:数据仓库维度表(SCD Type 1)
-- 产品维度表更新
MERGE INTO dim_product dp
USING staging_product sp
ON (dp.product_code = sp.product_code)
WHEN MATCHED AND (
dp.product_name != sp.product_name OR
dp.category != sp.category OR
dp.price != sp.price
) THEN
UPDATE SET
dp.product_name = sp.product_name,
dp.category = sp.category,
dp.price = sp.price,
dp.modified_date = CURRENT_TIMESTAMP
WHEN NOT MATCHED THEN
INSERT (product_code, product_name, category, price, created_date, modified_date)
VALUES (sp.product_code, sp.product_name, sp.category, sp.price,
CURRENT_TIMESTAMP, CURRENT_TIMESTAMP);
案例4:批量配置应用
-- 将促销配置应用到指定商品类别
MERGE INTO products p
USING promotion_config pc
ON (p.category_id IN (1, 2, 5) AND pc.promotion_type = 'SEASONAL')
WHEN MATCHED THEN
UPDATE SET
p.discount_rate = pc.discount_rate,
p.promotion_start = pc.start_date,
p.promotion_end = pc.end_date
WHERE p.status = 'ACTIVE' AND p.stock_quantity > 0;
高级用法
1. 多条件WHEN子句
MERGE INTO orders o
USING order_updates ou
ON (o.order_id = ou.order_id)
WHEN MATCHED AND ou.action = 'CANCEL' THEN
UPDATE SET o.status = 'CANCELLED', o.cancelled_date = ou.update_date
WHEN MATCHED AND ou.action = 'SHIP' THEN
UPDATE SET o.status = 'SHIPPED', o.shipped_date = ou.update_date
WHEN MATCHED AND ou.action = 'UPDATE' THEN
UPDATE SET o.total_amount = ou.new_amount
WHEN NOT MATCHED AND ou.action = 'CREATE' THEN
INSERT (order_id, customer_id, total_amount, status)
VALUES (ou.order_id, ou.customer_id, ou.total_amount, 'NEW');
2. 复杂数据转换
MERGE INTO customer_summary cs
USING (
SELECT
customer_id,
COUNT(*) as order_count,
SUM(total_amount) as total_spent,
MAX(order_date) as last_order_date,
CASE
WHEN SUM(total_amount) > 10000 THEN 'VIP'
WHEN SUM(total_amount) > 5000 THEN 'PREMIUM'
ELSE 'REGULAR'
END as customer_level
FROM orders
WHERE order_date >= CURRENT_DATE - 365
GROUP BY customer_id
) o
ON (cs.customer_id = o.customer_id)
WHEN MATCHED THEN
UPDATE SET
cs.order_count = o.order_count,
cs.total_spent = o.total_spent,
cs.last_order_date = o.last_order_date,
cs.customer_level = o.customer_level,
cs.updated_date = CURRENT_TIMESTAMP
WHEN NOT MATCHED THEN
INSERT (customer_id, order_count, total_spent, last_order_date,
customer_level, created_date, updated_date)
VALUES (o.customer_id, o.order_count, o.total_spent, o.last_order_date,
o.customer_level, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP);
3. 条件插入和更新
MERGE INTO user_preferences up
USING user_activity ua
ON (up.user_id = ua.user_id)
WHEN MATCHED AND ua.activity_date > up.last_activity THEN
UPDATE SET
up.preferred_category = ua.most_viewed_category,
up.last_activity = ua.activity_date
WHEN NOT MATCHED AND ua.total_sessions >= 5 THEN -- 只为活跃用户创建偏好
INSERT (user_id, preferred_category, last_activity)
VALUES (ua.user_id, ua.most_viewed_category, ua.activity_date);
性能优化
1. 索引策略
-- 在连接列上创建索引
CREATE INDEX idx_target_join ON target_table(join_column);
CREATE INDEX idx_source_join ON source_table(join_column);
-- 在WHERE条件列上创建索引
CREATE INDEX idx_target_filter ON target_table(filter_column);
2. 数据预处理
-- ✅ 好的做法:预先过滤和聚合
MERGE INTO summary_table st
USING (
SELECT
category_id,
SUM(amount) as total_amount,
COUNT(*) as record_count
FROM transaction_table
WHERE trans_date = CURRENT_DATE -- 预先过滤
AND status = 'COMPLETED'
GROUP BY category_id -- 预先聚合
) tt
ON (st.category_id = tt.category_id AND st.summary_date = CURRENT_DATE)
WHEN MATCHED THEN
UPDATE SET st.total_amount = tt.total_amount, st.record_count = tt.record_count;
3. 批量处理
-- 对大数据量进行分批处理
MERGE INTO large_target_table ltt
USING (
SELECT * FROM large_source_table
WHERE batch_id = :current_batch_id
AND created_date >= :start_date
) lst
ON (ltt.id = lst.id)
-- ... WHEN子句
4. 避免重复键问题
-- 确保源数据的唯一性
MERGE INTO target t
USING (
SELECT
id, name, amount,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY updated_date DESC) as rn
FROM source_table
) s
ON (t.id = s.id AND s.rn = 1) -- 只取最新的记录
WHEN MATCHED THEN
UPDATE SET t.name = s.name, t.amount = s.amount;
常见问题
1. ON条件的选择
-- ❌ 避免:性能差的ON条件
ON (target.name = source.name) -- 字符串比较,性能差
-- ✅ 推荐:基于索引列的ON条件
ON (target.id = source.id) -- 数字比较,有索引,性能好
2. WHERE子句位置
-- ❌ 错误:WHERE位置不对
WHEN MATCHED THEN
UPDATE SET col1 = val1
WHERE condition
-- ✅ 正确:WHERE在UPDATE SET之后
WHEN MATCHED THEN
UPDATE SET col1 = val1
WHERE condition
3. 空值处理
-- 处理NULL值的比较
WHEN MATCHED AND (
ISNULL(target.col1, '') != ISNULL(source.col1, '') OR
ISNULL(target.col2, 0) != ISNULL(source.col2, 0)
) THEN
UPDATE SET target.col1 = source.col1, target.col2 = source.col2;
4. 事务控制
-- 对于大量数据,使用显式事务控制
BEGIN TRANSACTION;
MERGE INTO large_table ...
-- 检查影响的行数
IF @@ROWCOUNT > 100000
ROLLBACK TRANSACTION;
ELSE
COMMIT TRANSACTION;
数据库兼容性
Oracle
-- 完整的MERGE语法,支持UPDATE后的DELETE
MERGE INTO target t
USING source s ON (t.id = s.id)
WHEN MATCHED THEN
UPDATE SET t.col1 = s.col1
DELETE WHERE t.status = 'INACTIVE'
WHEN NOT MATCHED THEN
INSERT VALUES (s.id, s.col1);
SQL Server
-- 支持BY SOURCE子句,语句末尾需要分号
MERGE target AS t
USING source AS s ON t.id = s.id
WHEN MATCHED THEN
UPDATE SET col1 = s.col1
WHEN NOT MATCHED BY TARGET THEN
INSERT (id, col1) VALUES (s.id, s.col1)
WHEN NOT MATCHED BY SOURCE THEN
DELETE;
PostgreSQL (15+)
-- 基本MERGE语法
MERGE INTO target t
USING source s ON t.id = s.id
WHEN MATCHED THEN
UPDATE SET col1 = s.col1
WHEN NOT MATCHED THEN
INSERT (id, col1) VALUES (s.id, s.col1);
MySQL替代方案
-- 使用ON DUPLICATE KEY UPDATE
INSERT INTO target (id, col1, col2)
SELECT id, col1, col2 FROM source
ON DUPLICATE KEY UPDATE
col1 = VALUES(col1),
col2 = VALUES(col2);
-- 使用REPLACE INTO
REPLACE INTO target (id, col1, col2)
SELECT id, col1, col2 FROM source;
最佳实践总结
✅ 推荐做法
- 明确的连接条件:优先使用主键或唯一键进行连接
- 适当的索引:在连接列和过滤列上建立索引
- 数据预处理:在USING子句中进行必要的过滤和聚合
- 批量处理:对大数据量进行分批处理
- 错误处理:考虑约束冲突和并发控制
- 测试验证:在生产环境前充分测试数据准确性
⚠️ 注意事项
- 笛卡尔积风险:使用条件过滤的ON子句时注意数据量
- 重复更新:确保源数据的唯一性
- 性能监控:关注执行计划和性能指标
- 事务大小:避免过大的事务影响系统性能
- 权限控制:确保适当的数据访问权限
MERGE语句是强大的数据操作工具,掌握其核心概念和最佳实践,能大大简化数据同步和批量处理的复杂度。


浙公网安备 33010602011771号