Oracle
-- Oracle 12c+ : OFFSET FETCH 语法
SELECT * FROM users
ORDER BY id
OFFSET 20 ROWS
FETCH NEXT 10 ROWS ONLY;
-- Oracle 11g及以下
SELECT * FROM (
SELECT a.*, ROWNUM rn FROM (
SELECT * FROM users ORDER BY id
) a WHERE ROWNUM <= 30
) WHERE rn > 20;-- 使用ROWNUM(三层嵌套查询)
SELECT * FROM (
SELECT u.*, ROW_NUMBER() OVER (ORDER BY id) as rn
FROM users u
) WHERE rn BETWEEN 21 AND 30;-- 使用ROW_NUMBER()
-- 注意:ROWNUM 为一个独立的字段,不能加表的别名修饰(即:t.ROWNUM是错误的)
select ROWNUM , t.* from emp t where ROWNUM <=5;
-- 分析函数分页
SELECT * FROM (
SELECT u.*,
RANK() OVER (ORDER BY score DESC) as rank_num,
DENSE_RANK() OVER (ORDER BY score DESC) as dense_rank_num
FROM users u
) WHERE rank_num BETWEEN 1 AND 10;-- 使用RANK()和DENSE_RANK()
MySQL
-- 基础分页(LIMIT OFFSET)
SELECT * FROM users ORDER BY id LIMIT 10 OFFSET 20;
SELECT * FROM users ORDER BY id LIMIT 20, 10;-- 简写形式
-- 优化分页
SELECT u.* FROM users u
INNER JOIN (
SELECT id FROM users ORDER BY id LIMIT 10 OFFSET 10000
) t ON u.id = t.id; -- 延迟关联优化大偏移量
SELECT * FROM users WHERE id > 100 ORDER BY id LIMIT 10; -- 游标分页
-- 特殊功能
-- 使用SQL_CALC_FOUND_ROWS获取总数(MySQL 5.7及以下)
SELECT SQL_CALC_FOUND_ROWS * FROM users LIMIT 10;
SELECT FOUND_ROWS() as total;
PostgreSQL
-- 基础分页(LIMIT OFFSET)
SELECT * FROM users ORDER BY id LIMIT 10 OFFSET 20;
-- 窗口函数分页
SELECT * FROM (
SELECT *, ROW_NUMBER() OVER (ORDER BY id) as row_num
FROM users
) t WHERE row_num BETWEEN 21 AND 30; -- 使用ROW_NUMBER()
SELECT * FROM (
SELECT *, RANK() OVER (ORDER BY score DESC) as rank_num
FROM users
) t WHERE rank_num BETWEEN 1 AND 10; -- 使用RANK()处理相同值
-- 游标分页
SELECT * FROM users WHERE id > $1 ORDER BY id LIMIT 10; -- 基于ID的游标分页
SELECT * FROM users
WHERE (created_at, id) > ($1, $2)
ORDER BY created_at, id LIMIT 10; -- 基于多字段的游标分页
-- 性能优化
CREATE INDEX idx_users_id ON users(id); -- 使用索引扫描
CREATE INDEX idx_users_cover ON users(id, name, email); -- 覆盖索引
SQL Server
-- 基础分页(SQL Server 2012+)
SELECT * FROM users
ORDER BY id
OFFSET 20 ROWS
FETCH NEXT 10 ROWS ONLY;
-- 旧版本分页(SQL Server 2008及以下)
WITH PagedResults AS (
SELECT *, ROW_NUMBER() OVER (ORDER BY id) as RowNum
FROM users
)
SELECT * FROM PagedResults
WHERE RowNum BETWEEN 21 AND 30;-- 使用ROW_NUMBER()
SELECT TOP 10 * FROM users
WHERE id NOT IN (
SELECT TOP 20 id FROM users ORDER BY id
) ORDER BY id;-- 使用TOP和子查询
-- 性能优化
WITH OrderedUsers AS (
SELECT id, name, email,
ROW_NUMBER() OVER (ORDER BY id) as RowNum,
COUNT(*) OVER() as TotalCount
FROM users
)
SELECT * FROM OrderedUsers
WHERE RowNum BETWEEN 21 AND 30; -- 使用CTE和窗口函数
SQLite
-- 基础分页
SELECT * FROM users ORDER BY id LIMIT 10 OFFSET 20;
-- 性能优化
CREATE INDEX idx_users_id ON users(id);-- 使用索引
SELECT * FROM users WHERE id > 100 ORDER BY id LIMIT 10;-- 游标分页
--特殊考虑
--1.SQLite的LIMIT性能较好,但大偏移量仍有问题
--2.建议使用游标分页处理大数据集
MongoDB
-- 基础分页
db.users.find().sort({_id: 1}).skip(20).limit(10); -- skip() 和 limit()
-- 游标分页
db.users.find({_id: {$gt: ObjectId("...")}}).sort({_id: 1}).limit(10);-- 基于_id的游标分页
db.users.find({created_at: {$gt: ISODate("2011-01-01")}})
.sort({created_at: 1}).limit(10);-- 基于自定义字段的游标分页
-- 聚合分页
db.users.aggregate([
{$sort: {_id: 1}},
{$skip: 20},
{$limit: 10}
]);-- 使用聚合管道
db.users.aggregate([
{$facet: {
data: [{$sort: {_id: 1}}, {$skip: 20}, {$limit: 10}],
count: [{$count: "total"}]
}}
]);-- 获取总数和分页数据
Redis
-- 有序集合分页
-- ZRANGE分页
ZRANGE leaderboard 0 9 WITHSCORES -- 前10名
ZRANGE leaderboard 10 19 WITHSCORES -- 11-20名
ZREVRANGE leaderboard 0 9 WITHSCORES -- ZREVRANGE降序分页
-- 列表分页
-- LRANGE分页
LRANGE messages 0 9 -- 前10名
LRANGE messages 10 19 -- 11-20名
-- 基于游标的分页
-- SCAN
SCAN 0 MATCH user:* COUNT 10
Elasticsearch
// 基础分页
{
"from": 20,
"size": 10,
"query": {"match_all": {}},
"sort": [{"_id": "asc"}]
}
// 滚动分页
# 初始请求
{
"size": 10,
"scroll": "1m",
"query": {"match_all": {}}
}
# 后续请求
{
"scroll": "1m",
"scroll_id": "scroll_id_here"
}
// Search After 分页
{
"size": 10,
"query": {"match_all": {}},
"search_after": [1234567890000, "some_tie_breaker"],
"sort": [
{"timestamp": "asc"},
{"_id": "asc"}
]
}
Hive
-- 基础分页
SELECT * FROM users ORDER BY id LIMIT 10; -- LIMIT 语法(最基本)
SELECT * FROM (
SELECT *, ROW_NUMBER() OVER (ORDER BY id) as rn
FROM users
) t WHERE rn BETWEEN 21 AND 30; -- 使用子查询实现OFFSET效果(性能较差)
-- 窗口函数分页
SELECT user_id, name, email
FROM (
SELECT user_id, name, email,
ROW_NUMBER() OVER (ORDER BY user_id) as row_num
FROM users
WHERE status = 'active'
) ranked_users
WHERE row_num BETWEEN 21 AND 30; -- 使用ROW_NUMBER()进行分页
SELECT user_id, score
FROM (
SELECT user_id, score,
RANK() OVER (ORDER BY score DESC) as rank_num
FROM user_scores
) ranked_scores
WHERE rank_num <= 10; -- 使用RANK()处理并列情况
-- 分区分页
SELECT * FROM user_logs
WHERE dt >= '2011-01-01' AND dt <= '2013-01-31'
ORDER BY log_time
LIMIT 1000; -- 基于分区的分页查询
SELECT user_id, action, log_time
FROM (
SELECT user_id, action, log_time,
ROW_NUMBER() OVER (PARTITION BY dt ORDER BY log_time) as rn
FROM user_logs
WHERE dt = '2013-01-01'
) partitioned_logs
WHERE rn BETWEEN 1 AND 100; -- 分区内分页
-- 大数据量分页优化
SELECT * FROM large_table
TABLESAMPLE(BUCKET 1 OUT OF 100 ON RAND())
LIMIT 1000; -- 使用TABLESAMPLE进行数据采样
SELECT * FROM events
WHERE event_date >= '2012-01-01'
AND event_date < '2012-02-02'
AND event_time > '2012-01-01 10:00:00'
ORDER BY event_time
LIMIT 10000; -- 基于时间分区的增量分页
-- 性能优化策略
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict; -- 1. 使用分区裁剪
SET hive.vectorized.execution.enabled=true; -- 2. 启用向量化执行
CREATE TABLE users_orc (
id bigint,
name string,
email string
) STORED AS ORC; -- 3. 使用列式存储格式
CREATE INDEX idx_users_id ON TABLE users (id)
AS 'COMPACT'
WITH DEFERRED REBUILD; -- 4. 创建索引(Hive 3.0之前)
-- 流式分页处理
SELECT TRANSFORM(user_id, name, email)
USING 'python pagination_script.py'
AS (user_id, name, email, page_num)
FROM users
DISTRIBUTE BY page_num;-- 使用STREAMING进行大数据集处理
-- 与外部系统集成
-- 导出分页数据到HDFS
INSERT OVERWRITE DIRECTORY '/output/users_page_1'
SELECT * FROM users
ORDER BY id
LIMIT 10000;
-- 使用Sqoop增量导入
sqoop import \
--connect jdbc:mysql://localhost/db \
--table users \
--incremental append \
--check-column id \
--last-value 1000 \
--target-dir /user/hive/warehouse/users
-- Hive分页特殊注意事项
-- 1. Hive不支持标准的OFFSET语法
-- 错误示例:
-- SELECT * FROM users LIMIT 10 OFFSET 20; -- 不支持
-- 2. 大表分页建议使用采样
SELECT * FROM (
SELECT *, RAND() as random_val
FROM large_table
DISTRIBUTE BY random_val
SORT BY random_val
) sampled
LIMIT 1000;
-- 3. 使用分桶表优化分页
CREATE TABLE users_bucketed (
id bigint,
name string
) CLUSTERED BY (id) INTO 32 BUCKETS
STORED AS ORC;
数据库分页方法选择建议
小数据量(< 10万条)
- MySQL/PostgreSQL/SQLite: LIMIT OFFSET
- SQL Server: OFFSET FETCH
- Oracle: ROWNUM或ROW_NUMBER()
大数据量(> 10万条)
- 关系型数据库: 游标分页(基于索引字段)
- NoSQL: 游标分页(基于_id或时间戳)
- 搜索引擎: Search After 或 Scroll API
- Hive: 分区分页 + 窗口函数,避免全表扫描
实时数据流
- 所有数据库: 基于时间戳的游标分页
- 消息队列: 基于偏移量的消费
- Hive: 基于时间分区的增量处理
性能优化通用原则
- 确保排序字段有索引
- 避免大偏移量的OFFSET
- 使用覆盖索引减少回表
- 考虑数据缓存策略
- Hive特殊优化: 使用分区裁剪、向量化执行、列式存储
- 根据业务场景选择合适的分页方式