大数据量的分页处理
我们知道,在数据量小的时候,数据的分页处理,一般情况下都没有什么问题,但是在大的数据量的情况下,我们就会有数据压力的问题,以前把所有数据都取出来,对数据库以及数据流量的压力也是非常大的,这样会造成性能会非常差。对于大的数据量分页的方法主要有下面的三种思想,它们共同的思路是:只取出我们要分页显示的数据记录,而不是全部取出来。
下面我们来具体分析这三种方法的实现以及性能的测试情况。先说明一下数据表的建立情况。
CREATE TABLE [dbo].[Test] (
[ID] [int] IDENTITY (1, 1) NOT NULL ,
[T_Name] [char] (32) COLLATE Chinese_PRC_CI_AS NOT NULL ,
[Voting] [int] NOT NULL
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Test] WITH NOCHECK ADD
CONSTRAINT [PK_Test] PRIMARY KEY CLUSTERED
(
[ID]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Test] ADD
CONSTRAINT [DF_Test_voting] DEFAULT (0) FOR [Voting]
GO
CREATE INDEX [IX_Test_tName_asc] ON [dbo].[Test]([T_Name]) ON [PRIMARY]
GO
CREATE INDEX [IX_Test_voting_asc] ON [dbo].[Test]([Voting]) ON [PRIMARY]
GO
CREATE INDEX [IX_Test_voting_desc] ON [dbo].[Test]([ID]) ON [PRIMARY]
GO
CREATE INDEX [IX_Test_tName_DESC] ON [dbo].[Test]([T_Name] DESC ) ON [PRIMARY]
GO
[ID] [int] IDENTITY (1, 1) NOT NULL ,
[T_Name] [char] (32) COLLATE Chinese_PRC_CI_AS NOT NULL ,
[Voting] [int] NOT NULL
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Test] WITH NOCHECK ADD
CONSTRAINT [PK_Test] PRIMARY KEY CLUSTERED
(
[ID]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Test] ADD
CONSTRAINT [DF_Test_voting] DEFAULT (0) FOR [Voting]
GO
CREATE INDEX [IX_Test_tName_asc] ON [dbo].[Test]([T_Name]) ON [PRIMARY]
GO
CREATE INDEX [IX_Test_voting_asc] ON [dbo].[Test]([Voting]) ON [PRIMARY]
GO
CREATE INDEX [IX_Test_voting_desc] ON [dbo].[Test]([ID]) ON [PRIMARY]
GO
CREATE INDEX [IX_Test_tName_DESC] ON [dbo].[Test]([T_Name] DESC ) ON [PRIMARY]
GO
我们新建了一个数据库表TEST,ID是PK并且是自增长的,CLUSTERED索引。T_Name和Voting分别建了ASC和DESC的索引。由于我们只单独对t_Name和Voting字段做分页查询,所以只建立了单独的索引,而没有建相关的组合索引了。
现在只对Voting做分页处理的一个案例。分页处理,一般情况下,我们知道要显示的哪一页Page,以及分页显示的PageSize.对于一些例外在这里我们不用考虑。
方法 一 : 用INDEX和ROWCOUNT结合来做分页处理。
CREATE PROC dbo.TestPage_Index
@pageSize INT,
@pageIndex INT
AS
BEGIN
DECLARE
@PageLower INT,
@PageUpper INT
DECLARE
@id INT
DECLARE
@voting INT,
@maxvoting INT
SET @PageLower = (@pageIndex - 1) * @PageSize
SET @PageUpper = @PageLower + @PageSize
SET RowCount @PageLower
SELECT @id = Id,
@voting = voTing
FROM Test.dbo.Test (NoLock )
ORDER BY voTing ASC
SET RowCount @PageUpper
SELECT @maxvoting = voTing
FROM Test.dbo.Test (NoLock )
ORDER BY voTing
SET RowCount @PageSize
SELECT Id,
t_Name,
voTing
FROM Test.dbo.Test (NoLock )
WHERE (voTing = @voting
AND Id > @id)
OR (voTing > @voting
AND voTing <= @maxVoting)
ORDER BY voTing
SET RowCount 0
END
GO
@pageSize INT,
@pageIndex INT
AS
BEGIN
DECLARE
@PageLower INT,
@PageUpper INT
DECLARE
@id INT
DECLARE
@voting INT,
@maxvoting INT
SET @PageLower = (@pageIndex - 1) * @PageSize
SET @PageUpper = @PageLower + @PageSize
SET RowCount @PageLower
SELECT @id = Id,
@voting = voTing
FROM Test.dbo.Test (NoLock )
ORDER BY voTing ASC
SET RowCount @PageUpper
SELECT @maxvoting = voTing
FROM Test.dbo.Test (NoLock )
ORDER BY voTing
SET RowCount @PageSize
SELECT Id,
t_Name,
voTing
FROM Test.dbo.Test (NoLock )
WHERE (voTing = @voting
AND Id > @id)
OR (voTing > @voting
AND voTing <= @maxVoting)
ORDER BY voTing
SET RowCount 0
END
GO
第二种方法:用临时表的方法来分页处理
CREATE PROC dbo.TestPage_Tab
@pageSize INT,
@pageIndex INT
AS
BEGIN
DECLARE
@PageLower INT,
@PageUpper INT
DECLARE
@id INT
DECLARE
@voting INT,
@maxvoting INT
CREATE TABLE ##IndexTable (
Id INT IDENTITY( 1 , 1 ),
nId INT)
SET @PageLower = (@pageIndex - 1) * @PageSize
SET @PageUpper = @PageLower + @PageSize
SET RowCount @PageUpper
INSERT INTO ##IndexTable
(nId)
SELECT Id
FROM Test.dbo.Test (NoLock )
ORDER BY voTing
SET RowCount @PageSize
SELECT a.*
FROM Test.dbo.Test a (NoLock),
##IndexTable t (NoLock)
WHERE a.Id = t.nId
AND t.Id > @PageLower
AND t.Id <= @PageUpper
ORDER BY t.Id
SET RowCount 0
DROP TABLE ##IndexTable
END
GO
@pageSize INT,
@pageIndex INT
AS
BEGIN
DECLARE
@PageLower INT,
@PageUpper INT
DECLARE
@id INT
DECLARE
@voting INT,
@maxvoting INT
CREATE TABLE ##IndexTable (
Id INT IDENTITY( 1 , 1 ),
nId INT)
SET @PageLower = (@pageIndex - 1) * @PageSize
SET @PageUpper = @PageLower + @PageSize
SET RowCount @PageUpper
INSERT INTO ##IndexTable
(nId)
SELECT Id
FROM Test.dbo.Test (NoLock )
ORDER BY voTing
SET RowCount @PageSize
SELECT a.*
FROM Test.dbo.Test a (NoLock),
##IndexTable t (NoLock)
WHERE a.Id = t.nId
AND t.Id > @PageLower
AND t.Id <= @PageUpper
ORDER BY t.Id
SET RowCount 0
DROP TABLE ##IndexTable
END
GO
第三种方法:用Cursor来做分页处理
CREATE PROC dbo.TestPage_CurSor
@pageSize INT,
@pageIndex INT
AS
BEGIN
DECLARE
@PageLower INT,
@PageUpper INT
DECLARE
@id INT,
@i INT
DECLARE
@voting INT,
@maxvoting INT
DECLARE
@condition CHAR(400)
DECLARE
@cur AS CURSOR
SET @PageLower = (@pageIndex - 1) * @PageSize
SET @PageUpper = @PageLower + @PageSize
SET RowCount @PageUpper
SET @cur = CURSOR FAST_FORWARD READ_ONLY FOR SELECT Id
FROM Test.dbo.Test (NoLock )
ORDER BY voTing
OPEN @cur
SET RowCount 0
FETCH NEXT FROM @cur
INTO @id
SET @i = 1
WHILE @@FETCH_STATUS = 0
BEGIN
IF (@i >= @PageLower + 1)
AND @i <= @pageUpper
BEGIN
IF @i = @PageLower + 1
BEGIN
SET @condition = Rtrim(CAST(@id AS CHAR(10)))
END
ELSE
BEGIN
SET @condition = Rtrim(@condition) + ',' + Rtrim(CAST(@id AS CHAR(10)))
END
END
SET @i = @i + 1
FETCH NEXT FROM @cur
INTO @id
END
CLOSE @cur
DEALLOCATE @cur
IF @condition IS NOT NULL
EXEC( 'select * from test..test(nolock) where id in (' + @Condition + ')')
END
GO
@pageSize INT,
@pageIndex INT
AS
BEGIN
DECLARE
@PageLower INT,
@PageUpper INT
DECLARE
@id INT,
@i INT
DECLARE
@voting INT,
@maxvoting INT
DECLARE
@condition CHAR(400)
DECLARE
@cur AS CURSOR
SET @PageLower = (@pageIndex - 1) * @PageSize
SET @PageUpper = @PageLower + @PageSize
SET RowCount @PageUpper
SET @cur = CURSOR FAST_FORWARD READ_ONLY FOR SELECT Id
FROM Test.dbo.Test (NoLock )
ORDER BY voTing
OPEN @cur
SET RowCount 0
FETCH NEXT FROM @cur
INTO @id
SET @i = 1
WHILE @@FETCH_STATUS = 0
BEGIN
IF (@i >= @PageLower + 1)
AND @i <= @pageUpper
BEGIN
IF @i = @PageLower + 1
BEGIN
SET @condition = Rtrim(CAST(@id AS CHAR(10)))
END
ELSE
BEGIN
SET @condition = Rtrim(@condition) + ',' + Rtrim(CAST(@id AS CHAR(10)))
END
END
SET @i = @i + 1
FETCH NEXT FROM @cur
INTO @id
END
CLOSE @cur
DEALLOCATE @cur
IF @condition IS NOT NULL
EXEC( 'select * from test..test(nolock) where id in (' + @Condition + ')')
END
GO
测试比较
我现在的机器是T43 ,CPU 1.86G SQL SERVER 2000
TEST表里有400,000条记录
测试程序如下:
DECLARE @dt DATETIME
SET @dt = Getdate()
EXEC TestPage_CurSor 25,200
SELECT Getdate() - @dt AS m_CurSor
SET @dt = Getdate()
EXEC TestPage_Index 25,200
SELECT Getdate() - @dt AS m_Index
SET @dt = Getdate()
EXEC TestPage_Tab 25,200
SELECT Getdate() - @dt AS m_Tab
SET @dt = Getdate()
EXEC TestPage_CurSor 25,200
SELECT Getdate() - @dt AS m_CurSor
SET @dt = Getdate()
EXEC TestPage_Index 25,200
SELECT Getdate() - @dt AS m_Index
SET @dt = Getdate()
EXEC TestPage_Tab 25,200
SELECT Getdate() - @dt AS m_Tab
测试的结果为:
我们可以看到测试的结果是如此的悬殊:
第一种对比下来,基本上没有花时间,就处理了
第二种花了50ms的时间,但是多了一个临时表
第三种花了近6s的时间才能处理。