统计公司人数
这是一个在sqlservercentral的谜题,要求按月统计每个月加入公司的人数,每个月离开公司的人数和当月公司的总人数。公司的人员加入离开的情况用以下下表来存储:
CREATE TABLE [dbo].[Registrations]
(
[Registration_ID] [int] IDENTITY(1, 1)
NOT NULL,
[FirstName] [varchar](80) NOT NULL,
[LastName] [varchar](80) NOT NULL,
[DateJoined] [datetime] NOT NULL,
[DateLeft] [datetime] NULL,
CONSTRAINT [PK_Registrations] PRIMARY KEY CLUSTERED
([DateJoined], [LastName], [FirstName])
)
CREATE INDEX idxDateJoined
ON Registrations (DateJoined, DateLeft, Registration_ID)
以上表First Name和Last Name表示用户的姓名,DateJoined表示加入公司的时间,DateLeft表示离开公司的时间。具体插入数据的代码可以从此处下载。
先看看按加入时间排序的头十条记录:
Registration_ID FirstName LastName DateJoined DateLeft
--------------- -------------------- -------------------- ----------------------- -----------------------
4 Judith Palmer 2004-01-01 00:00:00.000 NULL
5 Christa Jackson 2004-01-01 00:00:00.000 NULL
6 Sophia Grimes 2004-01-01 00:00:00.000 NULL
7 Tanisha Delgado 2004-01-01 00:00:00.000 NULL
8 Melisa Barber 2004-01-01 00:00:00.000 NULL
9 Hannah Gill 2004-01-01 00:00:00.000 NULL
10 Dante Barnes 2004-01-02 00:00:00.000 NULL
11 Rolando Ford 2004-01-02 00:00:00.000 NULL
12 Otis Anthony 2004-01-02 00:00:00.000 NULL
13 Frankie Larsen 2004-01-03 00:00:00.000 NULL
可以看出这几个2004年一月加入的人还没有离开公司。最终的结果应该如下所示:
Date MonthJoin MonthLeft Registrations
----------------------- ----------- ----------- -------------
2004-01-01 00:00:00.000 167 0 167
2004-02-01 00:00:00.000 133 0 300
2004-03-01 00:00:00.000 144 0 444
2004-04-01 00:00:00.000 155 0 599
2004-05-01 00:00:00.000 156 0 755
2004-06-01 00:00:00.000 136 1 890
2004-07-01 00:00:00.000 138 0 1028
2004-08-01 00:00:00.000 132 0 1160
2004-09-01 00:00:00.000 163 0 1323
2004-10-01 00:00:00.000 135 0 1458
。。。。。
首先给出我的一个算法:
DECLARE @sumValue AS INT = 0 CREATE TABLE #Temp( Date DATETIME, MonthJoin INT, MonthLeft INT, Registrations INT) ;WITH MonthLeft(dt, MJ, ML) AS ( SELECT DATEDIFF(MM, 0, r.DateLeft),0, COUNT(*) FROM dbo.Registrations r WHERE r.DateLeft IS NOT NULL GROUP BY DATEDIFF(MM, 0, r.DateLeft) UNION ALL SELECT DATEDIFF(MM, 0, r.DateJoined), COUNT(*), 0 FROM dbo.Registrations r GROUP BY DATEDIFF(MM, 0, r.DateJoined) ) INSERT INTO #Temp(Date, MonthJoin, MonthLeft, Registrations) SELECT DATEADD(MM, dt, 0), MAX(MJ), MAX(ML), 0 FROM MonthLeft GROUP BY dt UPDATE #Temp SET @sumValue = Registrations = @sumValue + MonthJoin - MonthLeft SELECT * FROM #Temp
首先Cursor内部的查询可以查找出每个月公司离开人数,每个月加入公司的人数。每次遍历结果中的记录时,用加入人数减去离开人数得到本月公司的净入人数,然后加上上个月的公司人数就可以得到本月公司的人数。在cursor内部一共会对表进行两次索引查找。
下面给出最佳解决方案(来自所给链接):
SET STATISTICS TIME ON
--DROP TABLE #Stage
CREATE TABLE #Stage
(
theMonth SMALLINT NOT NULL,
PeopleJoined INT NOT NULL,
PeopleLeft INT NOT NULL,
Subscribers INT NOT NULL
)
INSERT #Stage
(
theMonth,
PeopleJoined,
PeopleLeft,
Subscribers
)
SELECT u.theMonth,
SUM(CASE WHEN u.theCol = 'DateJoined' THEN u.Registrations ELSE 0 END) AS PeopleJoined,
SUM(CASE WHEN u.theCol = 'DateLeft' THEN u.Registrations ELSE 0 END) AS PeopleLeft,
0 AS Subscribers
FROM (
SELECT DATEDIFF(MONTH, 0, DateJoined) AS DateJoined,
DATEDIFF(MONTH, 0, DateLeft) AS DateLeft,
COUNT(*) AS Registrations
FROM dbo.Registrations
GROUP BY DATEDIFF(MONTH, 0, DateJoined),
DATEDIFF(MONTH, 0, DateLeft)
) AS d
UNPIVOT (
theMonth
FOR theCol IN (d.DateJoined, d.DateLeft)
) AS u
GROUP BY u.theMonth
HAVING SUM(CASE WHEN u.theCol = 'DateJoined' THEN u.Registrations ELSE 0 END) > 0
DECLARE @Subscribers INT = 0
;WITH Yak (theMonth, PeopleJoined, PeopleLeft, Subscribers)
AS (
SELECT TOP 2147483647
DATEADD(MONTH, theMonth, 0) AS theMonth,
PeopleJoined,
PeopleLeft,
Subscribers
FROM #Stage
ORDER BY theMonth
)
UPDATE Yak
SET @Subscribers = Subscribers = @Subscribers + PeopleJoined - PeopleLeft
OUTPUT inserted.theMonth,
inserted.PeopleJoined,
inserted.PeopleLeft,
inserted.Subscribers
这个解决方案非常快,只做了一次表的聚集索引扫描,主要使用了UNPIVOT关键字。这个解决方案有太多值得学习的东西。
浙公网安备 33010602011771号