高频 SQL 50 题(基础版)【二】
1075. 项目员工 I
比较简单,只要把两个表内链接,然后按照项目分组,计算每一组的平均值就好了。
SELECT
Project.project_id,
ROUND(AVG(Employee.experience_years), 2) AS average_years
FROM
Project
INNER JOIN
Employee ON
Employee.employee_id = Project.employee_id
GROUP BY
Project.project_id
1633. 各赛事的用户注册率
这里虽然用到了两个表,但是缺并不需要链接。我们只要把Register表按照contest_id 分组,并知道Users中用户数量就好了。
SELECT
contest_id,
ROUND(100 * COUNT(*) / (SELECT COUNT(*) FROM Users), 2) AS percentage
FROM
Register
GROUP BY
contest_id
ORDER BY
percentage DESC,
contest_id
1211. 查询结果的质量和占比
按照query_name分组,剩下就比较好做了。
SELECT
query_name,
ROUND(AVG(rating / position),2) AS quality,
ROUND(AVG(IF(rating < 3, 1, 0)) * 100, 2) AS poor_query_percentage
FROM
Queries
GROUP BY
query_name
1193. 每月交易 I
首先我们考虑该如何截取出年月,因为这里保证了日期的格式一定是yyyy-mm-dd因此我们只需要左侧的7位就好了。
SELECT
LEFT(trans_date, 7) AS month,
country,
COUNT(*) AS trans_count,
SUM(IF(state = "approved", 1, 0)) AS approved_count,
SUM(amount) AS trans_total_amount,
SUM(IF(state = "approved", 1, 0) * amount) AS approved_total_amount
FROM
Transactions
GROUP BY
month,
country
如果格式不固定的话,我们可以采用DATE_FORMAT()
SELECT
DATE_FORMAT(trans_date, "%Y-%m") AS month,
country,
COUNT(*) AS trans_count,
SUM(IF(state = "approved", 1, 0)) AS approved_count,
SUM(amount) AS trans_total_amount,
SUM(IF(state = "approved", 1, 0) * amount) AS approved_total_amount
FROM
Transactions
GROUP BY
month,
country
其中Y是四位年份,m是两位月份。可以通过大小写读取日期,并生成对应的格式。
1174. 即时食物配送 II
有一个问题,如何判断订单是首次订单?
我们可以根据用户分组,找出没个用户最早下订单的日期。因此可以有以下查询
SELECT
customer_id,
MIN(order_date)
FROM
Delivery
GROUP BY
customer_id
这样的话,我们可以在所有记录中根据(customer_id, order_date)筛选出所有的首次订单,再求即使订单的占比就好了。
SELECT
ROUND(AVG(order_date = customer_pref_delivery_date) * 100, 2)
AS immediate_percentage
FROM
Delivery
WHERE
(customer_id, order_date) IN (
SELECT
customer_id,
MIN(order_date)
FROM
Delivery
GROUP BY
customer_id
)
550. 游戏玩法分析 IV
首先如何获取首次登录的第二天,我们可以想先按照用户分组,然后用MIN找到最早的一天,最后用DATE_ADD计算出第二天。
SELECT
player_id,
DATE_ADD(MIN(event_date), INTERVAL 1 DAY) AS second_date
FROM
Activity
GROUP BY
player_id
接下来,我们可以把两个表进行左连接。
SELECT
*
FROM
Activity
LEFT JOIN (
SELECT
player_id,
DATE_ADD(MIN(event_date), INTERVAL 1 DAY) AS second_date
FROM
Activity
GROUP BY
player_id
) AS Expected
ON
Activity.player_id = Expected.player_id
AND Activity.event_date = Expected.second_date
然后我们观察结果
| player_id | device_id | event_date | games_played | player_id | second_date |
| --------- | --------- | ---------- | ------------ | --------- | ----------- |
| 1 | 2 | 2016-03-01 | 5 | null | null |
| 1 | 2 | 2016-03-02 | 6 | 1 | 2016-03-02 |
| 2 | 3 | 2017-06-25 | 1 | null | null |
| 3 | 1 | 2016-03-02 | 0 | null | null |
| 3 | 4 | 2018-07-03 | 5 | null | null |
我们观察一下,只要second_date不是null的就是首次登录第二题又登录过的。我们再考虑如何统计玩家的总数,只需要DISTINCT关键字就好了。完整代码如下
SELECT
ROUND(
SUM(IF(Expected.second_date IS NOT NULL, 1, 0))
/ COUNT(DISTINCT Activity.player_id) ,
2
) AS fraction
FROM
Activity
LEFT JOIN (
SELECT
player_id,
DATE_ADD(MIN(event_date), INTERVAL 1 DAY) AS second_date
FROM
Activity
GROUP BY
player_id
) AS Expected
ON
Activity.player_id = Expected.player_id
AND Activity.event_date = Expected.second_date
2356. 每位教师所教授的科目种类的数量
简单题,根据teacher_id分组,再统计出不同的subject_id数就好了。
SELECT
teacher_id,
COUNT(DISTINCT subject_id) AS cnt
FROM
Teacher
GROUP BY
teacher_id
1141. 查询近30天活跃用户数
按照日期分组,然后统计不同的用户数就好了。
30天内可以使用DATEDIFF配合BETWEEN进行筛选。
SELECT
activity_date AS day,
COUNT(DISTINCT user_id) AS active_users
FROM
Activity
GROUP BY
activity_date
HAVING
DATEDIFF("2019-07-27", activity_date) BETWEEN 0 AND 29
1084. 销售分析 III
可以先把表链接出来,然后计算出每种商品最早最晚销售的时间。
SELECT
Product.product_id,
Product.product_name,
MIN(Sales.sale_date) AS min_date,
MAX(Sales.sale_date) AS max_date
FROM
Product
INNER JOIN
Sales ON Product.product_id = Sales.product_id
GROUP BY
Product.product_id
结果如下
| product_id | product_name | min_date | max_date |
| ---------- | ------------ | ---------- | ---------- |
| 1 | S8 | 2019-01-21 | 2019-01-21 |
| 2 | G4 | 2019-02-17 | 2019-06-02 |
| 3 | iPhone | 2019-05-13 | 2019-05-13 |
再根据这个表做一次筛选即可。
SELECT
product_id,
product_name
FROM(
SELECT
Product.product_id,
Product.product_name,
MIN(Sales.sale_date) AS min_date,
MAX(Sales.sale_date) AS max_date
FROM
Product
INNER JOIN
Sales ON Product.product_id = Sales.product_id
GROUP BY
Product.product_id
) AS min_max_date
WHERE
"2019-01-01" <= min_date
AND max_date <= "2019-03-31"
596. 超过 5 名学生的课
简单题
SELECT
class
FROM
Courses
GROUP BY
class
HAVING
COUNT(*) >= 5
1729. 求关注者的数量
SELECT
user_id,
COUNT(*) AS followers_count
FROM
Followers
GROUP BY
user_id
ORDER BY
user_id
619. 只出现一次的最大数字
SELECT
MAX(num) AS num
FROM(
SELECT
num
FROM
MyNumbers
GROUP BY
num
HAVING
COUNT(*) = 1
) AS OnlyoneNum
1731. 每位经理的下属员工数量
可以先用内链接,把经理和员工进行链接,再按照经理分组。
SELECT
e1.employee_id,
e1.name,
COUNT(*) AS reports_count,
ROUND(AVG(e2.age)) AS average_age
FROM
Employees e1
INNER JOIN
Employees e2 ON
e2.reports_to = e1.employee_id
GROUP BY
e1.employee_id
ORDER BY
e1.employee_id
1789. 员工的直属部门
直属部门有两种情况,一种是primary_flag是Y,另一种是只属于一个部门。因此我们可以分别查询,再UNION
SELECT
employee_id,
department_id
FROM
Employee
WHERE
primary_flag = 'Y'
UNION
SELECT
employee_id,
department_id
FROM
Employee
GROUP BY
employee_id
HAVING
COUNT(*) = 1
但这种做法可能会不安全,主要是因为ONLY_FULL_GROUP_BY模式。这里不赘述。
我们也可以用窗口函数实现,首先进行如下的查询
SELECT
employee_id,
department_id,
primary_flag,
COUNT(*) OVER(PARTITION BY employee_id) AS count_dep
FROM
Employee
这样可以得到结果如下
| employee_id | department_id | primary_flag | count_dep |
| ----------- | ------------- | ------------ | --------- |
| 1 | 1 | N | 1 |
| 2 | 1 | Y | 2 |
| 2 | 2 | N | 2 |
| 3 | 3 | N | 1 |
| 4 | 2 | N | 3 |
| 4 | 3 | Y | 3 |
| 4 | 4 | N | 3 |
我们在根据这个表进行筛选
SELECT
employee_id,
department_id
FROM(
SELECT
employee_id,
department_id,
primary_flag,
COUNT(*) OVER(PARTITION BY employee_id) AS count_dep
FROM
Employee
) AS Counted
WHERE
primary_flag = 'Y'
OR count_dep = 1
610. 判断三角形
两边之和大于第三边
SELECT
*,
IF(x + y > z AND x + z > y AND y + z > x, "Yes", "No") AS triangle
FROM
Triangle
1045. 买下所有产品的客户
买下所有产品的客户购买的产品数等于Product中产品数。
SELECT
customer_id
FROM
Customer
GROUP BY
customer_id
HAVING
COUNT(DISTINCT product_key) = (
SELECT
COUNT(*)
FROM
Product
)
180. 连续出现的数字
首先我们先做一个这样的查询
SELECT
*
FROM
Logs l1,
Logs l2,
Logs l3
WHERE
l2.id = l1.id + 1
AND l3.id = l2.id + 1
AND l1.num = l2.num
AND l2.num = l3.num
这样可以筛选出所有连续三个相同的数字。我在这个基础上输出l1.num就可以得到答案。
SELECT
l1.num AS ConsecutiveNums
FROM
Logs l1,
Logs l2,
Logs l3
WHERE
l2.id = l1.id + 1
AND l3.id = l2.id + 1
AND l1.num = l2.num
AND l2.num = l3.num
但是会有这样的数据出现错误
| id | num |
| -- | --- |
| 1 | 3 |
| 2 | 3 |
| 3 | 3 |
| 4 | 3 |
所以还需要进行去重
SELECT
DISTINCT l1.num AS ConsecutiveNums
FROM
Logs l1,
Logs l2,
Logs l3
WHERE
l2.id = l1.id + 1
AND l3.id = l2.id + 1
AND l1.num = l2.num
AND l2.num = l3.num
1164. 指定日期的产品价格
首先筛选出所有的商品
SELECT
DISTINCT product_id
FROM
Products
筛选出2019-08-16之前的最新记录。
SELECT
product_id,
MAX(change_date)
FROM
Products
WHERE
change_date <= "2019-08-16"
GROUP BY
product_id
根据最新的记录,筛选出价格
SELECT
product_id,
new_price
FROM
Products
WHERE
(product_id,change_date) IN (
SELECT
product_id,
MAX(change_date)
FROM
Products
WHERE
change_date <= "2019-08-16"
GROUP BY
product_id
)
最后把价格查询和商品查询链接起来
SELECT
p1.product_id,
IFNULL(p2.new_price, 10) AS price
FROM (
SELECT
DISTINCT product_id
FROM
Products
) AS p1
LEFT JOIN (
SELECT
product_id,
new_price
FROM
Products
WHERE
(product_id,change_date) IN (
SELECT
product_id,
MAX(change_date)
FROM
Products
WHERE
change_date <= "2019-08-16"
GROUP BY
product_id
)
) AS p2
ON
p1.product_id = p2.product_id

浙公网安备 33010602011771号