SQL
问题描述
有一张表 log_table, 两个字段 user_id、login_date,求用户最大连续登录天数
数据
-- 示例数据
CREATE TABLE log_table (
user_id INT,
login_date DATE
);
INSERT INTO log_table (user_id, login_date) VALUES
(1, '2024-12-01'),
(1, '2024-12-02'),
(1, '2024-12-04'),
(1, '2024-12-05'),
(1, '2024-12-06'),
(2, '2024-12-01'),
(2, '2024-12-02'),
(2, '2024-12-03'),
(2, '2024-12-05'),
(2, '2024-12-06'),
(3, '2024-12-01');
输出示例
user_id max_consecutive_days
1 3
2 3
3 1
思路
- ROW_NUMBER() 根据 user_id 分组、login_date 排序,生成序号
- login_date 减去 对应的
序号*DAY,即 天数,若结果日期一致,则登录是连续的。当结果日期变化开始时,连续登录被打破
-- 核心SQL
SELECT
user_id
,login_date
,ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) AS rn
,DATE_SUB(login_date, INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY) AS diff -- 如果登录是连续的,diff值将保持不变;如果登录中断,diff值将改变。这样,我们可以通过diff值来识别连续的登录序列。
FROM (
SELECT
user_id
,login_date
FROM log_table
ORDER BY user_id, login_date
) AS sorted_logs
查询结果
- 根据 user_id、diff 分组,得出每个用户连续登录的天数,再通过 max 函数筛选出每个用户最长的登录天数。
SQL 实现
SELECT
user_id
,MAX(days_count) AS max_consecutive_days
FROM (
SELECT
user_id
,COUNT(*) AS days_count
FROM (
SELECT
user_id
,login_date
,ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) AS rn
,DATE_SUB(login_date, INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY) AS diff -- 如果登录是连续的,diff值将保持不变;如果登录中断,diff值将改变。这样,我们可以通过diff值来识别连续的登录序列。
FROM (
SELECT user_id, login_date
FROM log_table
ORDER BY user_id, login_date
) AS sorted_logs
) AS sub1
GROUP BY user_id, diff
) AS sub2
GROUP BY user_id;