MySQL 8 窗口函数实战指南
目录
1. 窗口函数基础
1.1 什么是窗口函数
窗口函数(Window Function)允许我们在不改变结果集行数的情况下,计算基于当前行所在的"窗口"的聚合值。
1.2 基本语法
function_name() OVER (
[PARTITION BY expr_list]
[ORDER BY expr_list]
[frame_clause]
)
1.3 框架语法(frame_clause)
{ROWS | RANGE} {frame_start | frame_between}
frame_between:
BETWEEN frame_start AND frame_end
frame_start, frame_end:
CURRENT ROW
| UNBOUNDED PRECEDING
| UNBOUNDED FOLLOWING
| expr PRECEDING
| expr FOLLOWING
2. 常用窗口函数
2.1 排序函数
-- ROW_NUMBER(): 连续唯一排序
SELECT
product_name,
category,
price,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) as rank
FROM products;
-- RANK(): 相同值相同排名,排名不连续
SELECT
product_name,
price,
RANK() OVER (ORDER BY price DESC) as rank
FROM products;
-- DENSE_RANK(): 相同值相同排名,排名连续
SELECT
product_name,
price,
DENSE_RANK() OVER (ORDER BY price DESC) as rank
FROM products;
2.2 分析函数
-- LAG/LEAD: 访问前后行数据
SELECT
order_date,
amount,
LAG(amount) OVER (ORDER BY order_date) as prev_amount,
LEAD(amount) OVER (ORDER BY order_date) as next_amount
FROM orders;
-- FIRST_VALUE/LAST_VALUE: 取分组内第一个/最后一个值
SELECT
product_name,
category,
price,
FIRST_VALUE(price) OVER (
PARTITION BY category
ORDER BY price DESC
) as highest_price
FROM products;
2.3 聚合函数
-- 移动平均
SELECT
order_date,
amount,
AVG(amount) OVER (
ORDER BY order_date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) as moving_avg
FROM orders;
-- 累计总和
SELECT
order_date,
amount,
SUM(amount) OVER (
ORDER BY order_date
ROWS UNBOUNDED PRECEDING
) as running_total
FROM orders;
3. 实际应用场景
3.1 销售数据分析
-- 计算每个产品在其类别中的销售排名
SELECT
p.product_name,
p.category,
SUM(o.quantity) as total_sales,
RANK() OVER (
PARTITION BY p.category
ORDER BY SUM(o.quantity) DESC
) as sales_rank
FROM products p
JOIN orders o ON p.product_id = o.product_id
GROUP BY p.product_id, p.product_name, p.category;
-- 计算月环比增长
SELECT
DATE_FORMAT(order_date, '%Y-%m') as month,
SUM(amount) as total_amount,
LAG(SUM(amount)) OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m')) as prev_month,
ROUND(
(SUM(amount) - LAG(SUM(amount)) OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m')))
/ LAG(SUM(amount)) OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m'))
* 100,
2
) as growth_rate
FROM orders
GROUP BY DATE_FORMAT(order_date, '%Y-%m');
3.2 用户行为分析
-- 计算用户连续登录天数
WITH user_logins AS (
SELECT
user_id,
login_date,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) as login_rank,
DATE_SUB(
login_date,
INTERVAL ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_date) DAY
) as group_date
FROM login_logs
)
SELECT
user_id,
MIN(login_date) as start_date,
MAX(login_date) as end_date,
COUNT(*) as consecutive_days
FROM user_logins
GROUP BY user_id, group_date
HAVING COUNT(*) >= 3;
-- 计算用户留存率
SELECT
first_date,
COUNT(DISTINCT user_id) as total_users,
COUNT(DISTINCT CASE WHEN days_diff = 1 THEN user_id END) as day1_retained,
ROUND(
COUNT(DISTINCT CASE WHEN days_diff = 1 THEN user_id END) * 100.0
/ COUNT(DISTINCT user_id),
2
) as day1_retention_rate
FROM (
SELECT
u1.user_id,
DATE(u1.login_time) as first_date,
DATEDIFF(DATE(u2.login_time), DATE(u1.login_time)) as days_diff
FROM user_logins u1
LEFT JOIN user_logins u2
ON u1.user_id = u2.user_id
AND DATE(u2.login_time) >= DATE(u1.login_time)
) t
GROUP BY first_date
ORDER BY first_date;
4. 性能优化建议
4.1 索引优化
- 为 PARTITION BY 和 ORDER BY 中使用的列创建适当的索引
- 考虑创建覆盖索引以提高性能
4.2 注意事项
- 避免使用过大的窗口范围
- 合理使用分区,减少数据扫描量
- 注意 UNBOUNDED PRECEDING/FOLLOWING 可能带来的性能影响
4.3 替代方案
某些场景下,可以使用其他方式替代窗口函数:
-- 使用自连接替代 LAG/LEAD
SELECT
a.order_date,
a.amount,
b.amount as prev_amount
FROM orders a
LEFT JOIN orders b ON DATE_SUB(a.order_date, INTERVAL 1 DAY) = b.order_date;
-- 使用变量替代 ROW_NUMBER
SET @row_num = 0;
SELECT
product_name,
price,
@row_num := @row_num + 1 as row_num
FROM products
ORDER BY price DESC;
总结
窗口函数是 MySQL 8.0 中的强大特性,能够简化复杂的分析查询。合理使用窗口函数可以:
- 提高代码可读性
- 简化复杂计算
- 提升查询性能
但同时要注意性能影响,根据实际场景选择合适的实现方式。