一、ROW_NUMBER() 窗口函数核心语法
基本语法结构
ROW_NUMBER() OVER (
PARTITION BY 分区字段
ORDER BY 排序字段 [ASC|DESC]
) AS 别名
您提供的案例解析
ROW_NUMBER() OVER (
PARTITION BY cwos.warranty_no -- 按保修单号分区
ORDER BY cwo.outbound_time DESC -- 按出库时间降序排序
) AS rn
功能:为每个保修单(warranty_no)下的记录,按出库时间倒序编号(最近出库的为1号)
二、四大核心应用场景
1. 分组取最新/最早记录(去重)
-- 获取每个保修单最新的出库记录
WITH ranked_data AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY warranty_no
ORDER BY outbound_time DESC
) AS rn
FROM warranty_operations
)
SELECT *
FROM ranked_data
WHERE rn = 1; -- 只要最新的
2. 分页查询优化
-- 高效分页:每页显示20条
WITH paged_data AS (
SELECT
*,
ROW_NUMBER() OVER (ORDER BY create_time DESC) AS seq
FROM orders
)
SELECT *
FROM paged_data
WHERE seq BETWEEN 21 AND 40; -- 第二页
3. 数据采样与排名
-- 每个类别取前3名
SELECT *
FROM (
SELECT
product_id,
sales,
category,
ROW_NUMBER() OVER (
PARTITION BY category
ORDER BY sales DESC
) AS rank_in_category
FROM sales_data
) t
WHERE rank_in_category <= 3;
4. 数据质量检查(查找重复项)
-- 查找重复的保修单记录
SELECT *
FROM (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY warranty_no, batch_no -- 多字段组合判定重复
ORDER BY create_time DESC
) AS duplicate_count
FROM warranty_records
) t
WHERE duplicate_count > 1; -- 重复的记录
三、实战中常见的七大"坑"及解决方案
🚨 坑1:性能陷阱 - 全表排序消耗
问题场景:
-- 危险:对百万级表全表排序
SELECT *, ROW_NUMBER() OVER (ORDER BY create_time) AS rn
FROM huge_table;
优化方案:
-- 方案A:添加WHERE条件缩小范围
SELECT *, ROW_NUMBER() OVER (ORDER BY create_time) AS rn
FROM huge_table
WHERE create_time >= '2024-01-01';
-- 方案B:利用分区减少排序量
SELECT *,
ROW_NUMBER() OVER (
PARTITION BY dept_id -- 先按部门分区
ORDER BY create_time
) AS rn
FROM huge_table;
🚨 坑2:NULL值排序歧义
问题场景:
-- NULL在排序中的位置不明确
ROW_NUMBER() OVER (ORDER BY outbound_time DESC) AS rn
-- 默认NULLS排在最后(DESC时在最前,ASC时在最后)
解决方案:
-- 明确NULL值处理
ROW_NUMBER() OVER (
ORDER BY
CASE
WHEN outbound_time IS NULL THEN 0
ELSE 1
END DESC,
outbound_time DESC
) AS rn
-- 或使用COALESCE
ROW_NUMBER() OVER (
ORDER BY COALESCE(outbound_time, '1900-01-01') DESC
) AS rn
🚨 坑3:非确定性排序(并列问题)
问题场景:
-- 当排序字段有重复值时,编号可能不稳定
ROW_NUMBER() OVER (
PARTITION BY warranty_no
ORDER BY status -- 多个记录status相同
) AS rn
-- 每次执行rn可能不同!
解决方案:
-- 添加第二排序字段确保确定性
ROW_NUMBER() OVER (
PARTITION BY warranty_no
ORDER BY status, id DESC -- id确保唯一排序
) AS rn
-- 或使用RANK()/DENSE_RANK()接受并列
RANK() OVER (
PARTITION BY warranty_no
ORDER BY status
) AS rank_num
🚨 坑4:分区过大导致内存溢出
问题场景:
-- 某个warranty_no有数十万条记录
ROW_NUMBER() OVER (
PARTITION BY warranty_no -- 这个分区可能非常大!
ORDER BY outbound_time
) AS rn
解决方案:
-- 方案1:提前过滤
WITH filtered AS (
SELECT *
FROM warranty_operations
WHERE outbound_time > '2024-01-01' -- 先过滤
)
SELECT *,
ROW_NUMBER() OVER (
PARTITION BY warranty_no
ORDER BY outbound_time
) AS rn
FROM filtered;
-- 方案2:调整数据库参数
-- 增加 work_mem / sort_memory 等参数
🚨 坑5:与GROUP BY的混淆使用
问题场景:
-- 错误:试图在ROW_NUMBER后直接GROUP BY
SELECT
warranty_no,
ROW_NUMBER() OVER (PARTITION BY warranty_no ORDER BY outbound_time) AS rn,
COUNT(*) -- 错误!这会破坏ROW_NUMBER
FROM warranty_operations
GROUP BY warranty_no;
正确写法:
WITH numbered AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY warranty_no ORDER BY outbound_time) AS rn
FROM warranty_operations
)
SELECT
warranty_no,
MAX(CASE WHEN rn = 1 THEN outbound_time END) AS latest_time,
COUNT(*) AS total_records
FROM numbered
GROUP BY warranty_no;
🚨 坑6:嵌套查询性能问题
问题场景:
-- 多层嵌套,性能差
SELECT *
FROM (
SELECT *, ROW_NUMBER() OVER (...) AS rn
FROM (
SELECT * FROM table1 JOIN table2 ...
) t1
) t2
WHERE rn = 1;
优化方案:
-- 减少嵌套,使用CTE提高可读性和性能
WITH joined_data AS (
SELECT * FROM table1 JOIN table2 ON ...
),
ranked_data AS (
SELECT *, ROW_NUMBER() OVER (...) AS rn
FROM joined_data
)
SELECT * FROM ranked_data WHERE rn = 1;
🚨 坑7:跨数据库兼容性
MySQL 8.0+ vs 其他数据库:
-- MySQL 8.0+ 支持
ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)
-- MySQL 5.7及以下版本不支持,需用变量模拟
SET @row_number = 0;
SET @warranty_no = '';
SELECT
warranty_no,
outbound_time,
@row_number := CASE
WHEN @warranty_no = warranty_no THEN @row_number + 1
ELSE 1
END AS rn,
@warranty_no := warranty_no
FROM warranty_operations
ORDER BY warranty_no, outbound_time DESC;
四、性能优化实战技巧
1. 索引优化策略
-- 为窗口函数创建复合索引
CREATE INDEX idx_warranty_outbound ON warranty_operations
(warranty_no, outbound_time DESC);
-- 索引覆盖查询
CREATE INDEX idx_covering ON warranty_operations
(warranty_no, outbound_time DESC, other_column1, other_column2);
2. 执行计划分析
-- 使用EXPLAIN ANALYZE (MySQL 8.0.18+)
EXPLAIN ANALYZE
SELECT *,
ROW_NUMBER() OVER (
PARTITION BY warranty_no
ORDER BY outbound_time DESC
) AS rn
FROM warranty_operations
WHERE warranty_no LIKE '2024%';
3. 分页查询优化对比
-- 传统分页(性能随offset增大而下降)
SELECT * FROM orders
ORDER BY create_time DESC
LIMIT 20 OFFSET 10000; -- 越往后越慢
-- 使用ROW_NUMBER分页(稳定)
WITH paged AS (
SELECT *,
ROW_NUMBER() OVER (ORDER BY create_time DESC) AS rn
FROM orders
WHERE create_time >= '2024-01-01'
)
SELECT * FROM paged
WHERE rn BETWEEN 10001 AND 10020;
五、最佳实践总结
- 始终添加第二排序字段确保编号确定性
- 合理使用PARTITION BY避免过大分区
- 明确NULL值排序规则防止结果歧义
- 为窗口函数创建合适索引提升性能
- 使用CTE替代多层嵌套提高可读性
- 生产环境先在小数据量测试验证结果
- 监控执行计划识别性能瓶颈
六、真实案例:保修单最新状态查询优化
优化前(问题代码):
SELECT DISTINCT ON (warranty_no) *
FROM warranty_operations
ORDER BY warranty_no, outbound_time DESC;
-- 使用DISTINCT ON(非标准SQL,部分数据库不支持)
优化后(标准且高效):
WITH latest_operations AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY warranty_no
ORDER BY
outbound_time DESC,
id DESC -- 添加第二排序字段确保稳定
) AS rn
FROM warranty_operations
WHERE status IN ('SHIPPED', 'DELIVERED')
)
SELECT
warranty_no,
outbound_time AS latest_outbound,
operator,
-- 其他字段...
FROM latest_operations
WHERE rn = 1
ORDER BY outbound_time DESC;
性能提升:通过对(warranty_no, outbound_time DESC, id DESC)建立索引,查询速度会飙升秒。