开篇引入
MySQL 8.0是一个划时代的版本,从8.0开始,MySQL真正成为了一个现代化的关系数据库。窗口函数、CTE、JSON函数、Explain ANALYZE……这些特性让MySQL从"能用"变成了"好用"。
《高性能MySQL》第6章提到了MySQL 8.0的诸多改进,这篇文章帮你把最实用的新特性用起来。
窗口函数(Window Functions)
为什么需要窗口函数
以前计算"每个部门的平均工资"需要子查询或JOIN:
-- 传统方法:子查询
SELECT
dept,
name,
salary,
(SELECT AVG(salary) FROM employees e2 WHERE e2.dept = e1.dept) AS dept_avg
FROM employees e1;
-- 窗口函数方法:一目了然
SELECT
dept,
name,
salary,
AVG(salary) OVER (PARTITION BY dept) AS dept_avg
FROM employees;
常用窗口函数
聚合类窗口函数:
-- 创建测试数据
CREATE TABLE sales (
id INT,
region VARCHAR(20),
salesperson VARCHAR(20),
amount DECIMAL(10,2)
);
INSERT INTO sales VALUES
(1, 'East', 'Alice', 1000),
(2, 'East', 'Bob', 1500),
(3, 'East', 'Charlie', 1200),
(4, 'West', 'David', 2000),
(5, 'West', 'Eve', 1800);
-- AVG: 移动平均
SELECT
salesperson,
amount,
AVG(amount) OVER (PARTITION BY region) AS region_avg,
amount - AVG(amount) OVER (PARTITION BY region) AS vs_avg
FROM sales;
-- SUM: 累计求和
SELECT
salesperson,
amount,
SUM(amount) OVER (ORDER BY id) AS running_total
FROM sales;
-- COUNT: 累计计数
SELECT
salesperson,
amount,
COUNT(*) OVER (ORDER BY amount DESC) AS rank
FROM sales;
排名类窗口函数:
-- ROW_NUMBER: 行号(无并列)
SELECT
salesperson,
amount,
ROW_NUMBER() OVER (ORDER BY amount DESC) AS row_num
FROM sales;
-- RANK: 排名(有并列,跳过下一名)
SELECT
salesperson,
amount,
RANK() OVER (ORDER BY amount DESC) AS rank_num
FROM sales;
-- DENSE_RANK: 密集排名(有并列,不跳过)
SELECT
salesperson,
amount,
DENSE_RANK() OVER (ORDER BY amount DESC) AS dense_rank_num
FROM sales;
-- NTILE: 分桶
SELECT
salesperson,
amount,
NTILE(2) OVER (ORDER BY amount DESC) AS bucket
FROM sales;
FIRST_VALUE / LAST_VALUE / NTH_VALUE:
-- 找出每个区域销售冠军
SELECT
region,
salesperson,
amount,
FIRST_VALUE(salesperson) OVER (PARTITION BY region ORDER BY amount DESC) AS top_salesperson,
FIRST_VALUE(amount) OVER (PARTITION BY region ORDER BY amount DESC) AS top_amount
FROM sales;
-- NTH_VALUE: 取第N个值
SELECT
salesperson,
amount,
NTH_VALUE(amount, 2) OVER (ORDER BY amount DESC) AS second_amount
FROM sales;
帧(Frame)概念
窗口函数可以在一个滑动窗口内计算:
-- 默认帧:从起点到当前行
SELECT
salesperson,
amount,
SUM(amount) OVER (ORDER BY id) AS running_sum
FROM sales;
-- 显式指定帧:前一行到当前行
SELECT
salesperson,
amount,
SUM(amount) OVER (ORDER BY id
ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS moving_sum
FROM sales;
-- 更多帧选项
-- RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: 到当前行
-- RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: 从当前行到最后
-- RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING: 前后各2行
CTE(公用表表达式)
普通CTE
CTE让复杂的查询更易读:
-- 替代子查询,让查询更清晰
WITH regional_sales AS (
SELECT region, SUM(amount) AS total_amount
FROM sales
GROUP BY region
),
top_regions AS (
SELECT region
FROM regional_sales
WHERE total_amount > 3000
)
SELECT
s.region,
s.salesperson,
s.amount,
r.total_amount AS region_total
FROM sales s
JOIN regional_sales r ON s.region = r.region
WHERE s.region IN (SELECT region FROM top_regions)
ORDER BY s.region, s.amount DESC;
递归CTE
递归CTE可以生成序列、遍历层级数据:
-- 生成数字序列
WITH RECURSIVE seq AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1 FROM seq WHERE n < 10
)
SELECT * FROM seq;
-- 生成日期序列
WITH RECURSIVE dates AS (
SELECT '2024-01-01' AS dt
UNION ALL
SELECT dt + INTERVAL 1 DAY FROM dates WHERE dt < '2024-01-10'
)
SELECT * FROM dates;
-- 遍历组织架构
CREATE TABLE org_chart (
id INT PRIMARY KEY,
name VARCHAR(50),
manager_id INT NULL
);
WITH RECURSIVE org AS (
SELECT id, name, manager_id, 1 AS level
FROM org_chart
WHERE manager_id IS NULL
UNION ALL
SELECT o.id, o.name, o.manager_id, org.level + 1
FROM org_chart o
JOIN org ON o.manager_id = org.id
)
SELECT * FROM org ORDER BY level, name;
JSON函数
JSON数据类型
MySQL 8.0增强了JSON支持:
CREATE TABLE api_logs (
id INT PRIMARY KEY AUTO_INCREMENT,
request_id VARCHAR(50),
payload JSON,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 插入JSON
INSERT INTO api_logs (request_id, payload) VALUES
('req-001', '{"user_id": 1, "action": "login", "ip": "192.168.1.1"}'),
('req-002', '{"user_id": 2, "action": "purchase", "amount": 99.9}');
-- 查询JSON字段
SELECT
request_id,
payload->>'$.user_id' AS user_id,
payload->>'$.action' AS action
FROM api_logs;
-- JSON_EXTRACT vs ->>
SELECT
payload->'$.user_id' AS user_id_json, -- JSON类型
payload->>'$.user_id' AS user_id_text -- 文本类型
FROM api_logs;
JSON函数
-- JSON_OBJECT: 创建JSON对象
SELECT JSON_OBJECT('name', '张三', 'age', 30, 'city', '北京');
-- JSON_ARRAY: 创建JSON数组
SELECT JSON_ARRAY(1, 2, 3, 4, 5);
-- JSON_MERGE_PATCH: 合并JSON(MySQL 8.0.3+)
SELECT JSON_MERGE_PATCH(
'{"a": 1, "b": 2}',
'{"b": 3, "c": 4}'
);
-- JSON_SET: 设置值
SELECT JSON_SET(
'{"name": "张三", "age": 30}',
'$.age', 31,
'$.city', '北京'
);
-- JSON_REMOVE: 删除值
SELECT JSON_REMOVE(
'{"name": "张三", "age": 30, "temp": "unused"}',
'$.temp'
);
-- JSON_KEYS: 获取所有键
SELECT JSON_KEYS('{"a": 1, "b": 2, "c": 3}');
-- JSON_LENGTH: 获取长度
SELECT JSON_LENGTH('{"a": 1, "b": {"c": 2}}'); -- 2
JSON与索引
MySQL 8.0支持在JSON列上创建索引:
-- 添加Generated Column
ALTER TABLE api_logs
ADD COLUMN user_id INT
GENERATED ALWAYS AS (payload->>'$.user_id');
-- 创建索引
CREATE INDEX idx_user_id ON api_logs(user_id);
-- 验证索引被使用
EXPLAIN SELECT * FROM api_logs WHERE user_id = 1;
函数索引
MySQL 8.0支持在表达式上创建索引:
-- 在email的小写形式上建索引
CREATE TABLE users (
id INT PRIMARY KEY,
email VARCHAR(100),
INDEX idx_email ((LOWER(email)))
);
-- 在计算表达式上建索引
CREATE TABLE orders (
id INT PRIMARY KEY,
created_at DATETIME,
INDEX idx_month ((DATE_FORMAT(created_at, '%Y-%m')))
);
-- 查询可以利用索引
SELECT * FROM orders
WHERE DATE_FORMAT(created_at, '%Y-%m') = '2024-01';
Explain ANALYZE(MySQL 8.0.18+)
执行计划现在可以显示实际执行时间:
EXPLAIN ANALYZE
SELECT
s.region,
SUM(s.amount) AS total
FROM sales s
GROUP BY s.region;
输出示例:
-> Aggregate: sum(s.amount) (cost=0.55 rows=2) (actual time=0.01..0.02 rows=2 loops=1)
-> Table scan on s (cost=0.27 rows=5) (actual time=0.01..0.01 rows=5 loops=1)
角色(Roles)
MySQL 8.0引入了角色:
-- 创建角色
CREATE ROLE app_reader;
CREATE ROLE app_writer;
-- 授予权限
GRANT SELECT ON mydb.* TO app_reader;
GRANT SELECT, INSERT, UPDATE, DELETE ON mydb.* TO app_writer;
-- 将角色授予用户
CREATE USER 'alice'@'%';
GRANT app_reader TO 'alice'@'%';
-- 用户激活角色
SET DEFAULT ROLE app_reader FOR 'alice'@'%';
-- 查看用户权限
SHOW GRANTS FOR 'alice'@'%';
Instant ADD COLUMN
MySQL 8.0.12+支持瞬时添加列:
-- 添加列(不复制数据,立即完成)
ALTER TABLE large_table ADD COLUMN new_col VARCHAR(100);
窗口函数实战:业务报表
月度销售报表
WITH monthly_sales AS (
SELECT
DATE_FORMAT(created_at, '%Y-%m') AS month,
region,
SUM(amount) AS total_amount,
COUNT(*) AS order_count
FROM orders
WHERE created_at >= '2023-01-01'
GROUP BY DATE_FORMAT(created_at, '%Y-%m'), region
)
SELECT
month,
region,
total_amount,
order_count,
SUM(total_amount) OVER (PARTITION BY region ORDER BY month) AS running_total,
AVG(total_amount) OVER (PARTITION BY region ORDER BY month
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg_3m,
total_amount / SUM(total_amount) OVER (PARTITION BY month) * 100 AS pct_of_month
FROM monthly_sales
ORDER BY month, region;
用户行为漏斗分析
WITH user_actions AS (
SELECT
user_id,
MAX(CASE WHEN action = 'page_view' THEN 1 ELSE 0 END) AS viewed,
MAX(CASE WHEN action = 'add_to_cart' THEN 1 ELSE 0 END) AS added_cart,
MAX(CASE WHEN action = 'checkout' THEN 1 ELSE 0 END) AS checked_out,
MAX(CASE WHEN action = 'purchase' THEN 1 ELSE 0 END) AS purchased
FROM user_behavior
GROUP BY user_id
),
funnel AS (
SELECT
SUM(viewed) AS page_views,
SUM(added_cart) AS add_to_carts,
SUM(checked_out) AS checkouts,
SUM(purchased) AS purchases
FROM user_actions
)
SELECT
'page_view' AS stage, page_views,
'add_to_cart' AS stage2, add_to_carts,
'checkout' AS stage3, checkouts,
'purchase' AS stage4, purchases,
ROUND(add_to_carts / page_views * 100, 2) AS view_to_cart_rate,
ROUND(checkouts / add_to_carts * 100, 2) AS cart_to_checkout_rate,
ROUND(purchases / checkouts * 100, 2) AS checkout_to_purchase_rate
FROM funnel;
小结
- 窗口函数:让聚合分析更直观,支持排名、累计、移动平均
- CTE:让复杂查询更易读,递归CTE能遍历层级数据
- JSON函数:原生JSON支持,8.0大幅增强
- Generated Column:基于JSON的虚拟列,可以建索引
- 函数索引:在表达式上建索引,查询更高效
- Explain ANALYZE:执行计划显示实际运行时间
- 角色:简化权限管理
- Instant ADD COLUMN:快速添加列,不锁表
MySQL 8.0的新特性让SQL表达能力大大增强,很多以前需要应用程序处理的逻辑,现在可以用SQL优雅地解决。
延伸阅读
- 《高性能MySQL》第6章 schema设计与管理
- MySQL 8.0 Reference Manual: Window Functions
- MySQL 8.0 Reference Manual: JSON Functions