【MySQL深入详解】第13篇:MySQL 8.0新特性——JSON与窗口函数实战

0 阅读6分钟

开篇引入

MySQL 8.0是一个划时代的版本,从8.0开始,MySQL真正成为了一个现代化的关系数据库。窗口函数、CTE、JSON函数、Explain ANALYZE……这些特性让MySQL从"能用"变成了"好用"。

《高性能MySQL》第6章提到了MySQL 8.0的诸多改进,这篇文章帮你把最实用的新特性用起来。

窗口函数(Window Functions)

为什么需要窗口函数

以前计算"每个部门的平均工资"需要子查询或JOIN:

-- 传统方法:子查询
SELECT 
    dept,
    name,
    salary,
    (SELECT AVG(salary) FROM employees e2 WHERE e2.dept = e1.dept) AS dept_avg
FROM employees e1;

-- 窗口函数方法:一目了然
SELECT 
    dept,
    name,
    salary,
    AVG(salary) OVER (PARTITION BY dept) AS dept_avg
FROM employees;

常用窗口函数

聚合类窗口函数

-- 创建测试数据
CREATE TABLE sales (
    id INT,
    region VARCHAR(20),
    salesperson VARCHAR(20),
    amount DECIMAL(10,2)
);

INSERT INTO sales VALUES
(1, 'East', 'Alice', 1000),
(2, 'East', 'Bob', 1500),
(3, 'East', 'Charlie', 1200),
(4, 'West', 'David', 2000),
(5, 'West', 'Eve', 1800);

-- AVG: 移动平均
SELECT 
    salesperson,
    amount,
    AVG(amount) OVER (PARTITION BY region) AS region_avg,
    amount - AVG(amount) OVER (PARTITION BY region) AS vs_avg
FROM sales;

-- SUM: 累计求和
SELECT 
    salesperson,
    amount,
    SUM(amount) OVER (ORDER BY id) AS running_total
FROM sales;

-- COUNT: 累计计数
SELECT 
    salesperson,
    amount,
    COUNT(*) OVER (ORDER BY amount DESC) AS rank
FROM sales;

排名类窗口函数

-- ROW_NUMBER: 行号(无并列)
SELECT 
    salesperson,
    amount,
    ROW_NUMBER() OVER (ORDER BY amount DESC) AS row_num
FROM sales;

-- RANK: 排名(有并列,跳过下一名)
SELECT 
    salesperson,
    amount,
    RANK() OVER (ORDER BY amount DESC) AS rank_num
FROM sales;

-- DENSE_RANK: 密集排名(有并列,不跳过)
SELECT 
    salesperson,
    amount,
    DENSE_RANK() OVER (ORDER BY amount DESC) AS dense_rank_num
FROM sales;

-- NTILE: 分桶
SELECT 
    salesperson,
    amount,
    NTILE(2) OVER (ORDER BY amount DESC) AS bucket
FROM sales;

FIRST_VALUE / LAST_VALUE / NTH_VALUE

-- 找出每个区域销售冠军
SELECT 
    region,
    salesperson,
    amount,
    FIRST_VALUE(salesperson) OVER (PARTITION BY region ORDER BY amount DESC) AS top_salesperson,
    FIRST_VALUE(amount) OVER (PARTITION BY region ORDER BY amount DESC) AS top_amount
FROM sales;

-- NTH_VALUE: 取第N个值
SELECT 
    salesperson,
    amount,
    NTH_VALUE(amount, 2) OVER (ORDER BY amount DESC) AS second_amount
FROM sales;

帧(Frame)概念

窗口函数可以在一个滑动窗口内计算:

-- 默认帧:从起点到当前行
SELECT 
    salesperson,
    amount,
    SUM(amount) OVER (ORDER BY id) AS running_sum
FROM sales;

-- 显式指定帧:前一行到当前行
SELECT 
    salesperson,
    amount,
    SUM(amount) OVER (ORDER BY id 
        ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS moving_sum
FROM sales;

-- 更多帧选项
-- RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: 到当前行
-- RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: 从当前行到最后
-- RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING: 前后各2行

CTE(公用表表达式)

普通CTE

CTE让复杂的查询更易读:

-- 替代子查询,让查询更清晰
WITH regional_sales AS (
    SELECT region, SUM(amount) AS total_amount
    FROM sales
    GROUP BY region
),
top_regions AS (
    SELECT region
    FROM regional_sales
    WHERE total_amount > 3000
)
SELECT 
    s.region,
    s.salesperson,
    s.amount,
    r.total_amount AS region_total
FROM sales s
JOIN regional_sales r ON s.region = r.region
WHERE s.region IN (SELECT region FROM top_regions)
ORDER BY s.region, s.amount DESC;

递归CTE

递归CTE可以生成序列、遍历层级数据:

-- 生成数字序列
WITH RECURSIVE seq AS (
    SELECT 1 AS n
    UNION ALL
    SELECT n + 1 FROM seq WHERE n < 10
)
SELECT * FROM seq;

-- 生成日期序列
WITH RECURSIVE dates AS (
    SELECT '2024-01-01' AS dt
    UNION ALL
    SELECT dt + INTERVAL 1 DAY FROM dates WHERE dt < '2024-01-10'
)
SELECT * FROM dates;

-- 遍历组织架构
CREATE TABLE org_chart (
    id INT PRIMARY KEY,
    name VARCHAR(50),
    manager_id INT NULL
);

WITH RECURSIVE org AS (
    SELECT id, name, manager_id, 1 AS level
    FROM org_chart
    WHERE manager_id IS NULL
    
    UNION ALL
    
    SELECT o.id, o.name, o.manager_id, org.level + 1
    FROM org_chart o
    JOIN org ON o.manager_id = org.id
)
SELECT * FROM org ORDER BY level, name;

JSON函数

JSON数据类型

MySQL 8.0增强了JSON支持:

CREATE TABLE api_logs (
    id INT PRIMARY KEY AUTO_INCREMENT,
    request_id VARCHAR(50),
    payload JSON,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 插入JSON
INSERT INTO api_logs (request_id, payload) VALUES 
('req-001', '{"user_id": 1, "action": "login", "ip": "192.168.1.1"}'),
('req-002', '{"user_id": 2, "action": "purchase", "amount": 99.9}');

-- 查询JSON字段
SELECT 
    request_id,
    payload->>'$.user_id' AS user_id,
    payload->>'$.action' AS action
FROM api_logs;

-- JSON_EXTRACT vs ->>
SELECT 
    payload->'$.user_id' AS user_id_json,      -- JSON类型
    payload->>'$.user_id' AS user_id_text       -- 文本类型
FROM api_logs;

JSON函数

-- JSON_OBJECT: 创建JSON对象
SELECT JSON_OBJECT('name', '张三', 'age', 30, 'city', '北京');

-- JSON_ARRAY: 创建JSON数组
SELECT JSON_ARRAY(1, 2, 3, 4, 5);

-- JSON_MERGE_PATCH: 合并JSON(MySQL 8.0.3+)
SELECT JSON_MERGE_PATCH(
    '{"a": 1, "b": 2}',
    '{"b": 3, "c": 4}'
);

-- JSON_SET: 设置值
SELECT JSON_SET(
    '{"name": "张三", "age": 30}',
    '$.age', 31,
    '$.city', '北京'
);

-- JSON_REMOVE: 删除值
SELECT JSON_REMOVE(
    '{"name": "张三", "age": 30, "temp": "unused"}',
    '$.temp'
);

-- JSON_KEYS: 获取所有键
SELECT JSON_KEYS('{"a": 1, "b": 2, "c": 3}');

-- JSON_LENGTH: 获取长度
SELECT JSON_LENGTH('{"a": 1, "b": {"c": 2}}');  -- 2

JSON与索引

MySQL 8.0支持在JSON列上创建索引:

-- 添加Generated Column
ALTER TABLE api_logs 
ADD COLUMN user_id INT 
GENERATED ALWAYS AS (payload->>'$.user_id');

-- 创建索引
CREATE INDEX idx_user_id ON api_logs(user_id);

-- 验证索引被使用
EXPLAIN SELECT * FROM api_logs WHERE user_id = 1;

函数索引

MySQL 8.0支持在表达式上创建索引:

-- 在email的小写形式上建索引
CREATE TABLE users (
    id INT PRIMARY KEY,
    email VARCHAR(100),
    INDEX idx_email ((LOWER(email)))
);

-- 在计算表达式上建索引
CREATE TABLE orders (
    id INT PRIMARY KEY,
    created_at DATETIME,
    INDEX idx_month ((DATE_FORMAT(created_at, '%Y-%m')))
);

-- 查询可以利用索引
SELECT * FROM orders 
WHERE DATE_FORMAT(created_at, '%Y-%m') = '2024-01';

Explain ANALYZE(MySQL 8.0.18+)

执行计划现在可以显示实际执行时间:

EXPLAIN ANALYZE
SELECT 
    s.region,
    SUM(s.amount) AS total
FROM sales s
GROUP BY s.region;

输出示例:

-> Aggregate: sum(s.amount)  (cost=0.55 rows=2) (actual time=0.01..0.02 rows=2 loops=1)
    -> Table scan on s  (cost=0.27 rows=5) (actual time=0.01..0.01 rows=5 loops=1)

角色(Roles)

MySQL 8.0引入了角色:

-- 创建角色
CREATE ROLE app_reader;
CREATE ROLE app_writer;

-- 授予权限
GRANT SELECT ON mydb.* TO app_reader;
GRANT SELECT, INSERT, UPDATE, DELETE ON mydb.* TO app_writer;

-- 将角色授予用户
CREATE USER 'alice'@'%';
GRANT app_reader TO 'alice'@'%';

-- 用户激活角色
SET DEFAULT ROLE app_reader FOR 'alice'@'%';

-- 查看用户权限
SHOW GRANTS FOR 'alice'@'%';

Instant ADD COLUMN

MySQL 8.0.12+支持瞬时添加列:

-- 添加列(不复制数据,立即完成)
ALTER TABLE large_table ADD COLUMN new_col VARCHAR(100);

窗口函数实战:业务报表

月度销售报表

WITH monthly_sales AS (
    SELECT 
        DATE_FORMAT(created_at, '%Y-%m') AS month,
        region,
        SUM(amount) AS total_amount,
        COUNT(*) AS order_count
    FROM orders
    WHERE created_at >= '2023-01-01'
    GROUP BY DATE_FORMAT(created_at, '%Y-%m'), region
)
SELECT 
    month,
    region,
    total_amount,
    order_count,
    SUM(total_amount) OVER (PARTITION BY region ORDER BY month) AS running_total,
    AVG(total_amount) OVER (PARTITION BY region ORDER BY month 
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg_3m,
    total_amount / SUM(total_amount) OVER (PARTITION BY month) * 100 AS pct_of_month
FROM monthly_sales
ORDER BY month, region;

用户行为漏斗分析

WITH user_actions AS (
    SELECT 
        user_id,
        MAX(CASE WHEN action = 'page_view' THEN 1 ELSE 0 END) AS viewed,
        MAX(CASE WHEN action = 'add_to_cart' THEN 1 ELSE 0 END) AS added_cart,
        MAX(CASE WHEN action = 'checkout' THEN 1 ELSE 0 END) AS checked_out,
        MAX(CASE WHEN action = 'purchase' THEN 1 ELSE 0 END) AS purchased
    FROM user_behavior
    GROUP BY user_id
),
funnel AS (
    SELECT 
        SUM(viewed) AS page_views,
        SUM(added_cart) AS add_to_carts,
        SUM(checked_out) AS checkouts,
        SUM(purchased) AS purchases
    FROM user_actions
)
SELECT 
    'page_view' AS stage, page_views,
    'add_to_cart' AS stage2, add_to_carts,
    'checkout' AS stage3, checkouts,
    'purchase' AS stage4, purchases,
    ROUND(add_to_carts / page_views * 100, 2) AS view_to_cart_rate,
    ROUND(checkouts / add_to_carts * 100, 2) AS cart_to_checkout_rate,
    ROUND(purchases / checkouts * 100, 2) AS checkout_to_purchase_rate
FROM funnel;

小结

  1. 窗口函数:让聚合分析更直观,支持排名、累计、移动平均
  2. CTE:让复杂查询更易读,递归CTE能遍历层级数据
  3. JSON函数:原生JSON支持,8.0大幅增强
  4. Generated Column:基于JSON的虚拟列,可以建索引
  5. 函数索引:在表达式上建索引,查询更高效
  6. Explain ANALYZE:执行计划显示实际运行时间
  7. 角色:简化权限管理
  8. Instant ADD COLUMN:快速添加列,不锁表

MySQL 8.0的新特性让SQL表达能力大大增强,很多以前需要应用程序处理的逻辑,现在可以用SQL优雅地解决。


延伸阅读

  • 《高性能MySQL》第6章 schema设计与管理
  • MySQL 8.0 Reference Manual: Window Functions
  • MySQL 8.0 Reference Manual: JSON Functions