MySQL 高级技巧手册 - 第二章:索引深度优化
目标:深入理解 MySQL 索引原理,掌握高级索引优化技巧,避免常见索引陷阱。
2.1 索引基础回顾
2.1.1 B+树索引结构
MySQL InnoDB 索引结构(B+树):
[10 | 30 | 50]
/ | \
[1|5|9] [20|25] [40|45] [60|70|80]
/ | \ / \ / \ / | \
叶节点(包含完整数据或主键)
特点:
├── 非叶子节点只存储键值和指针
├── 叶子节点包含完整数据(聚簇索引)或主键(二级索引)
├── 叶子节点通过指针相连,支持范围查询
└── 所有查询最终都要落到叶子节点
索引类型对比:
-- 查看表的索引
SHOW INDEX FROM user;
-- 详细索引信息
SELECT
INDEX_NAME,
SEQ_IN_INDEX,
COLUMN_NAME,
COLLATION,
CARDINALITY,
SUB_PART,
PACKED,
NULLABLE,
INDEX_TYPE,
COMMENT
FROM INFORMATION_SCHEMA.STATISTICS
WHERE TABLE_SCHEMA = 'mydb' AND TABLE_NAME = 'user';
| 索引类型 | 特点 | 适用场景 |
|---|---|---|
| PRIMARY KEY | 聚簇索引,叶子节点存数据 | 主键查询 |
| UNIQUE | 唯一约束,允许 NULL | 唯一性字段 |
| INDEX | 普通索引 | 频繁查询字段 |
| FULLTEXT | 全文索引(MyISAM/InnoDB 5.6+) | 文本搜索 |
| SPATIAL | 空间索引 | 地理数据 |
2.1.2 聚簇索引 vs 非聚簇索引
-- 创建测试表
CREATE TABLE user_order (
id BIGINT AUTO_INCREMENT PRIMARY KEY,
user_id BIGINT NOT NULL,
order_no VARCHAR(32) NOT NULL,
amount DECIMAL(10,2),
status TINYINT,
create_time DATETIME,
INDEX idx_user_id (user_id),
INDEX idx_order_no (order_no)
) ENGINE=InnoDB;
查询路径对比:
主键查询(聚簇索引):
SELECT * FROM user_order WHERE id = 100;
├── 直接在聚簇索引树中找到叶子节点
└── 返回完整数据(无需回表)
二级索引查询:
SELECT * FROM user_order WHERE user_id = 1000;
├── 在 idx_user_id 索引树找到叶子节点
├── 获取主键 id 值
├── 回到聚簇索引树查询完整数据(回表)
└── 返回数据
覆盖索引查询(避免回表):
SELECT order_no FROM user_order WHERE user_id = 1000;
├── 在 idx_user_id 索引树找到叶子节点
├── 检查是否包含 order_no(不包含,仍需回表)
└── 需要复合索引 (user_id, order_no) 才能覆盖
2.2 复合索引设计
2.2.1 最左前缀原则
-- 创建复合索引
CREATE INDEX idx_user_time ON user_order(user_id, create_time, status);
-- ✅ 能使用索引的情况
SELECT * FROM user_order WHERE user_id = 100; -- 使用第1列
SELECT * FROM user_order WHERE user_id = 100 AND create_time > '2024-01-01'; -- 使用第1、2列
SELECT * FROM user_order WHERE user_id = 100 AND create_time > '2024-01-01' AND status = 1; -- 使用全部
SELECT * FROM user_order WHERE user_id = 100 AND status = 1; -- 使用第1列(第3列跳过了第2列)
-- ❌ 不能使用索引的情况
SELECT * FROM user_order WHERE create_time > '2024-01-01'; -- 缺少第1列
SELECT * FROM user_order WHERE status = 1; -- 缺少前导列
SELECT * FROM user_order WHERE create_time > '2024-01-01' AND status = 1; -- 缺少第1列
索引列顺序设计原则:
-- 原则1:将选择性高的列放在前面
-- 计算选择性
SELECT
COUNT(DISTINCT user_id) / COUNT(*) as user_selectivity,
COUNT(DISTINCT status) / COUNT(*) as status_selectivity
FROM user_order;
-- 结果:user_id = 0.95, status = 0.01
-- 结论:user_id 选择性高,应放前面
-- 原则2:考虑查询频率
-- 如果绝大多数查询都是按 user_id 过滤,放第一列
-- 原则3:避免范围查询列在等值查询列前面
-- 不好的设计:(create_time, user_id)
CREATE INDEX idx_bad ON user_order(create_time, user_id);
-- WHERE create_time > '2024-01-01' AND user_id = 100
-- create_time 是范围查询,后面的 user_id 无法使用索引
-- 好的设计:(user_id, create_time)
CREATE INDEX idx_good ON user_order(user_id, create_time);
-- WHERE user_id = 100 AND create_time > '2024-01-01'
-- 两列都能使用索引
2.2.2 索引合并优化
-- Index Merge 示例
SELECT * FROM user_order
WHERE user_id = 100 OR order_no = 'ORD2024001';
-- 如果有两个独立索引:
-- idx_user_id (user_id)
-- idx_order_no (order_no)
-- MySQL 可能使用 Index Merge(并集)
-- 执行计划显示:type: index_merge, Extra: Using union(...)
开启和优化 Index Merge:
-- 查看 Index Merge 设置
SHOW VARIABLES LIKE 'optimizer_switch';
-- 临时开启(会话级)
SET SESSION optimizer_switch =
'index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on';
-- 强制使用 Index Merge(不推荐常规使用)
SELECT /*+ INDEX_MERGE(user_order idx_user_id, idx_order_no) */ *
FROM user_order
WHERE user_id = 100 OR order_no = 'ORD2024001';
2.3 覆盖索引与回表优化
2.3.1 覆盖索引设计
-- 场景:高频查询用户订单列表(只需要部分字段)
SELECT order_no, amount, status, create_time
FROM user_order
WHERE user_id = 100
ORDER BY create_time DESC
LIMIT 10;
-- 不好的索引(需要回表)
CREATE INDEX idx_user_time ON user_order(user_id, create_time);
-- 好的索引(覆盖索引,无需回表)
CREATE INDEX idx_user_cover ON user_order(user_id, create_time, order_no, amount, status);
-- 验证是否使用覆盖索引
EXPLAIN SELECT order_no, amount, status, create_time
FROM user_order
WHERE user_id = 100
ORDER BY create_time DESC
LIMIT 10;
-- Extra 列应显示:Using index(表示覆盖索引)
覆盖索引实战案例:
-- 案例:用户订单统计(查看某用户的订单状态分布)
SELECT status, COUNT(*) as cnt, SUM(amount) as total
FROM user_order
WHERE user_id = 100
GROUP BY status;
-- 覆盖索引设计
CREATE INDEX idx_user_status_cover ON user_order(user_id, status, amount);
-- 执行计划:Using index(覆盖索引)
EXPLAIN SELECT status, COUNT(*) as cnt, SUM(amount) as total
FROM user_order
WHERE user_id = 100
GROUP BY status;
2.3.2 回表代价分析
-- 查看回表次数(通过 Handler_read_rnd_next)
SHOW GLOBAL STATUS LIKE 'Handler_read%';
-- Handler_read_rnd_next:高表示大量回表
-- Handler_read_key:通过索引读取
-- Handler_read_next:顺序读取索引
回表 vs 覆盖索引性能对比:
-- 准备测试数据
INSERT INTO user_order (user_id, order_no, amount, status, create_time)
SELECT
FLOOR(RAND() * 10000),
CONCAT('ORD', LPAD(n, 10, '0')),
RAND() * 1000,
FLOOR(RAND() * 5),
DATE_SUB(NOW(), INTERVAL FLOOR(RAND() * 365) DAY)
FROM (
SELECT a.N + b.N * 10 + c.N * 100 + d.N * 1000 + e.N * 10000 + f.N * 100000 as n
FROM
(SELECT 0 AS N UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) a,
(SELECT 0 AS N UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) b,
(SELECT 0 AS N UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) c,
(SELECT 0 AS N UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) d,
(SELECT 0 AS N UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) e,
(SELECT 0 AS N UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9) f
) numbers
LIMIT 1000000;
-- 对比测试1:需要回表
CREATE INDEX idx_user_only ON user_order(user_id);
EXPLAIN ANALYZE
SELECT order_no, amount, status
FROM user_order
WHERE user_id = 5000;
-- 执行时间: ~50ms
-- 回表次数: ~100 次(假设 user_id=5000 有 100 条记录)
-- 对比测试2:覆盖索引
CREATE INDEX idx_user_cover ON user_order(user_id, order_no, amount, status);
EXPLAIN ANALYZE
SELECT order_no, amount, status
FROM user_order
WHERE user_id = 5000;
-- 执行时间: ~5ms
-- 回表次数: 0
2.4 索引失效场景与优化
2.4.1 常见索引失效场景
-- 场景1:隐式类型转换
-- 表结构:user_id BIGINT
SELECT * FROM user_order WHERE user_id = '100'; -- 字符串,会隐式转换
-- 优化:确保类型一致
SELECT * FROM user_order WHERE user_id = 100;
-- 场景2:函数操作
SELECT * FROM user_order WHERE DATE(create_time) = '2024-01-01'; -- 函数导致失效
-- 优化:改写为范围查询
SELECT * FROM user_order
WHERE create_time >= '2024-01-01 00:00:00'
AND create_time < '2024-01-02 00:00:00';
-- 场景3:左模糊匹配
SELECT * FROM user_order WHERE order_no LIKE '%001'; -- 左模糊,无法使用索引
-- 优化:改为右模糊或使用全文索引
SELECT * FROM user_order WHERE order_no LIKE 'ORD%'; -- 可以使用索引
-- 场景4:OR 条件(部分情况)
SELECT * FROM user_order WHERE user_id = 100 OR amount > 500;
-- 如果 amount 没有索引,可能导致 user_id 索引也不使用
-- 优化:使用 UNION
SELECT * FROM user_order WHERE user_id = 100
UNION ALL
SELECT * FROM user_order WHERE amount > 500 AND user_id != 100;
-- 场景5:NOT、!=、<>
SELECT * FROM user_order WHERE user_id != 100; -- 可能不走索引(取决于数据分布)
-- 优化:改写为范围查询或 IN
SELECT * FROM user_order WHERE user_id IN (1, 2, 3, ...); -- 排除 100
-- 场景6:IS NOT NULL(低选择性时)
SELECT * FROM user_order WHERE user_id IS NOT NULL;
-- 如果 user_id 几乎没有 NULL,可能全表扫描更快
2.4.2 索引下推(ICP)
-- Index Condition Pushdown (MySQL 5.6+)
-- 在存储引擎层过滤数据,减少回表
SELECT * FROM user_order
WHERE user_id = 100 AND amount > 500;
-- 索引:idx_user_amount (user_id, amount)
-- 不使用 ICP:
-- 1. 通过 user_id = 100 找到所有记录
-- 2. 回表获取完整数据
-- 3. 在 Server 层过滤 amount > 500
-- 使用 ICP:
-- 1. 通过 user_id = 100 找到记录
-- 2. 在存储引擎层检查 amount > 500
-- 3. 只回表符合条件的记录
-- 查看是否使用 ICP
EXPLAIN SELECT * FROM user_order WHERE user_id = 100 AND amount > 500;
-- Extra 列显示:Using index condition
-- 控制 ICP
SET optimizer_switch = 'index_condition_pushdown=on';
2.4.3 索引选择异常处理
-- 场景:MySQL 选择了错误的索引
EXPLAIN SELECT * FROM user_order WHERE user_id = 100 AND status = 1;
-- 可能选择了 idx_status 而不是 idx_user_status
-- 方法1:使用 FORCE INDEX(强制)
SELECT * FROM user_order
FORCE INDEX (idx_user_status)
WHERE user_id = 100 AND status = 1;
-- 方法2:使用 USE INDEX(建议)
SELECT * FROM user_order
USE INDEX (idx_user_status)
WHERE user_id = 100 AND status = 1;
-- 方法3:使用 IGNORE INDEX(排除)
SELECT * FROM user_order
IGNORE INDEX (idx_status)
WHERE user_id = 100 AND status = 1;
-- 方法4:分析表更新统计信息
ANALYZE TABLE user_order;
-- 方法5:增加索引提示(MySQL 8.0)
SELECT /*+ INDEX(user_order idx_user_status) */ *
FROM user_order
WHERE user_id = 100 AND status = 1;
2.5 索引维护与监控
2.5.1 索引健康度检查
-- 查看索引使用情况(sys schema)
SELECT
table_schema,
table_name,
index_name,
rows_selected,
rows_inserted,
rows_deleted,
rows_updated
FROM sys.schema_index_statistics
WHERE table_schema = 'mydb'
ORDER BY rows_selected DESC;
-- 查看未使用的索引
SELECT
object_schema,
object_name,
index_name
FROM sys.schema_unused_indexes
WHERE object_schema = 'mydb';
-- 查看重复索引
SELECT
table_schema,
table_name,
redundant_index_name,
redundant_index_columns,
dominant_index_name,
dominant_index_columns
FROM sys.schema_redundant_indexes
WHERE table_schema = 'mydb';
2.5.2 索引碎片整理
-- 查看索引碎片率
SELECT
table_name,
index_name,
ROUND(data_free / 1024 / 1024, 2) as data_free_mb,
ROUND(data_length / 1024 / 1024, 2) as data_length_mb,
ROUND(index_length / 1024 / 1024, 2) as index_length_mb,
ROUND(data_free / (data_length + index_length) * 100, 2) as frag_percent
FROM information_schema.tables
WHERE table_schema = 'mydb'
AND data_free > 0
ORDER BY data_free DESC;
-- 方法1:OPTIMIZE TABLE(会锁表)
OPTIMIZE TABLE user_order;
-- 方法2:ALTER TABLE(Online DDL,MySQL 5.6+)
ALTER TABLE user_order ENGINE=InnoDB;
-- 方法3:重建特定索引
ALTER TABLE user_order DROP INDEX idx_user_id, ADD INDEX idx_user_id (user_id);
Python 索引维护脚本:
import mysql.connector
from datetime import datetime
class IndexMaintenance:
"""索引维护工具"""
def __init__(self, host, user, password, database):
self.conn = mysql.connector.connect(
host=host, user=user, password=password, database=database
)
self.cursor = self.conn.cursor(dictionary=True)
def check_fragmentation(self, threshold=30):
"""
检查索引碎片
Args:
threshold: 碎片率阈值(百分比)
"""
query = """
SELECT
table_name,
ROUND(data_free / 1024 / 1024, 2) as data_free_mb,
ROUND((data_length + index_length) / 1024 / 1024, 2) as total_mb,
ROUND(data_free / (data_length + index_length + 1) * 100, 2) as frag_percent
FROM information_schema.tables
WHERE table_schema = DATABASE()
AND data_free / (data_length + index_length + 1) * 100 > %s
ORDER BY frag_percent DESC
"""
self.cursor.execute(query, (threshold,))
results = self.cursor.fetchall()
print(f"发现 {len(results)} 个表的碎片率超过 {threshold}%")
for row in results:
print(f" {row['table_name']}: {row['frag_percent']}% "
f"(释放空间: {row['data_free_mb']} MB)")
return results
def find_unused_indexes(self, min_days=7):
"""
查找未使用的索引
需要开启 performance_schema
"""
query = """
SELECT
object_schema,
object_name,
index_name
FROM performance_schema.table_io_waits_summary_by_index_usage
WHERE index_name IS NOT NULL
AND count_star = 0
AND object_schema = DATABASE()
ORDER BY object_schema, object_name
"""
try:
self.cursor.execute(query)
results = self.cursor.fetchall()
print(f"发现 {len(results)} 个未使用的索引")
for row in results:
print(f" {row['object_name']}.{row['index_name']}")
return results
except Exception as e:
print(f"查询失败(可能需要开启 performance_schema): {e}")
return []
def find_duplicate_indexes(self):
"""查找重复索引"""
query = """
SELECT
t.table_name,
i.index_name,
i.column_name,
i.seq_in_index
FROM information_schema.statistics i
JOIN information_schema.tables t ON i.table_schema = t.table_schema
AND i.table_name = t.table_name
WHERE i.table_schema = DATABASE()
AND t.engine = 'InnoDB'
ORDER BY t.table_name, i.index_name, i.seq_in_index
"""
self.cursor.execute(query)
results = self.cursor.fetchall()
# 分析重复
indexes = {}
for row in results:
key = f"{row['table_name']}.{row['index_name']}"
if key not in indexes:
indexes[key] = []
indexes[key].append(row['column_name'])
# 找出前缀重复的索引
duplicates = []
tables = {}
for key, cols in indexes.items():
table_name = key.split('.')[0]
if table_name not in tables:
tables[table_name] = {}
tables[table_name][key] = cols
for table, idx_dict in tables.items():
idx_list = list(idx_dict.items())
for i in range(len(idx_list)):
for j in range(i + 1, len(idx_list)):
name1, cols1 = idx_list[i]
name2, cols2 = idx_list[j]
# 检查是否一个索引是另一个的前缀
if cols1 == cols2[:len(cols1)] or cols2 == cols1[:len(cols2)]:
duplicates.append({
'table': table,
'index1': name1.split('.')[1],
'cols1': cols1,
'index2': name2.split('.')[1],
'cols2': cols2
})
return duplicates
def optimize_table(self, table_name, dry_run=True):
"""
优化表
Args:
table_name: 表名
dry_run: 是否仅预览
"""
if dry_run:
print(f"[预览] 将执行: OPTIMIZE TABLE {table_name}")
return
try:
print(f"正在优化表 {table_name}...")
self.cursor.execute(f"OPTIMIZE TABLE {table_name}")
result = self.cursor.fetchall()
for row in result:
print(f" 结果: {row}")
self.conn.commit()
except Exception as e:
print(f"优化失败: {e}")
def close(self):
self.cursor.close()
self.conn.close()
# 使用示例
if __name__ == '__main__':
maint = IndexMaintenance('localhost', 'root', 'password', 'mydb')
# 检查碎片
print("=== 索引碎片检查 ===")
maint.check_fragmentation(threshold=30)
# 查找未使用的索引
print("\n=== 未使用索引 ===")
maint.find_unused_indexes()
# 查找重复索引
print("\n=== 重复索引 ===")
duplicates = maint.find_duplicate_indexes()
for dup in duplicates:
print(f" {dup['table']}: {dup['index1']} 和 {dup['index2']}")
maint.close()
2.6 本章小结
✅ 已完成:
- B+树索引结构回顾(聚簇 vs 非聚簇)
- 复合索引设计(最左前缀、列顺序选择)
- Index Merge 优化
- 覆盖索引设计(避免回表)
- 回表代价分析与优化
- 索引失效场景(6 种常见情况)
- 索引下推(ICP)原理与应用
- 索引选择异常处理(FORCE/USE/IGNORE INDEX)
- 索引健康度检查(sys schema)
- 索引碎片整理(OPTIMIZE/ALTER)
📋 索引优化要点:
索引设计原则:
├── 最左前缀:复合索引列顺序很重要
├── 选择性:高选择性列放前面
├── 覆盖索引:减少回表,提高性能
├── 避免冗余:定期清理未使用索引
└── 控制数量:索引过多影响写入
常见陷阱:
├── 隐式类型转换
├── 函数操作索引列
├── 左模糊匹配 LIKE '%xxx'
├── OR 条件使用不当
└── 范围查询在前,等值查询在后
优化技巧:
├── 使用 EXPLAIN 验证执行计划
├── 定期 ANALYZE TABLE 更新统计信息
├── 使用覆盖索引避免回表
├── 开启 ICP 减少回表次数
└── 监控 Handler_read% 状态
下一步:第三章 - 分区表实战