1. 测试准备
在日常开发中,我们经常需要处理用户的多选数据(如权限设置、功能开关等)。面对这种需求,开发者往往会面临多种存储方案的选择。本文将全面对比四种常见的MySQL多选存储方案,通过严谨的性能测试,为您揭示各种方案的优劣。
测试环境与方法论
测试环境配置
- 硬件配置:
- 处理器:第13代英特尔酷睿i5-13500H
- 内存:16GB DDR4
- 存储:高性能SSD
- 软件环境:
- MySQL版本:5.7.26(InnoDB引擎)
- 字符集:utf8
测试方法论
我们设计了四种存储方案,每种方案都存储相同的32个布尔选项,通过百万级数据测试进行全方位对比:
- 32个BOOL字段方案 - 传统列式存储
- JSON数组方案 - 现代半结构化存储
- CSV字符串方案 - 传统字符串存储
- BIT(32)方案 - 二进制位存储
2. 测试表结构设计
方案1:32个BOOL字段(经典列式存储)
CREATE TABLE bool_columns (
id INT AUTO_INCREMENT PRIMARY KEY,
-- 基础选项 (1-10)
option1 BOOLEAN DEFAULT 0 COMMENT '系统管理员标志',
option2 BOOLEAN DEFAULT 0 COMMENT '内容审核员标志',
option3 BOOLEAN DEFAULT 0 COMMENT 'VIP用户标志',
option4 BOOLEAN DEFAULT 0 COMMENT '账户已验证',
option5 BOOLEAN DEFAULT 0 COMMENT '账户已锁定',
option6 BOOLEAN DEFAULT 0 COMMENT '允许接收邮件通知',
option7 BOOLEAN DEFAULT 0 COMMENT '允许接收短信通知',
option8 BOOLEAN DEFAULT 0 COMMENT '启用双重认证',
option9 BOOLEAN DEFAULT 0 COMMENT '显示在线状态',
option10 BOOLEAN DEFAULT 0 COMMENT '允许数据收集',
-- 内容偏好选项 (11-20)
option11 BOOLEAN DEFAULT 0 COMMENT '使用暗黑模式',
option12 BOOLEAN DEFAULT 0 COMMENT '使用紧凑布局',
option13 BOOLEAN DEFAULT 0 COMMENT '自动播放视频',
option14 BOOLEAN DEFAULT 0 COMMENT '显示NSFW内容',
option15 BOOLEAN DEFAULT 0 COMMENT '显示缩略图',
option16 BOOLEAN DEFAULT 0 COMMENT '启用键盘快捷键',
option17 BOOLEAN DEFAULT 0 COMMENT '使用高级编辑器',
option18 BOOLEAN DEFAULT 0 COMMENT '显示时间戳',
option19 BOOLEAN DEFAULT 0 COMMENT '显示用户位置',
option20 BOOLEAN DEFAULT 0 COMMENT '显示相关推荐',
-- 功能开关选项 (21-30)
option21 BOOLEAN DEFAULT 0 COMMENT '启用实验功能',
option22 BOOLEAN DEFAULT 0 COMMENT '加入用户体验计划',
option23 BOOLEAN DEFAULT 0 COMMENT '允许个性化广告',
option24 BOOLEAN DEFAULT 0 COMMENT '接受营销信息',
option25 BOOLEAN DEFAULT 0 COMMENT '接受隐私政策',
option26 BOOLEAN DEFAULT 0 COMMENT '启用自动保存',
option27 BOOLEAN DEFAULT 0 COMMENT '显示教程提示',
option28 BOOLEAN DEFAULT 0 COMMENT '同步跨设备数据',
option29 BOOLEAN DEFAULT 0 COMMENT '启用语音控制',
option30 BOOLEAN DEFAULT 0 COMMENT '使用生物识别登录',
-- 系统标志选项 (31-32)
option31 BOOLEAN DEFAULT 0 COMMENT '系统内部使用标志1',
option32 BOOLEAN DEFAULT 0 COMMENT '系统内部使用标志2',
-- 索引配置(为常用查询条件建立索引)
INDEX idx_option1 (option1) COMMENT '系统管理员查询索引',
INDEX idx_option2 (option2) COMMENT '内容审核员查询索引',
INDEX idx_option3 (option3) COMMENT 'VIP用户查询索引',
INDEX idx_option4 (option4) COMMENT '已验证账户查询索引',
INDEX idx_option6_option7 (option6, option7) COMMENT '通知偏好联合索引',
INDEX idx_option11_option12 (option11, option12) COMMENT '界面偏好联合索引',
INDEX idx_option21_option22 (option21, option22) COMMENT '实验功能联合索引'
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='32个布尔选项标志表';
优点:
- 查询性能最优
- 支持完善的索引策略
- 字段含义明确
缺点:
- 表结构固定,扩展性差
- 占用存储空间较大
方案2:JSON数组(现代方案)
CREATE TABLE json_array (
id INT AUTO_INCREMENT PRIMARY KEY,
options JSON NOT NULL COMMENT '存储32个布尔选项的JSON数组'
) ENGINE=InnoDB COMMENT='使用JSON数组存储32个布尔选项';
-- 为特定JSON路径创建虚拟列并索引
ALTER TABLE json_array
ADD COLUMN option1 BOOLEAN GENERATED ALWAYS AS (JSON_EXTRACT(options, '$[0]')) VIRTUAL,
ADD COLUMN option2 BOOLEAN GENERATED ALWAYS AS (JSON_EXTRACT(options, '$[1]')) VIRTUAL,
ADD COLUMN option3 BOOLEAN GENERATED ALWAYS AS (JSON_EXTRACT(options, '$[2]')) VIRTUAL,
ADD COLUMN option4 BOOLEAN GENERATED ALWAYS AS (JSON_EXTRACT(options, '$[3]')) VIRTUAL,
ADD INDEX idx_option1 (option1),
ADD INDEX idx_option2 (option2),
ADD INDEX idx_option3 (option3),
ADD INDEX idx_option4 (option4);
优点:
- 结构灵活,易于扩展
- 支持部分索引
- 现代MySQL对JSON有良好支持
缺点:
- 查询性能略低于BOOL方案
- 更新操作较复杂
方案3:CSV字符串(传统方案)
CREATE TABLE csv_string (
id INT AUTO_INCREMENT PRIMARY KEY,
options VARCHAR(255),
INDEX idx_options (options)
);
优点:
- 实现简单
- 存储紧凑
缺点:
- 查询性能最差
- 难以维护和扩展
- 缺乏类型安全
方案4:BIT(32)
CREATE TABLE bit_mask (
id INT AUTO_INCREMENT PRIMARY KEY,
options BIT(32),
INDEX idx_options (options)
);
优点:
- 存储空间最小
- 位运算性能高
缺点:
- 可读性差
- 查询复杂度高
- 扩展性有限
完整的测试数据生成存储过程
CREATE DEFINER=`root`@`localhost` PROCEDURE `generate_test_data1`()
BEGIN
DECLARE i INT DEFAULT 0;
DECLARE batch_size INT DEFAULT 1000;
DECLARE commit_counter INT DEFAULT 0;
DECLARE bit_str VARCHAR(64);
-- 禁用自动提交以提高性能
SET autocommit = 0;
WHILE i < 1000000 DO
-- BOOL方案 - 完整32个字段插入
INSERT INTO bool_columns (
option1, option2, option3, option4, option5,
option6, option7, option8, option9, option10,
option11, option12, option13, option14, option15,
option16, option17, option18, option19, option20,
option21, option22, option23, option24, option25,
option26, option27, option28, option29, option30,
option31, option32
) VALUES (
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND())
);
-- JSON方案 - 使用完全展开的CONCAT构建紧凑JSON
SET @json_data = CONCAT(
'[',
ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',',
ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',',
ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',',
ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',',
ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',',
ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',', ROUND(RAND()), ',',
ROUND(RAND()), ',', ROUND(RAND()),
']'
);
-- 最佳实践: 使用参数化查询
PREPARE stmt FROM 'INSERT INTO json_array (options) VALUES (?)';
EXECUTE stmt USING @json_data;
DEALLOCATE PREPARE stmt;
-- CSV方案 - 完整32个值的CSV字符串
INSERT INTO csv_string (options) VALUES (
CONCAT_WS(',',
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND())
)
);
-- BIT方案 - 完整32位二进制字符串
SET bit_str = CONCAT(
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()), ROUND(RAND()),
ROUND(RAND()), ROUND(RAND())
);
INSERT INTO bit_mask (options) VALUES (
CAST(CONV(bit_str, 2, 10) AS UNSIGNED)
);
SET i = i + 1;
SET commit_counter = commit_counter + 1;
-- 分批提交(每1000行)
IF commit_counter >= batch_size THEN
COMMIT;
SET commit_counter = 0;
END IF;
END WHILE;
-- 提交剩余事务
COMMIT;
SET autocommit = 1;
END
存储空间测量
1. 精确测量表大小
SELECT
table_name,
ROUND(data_length/1024/1024, 2) AS 'Data Size (MB)',
ROUND(index_length/1024/1024, 2) AS 'Index Size (MB)',
ROUND((data_length+index_length)/1024/1024, 2) AS 'Total Size (MB)'
FROM information_schema.TABLES
WHERE table_schema = DATABASE();
查询性能测试
1. 测试查询设计
测试1:单条件查询
-- 启用性能分析
SET profiling = 1;
-- BOOL方案
SELECT COUNT(*) FROM bool_columns WHERE option1 = 1;
-- JSON方案
SELECT COUNT(*) FROM json_array WHERE JSON_EXTRACT(options, '$[0]') = 1;
-- CSV方案
SELECT COUNT(*) FROM csv_string WHERE FIND_IN_SET('1', options) > 0;
-- BIT方案
SELECT COUNT(*) FROM bit_mask WHERE options & b'1' = b'1';
-- 查看耗时
SHOW PROFILES;
测试2:多条件组合查询
-- 启用性能分析
SET profiling = 1;
-- BOOL方案
SELECT COUNT(*) FROM bool_columns WHERE option1 = 1 AND option5 = 1;
-- JSON方案
SELECT COUNT(*) FROM json_array
WHERE JSON_EXTRACT(options, '$[0]') = 1
AND JSON_EXTRACT(options, '$[4]') = 1;
-- CSV方案
SELECT COUNT(*) FROM csv_string
WHERE FIND_IN_SET('1', options) > 0
AND SUBSTRING_INDEX(SUBSTRING_INDEX(options, ',', 5), ',', -1) = '1';
-- BIT方案
SELECT COUNT(*) FROM bit_mask
WHERE options & b'10001' = b'10001';
-- 查看耗时
SHOW PROFILES;
更新性能测试
-- 启用性能分析
SET profiling = 1;
-- BOOL方案单字段更新
UPDATE bool_columns SET option1 = 0 WHERE id < 1000;
-- JSON方案单字段更新
UPDATE json_array SET options = JSON_SET(options, '$[0]', 0) WHERE id < 1000;
-- BOOL方案多字段更新
UPDATE bool_columns SET option1 = 1, option5 = 0, option10 = 1 WHERE id < 1000;
-- JSON方案多字段更新
UPDATE json_array
SET options = JSON_SET(
JSON_SET(
JSON_SET(options, '$[0]', 1),
'$[4]', 0
),
'$[9]', 1
)
WHERE id < 1000;
-- 查看耗时
SHOW PROFILES;
结论
经过全面测试,我们可以得出以下结论:
- BOOL字段方案在查询和更新性能上全面领先,适合对性能要求高的核心系统
- JSON方案在保持较好性能的同时提供了灵活性,是现代应用的不错选择
- BIT方案存储空间最优,适合存储海量标记数据
- CSV方案在各测试中表现最差,不推荐在新项目中使用
MySQL官方文档参考
- MySQL BOOLEAN类型 dev.mysql.com/doc/refman/… 官方对BOOL/BOOLEAN类型的说明,实际存储为TINYINT(1)。
- MySQL JSON数据类型 dev.mysql.com/doc/refman/… 包含JSON数据的存储、查询和索引优化方法。
- 生成列(Generated Columns) dev.mysql.com/doc/refman/… JSON方案中虚拟列的实现原理。
- BIT数据类型 dev.mysql.com/doc/refman/… BIT类型的存储格式和位运算函数。
性能优化参考
- MySQL索引最佳实践 dev.mysql.com/doc/refman/… 解释了BOOL方案中多列索引的优势。
扩展阅读
- 数据库设计模式(Martin Fowler) martinfowler.com/eaaCatalog/ 包含Flag和Metadata Mapping等相关模式。
- Stack Overflow讨论:BIT vs BOOL stackoverflow.com/questions/2… 开发者社区对几种方案的实践讨论。