从 0 到 1 设计工单系统冷热分离:50 万数据下的分库分表实践

0 阅读1分钟

一、业务场景与需求

1. 背景

某企业客户服务系统中,service_ticket表存储 50 万 + 售后工单数据。其中高频操作数据(未结案工单 + 近 1 个月已结案工单)占比仅 30%,但承担 95% 的查询 / 更新请求,剩余 70% 历史数据导致:

  • 热库响应延迟:核心业务接口平均响应时间从 200ms 增至 800ms
  • 数据库 IO 瓶颈:高峰期磁盘 IO 使用率达 90%,频繁触发慢查询
  • 运维成本激增:全量备份耗时从 2 小时延长至 6 小时

2. 冷热数据定义

数据类型

定义标准

存储策略

访问频率

热数据

未结案工单(status=0)或
结案时间 < 1 个月

高性能 SSD 热库

秒级响应要求

冷数据

已结案且结案时间≥1 个月

大容量 HDD 冷库

日查询量 < 5%

二、数据库设计

1. 热库表(hot_db.service_ticket)

CREATE TABLE `service_ticket` (
  `id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '工单ID(主键)',
  `customer_name` varchar(100) NOT NULL COMMENT '客户姓名',
  `status` tinyint(4) NOT NULL DEFAULT '0' COMMENT '工单状态:0=未结案,1=已结案',
  `close_time` datetime DEFAULT NULL COMMENT '结案时间(NULL表示未结案)',
  `update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '最后更新时间(自动更新)',
  `cold_flag` tinyint(4) NOT NULL DEFAULT '0' COMMENT '冷热迁移标记:0=未处理,1=待迁移,2=已迁移',
  `lock_thread` varchar(50) DEFAULT NULL COMMENT '迁移锁线程ID(防止多线程冲突)',
  `lock_time` datetime DEFAULT NULL COMMENT '加锁时间(超过10分钟自动释放)',
  
  PRIMARY KEY (`id`),
  INDEX `idx_status_close_time` (`status`,`close_time`) COMMENT '冷热数据筛选索引',
  INDEX `idx_cold_flag` (`cold_flag`) COMMENT '迁移任务扫描索引',
  INDEX `idx_lock_time` (`lock_time`) COMMENT '锁超时检测索引'
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 
COMMENT='客户服务工单热数据表(存储活跃/近期数据)';

2. 冷库表(cold_db.archived_ticket)

CREATE TABLE `archived_ticket` (
  `id` bigint(20) NOT NULL COMMENT '工单ID(主键)',
  `customer_name` varchar(100) NOT NULL COMMENT '客户姓名',
  `status` tinyint(4) NOT NULL COMMENT '工单状态:1=已结案',
  `close_time` datetime NOT NULL COMMENT '结案时间',
  `update_time` datetime NOT NULL COMMENT '最后更新时间',
  
  PRIMARY KEY (`id`),
  INDEX `idx_close_time` (`close_time`) COMMENT '历史数据查询索引'
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 
COMMENT='客户服务工单冷数据表(存储历史归档数据)';

三、Java 代码实现

1. 实体类(ServiceTicket.java)

@Data
@TableName("service_ticket")
public class ServiceTicket {
    private Long id;                // 工单ID
    private String customerName;    // 客户姓名
    private Integer status;         // 工单状态:0=未结案,1=已结案
    private LocalDateTime closeTime;// 结案时间
    private LocalDateTime updateTime;// 最后更新时间
    private Integer coldFlag;       // 冷热迁移标记:0=未处理,1=待迁移,2=已迁移
    private String lockThread;      // 迁移锁线程ID
    private LocalDateTime lockTime; // 加锁时间
}

2. Mapper 接口(ServiceTicketMapper.java)

public interface ServiceTicketMapper extends BaseMapper<ServiceTicket> {

    /**
     * 批量锁定待迁移的冷数据并返回数据列表
     * @param threadId 线程ID(用于标识锁持有者)
     * @param batchSize 批量处理数量
     * @return 锁定成功的待迁移数据列表
     */
    @Update("UPDATE service_ticket " +
            "SET lock_thread = #{threadId}, lock_time = NOW() " +
            "WHERE id IN ( " +
            "  SELECT id FROM ( " +
            "    SELECT id FROM service_ticket " +
            "    WHERE status = 1 AND close_time <= NOW() - INTERVAL 1 MONTH " +
            "      AND cold_flag = 1 " +
            "      AND (lock_thread IS NULL OR lock_time <= NOW() - INTERVAL 10 MINUTE) " + 
            "    LIMIT #{batchSize} " + 
            "  ) AS t " +
            ")")
    int lockAndSelectColdPendingData(@Param("threadId") String threadId, @Param("batchSize") int batchSize);

    /**
     * 批量删除已迁移的热库数据
     * @param idList 待删除的工单ID列表
     * @return 删除成功的记录数
     */
    @Delete("DELETE FROM service_ticket WHERE id IN (<script>foreach item='id' collection='idList' separator=','>#{id}</script>)")
    int deleteByIds(@Param("idList") List<Long> idList);
}

3. 服务类(ColdDataMigrationService.java)

@Service
@Slf4j
public class ColdDataMigrationService {
    private static final int BATCH_SIZE = 100;        // 单批次处理数量(平衡吞吐量与数据库压力)
    private static final int THREAD_POOL_SIZE = 3;    // 并发迁移线程数(根据数据库性能调整)
    private static final long LOCK_TIMEOUT_MS = 10 * 60 * 1000; // 锁超时时间(10分钟)

    @Autowired
    private ServiceTicketMapper ticketMapper;
    @Autowired
    private ArchivedTicketMapper archivedMapper;
    @Autowired
    private ThreadPoolTaskExecutor migrationExecutor; // 自定义线程池(核心线程3,最大5,队列200)

    /**
     * 触发冷数据迁移主流程
     */
    public void migrateColdData() {
        log.info("开始执行冷数据迁移任务");
        
        // 1. 预处理:标记历史数据为待迁移状态
        markOldTicketsAsPendingMigration();
        
        // 2. 多线程并发执行迁移
        CountDownLatch latch = new CountDownLatch(THREAD_POOL_SIZE);
        for (int i = 0; i < THREAD_POOL_SIZE; i++) {
            migrationExecutor.execute(() -> {
                try {
                    processBatchMigration();
                } finally {
                    latch.countDown();
                }
            });
        }
        
        // 3. 等待所有线程完成
        try {
            latch.await();
            log.info("冷数据迁移任务执行完毕");
        } catch (InterruptedException e) {
            log.error("迁移任务被中断", e);
            Thread.currentThread().interrupt();
        }
    }

    /**
     * 标记历史数据为待迁移状态
     */
    private void markOldTicketsAsPendingMigration() {
        log.info("开始标记历史数据为待迁移状态");
        int count = ticketMapper.update(Wrappers.<ServiceTicket>lambdaUpdate()
            .set(ServiceTicket::getColdFlag, 1) // 设置为待迁移状态
            .eq(ServiceTicket::getStatus, 1) // 已结案工单
            .le(ServiceTicket::getCloseTime, LocalDateTime.now().minusMonths(1)) // 结案时间超过1个月
            .eq(ServiceTicket::getColdFlag, 0)); // 仅处理未标记过的数据
        log.info("已标记 {} 条数据为待迁移状态", count);
    }

    /**
     * 单线程批量迁移逻辑
     */
    private void processBatchMigration() {
        String threadId = Thread.currentThread().getName();
        log.info("线程 {} 开始执行批量迁移", threadId);
        
        while (true) {
            try {
                // 1. 获取并锁定待迁移数据
                List<ServiceTicket> tickets = ticketMapper.lockAndSelectColdPendingData(threadId, BATCH_SIZE);
                if (tickets.isEmpty()) {
                    log.info("线程 {} 没有发现待迁移数据,退出", threadId);
                    break;
                }
                
                // 2. 迁移至冷库(批量插入)
                List<ArchivedTicket> archivedList = tickets.stream()
                    .map(t -> new ArchivedTicket(t.getId(), t.getCustomerName(), t.getStatus(), 
                        t.getCloseTime(), t.getUpdateTime()))
                    .collect(Collectors.toList());
                archivedMapper.insertBatch(archivedList);
                
                // 3. 删除热库数据(带事务保障)
                List<Long> idList = tickets.stream().map(ServiceTicket::getId).toList();
                ticketMapper.deleteByIds(idList);
                
                // 4. 更新迁移状态(防止重复处理)
                ticketMapper.update(Wrappers.<ServiceTicket>lambdaUpdate()
                    .set(ServiceTicket::getColdFlag, 2) // 标记为已迁移
                    .in(ServiceTicket::getId, idList));
                
                log.info("线程 {} 成功迁移 {} 条数据", threadId, tickets.size());
                
                // 5. 短暂休眠,避免频繁查询
                Thread.sleep(100); 
            } catch (Exception e) {
                log.error("线程 {} 迁移过程发生异常: {}", threadId, e.getMessage(), e);
                try {
                    Thread.sleep(5000); // 异常时延长休眠时间
                } catch (InterruptedException ie) {
                    Thread.currentThread().interrupt();
                }
            }
        }
    }
}

四、YML 配置文件

# 应用基本配置
spring:
  application:
    name: customer-service-ticket
    
  # 多数据源配置(热库+冷库)
  datasource:
    # 热库配置(高性能SSD存储,主从架构)
    hot:
      url: jdbc:mysql://hot-db-cluster:3306/hot_db?useSSL=false&serverTimezone=Asia/Shanghai&autoReconnect=true&maxReconnects=3
      driver-class-name: com.mysql.cj.jdbc.Driver
      username: ${DB_HOT_USERNAME:root}
      password: ${DB_HOT_PASSWORD:123456}
      type: com.zaxxer.hikari.HikariDataSource
      hikari:
        maximum-pool-size: 15
        minimum-idle: 5
        idle-timeout: 30000
        connection-timeout: 30000
        max-lifetime: 1800000
        
    # 冷库配置(大容量HDD存储,读写分离)
    cold:
      url: jdbc:mysql://cold-db-cluster:3306/cold_db?useSSL=false&serverTimezone=Asia/Shanghai&autoReconnect=true&maxReconnects=3
      driver-class-name: com.mysql.cj.jdbc.Driver
      username: ${DB_COLD_USERNAME:root}
      password: ${DB_COLD_PASSWORD:123456}
      type: com.zaxxer.hikari.HikariDataSource
      hikari:
        maximum-pool-size: 5
        minimum-idle: 2
        idle-timeout: 60000
        connection-timeout: 30000
        max-lifetime: 1800000

  # MyBatis-Plus配置
  mybatis-plus:
    mapper-locations: classpath:mapper/**/*.xml  # Mapper文件路径
    global-config:
      db-config:
        id-type: auto  # 主键自增策略
        logic-delete-field: deleted  # 逻辑删除字段
        logic-not-delete-value: 0  # 未删除值
        logic-delete-value: 1  # 已删除值
    configuration:
      log-impl: org.apache.ibatis.logging.slf4j.Slf4jImpl  # 使用SLF4J记录SQL日志
      map-underscore-to-camel-case: true  # 下划线转驼峰
      cache-enabled: false  # 禁用二级缓存

# Quartz定时任务配置
quartz:
  job-store-type: jdbc  # 使用数据库存储任务
  jdbc:
    initialize-schema: never  # 不自动创建表结构
  scheduler-name: ColdDataMigrationScheduler  # 调度器名称
  properties:
    org:
      quartz:
        scheduler:
          instanceName: ColdDataScheduler
          instanceId: AUTO
        jobStore:
          class: org.quartz.impl.jdbcjobstore.JobStoreTX  # 事务型存储
          driverDelegateClass: org.quartz.impl.jdbcjobstore.StdJDBCDelegate
          tablePrefix: QRTZ_
          useProperties: false
          dataSource: quartzDataSource
          clusterCheckinInterval: 20000  # 集群检查间隔(毫秒)
        threadPool:
          class: org.quartz.simpl.SimpleThreadPool
          threadCount: 5  # 线程池大小
          threadPriority: 5
          threadsInheritContextClassLoaderOfInitializingThread: true
    dataSource:
      quartzDataSource:
        URL: jdbc:mysql://localhost:3306/quartz_db?useSSL=false&serverTimezone=Asia/Shanghai
        driver: com.mysql.cj.jdbc.Driver
        user: ${QUARTZ_USERNAME:root}
        password: ${QUARTZ_PASSWORD:123456}
        maxConnections: 5

# 自定义线程池配置
migration:
  thread-pool:
    core-pool-size: 3
    max-pool-size: 5
    queue-capacity: 200
    keep-alive-seconds: 300
    thread-name-prefix: migration-thread-

五、定时任务配置

@Configuration
public class QuartzConfig {

    /**
     * 定义冷数据迁移Job
     */
    @Bean
    public JobDetail migrateJobDetail() {
        return JobBuilder.newJob(ColdDataMigrationJob.class)
            .withIdentity("coldDataMigrationJob", "dataMigrationGroup")
            .storeDurably() // 即使没有触发器关联时,也保留job
            .build();
    }

    /**
     * 定义冷数据迁移触发器(每天凌晨2点执行)
     */
    @Bean
    public Trigger migrateTrigger() {
        // 支持动态调整Cron表达式(可从配置中心获取)
        String cronExpression = "0 0 2 * * ?";
        
        return TriggerBuilder.newTrigger()
            .forJob(migrateJobDetail())
            .withIdentity("coldDataMigrationTrigger", "dataMigrationGroup")
            .withSchedule(CronScheduleBuilder.cronSchedule(cronExpression)
                .withMisfireHandlingInstructionDoNothing()) // 错过触发时间不立即执行
            .build();
    }

    /**
     * 自定义线程池配置
     */
    @Bean
    public ThreadPoolTaskExecutor migrationExecutor() {
        ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
        executor.setCorePoolSize(3);
        executor.setMaxPoolSize(5);
        executor.setQueueCapacity(200);
        executor.setKeepAliveSeconds(300);
        executor.setThreadNamePrefix("migration-thread-");
        
        // 拒绝策略:当队列满且线程池达到最大线程数时,由调用线程处理任务
        executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
        
        // 优雅关闭线程池
        executor.setWaitForTasksToCompleteOnShutdown(true);
        executor.setAwaitTerminationSeconds(60);
        
        return executor;
    }
}

/**
 * 冷数据迁移任务执行类
 */
public class ColdDataMigrationJob implements Job {
    @Autowired
    private ColdDataMigrationService migrationService;

    @Override
    public void execute(JobExecutionContext context) throws JobExecutionException {
        log.info("开始执行冷数据迁移定时任务,触发时间:{}", new Date());
        
        try {
            // 执行冷数据迁移
            migrationService.migrateColdData();
            
            log.info("冷数据迁移定时任务执行成功");
        } catch (Exception e) {
            log.error("冷数据迁移定时任务执行失败", e);
            throw new JobExecutionException("冷数据迁移失败", e, false);
        }
    }
}

六、关键流程示例

1. 工单结案操作

// 工单结案业务逻辑
@Service
public class TicketService {
    @Autowired
    private ServiceTicketMapper ticketMapper;
    
    /**
     * 结案工单并标记冷数据
     */
    @Transactional(rollbackFor = Exception.class)
    public void closeTicket(Long ticketId) {
        // 1. 查询工单
        ServiceTicket ticket = ticketMapper.selectById(ticketId);
        if (ticket == null) {
            throw new BusinessException("工单不存在");
        }
        
        // 2. 检查状态
        if (ticket.getStatus() == TicketStatus.CLOSED.getValue()) {
            return; // 已结案,直接返回
        }
        
        // 3. 更新工单状态和结案时间
        ticket.setStatus(TicketStatus.CLOSED.getValue());
        ticket.setCloseTime(LocalDateTime.now());
        
        // 4. 持久化到数据库
        ticketMapper.updateById(ticket);
        
        // 5. 记录操作日志(异步处理)
        logService.recordOperationLog(ticketId, "工单已结案");
    }
}

2. 冷热数据查询逻辑

// 工单查询服务
@Service
public class TicketQueryService {
    @Autowired
    private ServiceTicketMapper ticketMapper;
    @Autowired
    private ArchivedTicketMapper archivedMapper;
    
    /**
     * 查询工单列表(自动区分冷热数据)
     */
    public List<TicketVO> queryTickets(LocalDateTime startTime, LocalDateTime endTime) {
        List<TicketVO> result = new ArrayList<>();
        
        // 1. 查询热数据(未结案+近1个月已结案)
        List<ServiceTicket> hotTickets = ticketMapper.selectList(
            Wrappers.<ServiceTicket>lambdaQuery()
                .eq(ServiceTicket::getStatus, TicketStatus.OPEN.getValue())
                .or()
                .eq(ServiceTicket::getStatus, TicketStatus.CLOSED.getValue())
                .ge(ServiceTicket::getCloseTime, LocalDateTime.now().minusMonths(1))
                .between(ServiceTicket::getUpdateTime, startTime, endTime)
        );
        
        // 2. 转换热数据VO
        result.addAll(hotTickets.stream()
            .map(t -> convertToVO(t))
            .collect(Collectors.toList()));
        
        // 3. 查询冷数据(超过1个月的已结案)
        if (startTime.isBefore(LocalDateTime.now().minusMonths(1))) {
            List<ArchivedTicket> coldTickets = archivedMapper.selectList(
                Wrappers.<ArchivedTicket>lambdaQuery()
                    .le(ArchivedTicket::getCloseTime, LocalDateTime.now().minusMonths(1))
                    .between(ArchivedTicket::getUpdateTime, startTime, endTime)
            );
            
            // 4. 转换冷数据VO
            result.addAll(coldTickets.stream()
                .map(t -> convertToVO(t))
                .collect(Collectors.toList()));
        }
        
        // 5. 按更新时间排序
        result.sort(Comparator.comparing(TicketVO::getUpdateTime).reversed());
        
        return result;
    }
    
    // 转换VO方法(略)
    private TicketVO convertToVO(ServiceTicket ticket) {
        // 对象转换逻辑
    }
    
    private TicketVO convertToVO(ArchivedTicket ticket) {
        // 对象转换逻辑
    }
}

七、优化总结

  1. 锁机制

    • 通过lock_threadlock_time实现行级锁,避免多线程冲突;
    • 锁超时时间设为 10 分钟,自动释放异常线程锁定的数据。
  2. 批量处理

    • 单线程每次迁移 100 条数据(BATCH_SIZE=100),3 线程并发提升效率;
    • LIMIT分页查询避免全表扫描,减少对线上业务的影响。
  3. 无侵入设计

    • 迁移逻辑完全封装在独立服务中,业务代码仅需正常更新工单状态;

    • 冷热数据通过独立 Mapper 隔离,查询逻辑清晰。