浅谈Rocketmq源码-消息存储(二)

229 阅读2分钟

ConsumerQueue、Index

上篇文章说到,在Rocketmq中,CommitLog、ConsumerQueue、Index这些文件都被映射成存储对象MappedFile,消息到来时会先存储在CommitLog,ConsumeQueue 和 Index文件是通过ReputMessageService的异步线程根据CommitLog的数据对其进行更新

那么是怎么更新的呢?

image.png

通过ReputMessageService的类图,我们可以直接看它的run方法

@Override
public void run() {
    DefaultMessageStore.log.info(this.getServiceName() + " service started");
​
    while (!this.isStopped()) {
        try {
            // 每隔一毫秒进行数据分发
            Thread.sleep(1);
            this.doReput();
        } catch (Exception e) {
            DefaultMessageStore.log.warn(this.getServiceName() + " service has exception. ", e);
        }
    }
​
    DefaultMessageStore.log.info(this.getServiceName() + " service end");
}
private void doReput() {
    // reputFromOffset小于commitLog的最小offset,说明已经是过期消息,重置offset
    if (this.reputFromOffset < DefaultMessageStore.this.commitLog.getMinOffset()) {
        log.warn("The reputFromOffset={} is smaller than minPyOffset={}, this usually indicate that the dispatch behind too much and the commitlog has expired.",
            this.reputFromOffset, DefaultMessageStore.this.commitLog.getMinOffset());
        this.reputFromOffset = DefaultMessageStore.this.commitLog.getMinOffset();
    }
    for (boolean doNext = true; this.isCommitLogAvailable() && doNext; ) {
​
        ......
        // 从commitLog中获取消息,此处要注意跨文件时如何获取消息
        SelectMappedBufferResult result = DefaultMessageStore.this.commitLog.getData(reputFromOffset);
        if (result != null) {
            try {
                this.reputFromOffset = result.getStartOffset();
                // 循环处理消息
                for (int readSize = 0; readSize < result.getSize() && doNext; ) {
                    DispatchRequest dispatchRequest =
                        DefaultMessageStore.this.commitLog.checkMessageAndReturnSize(result.getByteBuffer(), false, false);
                    int size = dispatchRequest.getBufferSize() == -1 ? dispatchRequest.getMsgSize() : dispatchRequest.getBufferSize();
​
                    if (dispatchRequest.isSuccess()) {
                        if (size > 0) {
                            // 进行数据的分发
                            DefaultMessageStore.this.doDispatch(dispatchRequest);
​
                            ......
                            // 更新reputFromOffset
                            this.reputFromOffset += size;
                            ......
                        } else if (size == 0) {
                            this.reputFromOffset = DefaultMessageStore.this.commitLog.rollNextFile(this.reputFromOffset);
                            readSize = result.getSize();
                        }
                    } 
                    ......
                }
            } finally {
                result.release();
            }
        } else {
            doNext = false;
        }
    }
}

数据分发的处理类是在DefaultMessageStore构造器中初始化的

public DefaultMessageStore(final MessageStoreConfig messageStoreConfig, final BrokerStatsManager brokerStatsManager,
    final MessageArrivingListener messageArrivingListener, final BrokerConfig brokerConfig) throws IOException {
    ......
      
    this.dispatcherList = new LinkedList<>();
    // 处理consumerQueue
    this.dispatcherList.addLast(new CommitLogDispatcherBuildConsumeQueue());
    // 处理index
    this.dispatcherList.addLast(new CommitLogDispatcherBuildIndex());
​
    ......
}

此处不细讲ConsumerQueue和Index的分发过程,因为跟commitLog类似,重点讲讲刷盘策略

消息刷盘策略

当消息追加到MappedFile以后,这条消息实际上还是存储在内存中,所以Rocketmq还需要把这些消息刷到磁盘中,才算是真真正正的完成任务,断电、挂掉这些异常情况统统都不怕了

消息刷盘分为同步刷盘和异步刷盘,异步刷盘又分为开启堆外内存和未开启堆外内存两种方式

无论是同步刷盘还是异步刷盘,Rocketmq都是采用了开启另外一个线程来执行任务的方式

下图是三种刷盘方式分别使用的线程

  1. GroupCommitService:同步刷盘
  2. FlushRealTimeService:未开启堆外内存的异步刷盘
  3. CommitRealTimeService:开启堆外内存的异步刷盘

image.png

下面我们来看看具体的代码

public CommitLog(final DefaultMessageStore defaultMessageStore) {
    ......
    // 在CommitLog类实例化的时候,会判断刷盘方式是同步刷盘还是异步刷盘
    if (FlushDiskType.SYNC_FLUSH == defaultMessageStore.getMessageStoreConfig().getFlushDiskType()) {
        // 同步刷盘,实例化的类是GroupCommitService
        this.flushCommitLogService = new GroupCommitService();
    } else {
        // 异步刷盘,实例化的类是FlushRealTimeService
        this.flushCommitLogService = new FlushRealTimeService();
    }
    // 实例化开启堆外内存的异步刷盘
    this.commitLogService = new CommitRealTimeService();
​
    ......
​
}
同步刷盘

在GroupCommitService内部维护了两个队列,分别是读队列requestsRead,写队列requestsWrite,之所以要维护两个队列,是为了读写分离,避免锁竞争,当消息到来时,并不是立刻执行刷盘,而是把消息放到requestsWrite队列中,并且唤醒处理的方法,处理完成后,隔10ms,requestsRead和requestsWrite会互换一下角色,重新执行刷盘方法,上一次执行刷盘方法期间,requestsWrite可能会堆积了一些请求,这时候就可以处理这些堆积的消息,周而复始

private volatile LinkedList<GroupCommitRequest> requestsWrite = new LinkedList<GroupCommitRequest>();
private volatile LinkedList<GroupCommitRequest> requestsRead = new LinkedList<GroupCommitRequest>();
private final PutMessageSpinLock lock = new PutMessageSpinLock();
​
public synchronized void putRequest(final GroupCommitRequest request) {
    lock.lock();
    try {
        // 将刷盘的请求放进requestsWrite队列中
        this.requestsWrite.add(request);
    } finally {
        lock.unlock();
    }
    // 通知新请求到来
    this.wakeup();
}
public void run() {
    CommitLog.log.info(this.getServiceName() + " service started");
​
    while (!this.isStopped()) {
        try {
            // 线程等待10ms
            this.waitForRunning(10);
            // 执行刷盘方法
            this.doCommit();
        } catch (Exception e) {
            CommitLog.log.warn(this.getServiceName() + " service has exception. ", e);
        }
    }
​
    ......
}
​
private void swapRequests() {
    lock.lock();
    try {
      // 读写队列进行交换
      LinkedList<GroupCommitRequest> tmp = this.requestsWrite;
      this.requestsWrite = this.requestsRead;
      this.requestsRead = tmp;
    } finally {
      lock.unlock();
    }
}
​
private void doCommit() {
    if (!this.requestsRead.isEmpty()) {
      for (GroupCommitRequest req : this.requestsRead) {
        // 判断此前刷盘的位置是否大于现在提交的位置,如果大于,不需要执行刷盘操作
        boolean flushOK = CommitLog.this.mappedFileQueue.getFlushedWhere() >= req.getNextOffset();
        // 此处要进行两次刷盘,是因为有的消息可能横跨了两个CommitLog
        for (int i = 0; i < 2 && !flushOK; i++) {
          // 刷盘
          CommitLog.this.mappedFileQueue.flush(0);
          flushOK = CommitLog.this.mappedFileQueue.getFlushedWhere() >= req.getNextOffset();
        }
        // 唤醒对应的消费者
        req.wakeupCustomer(flushOK ? PutMessageStatus.PUT_OK : PutMessageStatus.FLUSH_DISK_TIMEOUT);
      }
      // 更新数据
      long storeTimestamp = CommitLog.this.mappedFileQueue.getStoreTimestamp();
      if (storeTimestamp > 0) {
        CommitLog.this.defaultMessageStore.getStoreCheckpoint().setPhysicMsgTimestamp(storeTimestamp);
      }
​
      this.requestsRead = new LinkedList<>();
    } else {
      // Because of individual messages is set to not sync flush, it
      // will come to this process
      CommitLog.this.mappedFileQueue.flush(0);
    }
}
异步刷盘

看完同步刷盘,再来看看异步刷盘是怎么做的

class FlushRealTimeService extends FlushCommitLogService {
    private long lastFlushTimestamp = 0;
    private long printTimes = 0;
​
    public void run() {
        CommitLog.log.info(this.getServiceName() + " service started");
​
        while (!this.isStopped()) {
            // 休眠策略,这个标记位在下方会使用到
            boolean flushCommitLogTimed = CommitLog.this.defaultMessageStore.getMessageStoreConfig().isFlushCommitLogTimed();
​
            ......
​
            try {
                // 判断是否为周期性刷盘
                if (flushCommitLogTimed) {
                    // 固定休眠interval时间间隔,默认是0.5秒
                    Thread.sleep(interval);
                } else {
                    // 被唤醒就刷盘,非周期性刷盘
                    this.waitForRunning(interval);
                }
​
                if (printFlushProgress) {
                    this.printFlushProgress();
                }
​
                long begin = System.currentTimeMillis();
                // 执行刷盘操作
                CommitLog.this.mappedFileQueue.flush(flushPhysicQueueLeastPages);
                long storeTimestamp = CommitLog.this.mappedFileQueue.getStoreTimestamp();
                if (storeTimestamp > 0) {
                    CommitLog.this.defaultMessageStore.getStoreCheckpoint().setPhysicMsgTimestamp(storeTimestamp);
                }
                long past = System.currentTimeMillis() - begin;
                // 刷盘时间超过500ms,会打印日志提示
                if (past > 500) {
                    log.info("Flush data to disk costs {} ms", past);
                }
            } catch (Throwable e) {
                CommitLog.log.warn(this.getServiceName() + " service has exception. ", e);
                this.printFlushProgress();
            }
        }
​
        // 如果程序shutdown了,尽量保证内存中的消息全部刷盘
        boolean result = false;
        for (int i = 0; i < RETRY_TIMES_OVER && !result; i++) {
            result = CommitLog.this.mappedFileQueue.flush(0);
            CommitLog.log.info(this.getServiceName() + " service shutdown, retry " + (i + 1) + " times " + (result ? "OK" : "Not OK"));
        }
​
        this.printFlushProgress();
​
        CommitLog.log.info(this.getServiceName() + " service end");
}

那开启堆外内存的异步刷盘是怎么做的呢?

其实做法跟异步刷盘是相似的,不过多了一层将数据从堆外内存写入page cache的操作

class CommitRealTimeService extends FlushCommitLogService {
​
    private long lastCommitTimestamp = 0;
​
    @Override
    public String getServiceName() {
        return CommitRealTimeService.class.getSimpleName();
    }
​
    @Override
    public void run() {
        CommitLog.log.info(this.getServiceName() + " service started");
        while (!this.isStopped()) {
            ......
​
            try {
                // 将消息commit到内存缓冲区中
                boolean result = CommitLog.this.mappedFileQueue.commit(commitDataLeastPages);
                long end = System.currentTimeMillis();
                if (!result) {
                    this.lastCommitTimestamp = end; // result = false means some data committed.
                    // 唤醒flushCommitLogService进行强制刷盘
                    flushCommitLogService.wakeup();
                }
​
                if (end - begin > 500) {
                    log.info("Commit data to file costs {} ms", end - begin);
                }
                this.waitForRunning(interval);
            } catch (Throwable e) {
                CommitLog.log.error(this.getServiceName() + " service has exception. ", e);
            }
        }
​
        ......
    }
}

刷盘策略方法入口

public CompletableFuture<PutMessageStatus> submitFlushRequest(AppendMessageResult result, MessageExt messageExt) {
    // 同步刷盘
    if (FlushDiskType.SYNC_FLUSH == this.defaultMessageStore.getMessageStoreConfig().getFlushDiskType()) {
        final GroupCommitService service = (GroupCommitService) this.flushCommitLogService;
        if (messageExt.isWaitStoreMsgOK()) {
            GroupCommitRequest request = new GroupCommitRequest(result.getWroteOffset() + result.getWroteBytes(),
                    this.defaultMessageStore.getMessageStoreConfig().getSyncFlushTimeout());
            service.putRequest(request);
            return request.future();
        } else {
            service.wakeup();
            return CompletableFuture.completedFuture(PutMessageStatus.PUT_OK);
        }
    }
    // 异步刷盘
    else {
        if (!this.defaultMessageStore.getMessageStoreConfig().isTransientStorePoolEnable()) {
            flushCommitLogService.wakeup();
        } else  {
            // 开启堆外内存的异步刷盘
            commitLogService.wakeup();
        }
        return CompletableFuture.completedFuture(PutMessageStatus.PUT_OK);
    }
}

具体的刷盘方法位于MappedFile中

public int flush(final int flushLeastPages) {
    // 校验是否达到刷盘条件
    if (this.isAbleToFlush(flushLeastPages)) {
        if (this.hold()) {
            int value = getReadPosition();
​
            try {
                // 数据从writeBuffer提交到fileChannel
                if (writeBuffer != null || this.fileChannel.position() != 0) {
                    this.fileChannel.force(false);
                } else {
                    // 从mmap刷新数据到磁盘
                    this.mappedByteBuffer.force();
                }
            } catch (Throwable e) {
                log.error("Error occurred when force data to disk.", e);
            }
​
            this.flushedPosition.set(value);
            this.release();
        } else {
            log.warn("in flush, hold failed, flush offset = " + this.flushedPosition.get());
            this.flushedPosition.set(getReadPosition());
        }
    }
    return this.getFlushedPosition();
}

参考资料

www.debugger.wiki/article/htm…