ConsumerQueue、Index
上篇文章说到,在Rocketmq中,CommitLog、ConsumerQueue、Index这些文件都被映射成存储对象MappedFile,消息到来时会先存储在CommitLog,ConsumeQueue 和 Index文件是通过ReputMessageService的异步线程根据CommitLog的数据对其进行更新
那么是怎么更新的呢?
通过ReputMessageService的类图,我们可以直接看它的run方法
@Override
public void run() {
DefaultMessageStore.log.info(this.getServiceName() + " service started");
while (!this.isStopped()) {
try {
// 每隔一毫秒进行数据分发
Thread.sleep(1);
this.doReput();
} catch (Exception e) {
DefaultMessageStore.log.warn(this.getServiceName() + " service has exception. ", e);
}
}
DefaultMessageStore.log.info(this.getServiceName() + " service end");
}
private void doReput() {
// reputFromOffset小于commitLog的最小offset,说明已经是过期消息,重置offset
if (this.reputFromOffset < DefaultMessageStore.this.commitLog.getMinOffset()) {
log.warn("The reputFromOffset={} is smaller than minPyOffset={}, this usually indicate that the dispatch behind too much and the commitlog has expired.",
this.reputFromOffset, DefaultMessageStore.this.commitLog.getMinOffset());
this.reputFromOffset = DefaultMessageStore.this.commitLog.getMinOffset();
}
for (boolean doNext = true; this.isCommitLogAvailable() && doNext; ) {
......
// 从commitLog中获取消息,此处要注意跨文件时如何获取消息
SelectMappedBufferResult result = DefaultMessageStore.this.commitLog.getData(reputFromOffset);
if (result != null) {
try {
this.reputFromOffset = result.getStartOffset();
// 循环处理消息
for (int readSize = 0; readSize < result.getSize() && doNext; ) {
DispatchRequest dispatchRequest =
DefaultMessageStore.this.commitLog.checkMessageAndReturnSize(result.getByteBuffer(), false, false);
int size = dispatchRequest.getBufferSize() == -1 ? dispatchRequest.getMsgSize() : dispatchRequest.getBufferSize();
if (dispatchRequest.isSuccess()) {
if (size > 0) {
// 进行数据的分发
DefaultMessageStore.this.doDispatch(dispatchRequest);
......
// 更新reputFromOffset
this.reputFromOffset += size;
......
} else if (size == 0) {
this.reputFromOffset = DefaultMessageStore.this.commitLog.rollNextFile(this.reputFromOffset);
readSize = result.getSize();
}
}
......
}
} finally {
result.release();
}
} else {
doNext = false;
}
}
}
数据分发的处理类是在DefaultMessageStore构造器中初始化的
public DefaultMessageStore(final MessageStoreConfig messageStoreConfig, final BrokerStatsManager brokerStatsManager,
final MessageArrivingListener messageArrivingListener, final BrokerConfig brokerConfig) throws IOException {
......
this.dispatcherList = new LinkedList<>();
// 处理consumerQueue
this.dispatcherList.addLast(new CommitLogDispatcherBuildConsumeQueue());
// 处理index
this.dispatcherList.addLast(new CommitLogDispatcherBuildIndex());
......
}
此处不细讲ConsumerQueue和Index的分发过程,因为跟commitLog类似,重点讲讲刷盘策略
消息刷盘策略
当消息追加到MappedFile以后,这条消息实际上还是存储在内存中,所以Rocketmq还需要把这些消息刷到磁盘中,才算是真真正正的完成任务,断电、挂掉这些异常情况统统都不怕了
消息刷盘分为同步刷盘和异步刷盘,异步刷盘又分为开启堆外内存和未开启堆外内存两种方式
无论是同步刷盘还是异步刷盘,Rocketmq都是采用了开启另外一个线程来执行任务的方式
下图是三种刷盘方式分别使用的线程
- GroupCommitService:同步刷盘
- FlushRealTimeService:未开启堆外内存的异步刷盘
- CommitRealTimeService:开启堆外内存的异步刷盘
下面我们来看看具体的代码
public CommitLog(final DefaultMessageStore defaultMessageStore) {
......
// 在CommitLog类实例化的时候,会判断刷盘方式是同步刷盘还是异步刷盘
if (FlushDiskType.SYNC_FLUSH == defaultMessageStore.getMessageStoreConfig().getFlushDiskType()) {
// 同步刷盘,实例化的类是GroupCommitService
this.flushCommitLogService = new GroupCommitService();
} else {
// 异步刷盘,实例化的类是FlushRealTimeService
this.flushCommitLogService = new FlushRealTimeService();
}
// 实例化开启堆外内存的异步刷盘
this.commitLogService = new CommitRealTimeService();
......
}
同步刷盘
在GroupCommitService内部维护了两个队列,分别是读队列requestsRead,写队列requestsWrite,之所以要维护两个队列,是为了读写分离,避免锁竞争,当消息到来时,并不是立刻执行刷盘,而是把消息放到requestsWrite队列中,并且唤醒处理的方法,处理完成后,隔10ms,requestsRead和requestsWrite会互换一下角色,重新执行刷盘方法,上一次执行刷盘方法期间,requestsWrite可能会堆积了一些请求,这时候就可以处理这些堆积的消息,周而复始
private volatile LinkedList<GroupCommitRequest> requestsWrite = new LinkedList<GroupCommitRequest>();
private volatile LinkedList<GroupCommitRequest> requestsRead = new LinkedList<GroupCommitRequest>();
private final PutMessageSpinLock lock = new PutMessageSpinLock();
public synchronized void putRequest(final GroupCommitRequest request) {
lock.lock();
try {
// 将刷盘的请求放进requestsWrite队列中
this.requestsWrite.add(request);
} finally {
lock.unlock();
}
// 通知新请求到来
this.wakeup();
}
public void run() {
CommitLog.log.info(this.getServiceName() + " service started");
while (!this.isStopped()) {
try {
// 线程等待10ms
this.waitForRunning(10);
// 执行刷盘方法
this.doCommit();
} catch (Exception e) {
CommitLog.log.warn(this.getServiceName() + " service has exception. ", e);
}
}
......
}
private void swapRequests() {
lock.lock();
try {
// 读写队列进行交换
LinkedList<GroupCommitRequest> tmp = this.requestsWrite;
this.requestsWrite = this.requestsRead;
this.requestsRead = tmp;
} finally {
lock.unlock();
}
}
private void doCommit() {
if (!this.requestsRead.isEmpty()) {
for (GroupCommitRequest req : this.requestsRead) {
// 判断此前刷盘的位置是否大于现在提交的位置,如果大于,不需要执行刷盘操作
boolean flushOK = CommitLog.this.mappedFileQueue.getFlushedWhere() >= req.getNextOffset();
// 此处要进行两次刷盘,是因为有的消息可能横跨了两个CommitLog
for (int i = 0; i < 2 && !flushOK; i++) {
// 刷盘
CommitLog.this.mappedFileQueue.flush(0);
flushOK = CommitLog.this.mappedFileQueue.getFlushedWhere() >= req.getNextOffset();
}
// 唤醒对应的消费者
req.wakeupCustomer(flushOK ? PutMessageStatus.PUT_OK : PutMessageStatus.FLUSH_DISK_TIMEOUT);
}
// 更新数据
long storeTimestamp = CommitLog.this.mappedFileQueue.getStoreTimestamp();
if (storeTimestamp > 0) {
CommitLog.this.defaultMessageStore.getStoreCheckpoint().setPhysicMsgTimestamp(storeTimestamp);
}
this.requestsRead = new LinkedList<>();
} else {
// Because of individual messages is set to not sync flush, it
// will come to this process
CommitLog.this.mappedFileQueue.flush(0);
}
}
异步刷盘
看完同步刷盘,再来看看异步刷盘是怎么做的
class FlushRealTimeService extends FlushCommitLogService {
private long lastFlushTimestamp = 0;
private long printTimes = 0;
public void run() {
CommitLog.log.info(this.getServiceName() + " service started");
while (!this.isStopped()) {
// 休眠策略,这个标记位在下方会使用到
boolean flushCommitLogTimed = CommitLog.this.defaultMessageStore.getMessageStoreConfig().isFlushCommitLogTimed();
......
try {
// 判断是否为周期性刷盘
if (flushCommitLogTimed) {
// 固定休眠interval时间间隔,默认是0.5秒
Thread.sleep(interval);
} else {
// 被唤醒就刷盘,非周期性刷盘
this.waitForRunning(interval);
}
if (printFlushProgress) {
this.printFlushProgress();
}
long begin = System.currentTimeMillis();
// 执行刷盘操作
CommitLog.this.mappedFileQueue.flush(flushPhysicQueueLeastPages);
long storeTimestamp = CommitLog.this.mappedFileQueue.getStoreTimestamp();
if (storeTimestamp > 0) {
CommitLog.this.defaultMessageStore.getStoreCheckpoint().setPhysicMsgTimestamp(storeTimestamp);
}
long past = System.currentTimeMillis() - begin;
// 刷盘时间超过500ms,会打印日志提示
if (past > 500) {
log.info("Flush data to disk costs {} ms", past);
}
} catch (Throwable e) {
CommitLog.log.warn(this.getServiceName() + " service has exception. ", e);
this.printFlushProgress();
}
}
// 如果程序shutdown了,尽量保证内存中的消息全部刷盘
boolean result = false;
for (int i = 0; i < RETRY_TIMES_OVER && !result; i++) {
result = CommitLog.this.mappedFileQueue.flush(0);
CommitLog.log.info(this.getServiceName() + " service shutdown, retry " + (i + 1) + " times " + (result ? "OK" : "Not OK"));
}
this.printFlushProgress();
CommitLog.log.info(this.getServiceName() + " service end");
}
那开启堆外内存的异步刷盘是怎么做的呢?
其实做法跟异步刷盘是相似的,不过多了一层将数据从堆外内存写入page cache的操作
class CommitRealTimeService extends FlushCommitLogService {
private long lastCommitTimestamp = 0;
@Override
public String getServiceName() {
return CommitRealTimeService.class.getSimpleName();
}
@Override
public void run() {
CommitLog.log.info(this.getServiceName() + " service started");
while (!this.isStopped()) {
......
try {
// 将消息commit到内存缓冲区中
boolean result = CommitLog.this.mappedFileQueue.commit(commitDataLeastPages);
long end = System.currentTimeMillis();
if (!result) {
this.lastCommitTimestamp = end; // result = false means some data committed.
// 唤醒flushCommitLogService进行强制刷盘
flushCommitLogService.wakeup();
}
if (end - begin > 500) {
log.info("Commit data to file costs {} ms", end - begin);
}
this.waitForRunning(interval);
} catch (Throwable e) {
CommitLog.log.error(this.getServiceName() + " service has exception. ", e);
}
}
......
}
}
刷盘策略方法入口
public CompletableFuture<PutMessageStatus> submitFlushRequest(AppendMessageResult result, MessageExt messageExt) {
// 同步刷盘
if (FlushDiskType.SYNC_FLUSH == this.defaultMessageStore.getMessageStoreConfig().getFlushDiskType()) {
final GroupCommitService service = (GroupCommitService) this.flushCommitLogService;
if (messageExt.isWaitStoreMsgOK()) {
GroupCommitRequest request = new GroupCommitRequest(result.getWroteOffset() + result.getWroteBytes(),
this.defaultMessageStore.getMessageStoreConfig().getSyncFlushTimeout());
service.putRequest(request);
return request.future();
} else {
service.wakeup();
return CompletableFuture.completedFuture(PutMessageStatus.PUT_OK);
}
}
// 异步刷盘
else {
if (!this.defaultMessageStore.getMessageStoreConfig().isTransientStorePoolEnable()) {
flushCommitLogService.wakeup();
} else {
// 开启堆外内存的异步刷盘
commitLogService.wakeup();
}
return CompletableFuture.completedFuture(PutMessageStatus.PUT_OK);
}
}
具体的刷盘方法位于MappedFile中
public int flush(final int flushLeastPages) {
// 校验是否达到刷盘条件
if (this.isAbleToFlush(flushLeastPages)) {
if (this.hold()) {
int value = getReadPosition();
try {
// 数据从writeBuffer提交到fileChannel
if (writeBuffer != null || this.fileChannel.position() != 0) {
this.fileChannel.force(false);
} else {
// 从mmap刷新数据到磁盘
this.mappedByteBuffer.force();
}
} catch (Throwable e) {
log.error("Error occurred when force data to disk.", e);
}
this.flushedPosition.set(value);
this.release();
} else {
log.warn("in flush, hold failed, flush offset = " + this.flushedPosition.get());
this.flushedPosition.set(getReadPosition());
}
}
return this.getFlushedPosition();
}