入口DefaultMessageStore
写消息的入口
org.apache.rocketmq.store.DefaultMessageStore#putMessage
/**
* 写消息
*
* @param msg
* @return org.apache.rocketmq.store.PutMessageResult
* @author javaself
*/
@Override
public PutMessageResult putMessage(MessageExtBrokerInner msg) {
try {
//写消息
return asyncPutMessage(msg).get();
} catch (InterruptedException | ExecutionException e) {
return new PutMessageResult(PutMessageStatus.UNKNOWN_ERROR, null);
}
}
异步写消息
/**
* 写消息(异步)
*
* @param msg
* @return java.util.concurrent.CompletableFuture<org.apache.rocketmq.store.PutMessageResult>
* @author javaself
*/
@Override
public CompletableFuture<PutMessageResult> asyncPutMessage(MessageExtBrokerInner msg) {
PutMessageStatus checkStoreStatus = this.checkStoreStatus();
if (checkStoreStatus != PutMessageStatus.PUT_OK) {
return CompletableFuture.completedFuture(new PutMessageResult(checkStoreStatus, null));
}
PutMessageStatus msgCheckStatus = this.checkMessage(msg);
if (msgCheckStatus == PutMessageStatus.MESSAGE_ILLEGAL) {
return CompletableFuture.completedFuture(new PutMessageResult(msgCheckStatus, null));
}
PutMessageStatus lmqMsgCheckStatus = this.checkLmqMessage(msg);
if (msgCheckStatus == PutMessageStatus.LMQ_CONSUME_QUEUE_NUM_EXCEEDED) {
return CompletableFuture.completedFuture(new PutMessageResult(lmqMsgCheckStatus, null));
}
long beginTime = this.getSystemClock().now();
//写消息
CompletableFuture<PutMessageResult> putResultFuture = this.commitLog.asyncPutMessage(msg);
putResultFuture.thenAccept((result) -> {
long elapsedTime = this.getSystemClock().now() - beginTime;
if (elapsedTime > 500) {
log.warn("putMessage not in lock elapsed time(ms)={}, bodyLength={}", elapsedTime, msg.getBody().length);
}
this.storeStatsService.setPutMessageEntireTimeMax(elapsedTime);
if (null == result || !result.isOk()) {
this.storeStatsService.getPutMessageFailedTimes().add(1);
}
});
return putResultFuture;
}
调用的是CommitLog的写消息的方法
//消息存储
private final CommitLog commitLog;
小结
1、写消息的入口是DefaultMessageStore
/**
* 写消息的入口类:相当于提供了写消息的API
*
* @author gzh
*/
public class DefaultMessageStore implements MessageStore {
2、但是真正写消息的地方是CommitLog
DefaultMessageStore会调用CommitLog的写消息的方法。
CommitLog
org.apache.rocketmq.store.CommitLog#asyncPutMessage
/**
* 写消息(异步)
*
* @param msg
* @return java.util.concurrent.CompletableFuture<org.apache.rocketmq.store.PutMessageResult>
* @author gzh
*/
public CompletableFuture<PutMessageResult> asyncPutMessage(final MessageExtBrokerInner msg) {
// Set the storage time
msg.setStoreTimestamp(System.currentTimeMillis());
// Set the message body BODY CRC (consider the most appropriate setting
// on the client)
msg.setBodyCRC(UtilAll.crc32(msg.getBody()));
// Back to Results
AppendMessageResult result = null;
StoreStatsService storeStatsService = this.defaultMessageStore.getStoreStatsService();
String topic = msg.getTopic();
int queueId = msg.getQueueId();
final int tranType = MessageSysFlag.getTransactionValue(msg.getSysFlag());
if (tranType == MessageSysFlag.TRANSACTION_NOT_TYPE
|| tranType == MessageSysFlag.TRANSACTION_COMMIT_TYPE) {
// Delay Delivery
if (msg.getDelayTimeLevel() > 0) {
if (msg.getDelayTimeLevel() > this.defaultMessageStore.getScheduleMessageService().getMaxDelayLevel()) {
msg.setDelayTimeLevel(this.defaultMessageStore.getScheduleMessageService().getMaxDelayLevel());
}
topic = TopicValidator.RMQ_SYS_SCHEDULE_TOPIC;
queueId = ScheduleMessageService.delayLevel2QueueId(msg.getDelayTimeLevel());
// Backup real topic, queueId
MessageAccessor.putProperty(msg, MessageConst.PROPERTY_REAL_TOPIC, msg.getTopic());
MessageAccessor.putProperty(msg, MessageConst.PROPERTY_REAL_QUEUE_ID, String.valueOf(msg.getQueueId()));
msg.setPropertiesString(MessageDecoder.messageProperties2String(msg.getProperties()));
msg.setTopic(topic);
msg.setQueueId(queueId);
}
}
InetSocketAddress bornSocketAddress = (InetSocketAddress) msg.getBornHost();
if (bornSocketAddress.getAddress() instanceof Inet6Address) {
msg.setBornHostV6Flag();
}
InetSocketAddress storeSocketAddress = (InetSocketAddress) msg.getStoreHost();
if (storeSocketAddress.getAddress() instanceof Inet6Address) {
msg.setStoreHostAddressV6Flag();
}
PutMessageThreadLocal putMessageThreadLocal = this.putMessageThreadLocal.get();
PutMessageResult encodeResult = putMessageThreadLocal.getEncoder().encode(msg);
if (encodeResult != null) {
return CompletableFuture.completedFuture(encodeResult);
}
msg.setEncodedBuff(putMessageThreadLocal.getEncoder().encoderBuffer);
PutMessageContext putMessageContext = new PutMessageContext(generateKey(putMessageThreadLocal.getKeyBuilder(), msg));
long elapsedTimeInLock = 0;
MappedFile unlockMappedFile = null;
putMessageLock.lock(); //spin or ReentrantLock ,depending on store config //为什么加锁?CommitLog只有一个,所以是串行执行(加锁)。但是具体的存储文件有多个,因为一个文件1G,如果满了,就创建下一个文件。
try {
//写到哪个文件
MappedFile mappedFile = this.mappedFileQueue.getLastMappedFile();
long beginLockTimestamp = this.defaultMessageStore.getSystemClock().now();
this.beginTimeInLock = beginLockTimestamp;
// Here settings are stored timestamp, in order to ensure an orderly
// global
msg.setStoreTimestamp(beginLockTimestamp);
if (null == mappedFile || mappedFile.isFull()) {
mappedFile = this.mappedFileQueue.getLastMappedFile(0); // Mark: NewFile may be cause noise
}
if (null == mappedFile) {
log.error("create mapped file1 error, topic: " + msg.getTopic() + " clientAddr: " + msg.getBornHostString());
return CompletableFuture.completedFuture(new PutMessageResult(PutMessageStatus.CREATE_MAPEDFILE_FAILED, null));
}
//写消息
result = mappedFile.appendMessage(msg, this.appendMessageCallback, putMessageContext);
switch (result.getStatus()) {
case PUT_OK:
break;
case END_OF_FILE:
unlockMappedFile = mappedFile;
// Create a new file, re-write the message
mappedFile = this.mappedFileQueue.getLastMappedFile(0);
if (null == mappedFile) {
// XXX: warn and notify me
log.error("create mapped file2 error, topic: " + msg.getTopic() + " clientAddr: " + msg.getBornHostString());
return CompletableFuture.completedFuture(new PutMessageResult(PutMessageStatus.CREATE_MAPEDFILE_FAILED, result));
}
result = mappedFile.appendMessage(msg, this.appendMessageCallback, putMessageContext);
break;
case MESSAGE_SIZE_EXCEEDED:
case PROPERTIES_SIZE_EXCEEDED:
return CompletableFuture.completedFuture(new PutMessageResult(PutMessageStatus.MESSAGE_ILLEGAL, result));
case UNKNOWN_ERROR:
return CompletableFuture.completedFuture(new PutMessageResult(PutMessageStatus.UNKNOWN_ERROR, result));
default:
return CompletableFuture.completedFuture(new PutMessageResult(PutMessageStatus.UNKNOWN_ERROR, result));
}
elapsedTimeInLock = this.defaultMessageStore.getSystemClock().now() - beginLockTimestamp;
} finally {
beginTimeInLock = 0;
putMessageLock.unlock();
}
if (elapsedTimeInLock > 500) {
log.warn("[NOTIFYME]putMessage in lock cost time(ms)={}, bodyLength={} AppendMessageResult={}", elapsedTimeInLock, msg.getBody().length, result);
}
if (null != unlockMappedFile && this.defaultMessageStore.getMessageStoreConfig().isWarmMapedFileEnable()) {
this.defaultMessageStore.unlockMappedFile(unlockMappedFile);
}
PutMessageResult putMessageResult = new PutMessageResult(PutMessageStatus.PUT_OK, result);
// Statistics
storeStatsService.getSinglePutMessageTopicTimesTotal(msg.getTopic()).add(1);
storeStatsService.getSinglePutMessageTopicSizeTotal(topic).add(result.getWroteBytes());
CompletableFuture<PutMessageStatus> flushResultFuture = submitFlushRequest(result, msg);
CompletableFuture<PutMessageStatus> replicaResultFuture = submitReplicaRequest(result, msg);
return flushResultFuture.thenCombine(replicaResultFuture, (flushStatus, replicaStatus) -> {
if (flushStatus != PutMessageStatus.PUT_OK) {
putMessageResult.setPutMessageStatus(flushStatus);
}
if (replicaStatus != PutMessageStatus.PUT_OK) {
putMessageResult.setPutMessageStatus(replicaStatus);
if (replicaStatus == PutMessageStatus.FLUSH_SLAVE_TIMEOUT) {
log.error("do sync transfer other node, wait return, but failed, topic: {} tags: {} client address: {}",
msg.getTopic(), msg.getTags(), msg.getBornHostNameString());
}
}
return putMessageResult;
});
}
调用的是MappedFile的写消息的方法。
MappedFile
MappedFile作用是什么?为什么要多搞一个类出来?
MappedFileQueue可以看作是${ROCKET_HOME}/store/commitlog文件夹,而MappedFile则对应该文件夹下一个个的文件。
org.apache.rocketmq.store.MappedFile#appendMessage
/**
* 写消息
*
* @param msg 消息
* @param cb
* @param putMessageContext
* @return org.apache.rocketmq.store.AppendMessageResult
* @author javaself
*/
public AppendMessageResult appendMessage(final MessageExtBrokerInner msg, final AppendMessageCallback cb,
PutMessageContext putMessageContext) {
return appendMessagesInner(msg, cb, putMessageContext);
}
/**
* 写磁盘
*
* @param messageExt 消息
* @param cb
* @param putMessageContext
* @return org.apache.rocketmq.store.AppendMessageResult
* @author gzh
*/
public AppendMessageResult appendMessagesInner(final MessageExt messageExt, final AppendMessageCallback cb,
PutMessageContext putMessageContext) {
assert messageExt != null;
assert cb != null;
int currentPos = this.wrotePosition.get();
if (currentPos < this.fileSize) {
ByteBuffer byteBuffer = writeBuffer != null ? writeBuffer.slice() : this.mappedByteBuffer.slice();
byteBuffer.position(currentPos);
AppendMessageResult result;
if (messageExt instanceof MessageExtBrokerInner) {
//写消息
result = cb.doAppend(this.getFileFromOffset(), byteBuffer, this.fileSize - currentPos,
(MessageExtBrokerInner) messageExt, putMessageContext);
} else if (messageExt instanceof MessageExtBatch) {
result = cb.doAppend(this.getFileFromOffset(), byteBuffer, this.fileSize - currentPos,
(MessageExtBatch) messageExt, putMessageContext);
} else {
return new AppendMessageResult(AppendMessageStatus.UNKNOWN_ERROR);
}
this.wrotePosition.addAndGet(result.getWroteBytes());
this.storeTimestamp = result.getStoreTimestamp();
return result;
}
log.error("MappedFile.appendMessage return null, wrotePosition: {} fileSize: {}", currentPos, this.fileSize);
return new AppendMessageResult(AppendMessageStatus.UNKNOWN_ERROR);
}
又会调用CommitLog的doAppend方法。
MappedFile作用
MappedFile
是RocketMQ中的一个重要概念,用于管理消息存储的物理文件。它的作用是提供了一种映射文件到内存的方式,用于将消息存储在磁盘上,并允许快速的随机访问和读写。
具体来说,MappedFile
的作用如下:
-
消息存储:
MappedFile
用于将消息数据存储在磁盘上。当消息被写入到CommitLog
时,实际上是将消息写入到MappedFile
中。MappedFile
会以一定的大小进行分段,每个分段存储一部分消息。 -
随机访问和读写:
MappedFile
通过将文件映射到内存中,实现了对消息数据的快速随机访问和读写。这对于消息的追加写入和查询都提供了高效的方式,避免了频繁的磁盘寻址操作。 -
文件管理:
MappedFile
还负责管理消息存储文件的状态、元数据和清理等操作。它会跟踪文件的写入位置、已使用空间等信息,以及维护文件的刷盘(Flush)和删除等操作。 -
文件分片: 由于消息可能会非常大,
MappedFile
会将一个消息分为多个片段,以适应磁盘存储的限制。这样可以更好地管理和存储大型消息。
总的来说,MappedFile
在RocketMQ中扮演了消息物理存储和访问的桥梁,提供了高效的文件管理和读写能力,用于支持消息的持久化存储和快速检索。
CommitLog.doAppend方法
org.apache.rocketmq.store.CommitLog.DefaultAppendMessageCallback#doAppend
/**
* 写消息:先写到缓冲区(MappedFile.缓冲区),然后再异步刷盘
*
* @param fileFromOffset 偏移量
* @param byteBuffer 缓存
* @param maxBlank
* @param msgInner 消息
* @param putMessageContext
* @return org.apache.rocketmq.store.AppendMessageResult
* @author gzh
*/
public AppendMessageResult doAppend(final long fileFromOffset, final ByteBuffer byteBuffer, final int maxBlank,
final MessageExtBrokerInner msgInner, PutMessageContext putMessageContext) {
// STORETIMESTAMP + STOREHOSTADDRESS + OFFSET <br>
// PHY OFFSET
long wroteOffset = fileFromOffset + byteBuffer.position();
Supplier<String> msgIdSupplier = () -> {
int sysflag = msgInner.getSysFlag();
int msgIdLen = (sysflag & MessageSysFlag.STOREHOSTADDRESS_V6_FLAG) == 0 ? 4 + 4 + 8 : 16 + 4 + 8;
ByteBuffer msgIdBuffer = ByteBuffer.allocate(msgIdLen);
MessageExt.socketAddress2ByteBuffer(msgInner.getStoreHost(), msgIdBuffer);
msgIdBuffer.clear();//because socketAddress2ByteBuffer flip the buffer
msgIdBuffer.putLong(msgIdLen - 8, wroteOffset);
return UtilAll.bytes2string(msgIdBuffer.array());
};
// Record ConsumeQueue information
String key = putMessageContext.getTopicQueueTableKey();
Long queueOffset = CommitLog.this.topicQueueTable.get(key);
if (null == queueOffset) {
queueOffset = 0L;
CommitLog.this.topicQueueTable.put(key, queueOffset);
}
boolean multiDispatchWrapResult = CommitLog.this.multiDispatch.wrapMultiDispatch(msgInner);
if (!multiDispatchWrapResult) {
return new AppendMessageResult(AppendMessageStatus.UNKNOWN_ERROR);
}
// Transaction messages that require special handling
final int tranType = MessageSysFlag.getTransactionValue(msgInner.getSysFlag());
switch (tranType) {
// Prepared and Rollback message is not consumed, will not enter the
// consumer queuec
case MessageSysFlag.TRANSACTION_PREPARED_TYPE:
case MessageSysFlag.TRANSACTION_ROLLBACK_TYPE:
queueOffset = 0L;
break;
case MessageSysFlag.TRANSACTION_NOT_TYPE:
case MessageSysFlag.TRANSACTION_COMMIT_TYPE:
default:
break;
}
//消息数据
ByteBuffer preEncodeBuffer = msgInner.getEncodedBuff();
final int msgLen = preEncodeBuffer.getInt(0);
// Determines whether there is sufficient free space
if ((msgLen + END_FILE_MIN_BLANK_LENGTH) > maxBlank) {
this.msgStoreItemMemory.clear();
// 1 TOTALSIZE
this.msgStoreItemMemory.putInt(maxBlank);
// 2 MAGICCODE
this.msgStoreItemMemory.putInt(CommitLog.BLANK_MAGIC_CODE);
// 3 The remaining space may be any value
// Here the length of the specially set maxBlank
final long beginTimeMills = CommitLog.this.defaultMessageStore.now();
byteBuffer.put(this.msgStoreItemMemory.array(), 0, 8);
return new AppendMessageResult(AppendMessageStatus.END_OF_FILE, wroteOffset,
maxBlank, /* only wrote 8 bytes, but declare wrote maxBlank for compute write position */
msgIdSupplier, msgInner.getStoreTimestamp(),
queueOffset, CommitLog.this.defaultMessageStore.now() - beginTimeMills);
}
int pos = 4 + 4 + 4 + 4 + 4;
// 6 QUEUEOFFSET
preEncodeBuffer.putLong(pos, queueOffset);
pos += 8;
// 7 PHYSICALOFFSET
preEncodeBuffer.putLong(pos, fileFromOffset + byteBuffer.position());
int ipLen = (msgInner.getSysFlag() & MessageSysFlag.BORNHOST_V6_FLAG) == 0 ? 4 + 4 : 16 + 4;
// 8 SYSFLAG, 9 BORNTIMESTAMP, 10 BORNHOST, 11 STORETIMESTAMP
pos += 8 + 4 + 8 + ipLen;
// refresh store time stamp in lock
preEncodeBuffer.putLong(pos, msgInner.getStoreTimestamp());
final long beginTimeMills = CommitLog.this.defaultMessageStore.now();
// Write messages to the queue buffer
//写消息到缓冲区,这个时候还没有写到磁盘
byteBuffer.put(preEncodeBuffer); //先写到缓冲区,后面异步刷盘
msgInner.setEncodedBuff(null);
AppendMessageResult result = new AppendMessageResult(AppendMessageStatus.PUT_OK, wroteOffset, msgLen, msgIdSupplier,
msgInner.getStoreTimestamp(), queueOffset, CommitLog.this.defaultMessageStore.now() - beginTimeMills);
switch (tranType) {
case MessageSysFlag.TRANSACTION_PREPARED_TYPE:
case MessageSysFlag.TRANSACTION_ROLLBACK_TYPE:
break;
case MessageSysFlag.TRANSACTION_NOT_TYPE:
case MessageSysFlag.TRANSACTION_COMMIT_TYPE:
// The next update ConsumeQueue information
CommitLog.this.topicQueueTable.put(key, ++queueOffset);
//
CommitLog.this.multiDispatch.updateMultiQueueOffset(msgInner);
break;
default:
break;
}
return result;
}
这里才是真正写消息的地方。具体的话是,先写到缓冲区(MappedFile.缓冲区),然后再异步刷盘。
所谓缓冲区,其实就是字节数组。
public class MappedFile extends ReferenceResource {
/**
* Message will put to here first, and then reput to FileChannel if writeBuffer is not null.
*/
protected ByteBuffer writeBuffer = null;
//内存映射文件
private MappedByteBuffer mappedByteBuffer;
rocketmq有没有使用jdk标准队列类?
在RocketMQ的源代码中,一般来说是不直接使用JDK标准队列类的,因为RocketMQ是一个分布式消息中间件,需要满足高性能、可靠性、分布式特性等要求,因此往往会使用自定义的数据结构来实现特定的功能。
RocketMQ的内部实现通常会使用自定义的队列、缓存、数据结构等,以满足其作为消息中间件的需求。例如,前面提到的CommitLog
、ConsumeQueue
、MessageQueue
等都是RocketMQ内部自定义的类,用于实现特定的功能。
虽然RocketMQ的底层实现可能会使用一些JDK提供的基础类库,但通常会进行封装或者结合自定义的实现,以满足其独特的需求。
那CommitLog到底是不是队列呢?
我明白您的疑惑。虽然在RocketMQ中,CommitLog在某种程度上与消息队列有关,但它本身并不是传统意义上的队列,而是一种用于存储消息数据的物理文件。因此,可以说CommitLog不是队列,而是RocketMQ消息存储的一部分。
在RocketMQ中,消息的传递和消费涉及多个组件,包括Broker、NameServer、Consumer等,它们共同协作实现了消息的分发、存储和消费。CommitLog是其中的一环,它负责持久化消息到磁盘,确保消息不会因为Broker的重启或故障而丢失。然后,消费者根据订阅关系和消息的偏移量来消费消息,而非直接从CommitLog中出队。
因此,虽然CommitLog在RocketMQ的消息传递流程中起到了重要的作用,但它与传统的队列在使用方式上存在一些不同。它更像是消息存储的一部分,而非严格意义上的队列。
参考
《rocketmq技术内幕》