整体结构
生产者
主要结构
消息发送的入口
1.KafkaProducer.send(),并唤醒sender
// 1.等待元数据更新
ClusterAndWaitTime clusterAndWaitTime = waitOnMetadata(record.topic(), record.partition(), maxBlockTimeMs);
// 2.序列化KV
...
serializedKey = keySerializer.serialize(record.topic(),record.headers(), record.key());
serializedValue = valueSerializer.serialize(record.topic(), record.headers(), record.value());
...
// 3 分片 默认哈希
int partition = partition(record, serializedKey, serializedValue, cluster);
tp = new TopicPartition(record.topic(), partition);
...
// 4.把消息放入记录收集器
RecordAccumulator.RecordAppendResult result = accumulator.append(tp, timestamp, serializedKey,
serializedValue, headers, interceptCallback, remainingWaitMs);
// 5.唤醒sender并return 回调
return result.future;
--确定好发送的partition,消息记录收集器,封装 TopicPartition 对象到收集器
public final class RecordAccumulator {
private final ConcurrentMap<TopicPartition, Deque<ProducerBatch>> batches;
private final Set<TopicPartition> muted;
public RecordAppendResult append(...){
// check if we have an in-progress batch
Deque<ProducerBatch> dq = getOrCreateDeque(tp);
synchronized (dq) {
RecordAppendResult appendResult = tryAppend(timestamp, key, value, headers, callback, dq);
if (appendResult != null)
return appendResult;
}
}
}
2.Sender.run()轮询执行消息发送
public class Sender implements Runnable {
public void run() {
while (running) {
...
long pollTimeout = sendProducerData(now);
client.poll(pollTimeout, now);
...
}
private long sendProducerData(long now) {
Cluster cluster = metadata.fetch();
// 各个parttition根据队列中元素的多少和以及发送时间的间隔来找到哪些parttition可以发送,找出他们的leader节点
RecordAccumulator.ReadyCheckResult result = this.accumulator.ready(cluster, now);
// 根据ready的leader节点组装参数,key为节点的id,value为发送的
Map<Integer, List<ProducerBatch>> batches = this.accumulator.drain(cluster, result.readyNodes,
this.maxRequestSize, now);
// 如果需要保证顺序对parttition加锁
if (guaranteeMessageOrder) {
// Mute all the partitions drained
for (List<ProducerBatch> batchList : batches.values()) {
for (ProducerBatch batch : batchList)
this.accumulator.mutePartition(batch.topicPartition);
}
}
sendProduceRequests(batches, now);
return pollTimeout;
}
}
3.调用NetworkClient.doSend发送
private void doSend(ClientRequest clientRequest, boolean isInternalRequest, long now, AbstractRequest request) {
// 调用can sendmore方法
if (!canSendRequest(nodeId))
throw new IllegalStateException("Attempt to send a request to node " + nodeId + " which is not ready.");
String nodeId = clientRequest.destination();
RequestHeader header = clientRequest.makeHeader(request.version());
Send send = request.toSend(nodeId, header);
// 已经发送但未回调的请求 会放入
InFlightRequest inFlightRequest = new InFlightRequest(
header,
clientRequest.createdTimeMs(),
clientRequest.destination(),
clientRequest.callback(),
clientRequest.expectResponse(),
isInternalRequest,
request,
send,
now);
this.inFlightRequests.add(inFlightRequest);
selector.send(inFlightRequest.send);
}
// 判断对于一个节点,请求数量是否小于maxInFlightRequestsPerConnection,当为1时才会保证顺序
// 对应生产者配置中的max.in.flight.requests.per.connection
public boolean canSendRequest(String node) {
Deque<NetworkClient.InFlightRequest> queue = requests.get(node);
return queue == null || queue.isEmpty() ||
(queue.peekFirst().send.completed() && queue.size() < this.maxInFlightRequestsPerConnection);
}
4.发送后处理 NetworkClient.poll()
public List<ClientResponse> poll(long timeout, long now) {
//等待selector接收请求
this.selector.poll(Utils.min(timeout, metadataTimeout, requestTimeoutMs));
...
List<ClientResponse> responses = new ArrayList<>();
handleCompletedSends(responses, updatedNow);
completeResponses(responses);
...
return responses;
}
// 成功的处理
private void handleCompletedSends(List<ClientResponse> responses, long now) {
for (Send send : this.selector.completedSends()) {
// 移除InFlightRequest
InFlightRequest request = this.inFlightRequests.lastSent(send.destination());
if (!request.expectResponse) {
this.inFlightRequests.completeLastSent(send.destination());
responses.add(request.completed(null, now));
}
}
}
5.重试机制,Sender.SendProduceRequest中的callback方法
RequestCompletionHandler callback = new RequestCompletionHandler() {
public void onComplete(ClientResponse response) {
handleProduceResponse(response, recordsByPartition, time.milliseconds());
}
}
private boolean canRetry(ProducerBatch batch, ProduceResponse.PartitionResponse response) {
// 重试次数需要小于生产中配置中的retries参数
return batch.attempts() < this.retries &&
((response.error.exception() instanceof RetriableException) ||
(transactionManager != null && transactionManager.canRetry(response, batch)));
}
//重新加入队列中
public void reenqueue(ProducerBatch batch, long now) {
batch.reenqueued(now);
Deque<ProducerBatch> deque = getOrCreateDeque(batch.topicPartition);
synchronized (deque) {
if (transactionManager != null)
insertInSequenceOrder(deque, batch);
else
deque.addFirst(batch);
}
}
重试异常的种类
消费者客户端
主要方法
// 订阅主题
public void subscribe(Collection<String> topics, ConsumerRebalanceListener listener)
// 解除订阅
public void unsubscribe() {
// 拉取数据
public ConsumerRecords<K, V> poll(long timeout)
// 同步提交offset
public void commitSync(final Map<TopicPartition, OffsetAndMetadata> offsets)
// 异步提交offset
public void commitAsync(final Map<TopicPartition, OffsetAndMetadata> offsets, OffsetCommitCallback callback)
position
1. poll()方法
private Map<TopicPartition, List<ConsumerRecord<K, V>>> pollOnce(long timeout) {
client.maybeTriggerWakeup();
// offset自动提交代码
coordinator.poll(time.milliseconds(), timeout);
// 判断是否拥有所有订阅partition的offset,如果没有的话更新offset
// 场景:第一次有人消费
if (!subscriptions.hasAllFetchPositions())
// 根据配置的策略来更新offset
updateFetchPositions(this.subscriptions.missingFetchPositions());
// 获取方法消息的方法
fetcher.sendFetches();
//阻塞到请求完成
client.poll(pollTimeout, now, new PollCondition() {
@Override
public boolean shouldBlock() {
return !fetcher.hasCompletedFetches();
}
});
//获取消息的返回结果
if (this.interceptors == null)
return new ConsumerRecords<>(records);
else
return this.interceptors.onConsume(new ConsumerRecords<>(records));
}
// 根据消费者的配置累确定使用哪种策略 对应消费者配置auto.offset.reset
// EARLIEST 从最早的消息开始消费
// LASEST 从最新的消息开始消费
private void offsetResetStrategyTimestamp(
final TopicPartition partition,
final Map<TopicPartition, Long> output,
final Set<TopicPartition> partitionsWithNoOffsets) {
OffsetResetStrategy strategy = subscriptions.resetStrategy(partition);
if (strategy == OffsetResetStrategy.EARLIEST)
output.put(partition, ListOffsetRequest.EARLIEST_TIMESTAMP);
else if (strategy == OffsetResetStrategy.LATEST)
output.put(partition, endTimestamp());
else
partitionsWithNoOffsets.add(partition);
}
2.Fetcher.sendFetches() 发送请求
public int sendFetches() {
//创建请求,包括超时时间 最大数量等,每一个节点生成一个
Map<Node, FetchRequest.Builder> fetchRequestMap = createFetchRequests();
for (Map.Entry<Node, FetchRequest.Builder> fetchEntry : fetchRequestMap.entrySet()) {
final FetchRequest.Builder request = fetchEntry.getValue();
final Node fetchTarget = fetchEntry.getKey();
client.send(fetchTarget, request)
.addListener(new RequestFutureListener<ClientResponse>() {
@Override
public void onSuccess(ClientResponse resp) {
...
}
}
}
return fetchRequestMap.size();
}
// 获取一部分records并更新本地的position
private List<ConsumerRecord<K, V>> fetchRecords(PartitionRecords partitionRecords, int maxRecords) {
// 获取当前的position
long position = subscriptions.position(partitionRecords.partition);
if (partitionRecords.nextFetchOffset == position) {
// 获取下一个Offset
List<ConsumerRecord<K, V>> partRecords = partitionRecords.fetchRecords(maxRecords);
long nextOffset = partitionRecords.nextFetchOffset;
//更新本地的position
subscriptions.position(partitionRecords.partition, nextOffset);
return partRecords;
}
return emptyList();
}
位移提交
自动提交位移ConsumerCoordinator#poll
ConsumerCoordinator是和消费者协调broker交互的工具类
public void poll(long now, long remainingMs) {
//处理上次自动提交的结果
invokeCompletedOffsetCommitCallbacks();
...
// 可能会自动提交
maybeAutoCommitOffsetsAsync(now);
}
private void maybeAutoCommitOffsetsAsync(long now) {
// 如果启用自动提交,对应消费者配置中的enable.auto.commit
if (autoCommitEnabled) {
//如果当前时间大于下次执行自动提交的截止日期 则执行自动提交并将这个日期加上 auto.commit.interval.ms
if (now >= nextAutoCommitDeadline) {
this.nextAutoCommitDeadline = now + autoCommitIntervalMs;
Map<TopicPartition, OffsetAndMetadata> allConsumedOffsets = subscriptions.allConsumed();
commitOffsetsAsync(allConsumedOffsets, new OffsetCommitCallback() {
@Override
public void onComplete(Map<TopicPartition, OffsetAndMetadata> offsets, Exception exception) {
if (exception != null) {
log.warn("Asynchronous auto-commit of offsets {} failed: {}", offsets, exception.getMessage());
// 如果失败了可以重试就进行重试
if (exception instanceof RetriableException)
nextAutoCommitDeadline = Math.min(time.milliseconds() + retryBackoffMs, nextAutoCommitDeadline);
} else {
log.debug("Completed asynchronous auto-commit of offsets {}", offsets);
}
}
});
}
}
}
private RequestFuture<Void> sendOffsetCommitRequest(final Map<TopicPartition, OffsetAndMetadata> offsets) {
//获取groupCoordinator所在broker节点
Node coordinator = checkAndGetCoordinator();
// 构造请求
Map<TopicPartition, OffsetCommitRequest.PartitionData> offsetData = new HashMap<>(offsets.size());
for (Map.Entry<TopicPartition, OffsetAndMetadata> entry : offsets.entrySet()) {
...
}
...
return client.send(coordinator, builder)
.compose(new OffsetCommitResponseHandler(offsets));
}
消费者reblance
1.消费者加入 2.消费者下线 3.消费者退出 4.patition数量变化 ...
消费者通过心跳感知rebalance
ConsumerCoordinator.HeartbeatResponseHandler
// rejoinNeeded
private class HeartbeatResponseHandler extends CoordinatorResponseHandler<HeartbeatResponse, Void> {
@Override
public void handle(HeartbeatResponse heartbeatResponse, RequestFuture<Void> future) {
sensors.heartbeatLatency.record(response.requestLatencyMs());
Errors error = heartbeatResponse.error();
...
else if (error == Errors.REBALANCE_IN_PROGRESS) {
log.debug("Attempt to heartbeat failed since group is rebalancing");
requestRejoin();
future.raise(Errors.REBALANCE_IN_PROGRESS);
}
...
}
}
protected synchronized void requestRejoin() {
this.rejoinNeeded = true;
}
// Consumer.poll()开始时会调用joinGroupIfNeeded
void joinGroupIfNeeded() {
// 根据rejoinNeeded判断是否开始进行rebalance
while (needRejoin() || rejoinIncomplete()) {
// 确保Coordinator正常
ensureCoordinatorReady();
if (needsJoinPrepare) {
onJoinPrepare(generation.generationId, generation.memberId);
needsJoinPrepare = false;
}
//执行 joinGroup
RequestFuture<ByteBuffer> future = initiateJoinGroup();
client.poll(future);
if (future.succeeded()) {
onJoinComplete(generation.generationId, generation.memberId, generation.protocol, future.value());
resetJoinGroupFuture();
needsJoinPrepare = true;
}
}
}
消费者发送加入请求 ConsumerCoordinator.SendJoinGroupRequest
z
private synchronized RequestFuture<ByteBuffer> initiateJoinGroup() {
// 状态更新
state = MemberState.REBALANCING;
joinFuture = sendJoinGroupRequest();
joinFuture.addListener(new RequestFutureListener<ByteBuffer>() {
@Override
public void onSuccess(ByteBuffer value) {
synchronized (AbstractCoordinator.this) {
// 状态变更为STABLE 启动心跳
state = MemberState.STABLE;
rejoinNeeded = false;
if (heartbeatThread != null)
heartbeatThread.enable();
}
}
});
}
return joinFuture;
}
// 向coordinator发送joinGroupRequest请求
private RequestFuture<ByteBuffer> sendJoinGroupRequest() {
JoinGroupRequest.Builder requestBuilder = new JoinGroupRequest.Builder(
groupId,
this.sessionTimeoutMs,
this.generation.memberId,
protocolType(),
metadata()).setRebalanceTimeout(this.rebalanceTimeoutMs);
return client.send(coordinator, requestBuilder)
.compose(new JoinGroupResponseHandler());
}
GroupCoordinator返回JoinGroupResponse
private class JoinGroupResponseHandler extends CoordinatorResponseHandler<JoinGroupResponse, ByteBuffer> {
@Override
public void handle(JoinGroupResponse joinResponse, RequestFuture<ByteBuffer> future) {
if (joinResponse.isLeader()) {
onJoinLeader(joinResponse).chain(future);
} else {
onJoinFollower().chain(future);
}
}
}
客户端发送SyncGroupRequest
// 消费组leader回调
private RequestFuture<ByteBuffer> onJoinLeader(JoinGroupResponse joinResponse) {
try {
//根据消费组partition.assignment.strategy参数执行对应的partition分配策略
Map<String, ByteBuffer> groupAssignment = performAssignment(joinResponse.leaderId(), joinResponse.groupProtocol(),
joinResponse.members());
SyncGroupRequest.Builder requestBuilder =
new SyncGroupRequest.Builder(groupId, generation.generationId, generation.memberId, groupAssignment);
return sendSyncGroupRequest(requestBuilder);
} catch (RuntimeException e) {
return RequestFuture.failure(e);
}
}
// 普通消费者回调
private RequestFuture<ByteBuffer> onJoinFollower() {
SyncGroupRequest.Builder requestBuilder =
new SyncGroupRequest.Builder(groupId, generation.generationId, generation.memberId,
Collections.<String, ByteBuffer>emptyMap());
return sendSyncGroupRequest(requestBuilder);
}
@Override
protected void onJoinComplete(int generation,
String memberId,
String assignmentStrategy,
ByteBuffer assignmentBuffer) {
// only the leader is responsible for monitoring for metadata changes (i.e. partition changes)
if (!isLeader)
assignmentSnapshot = null;
// 根据配置获取partition的分配策略
PartitionAssignor assignor = lookupAssignor(assignmentStrategy);
//根据返回内容获取所有的partition信息
Assignment assignment = ConsumerProtocol.deserializeAssignment(assignmentBuffer);
// 将是否刷新offset设为true
subscriptions.needRefreshCommits();
// 重新订阅分配到的partition
subscriptions.assignFromSubscribed(assignment.partitions());
// 根据新的parttition获取之前没有订阅的topic
Set<String> addedTopics = new HashSet<>();
for (TopicPartition tp : subscriptions.assignedPartitions()) {
if (!joinedSubscription.contains(tp.topic()))
addedTopics.add(tp.topic());
}
if (!addedTopics.isEmpty()) {
Set<String> newSubscription = new HashSet<>(subscriptions.subscription());
Set<String> newJoinedSubscription = new HashSet<>(joinedSubscription);
newSubscription.addAll(addedTopics);
newJoinedSubscription.addAll(addedTopics);
this.subscriptions.subscribeFromPattern(newSubscription);
this.joinedSubscription = newJoinedSubscription;
}
// 更新元数据
this.metadata.setTopics(subscriptions.groupSubscription());
client.ensureFreshMetadata();
// give the assignor a chance to update internal state based on the received assignment
assignor.onAssignment(assignment);
// 更新自动提交位移的时间
this.nextAutoCommitDeadline = time.milliseconds() + autoCommitIntervalMs;
// 执行设置的ConsumerRebalanceListener
ConsumerRebalanceListener listener = subscriptions.listener();」
listener.onPartitionsAssigned(assigned);
}
spring-kafka 消费者客户端
org.springframework.kafka.listener.KafkaMessageListenerContainer.ListenerConsumer#run
public void run() {
while (isRunning()) {
try {
//非自动提交并且非按记录提交
if (!this.autoCommit && !this.isRecordAck) {
//执行手动提交offset
processCommits();
}
// 修改本地内存的position
processSeeks();
// 调用原生客户端的poll()方法
ConsumerRecords<K, V> records = this.consumer.poll(this.containerProperties.getPollTimeout());
if (records != null && records.count() > 0) {
// 执行注解的方法
invokeListener(records);
}
} catch (Exception e) {
handleConsumerException(e);
}
}
this.consumer.close();
}
位移提交策略
public enum AckMode {
// 当每一条记录被消费者监听器(ListenerConsumer)处理之后提交
RECORD,
// 当每一批poll()的数据被消费者监听器(ListenerConsumer)处理之后提交
BATCH,
// 当每一批poll()的数据被消费者监听器(ListenerConsumer)处理之后,距离上次提交时间大于TIME时提交
TIME,
// 当每一批poll()的数据被消费者监听器(ListenerConsumer)处理之后,被处理record数量大于等于COUNT时提交
COUNT,
// TIME | COUNT 有一个条件满足时提交
COUNT_TIME,
// 当每一批poll()的数据被消费者监听器(ListenerConsumer)处理之后, 手动调用Acknowledgment.acknowledge()后提交
MANUAL,
// 手动调用Acknowledgment.acknowledge()后立即提交
MANUAL_IMMEDIATE,
}
消息处理完后会调用ackCurrent
public void ackCurrent(final ConsumerRecord<K, V> record,
@SuppressWarnings(RAW_TYPES) @Nullable Producer producer) {
//如果是按记录消费
if (this.isRecordAck) {
Map<TopicPartition, OffsetAndMetadata> offsetsToCommit =
Collections.singletonMap(new TopicPartition(record.topic(), record.partition()),
new OffsetAndMetadata(record.offset() + 1));
if (this.containerProperties.isSyncCommits()) {
this.consumer.commitSync(offsetsToCommit);
}
else {
this.consumer.commitAsync(offsetsToCommit, this.commitCallback);
}
}
else if (!this.isAnyManualAck && !this.autoCommit) {
this.acks.add(record);
}
}
消费异常处理
private RuntimeException doInvokeRecordListener(final ConsumerRecord<K, V> record,
@SuppressWarnings("rawtypes") Producer producer,
Iterator<ConsumerRecord<K, V>> iterator) throws Error {
调用onMessage();
...
// 根据Ackmode来确定是否同步offset
ackCurrent(record, producer);
}
catch (RuntimeException e) {
...
this.errorHandler.handle(e, record, this.consumer);
}
SeekToCurrentErrorHandler
public static boolean doSeeks(List<ConsumerRecord<?, ?>> records, Consumer<?, ?> consumer, Exception exception,
boolean recoverable, BiPredicate<ConsumerRecord<?, ?>, Exception> skipper, Log logger) {
Map<TopicPartition, Long> partitions = new LinkedHashMap<>();
records.forEach(record -> {
if (recoverable && first.get()) {
//判断是否需要重试
boolean test = skipper.test(record, exception);
skipped.set(test);
}
// 把position重置到消息的offset
if (!recoverable || !first.get() || !skipped.get()) {
partitions.computeIfAbsent(new TopicPartition(record.topic(), record.partition()),
offset -> record.offset());
}
first.set(false);
});
partitions.forEach((topicPartition, offset) -> {
consumer.seek(topicPartition, offset);
});
return skipped.get();
}
spring-kafka的reblance ConsumerRebalanceListener
public ConsumerRebalanceListener createRebalanceListener(final Consumer<K, V> consumer) {
return new ConsumerRebalanceListener() {
final ConsumerRebalanceListener userListener = getContainerProperties().getConsumerRebalanceListener();
final ConsumerAwareRebalanceListener consumerAwareListener =
userListener instanceof ConsumerAwareRebalanceListener
? (ConsumerAwareRebalanceListener) userListener : null;
@Override
//调用时机是consumer停止拉取数据,rebalance开始之前
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
//执行用户的rebalance
this.userListener.onPartitionsRevoked(partitions);
//提交位移
commitPendingAcks();
}
@Override
//rebalance 完成后
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
ListenerConsumer.this.assignedPartitions = partitions;
//如果没有开启自动提交,从coordinator获取所有partition的offset
if (!ListenerConsumer.this.autoCommit) {
Map<TopicPartition, OffsetAndMetadata> offsets = new HashMap<>();
for (TopicPartition partition : partitions) offsets.put(partition, new OffsetAndMetadata(consumer.position(partition)));
// 重新设置本地的position信息
if (ListenerConsumer.this.genericListener instanceof ConsumerSeekAware) {
seekPartitions(partitions, false);
}
// 根据配置决定是同步提交和异步提交
if (KafkaMessageListenerContainer.this.getContainerProperties().isSyncCommits()) {
ListenerConsumer.this.consumer.commitSync(offsetsToCommit);
}
else {
ListenerConsumer.this.consumer.commitAsync(offsetsToCommit,
KafkaMessageListenerContainer.this.getContainerProperties().getCommitCallback());
}
//执行用户定义的rebalanceListener
this.userListener.onPartitionsAssigned(partitions);
}
};
}