1. spring-kafka使用
我们模拟使用生产者producer生产消息,让消费者consumer监听消息并进行消费的过程。
(1) 生产者
1) 引入spring-kafka
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka</artifactId>
</dependency>
2) Kafka配置
@Configuration
public class KafkaConfiguration {
// 服务器地址
@Value("${spring.kafka.bootstrap-servers}")
private String servers;
// ack类型,可选0、1、all/-1
@Value("${spring.kafka.producer.acks}")
private String ack;
// 重试次数
@Value("${spring.kafka.producer.retries}")
private String retries;
// 批量大小(单位字节)
@Value("${spring.kafka.producer.batch-size}")
private String batchSize;
// 提交延时(单位ms)
@Value("${spring.kafka.producer.linger}")
private String linger;
// 缓冲区大小(默认32M)
@Value("${spring.kafka.producer.buffer-memory}")
private String bufferMemory;
/**
* 自定义配置
*/
public Map<String, Object> producerConfigs() {
Map<String, Object> props = new HashMap<>();
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, servers);
props.put(ProducerConfig.ACKS_CONFIG, ack);
props.put(ProducerConfig.RETRIES_CONFIG, retries);
props.put(ProducerConfig.BATCH_SIZE_CONFIG, batchSize);
props.put(ProducerConfig.LINGER_MS_CONFIG, linger);
props.put(ProducerConfig.BUFFER_MEMORY_CONFIG, bufferMemory);
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
return props;
}
/**
* 生产者工厂
*/
@Bean
public ProducerFactory<String, Object> producerFactory() {
return new DefaultKafkaProducerFactory<>(producerConfigs());
}
/**
* 注入kafkaTemplate
*
* @return KafkaTemplate
*/
@Bean
public KafkaTemplate<String, Object> kafkaTemplate() {
return new KafkaTemplate<>(producerFactory());
}
/**
* 初始化一个Topic
*
* @return NewTopic
*/
@Bean
public NewTopic initialTopic() {
// 设置Topic名称、分区数、副本数,这里是创建了3个分区
return new NewTopic("test_topic", 3, (short) 1);
}
}
# 应用服务 WEB 访问端口
server:
port: 58080
spring:
# Kafka配置
kafka:
# 集群配置
bootstrap-servers: 192.168.31.138:9092
# 生产者配置
producer:
# 重试次数
retries: 3
# ack(可选0、1、all/-1)
acks: 1
# 批量大小(字节)
batch-size: 1024
# 提交延时(ms)
linger: 1000
# 缓冲区大小(默认32M)
buffer-memory: 40960
3) 模拟生产消息
@Slf4j
@RestController
public class ProducerController {
@Resource
private KafkaTemplate<String, Object> kafkaTemplate;
@RequestMapping("/send")
public String sendMessage(@RequestParam("key") String key, @RequestParam("value") String value) {
try {
for (int i = 0; i < 3; i++) {
kafkaTemplate.send("test_topic", 0, key + i, value + i);
}
return "发送成功";
} catch (Exception e) {
log.error("Failed to send message caused by {}", e.getMessage());
return "发送失败";
}
}
}
这样我们通过调用producer的sendMessage方法,就能生产消息了。这里一次调用生产了3条消息,向同一个分区进行发送。
(2) 消费者
1) 引入spring-kafka
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka</artifactId>
</dependency>
2) Kafka配置
@Configuration
public class KafkaConfiguration {
@Value("${spring.kafka.bootstrap-servers}")
private String servers;
@Value("${spring.kafka.consumer.group-id}")
private String groupId;
@Value("${spring.kafka.consumer.enable-auto-commit}")
private boolean autoCommit;
@Value("${spring.kafka.consumer.auto-offset-reset}")
private String latest;
@Value("${spring.kafka.consumer.max-poll-records}")
private String maxPollRecords;
@Value("${spring.kafka.consumer.listener.batch}")
private boolean type;
@Value("${spring.kafka.consumer.listener.missing-topics-fatal}")
private boolean missingTopicsFatal;
@Value("${spring.kafka.consumer.listener.concurrency}")
private int concurrency;
/**
* 自定义配置
*/
public Map<String, Object> consumerConfigs() {
Map<String, Object> props = new HashMap<>();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, servers);
props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, autoCommit);
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, latest);
props.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, maxPollRecords);
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
return props;
}
/**
* 消费者工厂
*/
@Bean
public ConsumerFactory<String, Object> consumerFactory() {
return new DefaultKafkaConsumerFactory<>(consumerConfigs());
}
/**
* 注入KafkaListenerContainerFactory
*
* @return KafkaListenerContainerFactory
*/
@Bean
public KafkaListenerContainerFactory<ConcurrentMessageListenerContainer<String, Object>> kafkaListenerContainerFactory() {
ConcurrentKafkaListenerContainerFactory<String, Object> factory =
new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory());
// 设置并发消费的线程数,不能超过partitions的大小
factory.setConcurrency(concurrency);
// 设置是否是批量消费
factory.setBatchListener(batch);
// 设置poll超时时间
factory.getContainerProperties().setPollTimeout(30000);
// 设置ACK模式
factory.getContainerProperties().setAckMode(ContainerProperties.AckMode.MANUAL);
// 设置缺少topic是否启动失败
factory.setMissingTopicsFatal(missingTopicsFatal);
return factory;
}
}
# 应用服务 WEB 访问端口
server:
port: 58081
# Kafka配置
spring:
kafka:
# 集群配置
bootstrap-servers: 192.168.31.138:9092
# 消费者配置
consumer:
# 消费者组
group-id: groupA
# 自动提交
enable-auto-commit: false
# 当没有初始offset或offset超出范围时该如何设置offset
# latest表示只消费新产生的数据,earliest表示从头开始消费,none表示抛出异常
auto-offset-reset: latest
# 每次消费多少条消息
max-poll-records: 5
listener:
# 设置批量消费
batch: true
# 若topic不存在,项目启动报错(关闭)
missing-topics-fatal: false
# 设置并发消费的线程数,不能超过partitions的大小
concurrency: 3
3) 模拟消费消息
@Slf4j
@Component
public class ConsumerListener {
@KafkaListener(topics = {"test_topic"})
public void consumer(String record, Acknowledgment ack) {
String threadName = Thread.currentThread().getName();
try {
log.info("线程: {} 接收到消息: {}", threadName, record);
} finally {
log.info("线程: {} 异步提交", threadName);
ack.acknowledge();
}
}
}
我们通过使用@KafkaListener注解,实现消费消息。
(3) 测试
调用producer的接口,可以看到如下打印:
前边3行可以看到是分区的分派,因为我们初始化时创建了3个分区,而concurrency设置的也为3,因此是均匀分配。如果concurrency设置为2,就可以看到某个消费者就会多分配一个分区;如果concurrency的值大于3,那么多余的消费者会闲置。后边6行就是消费消息和提交offset的过程。
为什么消费了三次呢,因为我们将max-poll-records的值设置为了 1,每次最多只能poll一条消息。
我们将max-poll-records设置为5,再次调用接口:
可以看到,consumer一次就消费掉了3条消息。
如果我们将producer的代码进行修改,让3条消息发送到不同的分区中:
@RequestMapping("/send")
public String sendMessage(@RequestParam("key") String key, @RequestParam("value") String value) {
try {
for (int i = 0; i < 3; i++) {
kafkaTemplate.send("test_topic", i, key + i, value + i);
}
return "发送成功";
} catch (Exception e) {
log.error("Failed to send message caused by {}", e.getMessage());
return "发送失败";
}
}
再次调用接口,打印如下:
可以看到这次是分别由3个消费者进行消费了。
2. 一些思考
(1) 为什么配置了@KafkaListener消费者就可以拉取数据呢?
首先,KafkaListenerAnnotationBeanPostProcessor实现了BeanPostProcessor接口,在其postProcessAfterInitialization方法中,会将所有注解了@KafkaListener的方法,通过processKafkaListener方法封装为MethodKafkaListenerEndpoint类型的对象,并进行注册。
protected void processKafkaListener(KafkaListener kafkaListener, Method method, Object bean, String beanName) {
Method methodToUse = this.checkProxy(method, bean);
// 生成endpoint
MethodKafkaListenerEndpoint<K, V> endpoint = new MethodKafkaListenerEndpoint();
endpoint.setMethod(methodToUse);
String beanRef = kafkaListener.beanRef();
this.listenerScope.addListener(beanRef, bean);
String[] topics = this.resolveTopics(kafkaListener);
TopicPartitionOffset[] tps = this.resolveTopicPartitions(kafkaListener);
if (!this.processMainAndRetryListeners(kafkaListener, bean, beanName, methodToUse, endpoint, topics, tps)) {
// 进行注册
this.processListener(endpoint, kafkaListener, bean, beanName, topics, tps);
}
this.listenerScope.removeListener(beanRef);
}
注册的时候,实际是调用KafkaListenerEndpointRegistrar的registerEndpoint方法,将endpoint放入endpointDescriptors列表中。
public void registerEndpoint(KafkaListenerEndpoint endpoint, @Nullable KafkaListenerContainerFactory<?> factory) {
Assert.notNull(endpoint, "Endpoint must be set");
Assert.hasText(endpoint.getId(), "Endpoint id must be set");
KafkaListenerEndpointRegistrar.KafkaListenerEndpointDescriptor descriptor = new KafkaListenerEndpointRegistrar.KafkaListenerEndpointDescriptor(endpoint, factory);
synchronized(this.endpointDescriptors) {
if (this.startImmediately) {
this.endpointRegistry.registerListenerContainer(descriptor.endpoint, this.resolveContainerFactory(descriptor), true);
} else {
// 放入endpointDescriptors列表中
this.endpointDescriptors.add(descriptor);
}
}
}
KafkaListenerEndpointRegistrar的registerAllEndpoints方法才是真正执行注册的地方,它遍历endpointDescriptors列表中的所有endpoint,在registerListenerContainer方法中通过调用createListenerContainer方法创建ConcurrentMessageListenerContainer类型的ListenerContainer对象。
protected void registerAllEndpoints() {
synchronized(this.endpointDescriptors) {
KafkaListenerEndpointRegistrar.KafkaListenerEndpointDescriptor descriptor;
for(Iterator var2 = this.endpointDescriptors.iterator(); var2.hasNext(); this.endpointRegistry.registerListenerContainer(descriptor.endpoint, this.resolveContainerFactory(descriptor))) {
descriptor = (KafkaListenerEndpointRegistrar.KafkaListenerEndpointDescriptor)var2.next();
if (descriptor.endpoint instanceof MultiMethodKafkaListenerEndpoint && this.validator != null) {
((MultiMethodKafkaListenerEndpoint)descriptor.endpoint).setValidator(this.validator);
}
}
this.startImmediately = true;
}
}
public void registerListenerContainer(KafkaListenerEndpoint endpoint, KafkaListenerContainerFactory<?> factory, boolean startImmediately) {
Assert.notNull(endpoint, "Endpoint must not be null");
Assert.notNull(factory, "Factory must not be null");
String id = endpoint.getId();
Assert.hasText(id, "Endpoint id must not be empty");
synchronized(this.listenerContainers) {
Assert.state(!this.listenerContainers.containsKey(id), "Another endpoint is already registered with id '" + id + "'");
// 创建MessageListenerContainer对象
MessageListenerContainer container = this.createListenerContainer(endpoint, factory);
this.listenerContainers.put(id, container);
ConfigurableApplicationContext appContext = this.applicationContext;
String groupName = endpoint.getGroup();
if (StringUtils.hasText(groupName) && appContext != null) {
Object containerGroup;
ContainerGroup group;
if (appContext.containsBean(groupName)) {
containerGroup = (List)appContext.getBean(groupName, List.class);
group = (ContainerGroup)appContext.getBean(groupName + ".group", ContainerGroup.class);
} else {
containerGroup = new ArrayList();
appContext.getBeanFactory().registerSingleton(groupName, containerGroup);
group = new ContainerGroup(groupName);
appContext.getBeanFactory().registerSingleton(groupName + ".group", group);
}
((List)containerGroup).add(container);
group.addContainers(new MessageListenerContainer[]{container});
}
if (startImmediately) {
this.startIfNecessary(container);
}
}
}
创建ListenerContainer对象时,会给对象中的containerProperties属性进行赋值,直到ListenerContainer对象中的doStart方法启动时,会根据其中的值,真正的创建KafkaMessageListenerContainer类型的listenerContainer并启动其中的listenerConsumer(创建listenerContainer时,也会创建一个KafkaConsumer对象)。值得注意的是,代码中的concurrency就是配置的并发线程数。
protected void doStart() {
if (!this.isRunning()) {
this.checkTopics();
ContainerProperties containerProperties = this.getContainerProperties();
TopicPartitionOffset[] topicPartitions = containerProperties.getTopicPartitions();
if (topicPartitions != null && this.concurrency > topicPartitions.length) {
this.logger.warn(() -> {
return "When specific partitions are provided, the concurrency must be less than or equal to the number of partitions; reduced from " + this.concurrency + " to " + topicPartitions.length;
});
this.concurrency = topicPartitions.length;
}
this.setRunning(true);
for(int i = 0; i < this.concurrency; ++i) {
// 真正创建KafkaMessageListenerContainer
KafkaMessageListenerContainer<K, V> container = this.constructContainer(containerProperties, topicPartitions, i);
this.configureChildContainer(i, container);
if (this.isPaused()) {
container.pause();
}
// 启动container
container.start();
this.containers.add(container);
}
}
}
listenerConsumer实现了Runnable接口,在启动时其run方法就会被调用,从而去Kafka中拉数据进行消费。
(2) listenerConsumer拉数据具体是怎么做的呢?
在listenerConsumer的run方法中,可以看到只要listenerContainer还在running状态,没有抛出异常,那么就会一直调用pollAndInvoke方法。
public void run() {
// 略
while(KafkaMessageListenerContainer.this.isRunning()) {
try {
this.pollAndInvoke();
} catch (// 略) {
}
// 略
}
this.wrapUp((Throwable)exitThrowable);
}
在pollConsumer方法中,可以看到一个pollTimeout,这个就是我们设置的过期时间30s。
private ConsumerRecords<K, V> pollConsumer() {
this.beforePoll();
try {
return this.consumer.poll(this.pollTimeout);
} catch (WakeupException var2) {
return ConsumerRecords.empty();
}
}
KafkaConsumer的poll方法中有一个do-while循环,只要没过期就会一直循环执行。循环中会一直调用pollForFetches方法拉取消息,如果拉取到了,会封装成Message,最后通过反射的方式,调用对应的方法进行消息消费;如果没有拉取到则一直执行循环直到过期时间到,然后重新在listenerConsumer的run方法中,调用pollAndInvoke方法。
private ConsumerRecords<K, V> poll(final Timer timer, final boolean includeMetadataInTimeout) {
acquireAndEnsureOpen();
try {
this.kafkaConsumerMetrics.recordPollStart(timer.currentTimeMs());
if (this.subscriptions.hasNoSubscriptionOrUserAssignment()) {
throw new IllegalStateException("Consumer is not subscribed to any topics or assigned any partitions");
}
// 这里有一个do-while循环,只要没过期就会一直循环执行
do {
client.maybeTriggerWakeup();
if (includeMetadataInTimeout) {
// try to update assignment metadata BUT do not need to block on the timer for join group
updateAssignmentMetadataIfNeeded(timer, false);
} else {
while (!updateAssignmentMetadataIfNeeded(time.timer(Long.MAX_VALUE), true)) {
log.warn("Still waiting for metadata");
}
}
// 这里获取到了records
final Map<TopicPartition, List<ConsumerRecord<K, V>>> records = pollForFetches(timer);
// 如果records不为空,则返回
if (!records.isEmpty()) {
// before returning the fetched records, we can send off the next round of fetches
// and avoid block waiting for their responses to enable pipelining while the user
// is handling the fetched records.
//
// NOTE: since the consumed position has already been updated, we must not allow
// wakeups or any other errors to be triggered prior to returning the fetched records.
if (fetcher.sendFetches() > 0 || client.hasPendingRequests()) {
client.transmitSends();
}
return this.interceptors.onConsume(new ConsumerRecords<>(records));
}
} while (timer.notExpired());
return ConsumerRecords.empty();
} finally {
release();
this.kafkaConsumerMetrics.recordPollEnd(timer.currentTimeMs());
}
再来看看pollForFetches方法:先用fetchedRecords从队列completedFetches中获取records,如果不为空则直接返回,如果为空,则调用sendFetches方法获取,并最终再调用一次fetchedRecords从队列中获取records。
private Map<TopicPartition, List<ConsumerRecord<K, V>>> pollForFetches(Timer timer) {
long pollTimeout = coordinator == null ? timer.remainingMs() :
Math.min(coordinator.timeToNextPoll(timer.currentTimeMs()), timer.remainingMs());
// 先通过fetchedRecords获取records,如果有则直接返回
// if data is available already, return it immediately
final Map<TopicPartition, List<ConsumerRecord<K, V>>> records = fetcher.fetchedRecords();
if (!records.isEmpty()) {
return records;
}
// send any new fetches (won't resend pending fetches)
fetcher.sendFetches();
// We do not want to be stuck blocking in poll if we are missing some positions
// since the offset lookup may be backing off after a failure
// NOTE: the use of cachedSubscriptionHashAllFetchPositions means we MUST call
// updateAssignmentMetadataIfNeeded before this method.
if (!cachedSubscriptionHashAllFetchPositions && pollTimeout > retryBackoffMs) {
pollTimeout = retryBackoffMs;
}
log.trace("Polling for fetches with timeout {}", pollTimeout);
// 调用poll方法拉取数据
Timer pollTimer = time.timer(pollTimeout);
client.poll(pollTimer, () -> {
// since a fetch might be completed by the background thread, we need this poll condition
// to ensure that we do not block unnecessarily in poll()
return !fetcher.hasAvailableFetches();
});
timer.update(pollTimer.currentTimeMs());
return fetcher.fetchedRecords();
}
sendFetches方法中发送拉取请求,并设置监听回调方法,将结果加入completedFetches中。
public synchronized int sendFetches() {
// Update metrics in case there was an assignment change
sensors.maybeUpdateAssignment(subscriptions);
Map<Node, FetchSessionHandler.FetchRequestData> fetchRequestMap = prepareFetchRequests();
for (Map.Entry<Node, FetchSessionHandler.FetchRequestData> entry : fetchRequestMap.entrySet()) {
final Node fetchTarget = entry.getKey();
final FetchSessionHandler.FetchRequestData data = entry.getValue();
final FetchRequest.Builder request = FetchRequest.Builder
.forConsumer(this.maxWaitMs, this.minBytes, data.toSend())
.isolationLevel(isolationLevel)
.setMaxBytes(this.maxBytes)
.metadata(data.metadata())
.toForget(data.toForget())
.rackId(clientRackId);
if (log.isDebugEnabled()) {
log.debug("Sending {} {} to broker {}", isolationLevel, data.toString(), fetchTarget);
}
// 发送请求
RequestFuture<ClientResponse> future = client.send(fetchTarget, request);
// We add the node to the set of nodes with pending fetch requests before adding the
// listener because the future may have been fulfilled on another thread (e.g. during a
// disconnection being handled by the heartbeat thread) which will mean the listener
// will be invoked synchronously.
this.nodesWithPendingFetchRequests.add(entry.getKey().id());
// 设置监听回调
future.addListener(new RequestFutureListener<ClientResponse>() {
@Override
public void onSuccess(ClientResponse resp) {
synchronized (Fetcher.this) {
try {
FetchResponse response = (FetchResponse) resp.responseBody();
FetchSessionHandler handler = sessionHandler(fetchTarget.id());
if (handler == null) {
log.error("Unable to find FetchSessionHandler for node {}. Ignoring fetch response.",
fetchTarget.id());
return;
}
if (!handler.handleResponse(response)) {
return;
}
Set<TopicPartition> partitions = new HashSet<>(response.responseData().keySet());
FetchResponseMetricAggregator metricAggregator = new FetchResponseMetricAggregator(sensors, partitions);
for (Map.Entry<TopicPartition, FetchResponseData.PartitionData> entry : response.responseData().entrySet()) {
TopicPartition partition = entry.getKey();
FetchRequest.PartitionData requestData = data.sessionPartitions().get(partition);
if (requestData == null) {
String message;
if (data.metadata().isFull()) {
message = MessageFormatter.arrayFormat(
"Response for missing full request partition: partition={}; metadata={}",
new Object[]{partition, data.metadata()}).getMessage();
} else {
message = MessageFormatter.arrayFormat(
"Response for missing session request partition: partition={}; metadata={}; toSend={}; toForget={}",
new Object[]{partition, data.metadata(), data.toSend(), data.toForget()}).getMessage();
}
// Received fetch response for missing session partition
throw new IllegalStateException(message);
} else {
long fetchOffset = requestData.fetchOffset;
FetchResponseData.PartitionData partitionData = entry.getValue();
log.debug("Fetch {} at offset {} for partition {} returned fetch data {}",
isolationLevel, fetchOffset, partition, partitionData);
Iterator<? extends RecordBatch> batches = FetchResponse.recordsOrFail(partitionData).batches().iterator();
short responseVersion = resp.requestHeader().apiVersion();
// 结果加入completedFetches中
completedFetches.add(new CompletedFetch(partition, partitionData,
metricAggregator, batches, fetchOffset, responseVersion));
}
}
sensors.fetchLatency.record(resp.requestLatencyMs());
} finally {
nodesWithPendingFetchRequests.remove(fetchTarget.id());
}
}
}
@Override
public void onFailure(RuntimeException e) {
synchronized (Fetcher.this) {
try {
FetchSessionHandler handler = sessionHandler(fetchTarget.id());
if (handler != null) {
handler.handleError(e);
}
} finally {
nodesWithPendingFetchRequests.remove(fetchTarget.id());
}
}
}
});
}
return fetchRequestMap.size();
}
(3) 线程名称为什么这么奇怪?
线程名称为什么是这样的呢?跟着源代码看看。
首先,KafkaListenerAnnotationBeanPostProcessor实现了BeanPostProcessor接口,在其postProcessAfterInitialization方法中,将所有注解了@KafkaListener的方法注入
通过processKafkaListener方法注册为endpoint。在KafkaListenerAnnotationBeanPostProcessor中,使用this.getEndpointId(kafkaListener)设置了endpoint的Id:
private String getEndpointId(KafkaListener kafkaListener) {
return StringUtils.hasText(kafkaListener.id()) ? this.resolveExpressionAsString(kafkaListener.id(), "id") : "org.springframework.kafka.KafkaListenerEndpointContainer#" + this.counter.getAndIncrement();
}
也就是说,线程名称第一个数字指的是endpoint的编号。
在ConcurrentMessageListenerContainer的doStart方法中,可以看到如下一段代码:
for(int i = 0; i < this.concurrency; ++i) {
KafkaMessageListenerContainer<K, V> container = this.constructContainer(containerProperties, topicPartitions, i);
this.configureChildContainer(i, container);
if (this.isPaused()) {
container.pause();
}
container.start();
this.containers.add(container);
}
其中,concurrency就是配置的并发线程数,configureChildContainer方法如下:
private void configureChildContainer(int index, KafkaMessageListenerContainer<K, V> container) {
String beanName = this.getBeanName();
// 设置beanName
beanName = (beanName == null ? "consumer" : beanName) + "-" + index;
container.setBeanName(beanName);
// 略
AsyncListenableTaskExecutor exec = container.getContainerProperties().getConsumerTaskExecutor();
if (exec == null) {
Object exec;
if (this.executors.size() > index) {
exec = (AsyncListenableTaskExecutor)this.executors.get(index);
} else {
// 创建SimpleAsyncTaskExecutor
exec = new SimpleAsyncTaskExecutor(beanName + "-C-");
this.executors.add(exec);
}
container.getContainerProperties().setConsumerTaskExecutor((AsyncListenableTaskExecutor)exec);
}
}
可以看到,在创建SimpleAsyncTaskExecutor时通过beanName设置了其前缀,因此线程名称第二个数字指的是SimpleAsyncTaskExecutor的编号。
在SimpleAsyncTaskExecutor的doExecute的方法中,会创建新的线程,其名称由nextThreadName生成:
protected String nextThreadName() {
return getThreadNamePrefix() + this.threadCount.incrementAndGet();
}
由此可知,线程名称第三个数字指的是线程编号。
这就可以理解为什么线程名称为org.springframework.kafka.KafkaListenerEndpointContainer#0-1-C-1的形式。
(4) ackMode是什么?
当关闭自动提交时(enable-auto-commit为false),需要手动提交offset,ackMode就表示了不同的手动提交方式:
- BATCH:默认提交模式。当每一批poll的数据被ListenerConsumer处理之后,由Spring帮我们提交;
- RECORD:当每一条poll的数据被ListenerConsumer处理之后,由Spring帮我们提交;
- TIME:当每一批poll的数据被ListenerConsumer处理之后,如果距离上次提交时间大于TIME时,由Spring帮我们提交;
- COUNT:当每一批poll的数据被ListenerConsumer处理之后,如果被处理的record数量大于等于COUNT时,由Spring帮我们提交;
- COUNT_TIME:TIME和COUNT有一个条件满足时,由Spring帮我们提交;
- MANUAL:需要对监听消息的方法中引入Acknowledge参数,手动调用acknowlegde方法会先将offset存放到本地缓存map,在下一次poll之前(即处理完上次poll回来的所有消息后)再从缓存拿出来进行批量提交;
- MANUAL_IMMEDIATE:需要对监听消息的方法中引入Acknowledge参数,手动调用acknowledge方法立即提交。
(5) 如果消费后不提交会有什么影响?
-
如果在消费kafka的数据过程中,一直没有提交offset,那么在此程序运行的过程中它不会重复消费。但是如果重启之后,就会重复消费之前没有提交offset的数据。
-
如果在消费的过程中有几条或者一批数据数据没有提交offset,后面其他的消息消费后正常提交offset,那么服务端会更新为消费后最新的offset,不会重新消费,就算重启程序也不会重新消费。
-
消费者如果没有提交offset,程序不会阻塞或者重复消费,除非在消费到这个你不想提交offset的消息时你尝试重新初始化一个客户端消费者,即可再次消费这个未提交offset的数据。因为客户端也记录了当前消费者的offset信息,所以程序会在每次消费了数据之后,自己记录offset,而手动提交到服务端的offset与这个并没有关系,所以程序会继续往下消费。在你重新初始化客户端消费者之后,会从服务端得到最新的offset信息记录到本地。所以说如果当前的消费的消息没有提交offset,此时在你重新初始化消费者之后,可得到这条未提交消息的offset,从此位置开始消费。
(6) 生产者如何设置回调?
如果生产者想设置在发送消息后的回调,有两种方法:
- 给future设置Callback
@RequestMapping("/send")
public String sendMessage(@RequestParam("key") String key, @RequestParam("value") String value) {
try {
for (int i = 0; i < 3; i++) {
ListenableFuture<SendResult<String, Object>> future = kafkaTemplate.send("test_topic", 0, key + i
, value + i);
future.addCallback(new ListenableFutureCallback<SendResult<String, Object>>() {
@Override
public void onFailure(Throwable ex) {
log.error("Failed to send message caused by {}", ex.getMessage());
}
@Override
public void onSuccess(SendResult<String, Object> result) {
log.info("Succeed to send message {}", result.toString());
}
});
}
return "发送成功";
} catch (Exception e) {
log.error("Failed to send message caused by {}", e.getMessage());
return "发送失败";
}
}
2. 给KafkaTemplate设置监听器
@Bean
public KafkaTemplate<String, Object> kafkaTemplate() {
KafkaTemplate<String, Object> kafkaTemplate = new KafkaTemplate<>(producerFactory());
// 设置生产者监听回调
kafkaTemplate.setProducerListener(new ProducerListener<String, Object>() {
public void onSuccess(ProducerRecord<String, Object> producerRecord,
RecordMetadata recordMetadata) {
log.info("Succeed to send message {} with offset {}", producerRecord.toString(),
recordMetadata.offset());
}
public void onError(ProducerRecord<String, Object> producerRecord,
@Nullable RecordMetadata recordMetadata, Exception exception) {
log.info("Failed to send message {} with offset {} caused by {}", producerRecord.toString(),
recordMetadata == null ? null : recordMetadata.offset(), exception.toString());
}
});
return kafkaTemplate;
}
(7) 如果发送失败/超时怎么办?
spring-kafka本身提供了重试机制,但是如果几次重试后仍然发送失败该怎么办?
一种可行的方法是,可以通过回调,将失败的消息写入表中,再用一个线程来读表,重新进行发送。
(8) spring-kafka如何使用事务?
有两种方法可以使用事务:
-
使用@Transactional注解
在发送消息的方法上加上@Transactional注解。 -
使用本地事务
kafkaTemplate.executeInTransaction((KafkaOperations.OperationsCallback) kafkaOperations -> {
kafkaOperations.send("test_topic", 0, key, value);
throw new RuntimeException("fail");
});
这两种方法都有几个共同的前提:
-
需要先配置
spring.kafka,producer.transaction-id-prefix,因为在KafkaAutoConfiguration类中需要注入事务管理器KafkaTransactionManager。@Bean @ConditionalOnProperty(name = "spring.kafka.producer.transaction-id-prefix") @ConditionalOnMissingBean public KafkaTransactionManager<?, ?> kafkaTransactionManager(ProducerFactory<?, ?> producerFactory) { return new KafkaTransactionManager<>(producerFactory); } -
需要保证幂等性开启:
enable.idempotence: true(新版本上默认开启) -
acks必须是-1/all:
retries: -1 -
重试次数必须大于0
-
max.in.flight.request.per.connection的值不能大于5(默认值是5),该值表示生产者在接收到服务器响应之前可以发送多个消息