KafkaConsumer是非线程安全的,在poll()调用中都会执行acquireAndEnsureOpen()来确保当前消费者只有一个线程在操作.
注意到acquire()和release()都是私有方法,因此在实际应用中不需要我们显示调用.下面是源码
private final AtomicLong currentThread;
private final AtomicInteger refcount;
//refcount是成员变量, 在其他订阅或者消费的方法中都会先调用acquireAndEnsureOpen()来确保线程安全
private void acquireAndEnsureOpen() {
this.acquire();
if (this.closed) {
this.release();
throw new IllegalStateException("This consumer has already been closed.");
}
}
private void acquire() {
//获取当前线程Id
long threadId = Thread.currentThread().getId();
//如果当前线程与currentThread不一样且currentThread的值不是-1,就抛出异常
//否则将currentThread设置为当前线程
if (threadId != this.currentThread.get() && !this.currentThread.compareAndSet(-1L, threadId)) {
throw new ConcurrentModificationException("KafkaConsumer is not safe for multi-threaded access");
} else {
//refcount加1
this.refcount.incrementAndGet();
}
}
private void release() {
// refcount减1后再返回,如果是0,就把currentThread设置为-1
if (this.refcount.decrementAndGet() == 0) {
this.currentThread.set(-1L);
}
}
多线程消费(一)
第一种,也是最常见的方式:线程封闭,即为每个线程都实例化一个KafkaConsumer对象.但是这种方式的消费并发度受限于实际的分区数. 当设置的线程数大于分区数时, 就会有部分消费线程一直处于空闲状态,代码见下面.这种方式和开启多个消费进程没有本质上区别.
优点
每个线程可以按照顺序消费各个分区中的消息 如果对消息的处理非常迅速,那么poll()拉去的频次会更高,进而整体消费性能得到提高
缺点
每个消费线程都要维护一个独立的TCP连接,如果线程数比较大,会造成不小的系统开销 如果对消息的处理缓慢,比如进行了事务性操作、等待RPC的同步相应,整体消费性会比较差
改进
性能的瓶颈在于消息处理
代码演示
import java.util.List;
import java.util.Properties;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.serialization.StringDeserializer;
public class MultiConsumerDemo1 {
/** 有多个可以用逗号隔开 */
public static final String brokerList = "47.93.121.123:9092";
public static final String topic = "kafka_demo_analysis";
/** 消费组的名称 */
public static final String groupId = "kafka-learner";
public static Properties initConfig() {
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
properties.put(ConsumerConfig.GROUP_ID_CONFIG, groupId); //消费组
properties.put(ConsumerConfig.CLIENT_ID_CONFIG, "0");
return properties;
}
public static void main(String[] args) {
Properties properties = initConfig();
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(MultiConsumerDemo1.initConfig());
List<PartitionInfo> partitionInfos = consumer.partitionsFor(topic);
final int consumerThreadNum = partitionInfos.size() / 2; //注意不要超过分区的数目, 这里设置成[1,分区数]都可以
consumer.close();
for (int i = 0; i < consumerThreadNum; i++) {
new KafkaConsumerThread(properties, topic).start();
}
}
}
import java.time.Duration;
import java.util.Collections;
import java.util.Properties;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
public class KafkaConsumerThread extends Thread {
private KafkaConsumer<String, String> consumer;
public KafkaConsumerThread(Properties properties, String topic) {
this.consumer = new KafkaConsumer<String, String>(properties);
this.consumer.subscribe(Collections.singletonList(topic));
}
@Override
public void run() {
try {
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
for (ConsumerRecord<String, String> record : records) {
//TODO
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
consumer.close();
}
}
}
多线程消费(二)
代码演示
难点在于对消息的顺序处理
public class MultiConsumerDemo2 {
/** 有多个可以用逗号隔开 */
public static final String brokerList = "****";
public static final String topic = "kafka_demo_analysis";
/** 消费组的名称 */
public static final String groupId = "kafka-learner";
public static Properties initConfig() {
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
properties.put(ConsumerConfig.GROUP_ID_CONFIG, groupId); //消费组
properties.put(ConsumerConfig.CLIENT_ID_CONFIG, "0");
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
return properties;
}
public static void main(String[] args) {
Properties properties = initConfig();
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(MultiConsumerDemo1.initConfig());
List<PartitionInfo> partitionInfos = consumer.partitionsFor(topic);
final int consumerThreadNum = partitionInfos.size() / 4;
for (int i = 0; i < 2; i++) {
new KafkaConsumerThread2(properties, topic, consumerThreadNum).start();
}
}
}
public class KafkaConsumerThread2 extends Thread {
private KafkaConsumer<String, String> consumer;
private ExecutorService executorService;
private Map<TopicPartition, OffsetAndMetadata> offsets;
public KafkaConsumerThread2(Properties properties, String topic, int threadNum) {
this.consumer = new KafkaConsumer<>(properties);
this.consumer.subscribe(Collections.singletonList(topic));
this.executorService = new ThreadPoolExecutor(threadNum, threadNum, 0L, TimeUnit.MILLISECONDS,
new ArrayBlockingQueue<>(2000), new ThreadPoolExecutor.CallerRunsPolicy());
this.offsets = new HashMap<>();
}
@Override
public void run() {
try {
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
if (records.isEmpty()) {
executorService.submit(() -> {
for (TopicPartition topicPartition : records.partitions()) {
List<ConsumerRecord<String, String>> recordList = records.records(topicPartition);
//TODO 处理recordList
long lastConsumedOffset = recordList.get(recordList.size() - 1).offset();
synchronized (offsets) {
if (!offsets.containsKey(topicPartition)) {
offsets.put(topicPartition, new OffsetAndMetadata(lastConsumedOffset + 1));
} else {
long position = offsets.get(topicPartition).offset();
if (position < lastConsumedOffset + 1) {
offsets.put(topicPartition, new OffsetAndMetadata(lastConsumedOffset + 1));
}
}
}
}
});
synchronized (offsets) {
if (!offsets.isEmpty()) {
consumer.commitSync(offsets);
offsets.clear();
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
consumer.close();
}
}
}