consumer多线程

701 阅读3分钟

KafkaConsumer是非线程安全的,在poll()调用中都会执行acquireAndEnsureOpen()来确保当前消费者只有一个线程在操作.
注意到acquire()和release()都是私有方法,因此在实际应用中不需要我们显示调用.下面是源码

    private final AtomicLong currentThread;
    private final AtomicInteger refcount;
    //refcount是成员变量, 在其他订阅或者消费的方法中都会先调用acquireAndEnsureOpen()来确保线程安全
    private void acquireAndEnsureOpen() {
        this.acquire();
        if (this.closed) {
            this.release();
            throw new IllegalStateException("This consumer has already been closed.");
        }
    }

    private void acquire() {
    	//获取当前线程Id
        long threadId = Thread.currentThread().getId();
        //如果当前线程与currentThread不一样且currentThread的值不是-1,就抛出异常
        //否则将currentThread设置为当前线程
        if (threadId != this.currentThread.get() && !this.currentThread.compareAndSet(-1L, threadId)) {
            throw new ConcurrentModificationException("KafkaConsumer is not safe for multi-threaded access");
        } else {
        	//refcount加1
            this.refcount.incrementAndGet();
        }
    }

    private void release() {
    	// refcount减1后再返回,如果是0,就把currentThread设置为-1
        if (this.refcount.decrementAndGet() == 0) {
            this.currentThread.set(-1L);
        }

    }

多线程消费(一)

第一种,也是最常见的方式:线程封闭,即为每个线程都实例化一个KafkaConsumer对象.但是这种方式的消费并发度受限于实际的分区数. 当设置的线程数大于分区数时, 就会有部分消费线程一直处于空闲状态,代码见下面.这种方式和开启多个消费进程没有本质上区别.

优点

每个线程可以按照顺序消费各个分区中的消息 如果对消息的处理非常迅速,那么poll()拉去的频次会更高,进而整体消费性能得到提高

缺点

每个消费线程都要维护一个独立的TCP连接,如果线程数比较大,会造成不小的系统开销 如果对消息的处理缓慢,比如进行了事务性操作、等待RPC的同步相应,整体消费性会比较差

改进

性能的瓶颈在于消息处理

代码演示

import java.util.List;
import java.util.Properties;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.serialization.StringDeserializer;

public class MultiConsumerDemo1 {
    /** 有多个可以用逗号隔开 */
    public static final String brokerList = "47.93.121.123:9092";
    public static final String topic = "kafka_demo_analysis";
    /** 消费组的名称 */
    public static final String groupId = "kafka-learner";

    public static Properties initConfig() {
        Properties properties = new Properties();
        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        properties.put(ConsumerConfig.GROUP_ID_CONFIG, groupId); //消费组
        properties.put(ConsumerConfig.CLIENT_ID_CONFIG, "0");
        return properties;
    }

    public static void main(String[] args) {
        Properties properties = initConfig();
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(MultiConsumerDemo1.initConfig());
        List<PartitionInfo> partitionInfos = consumer.partitionsFor(topic);
        final int consumerThreadNum = partitionInfos.size() / 2; //注意不要超过分区的数目, 这里设置成[1,分区数]都可以
        consumer.close();
        for (int i = 0; i < consumerThreadNum; i++) {
            new KafkaConsumerThread(properties, topic).start();
        }
    }
}
import java.time.Duration;
import java.util.Collections;
import java.util.Properties;

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;

public class KafkaConsumerThread extends Thread {
    private KafkaConsumer<String, String> consumer;

    public KafkaConsumerThread(Properties properties, String topic) {
        this.consumer = new KafkaConsumer<String, String>(properties);
        this.consumer.subscribe(Collections.singletonList(topic));
    }

    @Override
    public void run() {
        try {
            while (true) {
                ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
                for (ConsumerRecord<String, String> record : records) {
                    //TODO
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            consumer.close();
        }
    }
}

多线程消费(二)

代码演示

难点在于对消息的顺序处理

public class MultiConsumerDemo2 {
    /** 有多个可以用逗号隔开 */
    public static final String brokerList = "****";
    public static final String topic = "kafka_demo_analysis";
    /** 消费组的名称 */
    public static final String groupId = "kafka-learner";

    public static Properties initConfig() {
        Properties properties = new Properties();
        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);
        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        properties.put(ConsumerConfig.GROUP_ID_CONFIG, groupId); //消费组
        properties.put(ConsumerConfig.CLIENT_ID_CONFIG, "0");
        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
        return properties;
    }

    public static void main(String[] args) {
        Properties properties = initConfig();
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(MultiConsumerDemo1.initConfig());
        List<PartitionInfo> partitionInfos = consumer.partitionsFor(topic);
        final int consumerThreadNum = partitionInfos.size() / 4;
        for (int i = 0; i < 2; i++) {
            new KafkaConsumerThread2(properties, topic, consumerThreadNum).start();
        }
    }
}
public class KafkaConsumerThread2 extends Thread {
    private KafkaConsumer<String, String> consumer;
    private ExecutorService executorService;
    private Map<TopicPartition, OffsetAndMetadata> offsets;

    public KafkaConsumerThread2(Properties properties, String topic, int threadNum) {
        this.consumer = new KafkaConsumer<>(properties);
        this.consumer.subscribe(Collections.singletonList(topic));
        this.executorService = new ThreadPoolExecutor(threadNum, threadNum, 0L, TimeUnit.MILLISECONDS,
                new ArrayBlockingQueue<>(2000), new ThreadPoolExecutor.CallerRunsPolicy());
        this.offsets = new HashMap<>();
    }

    @Override
    public void run() {
        try {
            while (true) {
                ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
                if (records.isEmpty()) {
                    executorService.submit(() -> {
                        for (TopicPartition topicPartition : records.partitions()) {
                            List<ConsumerRecord<String, String>> recordList = records.records(topicPartition);
                            //TODO 处理recordList
                            long lastConsumedOffset = recordList.get(recordList.size() - 1).offset();
                            synchronized (offsets) {
                                if (!offsets.containsKey(topicPartition)) {
                                    offsets.put(topicPartition, new OffsetAndMetadata(lastConsumedOffset + 1));
                                } else {
                                    long position = offsets.get(topicPartition).offset();
                                    if (position < lastConsumedOffset + 1) {
                                        offsets.put(topicPartition, new OffsetAndMetadata(lastConsumedOffset + 1));
                                    }
                                }
                            }
                        }
                    });
                    synchronized (offsets) {
                        if (!offsets.isEmpty()) {
                            consumer.commitSync(offsets);
                            offsets.clear();
                        }
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            consumer.close();
        }
    }
}