1. 准备过程
创建 Topic
kafka-topics.sh \
--bootstrap-server adp-01:9092,adp-02:9092,adp-03:9092 \
--topic t1 \
--partitions 3 \
--replication-factor 3 \
--create
向 Topic 的每个 Partition 中发送 3 条数据,时间间隔 5 秒,代码如下:
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;
import java.util.Properties;
public class MyProducer {
public static void main(String[] args) throws Exception {
Properties producerConfig = new Properties();
producerConfig.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "adp-01:9092,adp-02:9092,adp-03:9092");
producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
KafkaProducer<String, String> producer = new KafkaProducer<>(producerConfig);
String targetTopic = "t1";
for (int i = 0; i < 9; i++) {
producer.send(new ProducerRecord<>(targetTopic, i + "", "hello kafka"));
Thread.sleep(5000);
}
producer.close();
}
}
使用消费者查看每条数据的时间戳:
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.text.SimpleDateFormat;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Properties;
public class AutoCommitDemo {
public static void main(String[] args) {
Properties consumerConfig = new Properties();
consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "adp-01:9092,adp-02:9092,adp-03:9092");
consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG, "g1");
consumerConfig.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, true);
consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(consumerConfig);
List<String> topics = new ArrayList<>();
topics.add("t1");
consumer.subscribe(topics);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS");
// 持续消费数据
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(1));
for (ConsumerRecord<String, String> record : records) {
System.out.printf("{"topic":"%s","partition":%s,"offset":%s,"key":"%s","value":"%s","timestamp":"%s"}%n",
record.topic(),
record.partition(),
record.offset(),
record.key(),
record.value(),
sdf.format(new Date(record.timestamp()))
);
}
}
}
}
根据输出得到:
- partition 2 offset 0 timestamp 2024-01-11T18:59:56.641
- partition 2 offset 1 timestamp 2024-01-11T19:00:06.656
- partition 2 offset 2 timestamp 2024-01-11T19:00:11.660
- partition 1 offset 0 timestamp 2024-01-11T19:00:16.665
- partition 1 offset 1 timestamp 2024-01-11T19:00:26.675
- partition 0 offset 0 timestamp 2024-01-11T19:00:01.650
- partition 0 offset 1 timestamp 2024-01-11T19:00:21.669
- partition 0 offset 2 timestamp 2024-01-11T19:00:31.681
- partition 0 offset 3 timestamp 2024-01-11T19:00:36.688
查看目前消费组 g1 的消费情况:
kafka-consumer-groups.sh \
--bootstrap-server adp-01:9092,adp-02:9092,adp-03:9092 \
--group g1 \
--describe
GROUP TOPIC PARTITION CURRENT-OFFSET LOG-END-OFFSET LAG CONSUMER-ID HOST CLIENT-ID
g1 t1 2 3 3 0 - - -
g1 t1 1 2 2 0 - - -
g1 t1 0 4 4 0 - - -
2. 重置消费偏移量测试
重置消费偏移量:
kafka-consumer-groups.sh \
--bootstrap-server adp-01:9092,adp-02:9092,adp-03:9092 \
--topic t1 \
--group g1 \
--reset-offsets \
--to-datetime 2024-01-11T19:00:00.000 \
--execute
# 期望输出
GROUP TOPIC PARTITION NEW-OFFSET
g1 t1 0 0
g1 t1 1 0
g1 t1 2 1
# 实际输出,不符合预期
GROUP TOPIC PARTITION NEW-OFFSET
g1 t1 0 4
g1 t1 1 2
g1 t1 2 3
问题分析:Kafka 的每条消息中都会被设置一个时间戳,代表消息的产生时间,这个时间戳可以在生产者程序中设置,如果生产者程序没有设置,则服务端将此时间设置为消息写入到 Kafka 的时间。但是 Kafka 本身不对该时间做时区的处理,认为这个时间戳是一个标准时间,即 UTC 时区的时间。
在本文的测试中,时间戳由服务端生成,服务器的时区是“Asia/Shanghai”,产生的时间比如“2024-01-11T08:00:00.000”设置给消息,Kafka 认为是这个时间戳是 UTC 时区的标准时间。
解决办法:设置正确的时区
# 解决时区问题,减 8 个小时
kafka-consumer-groups.sh \
--bootstrap-server adp-01:9092,adp-02:9092,adp-03:9092 \
--topic t1 \
--group g1 \
--reset-offsets \
--to-datetime 2024-01-11T19:00:00.000+08:00 \
--execute
# 输出,符合预期
GROUP TOPIC PARTITION NEW-OFFSET
g1 t1 0 0
g1 t1 1 0
g1 t1 2 1
kafka-consumer-groups.sh \
--bootstrap-server adp-01:9092,adp-02:9092,adp-03:9092 \
--topic t1 \
--group g1 \
--reset-offsets \
--to-datetime 2024-01-11T19:00:20.000+08:00 \
--execute
# 期望输出
GROUP TOPIC PARTITION NEW-OFFSET
g1 t1 0 1
g1 t1 1 1
g1 t1 2 3
# 实际输出,符合预期
GROUP TOPIC PARTITION NEW-OFFSET
g1 t1 0 1
g1 t1 1 1
g1 t1 2 3
或者将重置的时间减8个小时:
kafka-consumer-groups.sh \
--bootstrap-server adp-01:9092,adp-02:9092,adp-03:9092 \
--topic t1 \
--group g1 \
--reset-offsets \
--to-datetime 2024-01-11T11:00:21.669 \
--execute
# 输出符合预期
GROUP TOPIC PARTITION NEW-OFFSET
g1 t1 0 1
g1 t1 1 1
g1 t1 2 3
3. 总结
- 设置重置的时间戳时要注意时区问题
- 设置重置到时间 t1 后,每个分区的新偏移量被设置为 >= t1 时间戳的消息中的最小偏移量