个人Github地址 github.com/AmbitionTn/… , 希望下面的文章对大家有所帮助,要是觉得感兴趣可以关注一下,会持续更新,持续学习中
Kafka源码版本基于 kafka3.3
下一篇文章:Kafka源码分析2 Producer发送消息到缓存
1. Producer简单使用
下面是Kafka producer的简单使用
public class KafkaTest {
private static String topicName;
private static int msgNum;
private static int key;
public static void main(String[] args) {
Properties props = new Properties();
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.1.5:9092");
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
topicName = "ambition";
Producer<String, String> producer = new KafkaProducer<String, String>(props);
for (int i = 0; i < 20; i++) {
String msg = i + " This is Ambition's blog.";
producer.send(new ProducerRecord<String, String>(topicName, msg));
}
producer.close();
}
}
- 从上面的例子中可以看到,Kafka为提供了非常简单的API,使用时只需要两步
- 初始化Kafka Producer
- 调用Send方法完成数据发送到Broker
2. Producer 属性
public class KafkaProducer<K, V> implements Producer<K, V> {
private final Logger log;
// JMX 中显示的前缀
private static final String JMX_PREFIX = "kafka.producer";
// Network县城的前缀
public static final String NETWORK_THREAD_PREFIX = "kafka-producer-network-thread";
// 生产者监控组名称
public static final String PRODUCER_METRIC_GROUP_NAME = "producer-metrics";
// 生产者客户端名字
private final String clientId;
// Visible for testing
// 性能监控相关
final Metrics metrics;
// 生产者性能监控相关
private final KafkaProducerMetrics producerMetrics;
// 分区器
private final Partitioner partitioner;
// 生产者发送批量消息的最大消息体大小(字节数),服务端也会有消息容量限制,如果启动了消息压缩就是压缩后的容量,与这个字段不同
// 默认为为1M
private final int maxRequestSize;
// buffer memory 生产者最大内存中缓存的未发送到server的消息大小
// 默认为为32M
private final long totalMemorySize;
// 存储集群元数据信息 包括cluster node 等信息
private final ProducerMetadata metadata;
// 用于本地存放消息的缓冲
private final RecordAccumulator accumulator;
// 异步发送线程
private final Sender sender;
// 后台IO线程
private final Thread ioThread;
// 消息压缩类型
private final CompressionType compressionType;
// 错误记录器
private final Sensor errors;
// 时间相关操作
private final Time time;
// KEY 序列化器 默认为 Serializer
private final Serializer<K> keySerializer;
// Value序列化器 默认为 Serializer
private final Serializer<V> valueSerializer;
// 生产者配置信息
private final ProducerConfig producerConfig;
// 等待拉取集群元数据信息和分配buffer的最大时间 默认为1min
private final long maxBlockTimeMs;
// true 生产者将不会使用record的Key来选择partition,会随机选择一个
// false 生产者通过对record的key取hash而获得对应的partitioner
private final boolean partitionerIgnoreKeys;
// 生产者拦截器
private final ProducerInterceptors<K, V> interceptors;
// API版本
private final ApiVersions apiVersions;
// 事务管理器
private final TransactionManager transactionManager;
}
3. Producer 初始化过程
下面是KafkaProducer初始化过程
public KafkaProducer(Map<String, Object> configs, Serializer<K> keySerializer, Serializer<V> valueSerializer) {
this(new ProducerConfig(ProducerConfig.appendSerializerToConfig(configs, keySerializer, valueSerializer)),
keySerializer, valueSerializer, null, null, null, Time.SYSTEM);
}
KafkaProducer(ProducerConfig config,
Serializer<K> keySerializer,
Serializer<V> valueSerializer,
ProducerMetadata metadata,
KafkaClient kafkaClient,
ProducerInterceptors<K, V> interceptors,
Time time) {
try {
this.producerConfig = config;
this.time = time;
// 获取事务Id
String transactionalId = config.getString(ProducerConfig.TRANSACTIONAL_ID_CONFIG);
// 获取客户端Id
this.clientId = config.getString(ProducerConfig.CLIENT_ID_CONFIG);
// 日志打印相关
LogContext logContext;
if (transactionalId == null)
logContext = new LogContext(String.format("[Producer clientId=%s] ", clientId));
else
logContext = new LogContext(String.format("[Producer clientId=%s, transactionalId=%s] ", clientId, transactionalId));
log = logContext.logger(KafkaProducer.class);
log.trace("Starting the Kafka producer");
// 监控相关
Map<String, String> metricTags = Collections.singletonMap("client-id", clientId);
MetricConfig metricConfig = new MetricConfig().samples(config.getInt(ProducerConfig.METRICS_NUM_SAMPLES_CONFIG))
.timeWindow(config.getLong(ProducerConfig.METRICS_SAMPLE_WINDOW_MS_CONFIG), TimeUnit.MILLISECONDS)
.recordLevel(Sensor.RecordingLevel.forName(config.getString(ProducerConfig.METRICS_RECORDING_LEVEL_CONFIG)))
.tags(metricTags);
List<MetricsReporter> reporters = config.getConfiguredInstances(ProducerConfig.METRIC_REPORTER_CLASSES_CONFIG,
MetricsReporter.class,
Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId));
JmxReporter jmxReporter = new JmxReporter();
jmxReporter.configure(config.originals(Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId)));
reporters.add(jmxReporter);
MetricsContext metricsContext = new KafkaMetricsContext(JMX_PREFIX,
config.originalsWithPrefix(CommonClientConfigs.METRICS_CONTEXT_PREFIX));
this.metrics = new Metrics(metricConfig, reporters, time, metricsContext);
this.producerMetrics = new KafkaProducerMetrics(metrics);
// 获取分区器
this.partitioner = config.getConfiguredInstance(
ProducerConfig.PARTITIONER_CLASS_CONFIG,
Partitioner.class,
Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId));
// 如果Partitioner已经废弃,打印日志
warnIfPartitionerDeprecated();
this.partitionerIgnoreKeys = config.getBoolean(ProducerConfig.PARTITIONER_IGNORE_KEYS_CONFIG);
long retryBackoffMs = config.getLong(ProducerConfig.RETRY_BACKOFF_MS_CONFIG);
// KEY序列化器
if (keySerializer == null) {
this.keySerializer = config.getConfiguredInstance(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
Serializer.class);
this.keySerializer.configure(config.originals(Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId)), true);
} else {
config.ignore(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG);
this.keySerializer = keySerializer;
}
// VALUE序列化器
if (valueSerializer == null) {
this.valueSerializer = config.getConfiguredInstance(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
Serializer.class);
this.valueSerializer.configure(config.originals(Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId)), false);
} else {
config.ignore(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG);
this.valueSerializer = valueSerializer;
}
// 拦截器
List<ProducerInterceptor<K, V>> interceptorList = (List) config.getConfiguredInstances(
ProducerConfig.INTERCEPTOR_CLASSES_CONFIG,
ProducerInterceptor.class,
Collections.singletonMap(ProducerConfig.CLIENT_ID_CONFIG, clientId));
if (interceptors != null)
this.interceptors = interceptors;
else
this.interceptors = new ProducerInterceptors<>(interceptorList);
ClusterResourceListeners clusterResourceListeners = configureClusterResourceListeners(keySerializer,
valueSerializer, interceptorList, reporters);
// 生产者发送的最大消息体大小 默认为1M
this.maxRequestSize = config.getInt(ProducerConfig.MAX_REQUEST_SIZE_CONFIG);
// buffer size 生产者本地缓存最大字节大小 默认为32M
this.totalMemorySize = config.getLong(ProducerConfig.BUFFER_MEMORY_CONFIG);
/**
* kafka可以设置压缩消息的类型,通过压缩消息缩小消息体大小
* 提高系统吞吐率
*/
this.compressionType = CompressionType.forName(config.getString(ProducerConfig.COMPRESSION_TYPE_CONFIG));
// 生产者获取集群元数据信息和分配Buffer的最大等待时间 默认为1min
this.maxBlockTimeMs = config.getLong(ProducerConfig.MAX_BLOCK_MS_CONFIG);
// 获取超时时间 默认为30s
int deliveryTimeoutMs = configureDeliveryTimeout(config, log);
this.apiVersions = new ApiVersions();
// 获取事务管理器
this.transactionManager = configureTransactionState(config, logContext);
// 自定义Partitioner,如果没有自定义忽略本段代码
// There is no need to do work required for adaptive partitioning, if we use a custom partitioner.
boolean enableAdaptivePartitioning = partitioner == null &&
config.getBoolean(ProducerConfig.PARTITIONER_ADPATIVE_PARTITIONING_ENABLE_CONFIG);
RecordAccumulator.PartitionerConfig partitionerConfig = new RecordAccumulator.PartitionerConfig(
enableAdaptivePartitioning,
config.getLong(ProducerConfig.PARTITIONER_AVAILABILITY_TIMEOUT_MS_CONFIG)
);
/**
* 创建 RecordAccumulator,他是一个发送消息的缓冲器
* batch.size 单位是字节 默认为16kb, 表示消息达到多大会进行发送
* linger.ms 为消息的最大缓冲时间 默认为0
*/
this.accumulator = new RecordAccumulator(logContext,
config.getInt(ProducerConfig.BATCH_SIZE_CONFIG),
this.compressionType,
lingerMs(config),
retryBackoffMs,
deliveryTimeoutMs,
partitionerConfig,
metrics,
PRODUCER_METRIC_GROUP_NAME,
time,
apiVersions,
transactionManager,
new BufferPool(this.totalMemorySize, config.getInt(ProducerConfig.BATCH_SIZE_CONFIG), metrics, time, PRODUCER_METRIC_GROUP_NAME));
// 配置服务器 network
List<InetSocketAddress> addresses = ClientUtils.parseAndValidateAddresses(
config.getList(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG),
config.getString(ProducerConfig.CLIENT_DNS_LOOKUP_CONFIG));
/**
* 创建Metadata集群元数据对象,初始化生产者对象
* 建立网络请求获取集群元数据信息
*/
if (metadata != null) {
this.metadata = metadata;
} else {
this.metadata = new ProducerMetadata(retryBackoffMs,
config.getLong(ProducerConfig.METADATA_MAX_AGE_CONFIG),
config.getLong(ProducerConfig.METADATA_MAX_IDLE_CONFIG),
logContext,
clusterResourceListeners,
Time.SYSTEM);
this.metadata.bootstrap(addresses);
}
// 创建 sensor 对象
this.errors = this.metrics.sensor("errors");
// 创建sender线程
this.sender = newSender(logContext, kafkaClient, this.metadata);
String ioThreadName = NETWORK_THREAD_PREFIX + " | " + clientId;
/**
* 创建了一个Kafka IO线程,然后将Sender线程传进去
* 把业务定义的代码和线程定义的代码分隔开便于管理和维护
*/
this.ioThread = new KafkaThread(ioThreadName, this.sender, true);
this.ioThread.start();
config.logUnused();
AppInfoParser.registerAppInfo(JMX_PREFIX, clientId, metrics, time.milliseconds());
log.debug("Kafka producer started");
} catch (Throwable t) {
// call close methods if internal objects are already constructed this is to prevent resource leak. see KAFKA-2121
close(Duration.ofMillis(0), true);
// now propagate the exception
throw new KafkaException("Failed to construct kafka producer", t);
}
}
// visible for testing
Sender newSender(LogContext logContext, KafkaClient kafkaClient, ProducerMetadata metadata) {
int maxInflightRequests = producerConfig.getInt(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION);
// 请求超时时间 默认为40s
int requestTimeoutMs = producerConfig.getInt(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG);
ChannelBuilder channelBuilder = ClientUtils.createChannelBuilder(producerConfig, time, logContext);
ProducerMetrics metricsRegistry = new ProducerMetrics(this.metrics);
Sensor throttleTimeSensor = Sender.throttleTimeSensor(metricsRegistry.senderMetrics);
/**
* 创建kafkaClient
* connections.max.idle.ms 关闭空闲channel连接的时间 默认为5min
*
*/
KafkaClient client = kafkaClient != null ? kafkaClient : new NetworkClient(
new Selector(producerConfig.getLong(ProducerConfig.CONNECTIONS_MAX_IDLE_MS_CONFIG),
this.metrics, time, "producer", channelBuilder, logContext),
metadata,
clientId,
maxInflightRequests,
producerConfig.getLong(ProducerConfig.RECONNECT_BACKOFF_MS_CONFIG),
producerConfig.getLong(ProducerConfig.RECONNECT_BACKOFF_MAX_MS_CONFIG),
producerConfig.getInt(ProducerConfig.SEND_BUFFER_CONFIG),
producerConfig.getInt(ProducerConfig.RECEIVE_BUFFER_CONFIG),
requestTimeoutMs,
producerConfig.getLong(ProducerConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MS_CONFIG),
producerConfig.getLong(ProducerConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MAX_MS_CONFIG),
time,
true,
apiVersions,
throttleTimeSensor,
logContext);
/**
* 创建sender线程
* ack=0 生产者发送消息后不等待server端返回,也就是不关系发送结果
* ack=1 server端leader已经写入本地log,不关心follower是否写入成功
* ack=-1 或 ack=all server端leader等到所有副本集ready后再返回,最强的高可用保障
*
* max.request.size 最大发送消息大小1M
* retries 重试次数 默认为0
* retry.backoff.ms 重试时间间隔 默认50ms
*/
short acks = Short.parseShort(producerConfig.getString(ProducerConfig.ACKS_CONFIG));
return new Sender(logContext,
client,
metadata,
this.accumulator,
maxInflightRequests == 1,
producerConfig.getInt(ProducerConfig.MAX_REQUEST_SIZE_CONFIG),
acks,
producerConfig.getInt(ProducerConfig.RETRIES_CONFIG),
metricsRegistry.senderMetrics,
time,
requestTimeoutMs,
producerConfig.getLong(ProducerConfig.RETRY_BACKOFF_MS_CONFIG),
this.transactionManager,
apiVersions);
}
- 我们一起来看下,在KafkaProducer初始化的时候初始了哪些核心组件和核心参数
- 核心组件Partitioner: 分区选择器,用来决定一个record应该被分配到哪一个partition
- 核心组件keySerializer:Key序列化器,用来对record的key进行序列化【默认为 Serializer 序列化器】
- 核心组件valueSerializer: Value序列化器,用来对record的Value进行序列化【默认为 Serializer 序列化器】
- 核心组件interceptors:拦截器,用来对消息进行预先处理和消息记录等
- 核心组件RecordAccumulator:生产者消息缓冲区,用来缓冲未发送到broker的消息,里面包括了若干个Batch
- batch.size:默认情况下只有batch.size,默认为16KB,也就意味着每一次需要凑齐16KB的batch才会发送到Broker,但是这样会有一个问题,如果你发送了一个record但是迟迟没有发送新的record,使得batch一直无法达到16KB,所以一直没有办法发送,通过设置 linger.ms 可以解决
- linger.ms:所以说要设置一个linger.ms,如果在指定时间范围内,都没凑出来一个batch把这条消息发送出去,那么到了这个linger.ms指定的时间,比如说5ms,如果5ms还没凑出来一个batch,那么就必须立即把这个消息发送出去
- 核心组件metadata:元数据信息,通过网络拉取broker集群中Cluster信息,包括Node partition ISR等信息,提供给Sender线程发送时使用
- metadata.max.age.ms:每隔一段时间刷新一下metadata信息【默认为5min】
- 核心组件sender:sender线程,用于后台发送Accumulator中的批量消息
- acks(0)不重试,acks(1)只要leader接收成功就算成功 acks(-1或all) 保证数据不丢失,完成leader和follower数据同步 【默认为1】
- retries:重试次数【默认为0】
- 核心组件 NetworkClient:网络通信组件
- connections.max.idle.ms:空闲连接最大保留时间【默认为9min】
- reconnect.backoff.ms:重新连接的时间间隔【默认为50ms】
- max.in.flight.requests.per.connection:每个连接最多有几个request没收到响应【默认为5个】
- send.buffer.bytes:发送缓冲区大小【默认128k】
- receive.buffer.bytes:接收缓冲区大小【默认64k】
4. Sender线程初始化
/**
* 创建了一个Kafka IO线程,然后将Sender线程传进去
* 把业务定义的代码和线程定义的代码分隔开便于管理和维护
*/
this.ioThread = new KafkaThread(ioThreadName, this.sender, true);
public KafkaThread(final String name, Runnable runnable, boolean daemon) {
super(runnable, name);
configureThread(name, daemon);
}
private void configureThread(final String name, boolean daemon) {
setDaemon(daemon);
setUncaughtExceptionHandler((t, e) -> log.error("Uncaught exception in thread '{}':", name, e));
}
- 线程名称:kafka-producer-network-thread|clientId
- 由上面的源码可以看出,sender线程通过参数传入到KafkaThread中,在KafkaThread中通过为其设置守护线程,在设计守护线程的时候可以考虑用这种方式,将业务定义与线程定义节藕。
5. Property如何转换成对象的
ProducterConfig继承AbstractConfig
public AbstractConfig(ConfigDef definition, Map<?, ?> originals, Map<String, ?> configProviderProps, boolean doLog) {
/* check that all the keys are really strings */
for (Map.Entry<?, ?> entry : originals.entrySet())
if (!(entry.getKey() instanceof String))
throw new ConfigException(entry.getKey().toString(), entry.getValue(), "Key must be a string.");
this.originals = resolveConfigVariables(configProviderProps, (Map<String, Object>) originals);
this.values = definition.parse(this.originals);
Map<String, Object> configUpdates = postProcessParsedConfig(Collections.unmodifiableMap(this.values));
for (Map.Entry<String, Object> update : configUpdates.entrySet()) {
this.values.put(update.getKey(), update.getValue());
}
definition.parse(this.values);
this.definition = definition;
if (doLog)
logAll();
}
调用链:AbstractConfig --> parse --> parseValue --> parseType parseType 为最后的解析方法,将传入的对象名称和类型转换成对应的对象返回
public static Object parseType(String name, Object value, Type type) {
try {
if (value == null) return null;
String trimmed = null;
if (value instanceof String)
trimmed = ((String) value).trim();
switch (type) {
case BOOLEAN:
if (value instanceof String) {
if (trimmed.equalsIgnoreCase("true"))
return true;
else if (trimmed.equalsIgnoreCase("false"))
return false;
else
throw new ConfigException(name, value, "Expected value to be either true or false");
} else if (value instanceof Boolean)
return value;
else
throw new ConfigException(name, value, "Expected value to be either true or false");
case PASSWORD:
if (value instanceof Password)
return value;
else if (value instanceof String)
return new Password(trimmed);
else
throw new ConfigException(name, value, "Expected value to be a string, but it was a " + value.getClass().getName());
case STRING:
if (value instanceof String)
return trimmed;
else
throw new ConfigException(name, value, "Expected value to be a string, but it was a " + value.getClass().getName());
case INT:
if (value instanceof Integer) {
return value;
} else if (value instanceof String) {
return Integer.parseInt(trimmed);
} else {
throw new ConfigException(name, value, "Expected value to be a 32-bit integer, but it was a " + value.getClass().getName());
}
case SHORT:
if (value instanceof Short) {
return value;
} else if (value instanceof String) {
return Short.parseShort(trimmed);
} else {
throw new ConfigException(name, value, "Expected value to be a 16-bit integer (short), but it was a " + value.getClass().getName());
}
case LONG:
if (value instanceof Integer)
return ((Integer) value).longValue();
if (value instanceof Long)
return value;
else if (value instanceof String)
return Long.parseLong(trimmed);
else
throw new ConfigException(name, value, "Expected value to be a 64-bit integer (long), but it was a " + value.getClass().getName());
case DOUBLE:
if (value instanceof Number)
return ((Number) value).doubleValue();
else if (value instanceof String)
return Double.parseDouble(trimmed);
else
throw new ConfigException(name, value, "Expected value to be a double, but it was a " + value.getClass().getName());
case LIST:
if (value instanceof List)
return value;
else if (value instanceof String)
if (trimmed.isEmpty())
return Collections.emptyList();
else
return Arrays.asList(COMMA_WITH_WHITESPACE.split(trimmed, -1));
else
throw new ConfigException(name, value, "Expected a comma separated list.");
case CLASS:
if (value instanceof Class)
return value;
else if (value instanceof String) {
ClassLoader contextOrKafkaClassLoader = Utils.getContextOrKafkaClassLoader();
// Use loadClass here instead of Class.forName because the name we use here may be an alias
// and not match the name of the class that gets loaded. If that happens, Class.forName can
// throw an exception.
Class<?> klass = contextOrKafkaClassLoader.loadClass(trimmed);
// Invoke forName here with the true name of the requested class to cause class
// initialization to take place.
return Class.forName(klass.getName(), true, contextOrKafkaClassLoader);
} else
throw new ConfigException(name, value, "Expected a Class instance or class name.");
default:
throw new IllegalStateException("Unknown type.");
}
} catch (NumberFormatException e) {
throw new ConfigException(name, value, "Not a number of type " + type);
} catch (ClassNotFoundException e) {
throw new ConfigException(name, value, "Class " + value + " could not be found.");
}
}
6. 总结
- 本文只是对KafkaProducer初始化进行了简单的说明,对于文章中列举出来的参数如果想更详细的了解,可以阅读Kafka源码,同时参考Kafka官方文档