Kafka Connect常用于各种Kafka于外界的数据传输。Source将第三方数据转移写入Kafka topic中,Sink则是将topic中的数据写出到第三方。
Kafka Connect能保证Exactly Once语义,且它提供了轻量级的各种组件,用户可以非常方便的自定义功能。
Transform是connect中的组件,可以用来转换数据类型,增减字段等。
自定义的Transform需要实现Transformation接口。
/**
* Single message transformation for Kafka Connect record types.
*
* Connectors can be configured with transformations to make lightweight message-at-a-time modifications.
*/
public interface Transformation<R extends ConnectRecord<R>> extends Configurable, Closeable {
/**
* 1. apply方法中获取kafka record对象,然后可以进行处理修改之后,返回新的record。
* 2. 必须是线程安全的处理
*/
R apply(R record);
/** 获取connect中的配置信息 **/
ConfigDef config();
/** 关闭这个transform **/
@Override
void close();
}
public interface Configurable {
/**
* 解析ConfigDef的配置信息
*/
void configure(Map<String, ?> configs);
}
比如获取Kafka元数据信息,并将他们加入到body中:
public class FetchKafkaMetaData<R extends ConnectRecord<R>> implements Transformation<R> {
private String key = null;
private String timestamp = null;
private String topic = null;
private interface ConfigName {
String CONFIG_NAME_KEY = "key.name";
String CONFIG_NAME_TIMESTAMP = "timestamp.name";
String CONFIG_NAME_TOPIC = "topic.name";
}
public static final ConfigDef CONFIG_DEF = new ConfigDef()
.define(ConfigName.CONFIG_NAME_KEY, ConfigDef.Type.STRING, ConfigDef.Importance.HIGH, "Field name for key")
.define(ConfigName.CONFIG_NAME_TIMESTAMP, ConfigDef.Type.STRING, ConfigDef.Importance.HIGH, "Field name for timestamp");
.define(ConfigName.CONFIG_NAME_TOPIC, ConfigDef.Type.STRING, ConfigDef.Importance.HIGH, "Field name for topic");
@Override
public R apply(R r) {
if (r.valueSchema() == null) {
return applySchemaless(r);
} else {
return r;
}
}
private R applySchemaless(R r) {
try {
ObjectMapper mapper = new ObjectMapper();
Map<String, Object> value = new HashMap<>();
if (r.value() != null) {
//要根据数据类型convert
value = mapper.convertValue(r.value(), Map.class);
}
ObjectWriter writer = mapper.writerFor(Map.class);
value.put(key, r.key());
value.put(timestamp, r.timestamp());
value.put(topic, r.topic());
byte[] bytes = writer.writeValueAsBytes(value);
return r.newRecord(r.topic(), r.kafkaPartition(), r.keySchema(), r.key(), r.valueSchema(), bytes, r.timestamp());
} catch (Exception e) {
return r;
}
}
@Override
public ConfigDef config() {
return CONFIG_DEF;
}
@Override
public void close() {
}
@Override
public void configure(Map<String, ?> map) {
final SimpleConfig config = new SimpleConfig(CONFIG_DEF, map);
key = config.getString(ConfigName.CONFIG_NAME_KEY);
timestamp = config.getString(ConfigName.CONFIG_NAME_TIMESTAMP);
topic = config.getString(ConfigName.CONFIG_NAME_TOPIC);
}
}