Custom Kafka Connect Transform

785 阅读1分钟

Kafka Connect常用于各种Kafka于外界的数据传输。Source将第三方数据转移写入Kafka topic中,Sink则是将topic中的数据写出到第三方。
Kafka Connect能保证Exactly Once语义,且它提供了轻量级的各种组件,用户可以非常方便的自定义功能。

Transform是connect中的组件,可以用来转换数据类型,增减字段等。

自定义的Transform需要实现Transformation接口。

/**
 * Single message transformation for Kafka Connect record types.
 *
 * Connectors can be configured with transformations to make lightweight message-at-a-time modifications.
 */
public interface Transformation<R extends ConnectRecord<R>> extends Configurable, Closeable {

    /**
     * 1. apply方法中获取kafka record对象,然后可以进行处理修改之后,返回新的record。
     * 2. 必须是线程安全的处理
     */
    R apply(R record);

    /** 获取connect中的配置信息 **/
    ConfigDef config();

    /** 关闭这个transform **/
    @Override
    void close();
}

public interface Configurable {

    /**
     * 解析ConfigDef的配置信息
     */
    void configure(Map<String, ?> configs);

}

比如获取Kafka元数据信息,并将他们加入到body中:

public class FetchKafkaMetaData<R extends ConnectRecord<R>> implements Transformation<R> {
    private String key = null;
    private String timestamp = null;
    private String topic = null;

    private interface ConfigName {
        String CONFIG_NAME_KEY = "key.name";
        String CONFIG_NAME_TIMESTAMP = "timestamp.name";
        String CONFIG_NAME_TOPIC = "topic.name";
    }

    public static final ConfigDef CONFIG_DEF = new ConfigDef()
            .define(ConfigName.CONFIG_NAME_KEY, ConfigDef.Type.STRING, ConfigDef.Importance.HIGH, "Field name for key")
            .define(ConfigName.CONFIG_NAME_TIMESTAMP, ConfigDef.Type.STRING, ConfigDef.Importance.HIGH, "Field name for timestamp");
            .define(ConfigName.CONFIG_NAME_TOPIC, ConfigDef.Type.STRING, ConfigDef.Importance.HIGH, "Field name for topic");

    @Override
    public R apply(R r) {
        if (r.valueSchema() == null) {
            return applySchemaless(r);
        } else {
            return r;
        }
    }
    private R applySchemaless(R r) {
        try {
            ObjectMapper mapper = new ObjectMapper();
            Map<String, Object> value = new HashMap<>();
            if (r.value() != null) {
                //要根据数据类型convert
                value = mapper.convertValue(r.value(), Map.class);
            }
            ObjectWriter writer = mapper.writerFor(Map.class);
            value.put(key, r.key());
            value.put(timestamp, r.timestamp());
            value.put(topic, r.topic());
            byte[] bytes = writer.writeValueAsBytes(value);
            return r.newRecord(r.topic(), r.kafkaPartition(), r.keySchema(), r.key(), r.valueSchema(), bytes, r.timestamp());
        } catch (Exception e) {
            return r;
        }
    }
    @Override
    public ConfigDef config() {
        return CONFIG_DEF;
    }
    @Override
    public void close() {

    }
    @Override
    public void configure(Map<String, ?> map) {
        final SimpleConfig config = new SimpleConfig(CONFIG_DEF, map);
        key = config.getString(ConfigName.CONFIG_NAME_KEY);
        timestamp = config.getString(ConfigName.CONFIG_NAME_TIMESTAMP);
        topic = config.getString(ConfigName.CONFIG_NAME_TOPIC);
    }
}