要求:
- 实现一个自定义的 Source,该 source 产生随机的 UUID
- source 支持从配置文件定义三个配置
- prefix:在产生的 UUID 之前加上配置的 prefix 字符串返回,prefix 与 UUID 之间的连接符为
::: - isUpper:如果设置为 true,则返回的 UUID 字符串转为大写
- length:设置生成的 UUID 的长度,默认为32
- prefix:在产生的 UUID 之前加上配置的 prefix 字符串返回,prefix 与 UUID 之间的连接符为
- 实现一个自定义的 Sink,该 sink 将 channel 中的 event 以日志的形式记录到控制台
- sink 支持从配置文件定义两个配置
- prefix:在数据之前添加的前缀,前缀与数据之间的连接符为
::: - suffix:在数据之后添加的后缀,后缀与数据之间的连接符为
:::
- prefix:在数据之前添加的前缀,前缀与数据之间的连接符为
自定义 Source 代码:
package com.arc.flume.source;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.PollableSource;
import org.apache.flume.conf.Configurable;
import org.apache.flume.event.SimpleEvent;
import org.apache.flume.source.AbstractSource;
import java.nio.charset.StandardCharsets;
import java.util.UUID;
/**
* @Description: 自定义 Source:产生随机数
*/
public class MySource extends AbstractSource implements Configurable, PollableSource {
// 数据的前缀
private String prefix;
// 数据是否是大写形式
private boolean isUpper;
// 生成的 UUID 字符串的长度
private int length;
private Event getSomeData() {
Event event = new SimpleEvent();
String uuid = UUID.randomUUID().toString().substring(0, length);
if (isUpper) {
event.setBody((prefix + ":::" + uuid.toUpperCase()).getBytes(StandardCharsets.UTF_8));
} else {
event.setBody((prefix + ":::" + uuid.toLowerCase()).getBytes(StandardCharsets.UTF_8));
}
return event;
}
/**
* Source 核心方法:采集数据,封装成 Event,此方法在 Flume Agent 启动后被频繁调用
* @return
* @throws EventDeliveryException
*/
@Override
public Status process() throws EventDeliveryException {
Status status = null;
try {
// This try clause includes whatever Channel/Event operations you want to do
// Receive new data
// 产生一个随机数
Event e = getSomeData();
Thread.sleep(1000);
// Store the Event into this Source's associated Channel(s)
// 将 Event 对象交给 Channel Processor 进行处理
getChannelProcessor().processEvent(e);
status = Status.READY;
} catch (Throwable t) {
// Log exception, handle individual exceptions as needed
status = Status.BACKOFF;
// re-throw all Errors
if (t instanceof Error) {
throw (Error)t;
}
}
return status;
}
/**
* 如果从数据源中没有获取到数据,则线程需要休息一段时间再去尝试获取数据
* getBackOffSleepIncrement() 和 getMaxBackOffSleepInterval() 这两个方法用于控制线程休息时间
* 源码如下:
*
* if (source.process().equals(PollableSource.Status.BACKOFF)) {
* counterGroup.incrementAndGet("runner.backoffs");
* Thread.sleep(Math.min(
* counterGroup.incrementAndGet("runner.backoffs.consecutive") * source.getBackOffSleepIncrement(),
* source.getMaxBackOffSleepInterval()));
* } else {
* counterGroup.set("runner.backoffs.consecutive", 0L);
* }
*
* 如果获取到数据,状态为 READY,runner.backoffs.consecutive = 0
* 否则状态为 BACKOFF:
* 第一次休息:
* counterGroup.incrementAndGet("runner.backoffs.consecutive") * source.getBackOffSleepIncrement()
* = 1 * source.getBackOffSleepIncrement()
* 第二次休息:
* counterGroup.incrementAndGet("runner.backoffs.consecutive") * source.getBackOffSleepIncrement()
* = 2 * source.getBackOffSleepIncrement()
* ....
* 但休息的最大时间不能超过 getMaxBackOffSleepInterval()
*
*/
@Override
public long getBackOffSleepIncrement() {
return 1000;
}
@Override
public long getMaxBackOffSleepInterval() {
return 10000;
}
/**
* 读取 Flume 的配置信息
* a2.sources.r1.<property> = value
* String field = context.getString(property)
*/
@Override
public void configure(Context context) {
prefix = context.getString("prefix", "mySource");
isUpper = context.getBoolean("isUpper", false);
length = context.getInteger("length", 36);
}
}
自定义 Sink 代码:
package com.arc.flume.sink;
import org.apache.flume.*;
import org.apache.flume.conf.Configurable;
import org.apache.flume.sink.AbstractSink;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.charset.StandardCharsets;
import java.util.Map;
/**
* @Description: 自定义 Sink,将数据通过日志的方式记录到控制台
*/
public class MySink extends AbstractSink implements Configurable {
private static final Logger LOGGER = LoggerFactory.getLogger(MySink.class);
// 自定义的属性:前缀
private String prefix;
// 自定义的属性:后缀
private String suffix;
private void storeSomeData(Event event) {
Map<String, String> headers = event.getHeaders();
String body = new String(event.getBody(), StandardCharsets.UTF_8);
String data = prefix + ":::" + headers + ":::" + body + ":::" + suffix;
LOGGER.info(data);
}
/**
* 处理 Event 的核心方法,在 Flume 处理流程中循环调用
* @return
* @throws EventDeliveryException
*/
@Override
public Status process() throws EventDeliveryException {
Status status = null;
// Start transaction
// 获取 Channel,一个 Sink 对应一个 Channel
Channel ch = getChannel();
// 获取事务对象
Transaction txn = ch.getTransaction();
// 开启事务
txn.begin();
try {
// This try clause includes whatever Channel operations you want to do
// 从 Channel 中获取 Event
Event event = ch.take();
// Send the Event to the external repository.
// 处理 Event
storeSomeData(event);
// 处理成功,提交事务
txn.commit();
status = Status.READY;
} catch (Throwable t) {
// 处理失败,事务回滚
txn.rollback();
// Log exception, handle individual exceptions as needed
// 状态标记为 BACKOFF
status = Status.BACKOFF;
// re-throw all Errors
if (t instanceof Error) {
throw (Error)t;
}
} finally {
// 关闭事务
txn.close();
}
return status;
}
/**
* 读取配置,给自定义的属性赋值
* @param context
*/
@Override
public void configure(Context context) {
String prefix = context.getString("prefix", "MySink-prefix");
String suffix = context.getString("suffix", "MySink-suffix");
// Process the myProp value (e.g. validation)
// Store myProp for later retrieval by process() method
this.prefix = prefix;
this.suffix = suffix;
}
@Override
public synchronized void start() {
// Initialize the connection to the external repository (e.g. HDFS) that
// this Sink will forward Events to ..
super.start();
}
@Override
public synchronized void stop() {
// Disconnect from the external respository and do any
// additional cleanup (e.g. releasing resources or nulling-out
// field values) ..
super.stop();
}
}
将代码打成 jar 包,上传到 flume 的 lib 目录下。
配置文件:
a1.sources = r1
a1.channels = c1
a1.sinks = k1
a1.sources.r1.type = com.arc.flume.source.MySource
a1.sources.r1.prefix = MS
a1.sources.r1.isUpper = true
a1.sources.r1.length = 8
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
a1.sinks.k1.type = com.arc.flume.sink.MySink
a1.sinks.k1.prefix = KM
a1.sinks.k1.suffix = BH
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
启动 Flume:
[admin@adp-01 ~]$ flume-ng agent -n a1 -c conf -f a1.conf
2023-04-27 12:45:05,440 INFO sink.MySink: KM:::{}:::MS:::1C67ED04:::BH
2023-04-27 12:45:06,443 INFO sink.MySink: KM:::{}:::MS:::4A857A87:::BH
2023-04-27 12:45:07,445 INFO sink.MySink: KM:::{}:::MS:::2F02208E:::BH
2023-04-27 12:45:08,453 INFO sink.MySink: KM:::{}:::MS:::15348712:::BH