本文已参与「新人创作礼」活动,一起开启掘金创作之路。
一 自定义Interceptor
1 需求分析
2 实现步骤
2.1 创建Maven项目,添加依赖
<dependencies>
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-core</artifactId>
<version>1.9.0</version>
</dependency>
</dependencies>
2.2 定义CustomInterceptor类并实现Interceptor接口
/**
自定义拦截器:根据body中的内容在headers中添加指定的key-value
如果内容为字母,添加state=letter
如果内容为数字,添加state=number
*/
public class CustomInterceptor implements Interceptor {
@Override
public void initialize() {
}
//为每个event中的header中添加key-value
//channelProcessor调用拦截器时会调用该方法,并将event传过来
@Override
public Event intercept(Event event) {
//获取body中的内容
byte[] body = event.getBody();
if (body[0] <= 'z' && body[0] >= 'a') {
//向headers中添加type=letter
event.getHeaders().put("type", "letter");
} else if (body[0] >= '0' && body[0] <= '9') {
event.getHeaders().put("type", "number");
}//不考虑异常情况
return event;
}
//如果有多个event,执行此方法
@Override
public List<Event> intercept(List<Event> events) {
//从集合中遍历每一个event
for (Event event : events) {
intercept(event);
}
return events;
}
@Override
public void close() {
}
//静态内部类,返回Bulider的实例
public static class Builder implements Interceptor.Builder {
//返回interceptor接口的实例
@Override
public Interceptor build() {
return new CustomInterceptor();
}
@Override
public void configure(Context context) {
}
}
}
2.2 编辑flume配置文件
为hadoop101上的Flume1配置1个netcat source,1个sink group(2个avro sink),并配置相应的ChannelSelector和interceptor。
#agent1(hadoop101)
a1.sources = r1
a1.channels = c1 c2
a1.sinks = k1 k2
a1.sources.r1.type = netcat
a1.sources.r1.bind = hadoop101
a1.sources.r1.port = 11111
#复用
a1.sources.r1.selector.type = multiplexing
a1.sources.r1.selector.header = type
a1.sources.r1.selector.mapping.letter = c1
a1.sources.r1.selector.mapping.number = c2
#自定义拦截器
a1.sources.r1.interceptors = i1
#注意:调用的是Interceptor的内部类Builder
a1.sources.r1.interceptors.i1.type = com.hike.demo.MyInterceptor$Builder
a1.channels.c1.type = memory
a1.channels.c2.type = memory
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = hadoop102
a1.sinks.k1.port = 22222
a1.sinks.k2.type = avro
a1.sinks.k2.hostname = hadoop103
a1.sinks.k2.port = 33333
a1.sources.r1.channels = c1 c2
a1.sinks.k1.channel = c1
a1.sinks.k2.channel = c2
-----------------------------------------------------------------
#agent2(hadoop102)
a1.sources = r1
a1.channels = c1
a1.sinks = k1
a1.sources.r1.type = avro
a1.sources.r1.bind = hadoop103
a1.sources.r1.port = 33333
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.sinks.k1.type = logger
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
-----------------------------------------------------------------
#agent3(hadoop104)
a1.sources = r1
a1.channels = c1
a1.sinks = k1
a1.sources.r1.type = avro
a1.sources.r1.bind = hadoop104
a1.sources.r1.port = 44444
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.sinks.k1.type = logger
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
分别在hadoop101,hadoop102,hadoop103上启动flume进程,先32,后1。
在hadoop101使用netcat向localhost:44444发送字母和数字。
观察hadoop103和hadoop104打印的日志。
二 自定义Source
1 需求分析
使用flume接收数据,并给每条数据添加前缀,输出到控制台。前缀可从flume配置文件中配置。
需要实现相应方法:
getBackOffSleepIncrement()//暂不用
getMaxBackOffSleepInterval()//暂不用
configure(Context context)//初始化context(读取配置文件内容)
process()//获取数据封装成event并写入channel,这个方法将被循环调用。
使用场景:读取MySQL数据或者其他文件系统。
2 java代码
public class MySource extends AbstractSource implements Configurable, PollableSource {
//定义配置文件将来要读取的字段
private Long delay;
private String field;
//获取上下文,读取配置信息
@Override
public void configure(Context context) {
delay = context.getLong("delay");
//获取配置文件中指定属性的内容,如果没有配置,默认值为Hello
field = context.getString("field", "Hello!");
}
//核心方法,获取数据并且封装event,将创建的event放入到event中,此方法会被循环调用
//Status是一个枚举类,表示向channel中添加的数据是否成功
@Override
public Status process() throws EventDeliveryException {
try {
//创建事件头信息
HashMap<String, String> hearderMap = new HashMap<>();
//创建事件
SimpleEvent event = new SimpleEvent();
//循环封装事件
for (int i = 0; i < 5; i++) {
//给事件设置头信息
event.setHeaders(hearderMap);
//给事件设置内容
event.setBody((field + i).getBytes());
//获取ChannelProcessor,并将事件写入channel中(ChannelProcessor)
getChannelProcessor().processEvent(event);
Thread.sleep(delay);
}
} catch (Exception e) {
e.printStackTrace();
return Status.BACKOFF;
}
return Status.READY;
}
//当Source没有数据可封装时,会让Source所在的线程休息一会
@Override
public long getBackOffSleepIncrement() {
return 2000L;
}
//当Source没有数据可封装时,会让Source所在的线程休息的最大时间
@Override
public long getMaxBackOffSleepInterval() {
return 5000L;
}
}
将写好的代码打包,并放到flume的lib目录(/opt/module/flume)下。
3 配置文件
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
# 自定义Source
a1.sources.r1.type = com.hike.MySource
a1.sources.r1.delay = 1000
# 自定义前缀内容
#a1.sources.r1.field = hike
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
执行任务
flume-ng agent -c conf/ -f job/mysource.conf -n a1 -Dflume.root.logger=INFO,console
三 自定义Sink
1 需求分析
使用flume接收数据,并在Sink端给每条数据添加前缀和后缀,输出到控制台。前后缀可在flume任务配置文件中配置。
2 java代码
public class MySink extends AbstractSink implements Configurable {
//创建Logger对象,可以将数据一日志的方式输出
private static final Logger LOG = LoggerFactory.getLogger(AbstractSink.class);
private String prefix;
private String suffix;
//核心方法,用来处理sink逻辑(将channel中的内容写出去)
@Override
public Status process() throws EventDeliveryException {
//声明返回值状态信息
Status status;
//获取当前Sink绑定的Channel
Channel ch = getChannel();
//从channel中获取事务
Transaction txn = ch.getTransaction();
//声明事件
Event event;
//开启事务
txn.begin();
//读取Channel中的事件,直到读取到事件结束循环
while (true) {
event = ch.take();
if (event != null) {
break;
}
}
try {
//处理事件(打印)
LOG.info(prefix + new String(event.getBody()) + suffix);
//事务提交
txn.commit();
status = Status.READY;
} catch (Exception e) {
//遇到异常,事务回滚
txn.rollback();
status = Status.BACKOFF;
} finally {
//关闭事务
txn.close();
}
return status;
}
//获取上下文信息,读取配置文件
@Override
public void configure(Context context) {
//读取配置文件内容,有默认值hello
prefix = context.getString("prefix", "hello:");
//读取配置文件内容,无默认值
suffix = context.getString("suffix");
}
}
3 配置文件
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = localhost
a1.sources.r1.port = 44444
# Describe the sink,自定义sink
a1.sinks.k1.type = com.hike.MySink
# 前缀
a1.sinks.k1.prefix = hike:
# 后缀
a1.sinks.k1.suffix = :hike
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
执行任务
flume-ng agent -c conf/ -f job/mysink.conf -n a1 -Dflume.root.logger=INFO,console
nc localhost 44444
输入一些内容