数据流中存在 3 种数据:
- 以 "AT" 开头的数据
- 以 "MO" 开头的数据
- 其他数据
要求:
- 以 "AT" 开头的数据,写入 Sink 1
- 以 "MO" 开头的数据,写入 Sink 2
- 其他数据写入 Sink 3
首先自定义拦截器,项目中引入 flume 的依赖:
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-core</artifactId>
<version>${flume.version}</version>
</dependency>
自定义的类要实现org.apache.flume.interceptor.Interceptor接口:
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import java.nio.charset.StandardCharsets;
import java.util.List;
/**
* 需求:
* 1、event 的 header 中 title 的值是 AT 的,写入 Channel c1
* 2、event 的 header 中 title 的值是 MO 的,写入 Channel c2
* 3、其他的 event 写入 Channel c3
* 所有拦截器应该在 event 的 header 中写入 title 的值
*/
public class MyInterceptor implements Interceptor {
@Override
public void initialize() {}
@Override
public Event intercept(Event event) {
String body = new String(event.getBody(), StandardCharsets.UTF_8);
if ("AT".equalsIgnoreCase(body.substring(0, 2))) {
event.getHeaders().put("title", "AT");
} else if ("MO".equalsIgnoreCase(body.substring(0, 2))) {
event.getHeaders().put("title", "MO");
}
return event;
}
@Override
public List<Event> intercept(List<Event> list) {
for (Event event : list) {
intercept(event);
}
return list;
}
@Override
public void close() {}
public static class Builder implements Interceptor.Builder {
@Override
public Interceptor build() {
return new MyInterceptor();
}
@Override
public void configure(Context context) {}
}
}
将代码达成 jar 包,上传到 flume 的 lib 目录下。
Flume 1(adp-01) 配置文件:
a1.sources = r1
a1.sinks = k1 k2 k3
a1.channels = c1 c2 c3
a1.sources.r1.type = netcat
a1.sources.r1.bind = localhost
a1.sources.r1.port = 44444
a1.sources.r1.interceptors = i1
# 设置为自定义拦截器的全限定类名 + $Builder
a1.sources.r1.interceptors.i1.type = com.arc.flume.interceptor.MyInterceptor$Builder
a1.sources.r1.selector.type = multiplexing
# 设置为要过滤的 header 中的 key
a1.sources.r1.selector.header = title
# title = AT 的发送到 c1
a1.sources.r1.selector.mapping.AT = c1
# title = MO 的发送到 c2
a1.sources.r1.selector.mapping.MO = c2
# 其他的 event 发送到 c3
a1.sources.r1.selector.default = c3
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
a1.channels.c2.type = memory
a1.channels.c2.capacity = 1000
a1.channels.c2.transactionCapacity = 100
a1.channels.c3.type = memory
a1.channels.c3.capacity = 1000
a1.channels.c3.transactionCapacity = 100
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = adp-01
a1.sinks.k1.port = 30000
a1.sinks.k2.type=avro
a1.sinks.k2.hostname = adp-02
a1.sinks.k2.port = 30000
a1.sinks.k3.type=avro
a1.sinks.k3.hostname = adp-03
a1.sinks.k3.port = 30000
a1.sources.r1.channels = c1 c2 c3
a1.sinks.k1.channel = c1
a1.sinks.k2.channel = c2
a1.sinks.k3.channel = c3
Flume 2(adp-01) 配置文件:
a2.sources = r1
a2.channels = c1
a2.sinks = k1
a2.sources.r1.type = avro
a2.sources.r1.bind = adp-01
a2.sources.r1.port = 30000
a2.channels.c1.type = memory
a2.channels.c1.capacity = 1000
a2.channels.c1.transactionCapacity = 100
a2.sinks.k1.type = logger
a2.sources.r1.channels = c1
a2.sinks.k1.channel = c1
Flume 3(adp-02) 配置文件:
a3.sources = r1
a3.channels = c1
a3.sinks = k1
a3.sources.r1.type = avro
a3.sources.r1.bind = adp-02
a3.sources.r1.port = 30000
a3.channels.c1.type = memory
a3.channels.c1.capacity = 1000
a3.channels.c1.transactionCapacity = 100
a3.sinks.k1.type = logger
a3.sources.r1.channels = c1
a3.sinks.k1.channel = c1
Flume 4(adp-03) 配置文件:
a4.sources = r1
a4.channels = c1
a4.sinks = k1
a4.sources.r1.type = avro
a4.sources.r1.bind = adp-03
a4.sources.r1.port = 30000
a4.channels.c1.type = memory
a4.channels.c1.capacity = 1000
a4.channels.c1.transactionCapacity = 100
a4.sinks.k1.type = logger
a4.sources.r1.channels = c1
a4.sinks.k1.channel = c1
启动 Flume:
# 先启动 3 个用于接收 Avro 数据源的 agent
# adp-01
[admin@adp-01 ~]$ flume-ng agent -n a2 -c conf -f a2.conf
# adp-02
[admin@adp-02 ~]$ flume-ng agent -n a3 -c conf -f a3.conf
# adp-03
[admin@adp-03 ~]$ flume-ng agent -n a4 -c conf -f a4.conf
# adp-01
[admin@adp-01 ~]$ flume-ng agent -n a1 -c conf -f a1.conf
发送数据(adp-01)
[admin@adp-01 ~]$ nc localhost 44444
AT_ABC
MO_ABC
XXX
# adp-01
2023-04-27 06:15:29,002 INFO sink.LoggerSink: Event: { headers:{title=AT} body: 41 54 5F 41 42 43 AT_ABC }
# adp-02
2023-04-27 06:15:23,890 INFO sink.LoggerSink: Event: { headers:{title=MO} body: 4D 4F 5F 41 42 43 MO_ABC }
# adp-03
2023-04-27 06:15:31,943 INFO sink.LoggerSink: Event: { headers:{} body: 58 58 58 XXX }