- 小知识,大挑战!本文正在参与“程序员必备小知识”创作活动。
CEP
- 复杂事件处理(Complex Event Processing,CEP)
- Flink CEP是在 Flink 中实现的复杂事件处理(CEP)库
- CEP 允许在无休止的事件流中检测事件模式,让我们有机会掌握数据中重要的部分
- 一个或多个由简单事件构成的事件流通过一定的规则匹配,然后输出用户想得到的数据 —— 满足规则的复杂事件
CDC
- Flink CDC连接器是Apache Flink的一组源连接器,使用变更数据捕获(change data capture,CDC)接收来自不同数据库的变更。
- Flink CDC连接器将Debezium集成为引擎,以捕获数据更改。
- 可以充分利用Debezium的能力。
工程实现
pom :版本选择根据实际集群的flink版本保持一致
cep
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-cep-scala_2.11</artifactId>
<version>1.12.1</version>
</dependency>
cdc
<dependency>
<groupId>com.alibaba.ververica</groupId>
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>1.2.0</version>
</dependency>
环境初始化
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
mysql 设置
建表语句
CREATE TABLE `login_in_result_test` (
`user_id` bigint(20) DEFAULT NULL,
`login_in_result` varchar(255) DEFAULT NULL,
`login_in_time` bigint(20) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
测试sql
truncate table `user`.login_in_result_test;
INSERT INTO `user`.`login_in_result_test`(`user_id`, `login_in_result`, `login_in_time`) VALUES (1, 'fail', 1597905210);
INSERT INTO `user`.`login_in_result_test`(`user_id`, `login_in_result`, `login_in_time`) VALUES (1, 'fail', 1597905215);
INSERT INTO `user`.`login_in_result_test`(`user_id`, `login_in_result`, `login_in_time`) VALUES (1, 'fail', 1597905220);
INSERT INTO `user`.`login_in_result_test`(`user_id`, `login_in_result`, `login_in_time`) VALUES (1, 'fail', 1597905227);
INSERT INTO `user`.`login_in_result_test`(`user_id`, `login_in_result`, `login_in_time`) VALUES (1, 'fail', 1597905230);
INSERT INTO `user`.`login_in_result_test`(`user_id`, `login_in_result`, `login_in_time`) VALUES (1, 'fail', 1597905231);
cdc 设置
DebeziumSourceFunction<String> sourceFunction = MySQLSource.<String>builder()
.hostname("127.0.0.1")
.port(3307)
.databaseList("user")
.tableList("user.login_in_result_test")
.startupOptions(StartupOptions.latest())
.username("root")
.password("123456")
.deserializer(new MyDeserializationSchema())
.build();
自定义 MyDeserializationSchema
public class MyDeserializationSchema implements DebeziumDeserializationSchema<String> {
@Override
public void deserialize(SourceRecord sourceRecord, Collector<String> collector) throws Exception {
//获取主题信息,提取数据库和表名
String topic = sourceRecord.topic();
String[] fields = topic.split("\.");
String db = fields[1];
String tableName = fields[2];
//获取操作类型
Envelope.Operation operation = Envelope.operationFor(sourceRecord);
//获取value信息,提取数据本身
Struct value = (Struct) sourceRecord.value();
Struct dataValue = null;
if (Envelope.Operation.DELETE.code().equals(operation.code())) {
dataValue = value.getStruct("before");
} else {
dataValue = value.getStruct("after");
}
JSONObject jsonObject = new JSONObject();
for (Field field : dataValue.schema().fields()) {
Object o = dataValue.get(field);
jsonObject.put(field.name(), o);
}
//创建结果json
JSONObject result = new JSONObject();
result.put("dataBase", db);
result.put("tableName", tableName);
result.put("data", jsonObject);
result.put("op", operation);
//输出数据
collector.collect(result.toJSONString());
}
@Override
public TypeInformation<String> getProducedType() {
return BasicTypeInfo.STRING_TYPE_INFO;
}
}
source 预处理
DataStream<LogInEvent> source = env.addSource(sourceFunction).filter(new FilterFunction<String>() {
@Override
public boolean filter(String s) throws Exception {
JSONObject jsonObject = JSONObject.parseObject(s);
String op = jsonObject.getString("op");
if ("CREATE".equals(op)) {
return true;
}
return false;
}
}).map(new MapFunction<String, LogInEvent>() {
@Override
public LogInEvent map(String s) throws Exception {
JSONObject jsonObject = JSONObject.parseObject(s);
JSONObject data = jsonObject.getJSONObject("data");
return new LogInEvent(data.getLong("user_id"), data.getString("login_in_result"), data.getLong("login_in_time"));
}
}).assignTimestampsAndWatermarks(WatermarkStrategy.<LogInEvent>forBoundedOutOfOrderness(Duration.ofSeconds(5))
.withTimestampAssigner(((logInEvent, l) -> logInEvent.getTime() * 1000)))
.keyBy(logInEvent -> logInEvent.getUserId());
实体类
public class LogInEvent implements Serializable {
private Long userId;
private String loginInResult;
private Long time;
public LogInEvent() {
}
public LogInEvent(Long userId, String loginInResult, Long time) {
this.userId = userId;
this.loginInResult = loginInResult;
this.time = time;
}
public Long getUserId() {
return userId;
}
public void setUserId(Long userId) {
this.userId = userId;
}
public String getLoginInResult() {
return loginInResult;
}
public void setLoginInResult(String loginInResult) {
this.loginInResult = loginInResult;
}
public Long getTime() {
return time;
}
public void setTime(Long time) {
this.time = time;
}
@Override
public String toString() {
return "LogInEvent{" +
"userId=" + userId +
", loginInResult='" + loginInResult + ''' +
", time=" + time +
'}';
}
}
设置匹配模式 Pattern
Pattern pattern = Pattern.<LogInEvent>begin("start").where(new SimpleCondition<LogInEvent>() {
@Override
public boolean filter(LogInEvent logInEvent) throws Exception {
return logInEvent.getLoginInResult().equals("fail");
}
}).next("next").where(new SimpleCondition<LogInEvent>() {
@Override
public boolean filter(LogInEvent logInEvent) throws Exception {
return logInEvent.getLoginInResult().equals("fail");
}
}).within(Time.seconds(5));
模式应用及任务提交
PatternStream<LogInEvent> patternStream = CEP.pattern(source, pattern);
SingleOutputStreamOperator<String> process = patternStream.process(new PatternProcessFunction<LogInEvent, String>() {
@Override
public void processMatch(Map<String, List<LogInEvent>> match, Context ctx, Collector<String> out) throws Exception {
List<LogInEvent> start = match.get("start");
List<LogInEvent> next = match.get("next");
out.collect("start:" + start + ",next:" + next);
}
});
process.print().setParallelism(1);
env.execute();