Flink CEP + CDC 工程实现

852 阅读3分钟

CEP

  1. 复杂事件处理(Complex Event Processing,CEP)
  2. Flink CEP是在 Flink 中实现的复杂事件处理(CEP)库
  3. CEP 允许在无休止的事件流中检测事件模式,让我们有机会掌握数据中重要的部分
  4. 一个或多个由简单事件构成的事件流通过一定的规则匹配,然后输出用户想得到的数据 —— 满足规则的复杂事件

CDC

  1. Flink CDC连接器是Apache Flink的一组源连接器,使用变更数据捕获(change data capture,CDC)接收来自不同数据库的变更。
  2. Flink CDC连接器将Debezium集成为引擎,以捕获数据更改。
  3. 可以充分利用Debezium的能力。

工程实现

pom :版本选择根据实际集群的flink版本保持一致

cep

<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-cep-scala_2.11</artifactId>
    <version>1.12.1</version>
</dependency>

cdc

截屏2021-10-12 下午5.48.41.png

<dependency>
    <groupId>com.alibaba.ververica</groupId>
    <artifactId>flink-connector-mysql-cdc</artifactId>
    <version>1.2.0</version>
</dependency>

环境初始化

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

mysql 设置

建表语句

CREATE TABLE `login_in_result_test` (
  `user_id` bigint(20) DEFAULT NULL,
  `login_in_result` varchar(255) DEFAULT NULL,
  `login_in_time` bigint(20) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4

测试sql

truncate table `user`.login_in_result_test;
INSERT INTO `user`.`login_in_result_test`(`user_id`, `login_in_result`, `login_in_time`) VALUES (1, 'fail', 1597905210);
INSERT INTO `user`.`login_in_result_test`(`user_id`, `login_in_result`, `login_in_time`) VALUES (1, 'fail', 1597905215);
INSERT INTO `user`.`login_in_result_test`(`user_id`, `login_in_result`, `login_in_time`) VALUES (1, 'fail', 1597905220);
INSERT INTO `user`.`login_in_result_test`(`user_id`, `login_in_result`, `login_in_time`) VALUES (1, 'fail', 1597905227);
INSERT INTO `user`.`login_in_result_test`(`user_id`, `login_in_result`, `login_in_time`) VALUES (1, 'fail', 1597905230);
INSERT INTO `user`.`login_in_result_test`(`user_id`, `login_in_result`, `login_in_time`) VALUES (1, 'fail', 1597905231);

cdc 设置

DebeziumSourceFunction<String> sourceFunction = MySQLSource.<String>builder()
        .hostname("127.0.0.1")
        .port(3307)
        .databaseList("user")
        .tableList("user.login_in_result_test")
        .startupOptions(StartupOptions.latest())
        .username("root")
        .password("123456")
        .deserializer(new MyDeserializationSchema())
        .build();

自定义 MyDeserializationSchema

public class MyDeserializationSchema implements DebeziumDeserializationSchema<String> {
    @Override
    public void deserialize(SourceRecord sourceRecord, Collector<String> collector) throws Exception {

        //获取主题信息,提取数据库和表名
        String topic = sourceRecord.topic();
        String[] fields = topic.split("\.");
        String db = fields[1];
        String tableName = fields[2];

        //获取操作类型
        Envelope.Operation operation = Envelope.operationFor(sourceRecord);

        //获取value信息,提取数据本身
        Struct value = (Struct) sourceRecord.value();
        Struct dataValue = null;
        if (Envelope.Operation.DELETE.code().equals(operation.code())) {
            dataValue = value.getStruct("before");
        } else {
            dataValue = value.getStruct("after");
        }
        JSONObject jsonObject = new JSONObject();

        for (Field field : dataValue.schema().fields()) {
            Object o = dataValue.get(field);
            jsonObject.put(field.name(), o);
        }

        //创建结果json
        JSONObject result = new JSONObject();
        result.put("dataBase", db);
        result.put("tableName", tableName);
        result.put("data", jsonObject);
        result.put("op", operation);

        //输出数据
        collector.collect(result.toJSONString());
    }

    @Override
    public TypeInformation<String> getProducedType() {
        return BasicTypeInfo.STRING_TYPE_INFO;
    }
}

source 预处理

DataStream<LogInEvent> source = env.addSource(sourceFunction).filter(new FilterFunction<String>() {
    @Override
    public boolean filter(String s) throws Exception {
        JSONObject jsonObject = JSONObject.parseObject(s);
        String op = jsonObject.getString("op");
        if ("CREATE".equals(op)) {
            return true;
        }
        return false;
    }
}).map(new MapFunction<String, LogInEvent>() {
    @Override
    public LogInEvent map(String s) throws Exception {
        JSONObject jsonObject = JSONObject.parseObject(s);
        JSONObject data = jsonObject.getJSONObject("data");
        return new LogInEvent(data.getLong("user_id"), data.getString("login_in_result"), data.getLong("login_in_time"));
    }
}).assignTimestampsAndWatermarks(WatermarkStrategy.<LogInEvent>forBoundedOutOfOrderness(Duration.ofSeconds(5))
        .withTimestampAssigner(((logInEvent, l) -> logInEvent.getTime() * 1000)))
        .keyBy(logInEvent -> logInEvent.getUserId());

实体类

public class LogInEvent implements Serializable {
    private Long userId;
    private String loginInResult;
    private Long time;

    public LogInEvent() {
    }

    public LogInEvent(Long userId, String loginInResult, Long time) {
        this.userId = userId;
        this.loginInResult = loginInResult;
        this.time = time;
    }

    public Long getUserId() {
        return userId;
    }

    public void setUserId(Long userId) {
        this.userId = userId;
    }

    public String getLoginInResult() {
        return loginInResult;
    }

    public void setLoginInResult(String loginInResult) {
        this.loginInResult = loginInResult;
    }

    public Long getTime() {
        return time;
    }

    public void setTime(Long time) {
        this.time = time;
    }

    @Override
    public String toString() {
        return "LogInEvent{" +
                "userId=" + userId +
                ", loginInResult='" + loginInResult + ''' +
                ", time=" + time +
                '}';
    }
}

设置匹配模式 Pattern

Pattern pattern = Pattern.<LogInEvent>begin("start").where(new SimpleCondition<LogInEvent>() {
    @Override
    public boolean filter(LogInEvent logInEvent) throws Exception {
        return logInEvent.getLoginInResult().equals("fail");
    }
}).next("next").where(new SimpleCondition<LogInEvent>() {
    @Override
    public boolean filter(LogInEvent logInEvent) throws Exception {
        return logInEvent.getLoginInResult().equals("fail");
    }
}).within(Time.seconds(5));

模式应用及任务提交

PatternStream<LogInEvent> patternStream = CEP.pattern(source, pattern);
SingleOutputStreamOperator<String> process = patternStream.process(new PatternProcessFunction<LogInEvent, String>() {
    @Override
    public void processMatch(Map<String, List<LogInEvent>> match, Context ctx, Collector<String> out) throws Exception {
        List<LogInEvent> start = match.get("start");
        List<LogInEvent> next = match.get("next");
        out.collect("start:" + start + ",next:" + next);
    }
});

process.print().setParallelism(1);

env.execute();